From 66f9864a381673b9bacd94de242f97c522ebd54c Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Sat, 29 May 2010 22:59:31 -0700
Subject: [PATCH 001/307] Remove un-necessary memory initialization

The intra prediction needs one line above at the top edge.
---
 vp8/common/setupintrarecon.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c
index dcaafe6c6..38bfae822 100644
--- a/vp8/common/setupintrarecon.c
+++ b/vp8/common/setupintrarecon.c
@@ -16,21 +16,15 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
     int i;
 
     // set up frame new frame for intra coded blocks
-    vpx_memset(ybf->y_buffer - 1 - 2 * ybf->y_stride, 127, ybf->y_width + 5);
     vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
-
     for (i = 0; i < ybf->y_height; i++)
         ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129;
 
-    vpx_memset(ybf->u_buffer - 1 - 2 * ybf->uv_stride, 127, ybf->uv_width + 5);
     vpx_memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
-
     for (i = 0; i < ybf->uv_height; i++)
         ybf->u_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
 
-    vpx_memset(ybf->v_buffer - 1 - 2 * ybf->uv_stride, 127, ybf->uv_width + 5);
     vpx_memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5);
-
     for (i = 0; i < ybf->uv_height; i++)
         ybf->v_buffer[ybf->uv_stride *i - 1] = (unsigned char) 129;
 

From d33bf3d6649ce049a12fcf7d629e551066f41c47 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Fri, 28 May 2010 14:34:39 -0400
Subject: [PATCH 002/307] Remove costly memory reads/writes in
 vp8_reset_mb_tokens_context()

Tests on x86 showed this function costed 2.7% of total decoding time
because of all the memory reads/writes. After modification, it only
costs about 0.7% of decoding time, which gives a 2% gain.

Change-Id: I5003ee30b6dc6dea0bfa42a6ad7e7c22fcc7b215
---
 vp8/decoder/detokenize.c | 41 +++++++++++++++++++++++++++++-----------
 1 file changed, 30 insertions(+), 11 deletions(-)

diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index a42f18dd7..c081518fb 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -67,23 +67,42 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
     ENTROPY_CONTEXT *l;
     int i;
 
-    for (i = 0; i < 24; i++)
-    {
+    /* Clear entropy contexts for Y blocks */
+    a = A[Y1CONTEXT];
+    l = L[Y1CONTEXT];
+    *a = 0;
+    *(a+1) = 0;
+    *(a+2) = 0;
+    *(a+3) = 0;
+    *l = 0;
+    *(l+1) = 0;
+    *(l+2) = 0;
+    *(l+3) = 0;
 
-        a = A[ vp8_block2context[i] ] + vp8_block2above[i];
-        l = L[ vp8_block2context[i] ] + vp8_block2left[i];
+    /* Clear entropy contexts for U blocks */
+    a = A[UCONTEXT];
+    l = L[UCONTEXT];
+    *a = 0;
+    *(a+1) = 0;
+    *l = 0;
+    *(l+1) = 0;
 
-        *a = *l = 0;
-    }
+    /* Clear entropy contexts for V blocks */
+    a = A[VCONTEXT];
+    l = L[VCONTEXT];
+    *a = 0;
+    *(a+1) = 0;
+    *l = 0;
+    *(l+1) = 0;
 
+    /* Clear entropy contexts for Y2 blocks */
     if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
     {
-        a = A[Y2CONTEXT] + vp8_block2above[24];
-        l = L[Y2CONTEXT] + vp8_block2left[24];
-        *a = *l = 0;
+        a = A[Y2CONTEXT];
+        l = L[Y2CONTEXT];
+        *a = 0;
+        *l = 0;
     }
-
-
 }
 DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
 #define NORMALIZE \

From 44d89495534b2975bb863a8ff534e5ff65c4c360 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Wed, 26 May 2010 18:27:51 -0400
Subject: [PATCH 003/307] Add support for reading YUV4MPEG2 files to ivfenc.

A large collection of example files may be found at
 http://media.xiph.org/video/derf/
This also fixes a bug in ivfenc for uncompressed IVF input, which previously
 appeared not to skip past the file header the second time it opened the file.
I don't actually have an IVF file with which to test this fix, however.

Change-Id: Id69a1e11a3fa16c4a4fa8944e880bcea090cd52b
---
 AUTHORS     |   3 +
 examples.mk |   5 +-
 ivfenc.c    | 148 ++++++---
 y4minput.c  | 880 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 y4minput.h  |  59 ++++
 5 files changed, 1054 insertions(+), 41 deletions(-)
 create mode 100644 y4minput.c
 create mode 100644 y4minput.h

diff --git a/AUTHORS b/AUTHORS
index 9686ac13e..4ab68811d 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -2,3 +2,6 @@
 # Name or Organization <email address>
 
 Google Inc.
+The Mozilla Foundation
+Timothy B. Terriberry <tterriberry@mozilla.com>
+The Xiph.Org Foundation
diff --git a/examples.mk b/examples.mk
index f05fd818e..0054f295d 100644
--- a/examples.mk
+++ b/examples.mk
@@ -19,8 +19,9 @@ ivfdec.SRCS                 += args.c args.h vpx_ports/config.h
 ivfdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
 ivfdec.DESCRIPTION           = Full featured decoder
 UTILS-$(CONFIG_ENCODERS)    += ivfenc.c
-ivfenc.SRCS                 += args.c args.h vpx_ports/config.h
-ivfenc.SRCS                 += vpx_ports/mem_ops.h vpx_ports/mem_ops_aligned.h
+ivfenc.SRCS                 += args.c args.h y4minput.c y4minput.h
+ivfenc.SRCS                 += vpx_ports/config.h vpx_ports/mem_ops.h
+ivfenc.SRCS                 += vpx_ports/mem_ops_aligned.h
 ivfenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
 ivfenc.DESCRIPTION           = Full featured encoder
 
diff --git a/ivfenc.c b/ivfenc.c
index 4dc708251..4d96d468a 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -32,6 +32,7 @@
 #include "vpx/vp8cx.h"
 #include "vpx_ports/mem_ops.h"
 #include "vpx_ports/vpx_timer.h"
+#include "y4minput.h"
 
 static const char *exec_name;
 
@@ -217,49 +218,65 @@ vpx_fixed_buf_t stats_get(stats_io_t *stats)
     return stats->buf;
 }
 
+enum video_file_type
+{
+    FILE_TYPE_RAW,
+    FILE_TYPE_IVF,
+    FILE_TYPE_Y4M
+};
+
 #define IVF_FRAME_HDR_SZ (4+8) /* 4 byte size + 8 byte timestamp */
-static int read_frame(FILE *f, vpx_image_t *img, unsigned int is_ivf)
+static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
+                      y4m_input *y4m)
 {
     int plane = 0;
 
-    if (is_ivf)
+    if (file_type == FILE_TYPE_Y4M)
     {
-        char junk[IVF_FRAME_HDR_SZ];
-
-        /* Skip the frame header. We know how big the frame should be. See
-         * write_ivf_frame_header() for documentation on the frame header
-         * layout.
-         */
-        fread(junk, 1, IVF_FRAME_HDR_SZ, f);
+        if (y4m_input_fetch_frame(y4m, f, img) < 0)
+           return 0;
     }
-
-    for (plane = 0; plane < 3; plane++)
+    else
     {
-        unsigned char *ptr;
-        int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
-        int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
-        int r;
-
-        /* Determine the correct plane based on the image format. The for-loop
-         * always counts in Y,U,V order, but this may not match the order of
-         * the data on disk.
-         */
-        switch (plane)
+        if (file_type == FILE_TYPE_IVF)
         {
-        case 1:
-            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
-            break;
-        case 2:
-            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
-            break;
-        default:
-            ptr = img->planes[plane];
+            char junk[IVF_FRAME_HDR_SZ];
+
+            /* Skip the frame header. We know how big the frame should be. See
+             * write_ivf_frame_header() for documentation on the frame header
+             * layout.
+             */
+            fread(junk, 1, IVF_FRAME_HDR_SZ, f);
         }
 
-        for (r = 0; r < h; r++)
+        for (plane = 0; plane < 3; plane++)
         {
-            fread(ptr, 1, w, f);
-            ptr += img->stride[plane];
+            unsigned char *ptr;
+            int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
+            int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
+            int r;
+
+            /* Determine the correct plane based on the image format. The for-loop
+             * always counts in Y,U,V order, but this may not match the order of
+             * the data on disk.
+             */
+            switch (plane)
+            {
+            case 1:
+                ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
+                break;
+            case 2:
+                ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
+                break;
+            default:
+                ptr = img->planes[plane];
+            }
+
+            for (r = 0; r < h; r++)
+            {
+                fread(ptr, 1, w, f);
+                ptr += img->stride[plane];
+            }
         }
     }
 
@@ -267,6 +284,20 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int is_ivf)
 }
 
 
+unsigned int file_is_y4m(FILE *infile,
+                         y4m_input *y4m)
+{
+    char raw_hdr[4];
+    if (fread(raw_hdr, 1, 4, infile) == 4 &&
+        memcmp(raw_hdr, "YUV4", 4) == 0 &&
+        y4m_input_open(y4m, infile, raw_hdr, 4) >= 0)
+    {
+        return 1;
+    }
+    rewind(infile);
+    return 0;
+}
+
 #define IVF_FILE_HDR_SZ (32)
 unsigned int file_is_ivf(FILE *infile,
                          unsigned int *fourcc,
@@ -568,8 +599,10 @@ int main(int argc, const char **argv_)
     static const int        *ctrl_args_map = NULL;
     int                      verbose = 0, show_psnr = 0;
     int                      arg_use_i420 = 1;
+    int                      arg_have_timebase = 0;
     unsigned long            cx_time = 0;
-    unsigned int             is_ivf, fourcc;
+    unsigned int             file_type, fourcc;
+    y4m_input                y4m;
 
     exec_name = argv_[0];
 
@@ -686,7 +719,10 @@ int main(int argc, const char **argv_)
         else if (arg_match(&arg, &height, argi))
             cfg.g_h = arg_parse_uint(&arg);
         else if (arg_match(&arg, &timebase, argi))
+        {
             cfg.g_timebase = arg_parse_rational(&arg);
+            arg_have_timebase = 1;
+        }
         else if (arg_match(&arg, &error_resilient, argi))
             cfg.g_error_resilient = arg_parse_uint(&arg);
         else if (arg_match(&arg, &lag_in_frames, argi))
@@ -808,10 +844,24 @@ int main(int argc, const char **argv_)
         return EXIT_FAILURE;
     }
 
-    is_ivf = file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h);
-
-    if (is_ivf)
+    if (file_is_y4m(infile, &y4m))
     {
+        file_type = FILE_TYPE_Y4M;
+        cfg.g_w = y4m.pic_w;
+        cfg.g_h = y4m.pic_h;
+        /* Use the frame rate from the file only if none was specified on the
+         *  command-line.
+         */
+        if (!arg_have_timebase)
+        {
+            cfg.g_timebase.num = y4m.fps_d;
+            cfg.g_timebase.den = y4m.fps_n;
+        }
+        arg_use_i420 = 0;
+    }
+    else if (file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h))
+    {
+        file_type = FILE_TYPE_IVF;
         switch (fourcc)
         {
         case 0x32315659:
@@ -825,6 +875,8 @@ int main(int argc, const char **argv_)
             return EXIT_FAILURE;
         }
     }
+    else
+        file_type = FILE_TYPE_RAW;
 
     fclose(infile);
 
@@ -869,8 +921,14 @@ int main(int argc, const char **argv_)
         SHOW(kf_max_dist);
     }
 
-    vpx_img_alloc(&raw, arg_use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12,
-                  cfg.g_w, cfg.g_h, 1);
+    if (file_type == FILE_TYPE_Y4M)
+        /*The Y4M reader does its own allocation.
+          Just initialize this here to avoid problems if we never read any
+           frames.*/
+        memset(&raw, 0, sizeof(raw));
+    else
+        vpx_img_alloc(&raw, arg_use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12,
+                      cfg.g_w, cfg.g_h, 1);
 
     // This was added so that ivfenc will create monotically increasing
     // timestamps.  Since we create new timestamps for alt-reference frames
@@ -894,6 +952,18 @@ int main(int argc, const char **argv_)
             return EXIT_FAILURE;
         }
 
+        /*Skip the file header.*/
+        if (file_type == FILE_TYPE_IVF)
+        {
+            char raw_hdr[IVF_FILE_HDR_SZ];
+            (void)fread(raw_hdr, 1, IVF_FILE_HDR_SZ, infile);
+        }
+        else if(file_type == FILE_TYPE_Y4M)
+        {
+            char buffer[80];
+            (void)fgets(buffer, sizeof(buffer)/sizeof(*buffer) - 1, infile);
+        }
+
         outfile = fopen(out_fn, "wb");
 
         if (!outfile)
@@ -966,7 +1036,7 @@ int main(int argc, const char **argv_)
 
             if (!arg_limit || frames_in < arg_limit)
             {
-                frame_avail = read_frame(infile, &raw, is_ivf);
+                frame_avail = read_frame(infile, &raw, file_type, &y4m);
 
                 if (frame_avail)
                     frames_in++;
diff --git a/y4minput.c b/y4minput.c
new file mode 100644
index 000000000..f1f50bc79
--- /dev/null
+++ b/y4minput.c
@@ -0,0 +1,880 @@
+/*
+ *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license and patent
+ *  grant that can be found in the LICENSE file in the root of the source
+ *  tree. All contributing project authors may be found in the AUTHORS
+ *  file in the root of the source tree.
+ *
+ *  Based on code from the OggTheora software codec source code,
+ *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
+ */
+#include <stdlib.h>
+#include <string.h>
+#include "y4minput.h"
+
+static int y4m_parse_tags(y4m_input *_y4m,char *_tags){
+  int   got_w;
+  int   got_h;
+  int   got_fps;
+  int   got_interlace;
+  int   got_par;
+  int   got_chroma;
+  char *p;
+  char *q;
+  got_w=got_h=got_fps=got_interlace=got_par=got_chroma=0;
+  for(p=_tags;;p=q){
+    /*Skip any leading spaces.*/
+    while(*p==' ')p++;
+    /*If that's all we have, stop.*/
+    if(p[0]=='\0')break;
+    /*Find the end of this tag.*/
+    for(q=p+1;*q!='\0'&&*q!=' ';q++);
+    /*Process the tag.*/
+    switch(p[0]){
+      case 'W':{
+        if(sscanf(p+1,"%d",&_y4m->pic_w)!=1)return -1;
+        got_w=1;
+      }break;
+      case 'H':{
+        if(sscanf(p+1,"%d",&_y4m->pic_h)!=1)return -1;
+        got_h=1;
+      }break;
+      case 'F':{
+        if(sscanf(p+1,"%d:%d",&_y4m->fps_n,&_y4m->fps_d)!=2){
+          return -1;
+        }
+        got_fps=1;
+      }break;
+      case 'I':{
+        _y4m->interlace=p[1];
+        got_interlace=1;
+      }break;
+      case 'A':{
+        if(sscanf(p+1,"%d:%d",&_y4m->par_n,&_y4m->par_d)!=2){
+          return -1;
+        }
+        got_par=1;
+      }break;
+      case 'C':{
+        if(q-p>16)return -1;
+        memcpy(_y4m->chroma_type,p+1,q-p-1);
+        _y4m->chroma_type[q-p-1]='\0';
+        got_chroma=1;
+      }break;
+      /*Ignore unknown tags.*/
+    }
+  }
+  if(!got_w||!got_h||!got_fps)return -1;
+  if(!got_interlace)_y4m->interlace='?';
+  if(!got_par)_y4m->par_n=_y4m->par_d=0;
+  /*Chroma-type is not specified in older files, e.g., those generated by
+     mplayer.*/
+  if(!got_chroma)strcpy(_y4m->chroma_type,"420");
+  return 0;
+}
+
+
+
+/*All anti-aliasing filters in the following conversion functions are based on
+   one of two window functions:
+  The 6-tap Lanczos window (for down-sampling and shifts):
+   sinc(\pi*t)*sinc(\pi*t/3), |t|<3  (sinc(t)==sin(t)/t)
+   0,                         |t|>=3
+  The 4-tap Mitchell window (for up-sampling):
+   7|t|^3-12|t|^2+16/3,             |t|<1
+   -(7/3)|x|^3+12|x|^2-20|x|+32/3,  |t|<2
+   0,                               |t|>=2
+  The number of taps is intentionally kept small to reduce computational
+   overhead and limit ringing.
+
+  The taps from these filters are scaled so that their sum is 1, and the result
+   is scaled by 128 and rounded to integers to create a filter whose
+   intermediate values fit inside 16 bits.
+  Coefficients are rounded in such a way as to ensure their sum is still 128,
+   which is usually equivalent to normal rounding.
+
+  Conversions which require both horizontal and vertical filtering could
+   have these steps pipelined, for less memory consumption and better cache
+   performance, but we do them separately for simplicity.*/
+
+#define OC_MINI(_a,_b)      ((_a)>(_b)?(_b):(_a))
+#define OC_MAXI(_a,_b)      ((_a)<(_b)?(_b):(_a))
+#define OC_CLAMPI(_a,_b,_c) (OC_MAXI(_a,OC_MINI(_b,_c)))
+
+/*420jpeg chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  420mpeg2 chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  BR      |       BR      |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  BR      |       BR      |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  We use a resampling filter to shift the site locations one quarter pixel (at
+   the chroma plane's resolution) to the right.
+  The 4:2:2 modes look exactly the same, except there are twice as many chroma
+   lines, and they are vertically co-sited with the luma samples in both the
+   mpeg2 and jpeg cases (thus requiring no vertical resampling).*/
+static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst,
+ const unsigned char *_src,int _c_w,int _c_h){
+  int pli;
+  int y;
+  int x;
+  for(y=0;y<_c_h;y++){
+    /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos
+       window.*/
+    for(x=0;x<OC_MINI(_c_w,2);x++){
+      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[0]-17*_src[OC_MAXI(x-1,0)]+
+       114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
+       _src[OC_MINI(x+3,_c_w-1)]+64)>>7,255);
+    }
+    for(;x<_c_w-3;x++){
+      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
+       114*_src[x]+35*_src[x+1]-9*_src[x+2]+_src[x+3]+64)>>7,255);
+    }
+    for(;x<_c_w;x++){
+      _dst[x]=(unsigned char)OC_CLAMPI(0,(4*_src[x-2]-17*_src[x-1]+
+       114*_src[x]+35*_src[OC_MINI(x+1,_c_w-1)]-9*_src[OC_MINI(x+2,_c_w-1)]+
+       _src[_c_w-1]+64)>>7,255);
+    }
+    _dst+=_c_w;
+    _src+=_c_w;
+  }
+}
+
+/*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/
+static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  int c_w;
+  int c_h;
+  int c_sz;
+  int pli;
+  int y;
+  int x;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  for(pli=1;pli<3;pli++){
+    y4m_42xmpeg2_42xjpeg_helper(_dst,_aux,c_w,c_h);
+    _dst+=c_sz;
+    _aux+=c_sz;
+  }
+}
+
+/*This format is only used for interlaced content, but is included for
+   completeness.
+
+  420jpeg chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  420paldv chroma samples are sited like:
+  YR------Y-------YR------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YB------Y-------YB------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YR------Y-------YR------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YB------Y-------YB------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  We use a resampling filter to shift the site locations one quarter pixel (at
+   the chroma plane's resolution) to the right.
+  Then we use another filter to move the C_r location down one quarter pixel,
+   and the C_b location up one quarter pixel.*/
+static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  unsigned char *tmp;
+  int            c_w;
+  int            c_h;
+  int            c_sz;
+  int            pli;
+  int            y;
+  int            x;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+1)/2;
+  c_h=(_y4m->pic_h+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  c_sz=c_w*c_h;
+  tmp=_aux+2*c_sz;
+  for(pli=1;pli<3;pli++){
+    /*First do the horizontal re-sampling.
+      This is the same as the mpeg2 case, except that after the horizontal
+       case, we need to apply a second vertical filter.*/
+    y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h);
+    _aux+=c_sz;
+    switch(pli){
+      case 1:{
+        /*Slide C_b up a quarter-pel.
+          This is the same filter used above, but in the other order.*/
+        for(x=0;x<c_w;x++){
+          for(y=0;y<OC_MINI(c_h,3);y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[0]
+             -9*tmp[OC_MAXI(y-2,0)*c_w]+35*tmp[OC_MAXI(y-1,0)*c_w]
+             +114*tmp[y*c_w]-17*tmp[OC_MINI(y+1,c_h-1)*c_w]
+             +4*tmp[OC_MINI(y+2,c_h-1)*c_w]+64)>>7,255);
+          }
+          for(;y<c_h-2;y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
+             -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
+             -17*tmp[(y+1)*c_w]+4*tmp[(y+2)*c_w]+64)>>7,255);
+          }
+          for(;y<c_h;y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(tmp[(y-3)*c_w]
+             -9*tmp[(y-2)*c_w]+35*tmp[(y-1)*c_w]+114*tmp[y*c_w]
+             -17*tmp[OC_MINI(y+1,c_h-1)*c_w]+4*tmp[(c_h-1)*c_w]+64)>>7,255);
+          }
+          _dst++;
+          tmp++;
+        }
+        _dst+=c_sz-c_w;
+        tmp-=c_w;
+      }break;
+      case 2:{
+        /*Slide C_r down a quarter-pel.
+          This is the same as the horizontal filter.*/
+        for(x=0;x<c_w;x++){
+          for(y=0;y<OC_MINI(c_h,2);y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[0]
+             -17*tmp[OC_MAXI(y-1,0)*c_w]+114*tmp[y*c_w]
+             +35*tmp[OC_MINI(y+1,c_h-1)*c_w]-9*tmp[OC_MINI(y+2,c_h-1)*c_w]
+             +tmp[OC_MINI(y+3,c_h-1)*c_w]+64)>>7,255);
+          }
+          for(;y<c_h-3;y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
+             -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[(y+1)*c_w]
+             -9*tmp[(y+2)*c_w]+tmp[(y+3)*c_w]+64)>>7,255);
+          }
+          for(;y<c_h;y++){
+            _dst[y*c_w]=(unsigned char)OC_CLAMPI(0,(4*tmp[(y-2)*c_w]
+             -17*tmp[(y-1)*c_w]+114*tmp[y*c_w]+35*tmp[OC_MINI(y+1,c_h-1)*c_w]
+             -9*tmp[OC_MINI(y+2,c_h-1)*c_w]+tmp[(c_h-1)*c_w]+64)>>7,255);
+          }
+          _dst++;
+          tmp++;
+        }
+      }break;
+    }
+    /*For actual interlaced material, this would have to be done separately on
+       each field, and the shift amounts would be different.
+      C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8,
+       C_b up 1/8 in the bottom field.
+      The corresponding filters would be:
+       Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128
+       Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/
+  }
+}
+
+/*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0.
+  This is used as a helper by several converation routines.*/
+static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst,
+ const unsigned char *_src,int _c_w,int _c_h){
+  int y;
+  int x;
+  /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
+  for(x=0;x<_c_w;x++){
+    for(y=0;y<OC_MINI(_c_h,2);y+=2){
+      _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(64*_src[0]
+       +78*_src[OC_MINI(1,_c_h-1)*_c_w]
+       -17*_src[OC_MINI(2,_c_h-1)*_c_w]
+       +3*_src[OC_MINI(3,_c_h-1)*_c_w]+64)>>7,255);
+    }
+    for(;y<_c_h-3;y+=2){
+      _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]+_src[(y+3)*_c_w])
+       -17*(_src[(y-1)*_c_w]+_src[(y+2)*_c_w])
+       +78*(_src[y*_c_w]+_src[(y+1)*_c_w])+64)>>7,255);
+    }
+    for(;y<_c_h;y+=2){
+      _dst[(y>>1)*_c_w]=OC_CLAMPI(0,(3*(_src[(y-2)*_c_w]
+       +_src[(_c_h-1)*_c_w])-17*(_src[(y-1)*_c_w]
+       +_src[OC_MINI(y+2,_c_h-1)*_c_w])
+       +78*(_src[y*_c_w]+_src[OC_MINI(y+1,_c_h-1)*_c_w])+64)>>7,255);
+    }
+    _src++;
+    _dst++;
+  }
+}
+
+/*420jpeg chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  422jpeg chroma samples are sited like:
+  Y---BR--Y-------Y---BR--Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y---BR--Y-------Y---BR--Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y---BR--Y-------Y---BR--Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y---BR--Y-------Y---BR--Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  We use a resampling filter to decimate the chroma planes by two in the
+   vertical direction.*/
+static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  int c_w;
+  int c_h;
+  int c_sz;
+  int dst_c_w;
+  int dst_c_h;
+  int dst_c_sz;
+  int tmp_sz;
+  int pic_sz;
+  int pli;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
+  c_h=_y4m->pic_h;
+  dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  dst_c_sz=dst_c_w*dst_c_h;
+  for(pli=1;pli<3;pli++){
+    y4m_422jpeg_420jpeg_helper(_dst,_aux,c_w,c_h);
+    _aux+=c_sz;
+    _dst+=dst_c_sz;
+  }
+}
+
+/*420jpeg chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  422 chroma samples are sited like:
+  YBR-----Y-------YBR-----Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------YBR-----Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------YBR-----Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------YBR-----Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  We use a resampling filter to shift the original site locations one quarter
+   pixel (at the original chroma resolution) to the right.
+  Then we use a second resampling filter to decimate the chroma planes by two
+   in the vertical direction.*/
+static void y4m_convert_422_420jpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  unsigned char *tmp;
+  int            c_w;
+  int            c_h;
+  int            c_sz;
+  int            dst_c_w;
+  int            dst_c_h;
+  int            dst_c_sz;
+  int            pli;
+  int            y;
+  int            x;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
+  c_h=_y4m->pic_h;
+  dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  dst_c_sz=c_w*dst_c_h;
+  tmp=_aux+2*c_sz;
+  for(pli=1;pli<3;pli++){
+    /*In reality, the horizontal and vertical steps could be pipelined, for
+       less memory consumption and better cache performance, but we do them
+       separately for simplicity.*/
+    /*First do horizontal filtering (convert to 422jpeg)*/
+    y4m_42xmpeg2_42xjpeg_helper(tmp,_aux,c_w,c_h);
+    /*Now do the vertical filtering.*/
+    y4m_422jpeg_420jpeg_helper(_dst,tmp,c_w,c_h);
+    _aux+=c_sz;
+    _dst+=dst_c_sz;
+  }
+}
+
+/*420jpeg chroma samples are sited like:
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |   BR  |       |   BR  |
+  |       |       |       |
+  Y-------Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  411 chroma samples are sited like:
+  YBR-----Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+  YBR-----Y-------Y-------Y-------
+  |       |       |       |
+  |       |       |       |
+  |       |       |       |
+
+  We use a filter to resample at site locations one eighth pixel (at the source
+   chroma plane's horizontal resolution) and five eighths of a pixel to the
+   right.
+  Then we use another filter to decimate the planes by 2 in the vertical
+   direction.*/
+static void y4m_convert_411_420jpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  unsigned char *tmp;
+  int            c_w;
+  int            c_h;
+  int            c_sz;
+  int            dst_c_w;
+  int            dst_c_h;
+  int            dst_c_sz;
+  int            tmp_sz;
+  int            pli;
+  int            y;
+  int            x;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
+  c_h=_y4m->pic_h;
+  dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  dst_c_sz=dst_c_w*dst_c_h;
+  tmp_sz=dst_c_w*c_h;
+  tmp=_aux+2*c_sz;
+  for(pli=1;pli<3;pli++){
+    /*In reality, the horizontal and vertical steps could be pipelined, for
+       less memory consumption and better cache performance, but we do them
+       separately for simplicity.*/
+    /*First do horizontal filtering (convert to 422jpeg)*/
+    for(y=0;y<c_h;y++){
+      /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a
+         4-tap Mitchell window.*/
+      for(x=0;x<OC_MINI(c_w,1);x++){
+        tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(111*_aux[0]
+         +18*_aux[OC_MINI(1,c_w-1)]-_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
+        tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(47*_aux[0]
+         +86*_aux[OC_MINI(1,c_w-1)]-5*_aux[OC_MINI(2,c_w-1)]+64)>>7,255);
+      }
+      for(;x<c_w-2;x++){
+        tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
+         +18*_aux[x+1]-_aux[x+2]+64)>>7,255);
+        tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
+         +86*_aux[x+1]-5*_aux[x+2]+64)>>7,255);
+      }
+      for(;x<c_w;x++){
+        tmp[x<<1]=(unsigned char)OC_CLAMPI(0,(_aux[x-1]+110*_aux[x]
+         +18*_aux[OC_MINI(x+1,c_w-1)]-_aux[c_w-1]+64)>>7,255);
+        if((x<<1|1)<dst_c_w){
+          tmp[x<<1|1]=(unsigned char)OC_CLAMPI(0,(-3*_aux[x-1]+50*_aux[x]
+           +86*_aux[OC_MINI(x+1,c_w-1)]-5*_aux[c_w-1]+64)>>7,255);
+        }
+      }
+      tmp+=dst_c_w;
+      _aux+=c_w;
+    }
+    tmp-=tmp_sz;
+    /*Now do the vertical filtering.*/
+    y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h);
+    _dst+=dst_c_sz;
+  }
+}
+
+/*Convert 444 to 420jpeg.*/
+static void y4m_convert_444_420jpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  unsigned char *tmp;
+  int            c_w;
+  int            c_h;
+  int            c_sz;
+  int            dst_c_w;
+  int            dst_c_h;
+  int            dst_c_sz;
+  int            tmp_sz;
+  int            pli;
+  int            y;
+  int            x;
+  /*Skip past the luma data.*/
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  /*Compute the size of each chroma plane.*/
+  c_w=(_y4m->pic_w+_y4m->src_c_dec_h-1)/_y4m->src_c_dec_h;
+  c_h=_y4m->pic_h;
+  dst_c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  dst_c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  dst_c_sz=dst_c_w*dst_c_h;
+  tmp_sz=dst_c_w*c_h;
+  tmp=_aux+2*c_sz;
+  for(pli=1;pli<3;pli++){
+    /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/
+    for(y=0;y<c_h;y++){
+      for(x=0;x<OC_MINI(c_w,2);x+=2){
+        tmp[x>>1]=OC_CLAMPI(0,(64*_aux[0]+78*_aux[OC_MINI(1,c_w-1)]
+         -17*_aux[OC_MINI(2,c_w-1)]
+         +3*_aux[OC_MINI(3,c_w-1)]+64)>>7,255);
+      }
+      for(;x<c_w-3;x+=2){
+        tmp[x>>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[x+3])
+         -17*(_aux[x-1]+_aux[x+2])+78*(_aux[x]+_aux[x+1])+64)>>7,255);
+      }
+      for(;x<c_w;x+=2){
+        tmp[x>>1]=OC_CLAMPI(0,(3*(_aux[x-2]+_aux[c_w-1])-
+         17*(_aux[x-1]+_aux[OC_MINI(x+2,c_w-1)])+
+         78*(_aux[x]+_aux[OC_MINI(x+1,c_w-1)])+64)>>7,255);
+      }
+      tmp+=dst_c_w;
+      _aux+=c_w;
+    }
+    tmp-=tmp_sz;
+    /*Now do the vertical filtering.*/
+    y4m_422jpeg_420jpeg_helper(_dst,tmp,dst_c_w,c_h);
+    _dst+=dst_c_sz;
+  }
+}
+
+/*The image is padded with empty chroma components at 4:2:0.*/
+static void y4m_convert_mono_420jpeg(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+  int c_sz;
+  _dst+=_y4m->pic_w*_y4m->pic_h;
+  c_sz=((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)*
+   ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v);
+  memset(_dst,128,c_sz*2);
+}
+
+/*No conversion function needed.*/
+static void y4m_convert_null(y4m_input *_y4m,unsigned char *_dst,
+ unsigned char *_aux){
+}
+
+int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip){
+  char buffer[80];
+  int  ret;
+  int  i;
+  /*Read until newline, or 80 cols, whichever happens first.*/
+  for(i=0;i<79;i++){
+    if(_nskip>0){
+      buffer[i]=*_skip++;
+      _nskip--;
+    }
+    else{
+      ret=fread(buffer+i,1,1,_fin);
+      if(ret<1)return -1;
+    }
+    if(buffer[i]=='\n')break;
+  }
+  /*We skipped too much header data.*/
+  if(_nskip>0)return -1;
+  if(i==79){
+    fprintf(stderr,"Error parsing header; not a YUV2MPEG2 file?\n");
+    return -1;
+  }
+  buffer[i]='\0';
+  if(memcmp(buffer,"YUV4MPEG",8)){
+    fprintf(stderr,"Incomplete magic for YUV4MPEG file.\n");
+    return -1;
+  }
+  if(buffer[8]!='2'){
+    fprintf(stderr,"Incorrect YUV input file version; YUV4MPEG2 required.\n");
+  }
+  ret=y4m_parse_tags(_y4m,buffer+5);
+  if(ret<0){
+    fprintf(stderr,"Error parsing YUV4MPEG2 header.\n");
+    return ret;
+  }
+  if(_y4m->interlace=='?'){
+    fprintf(stderr,"Warning: Input video interlacing format unknown; "
+     "assuming progressive scan.\n");
+  }
+  else if(_y4m->interlace!='p'){
+    fprintf(stderr,"Input video is interlaced; "
+     "Only progressive scan handled.\n");
+    return -1;
+  }
+  if(strcmp(_y4m->chroma_type,"420")==0||
+   strcmp(_y4m->chroma_type,"420jpeg")==0){
+    _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h
+     +2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
+    /*Natively supported: no conversion required.*/
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0;
+    _y4m->convert=y4m_convert_null;
+  }
+  else if(strcmp(_y4m->chroma_type,"420mpeg2")==0){
+    _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.*/
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=
+     2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
+    _y4m->convert=y4m_convert_42xmpeg2_42xjpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"420paldv")==0){
+    _y4m->src_c_dec_h=_y4m->dst_c_dec_h=_y4m->src_c_dec_v=_y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.
+      We need to make two filter passes, so we need some extra space in the
+       aux buffer.*/
+    _y4m->aux_buf_sz=3*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
+    _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*((_y4m->pic_h+1)/2);
+    _y4m->convert=y4m_convert_42xpaldv_42xjpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"422jpeg")==0){
+    _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2;
+    _y4m->src_c_dec_v=1;
+    _y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.*/
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h;
+    _y4m->convert=y4m_convert_422jpeg_420jpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"422")==0){
+    _y4m->src_c_dec_h=_y4m->dst_c_dec_h=2;
+    _y4m->src_c_dec_v=1;
+    _y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.
+      We need to make two filter passes, so we need some extra space in the
+       aux buffer.*/
+    _y4m->aux_buf_read_sz=2*((_y4m->pic_w+1)/2)*_y4m->pic_h;
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
+    _y4m->convert=y4m_convert_422_420jpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"411")==0){
+    _y4m->src_c_dec_h=4;
+    _y4m->dst_c_dec_h=2;
+    _y4m->src_c_dec_v=1;
+    _y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.
+      We need to make two filter passes, so we need some extra space in the
+       aux buffer.*/
+    _y4m->aux_buf_read_sz=2*((_y4m->pic_w+3)/4)*_y4m->pic_h;
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
+    _y4m->convert=y4m_convert_411_420jpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"444")==0){
+    _y4m->src_c_dec_h=1;
+    _y4m->dst_c_dec_h=2;
+    _y4m->src_c_dec_v=1;
+    _y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.
+      We need to make two filter passes, so we need some extra space in the
+       aux buffer.*/
+    _y4m->aux_buf_read_sz=2*_y4m->pic_w*_y4m->pic_h;
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz+((_y4m->pic_w+1)/2)*_y4m->pic_h;
+    _y4m->convert=y4m_convert_444_420jpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"444alpha")==0){
+    _y4m->src_c_dec_h=1;
+    _y4m->dst_c_dec_h=2;
+    _y4m->src_c_dec_v=1;
+    _y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*Chroma filter required: read into the aux buf first.
+      We need to make two filter passes, so we need some extra space in the
+       aux buffer.
+      The extra plane also gets read into the aux buf.
+      It will be discarded.*/
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=3*_y4m->pic_w*_y4m->pic_h;
+    _y4m->convert=y4m_convert_444_420jpeg;
+  }
+  else if(strcmp(_y4m->chroma_type,"mono")==0){
+    _y4m->src_c_dec_h=_y4m->src_c_dec_v=0;
+    _y4m->dst_c_dec_h=_y4m->dst_c_dec_v=2;
+    _y4m->dst_buf_read_sz=_y4m->pic_w*_y4m->pic_h;
+    /*No extra space required, but we need to clear the chroma planes.*/
+    _y4m->aux_buf_sz=_y4m->aux_buf_read_sz=0;
+    _y4m->convert=y4m_convert_mono_420jpeg;
+  }
+  else{
+    fprintf(stderr,"Unknown chroma sampling type: %s\n",_y4m->chroma_type);
+    return -1;
+  }
+  /*The size of the final frame buffers is always computed from the
+     destination chroma decimation type.*/
+  _y4m->dst_buf_sz=_y4m->pic_w*_y4m->pic_h
+   +2*((_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h)*
+   ((_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v);
+  _y4m->dst_buf=(unsigned char *)malloc(_y4m->dst_buf_sz);
+  _y4m->aux_buf=(unsigned char *)malloc(_y4m->aux_buf_sz);
+  return 0;
+}
+
+void y4m_input_close(y4m_input *_y4m){
+  free(_y4m->dst_buf);
+  free(_y4m->aux_buf);
+}
+
+int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){
+  char frame[6];
+  int  pic_sz;
+  int  frame_c_w;
+  int  frame_c_h;
+  int  c_w;
+  int  c_h;
+  int  c_sz;
+  int  ret;
+  /*Read and skip the frame header.*/
+  ret=fread(frame,1,6,_fin);
+  if(ret<6)return 0;
+  if(memcmp(frame,"FRAME",5)){
+    fprintf(stderr,"Loss of framing in Y4M input data\n");
+    return -1;
+  }
+  if(frame[5]!='\n'){
+    char c;
+    int  j;
+    for(j=0;j<79&&fread(&c,1,1,_fin)&&c!='\n';j++);
+    if(j==79){
+      fprintf(stderr,"Error parsing Y4M frame header\n");
+      return -1;
+    }
+  }
+  /*Read the frame data that needs no conversion.*/
+  if(fread(_y4m->dst_buf,1,_y4m->dst_buf_read_sz,_fin)!=_y4m->dst_buf_read_sz){
+    fprintf(stderr,"Error reading Y4M frame data.\n");
+    return -1;
+  }
+  /*Read the frame data that does need conversion.*/
+  if(fread(_y4m->aux_buf,1,_y4m->aux_buf_read_sz,_fin)!=_y4m->aux_buf_read_sz){
+    fprintf(stderr,"Error reading Y4M frame data.\n");
+    return -1;
+  }
+  /*Now convert the just read frame.*/
+  (*_y4m->convert)(_y4m,_y4m->dst_buf,_y4m->aux_buf);
+  /*Fill in the frame buffer pointers.
+    We don't use vpx_img_wrap() because it forces padding for odd picture
+     sizes, which would require a separate fread call for every row.*/
+  memset(_img,0,sizeof(*_img));
+  /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/
+  _img->fmt=IMG_FMT_I420;
+  _img->w=_img->d_w=_y4m->pic_w;
+  _img->h=_img->d_h=_y4m->pic_h;
+  /*This is hard-coded to 4:2:0 for now, as that's all VP8 supports.*/
+  _img->x_chroma_shift=1;
+  _img->y_chroma_shift=1;
+  _img->bps=12;
+  /*Set up the buffer pointers.*/
+  pic_sz=_y4m->pic_w*_y4m->pic_h;
+  c_w=(_y4m->pic_w+_y4m->dst_c_dec_h-1)/_y4m->dst_c_dec_h;
+  c_h=(_y4m->pic_h+_y4m->dst_c_dec_v-1)/_y4m->dst_c_dec_v;
+  c_sz=c_w*c_h;
+  _img->stride[PLANE_Y]=_y4m->pic_w;
+  _img->stride[PLANE_U]=_img->stride[PLANE_V]=c_w;
+  _img->planes[PLANE_Y]=_y4m->dst_buf;
+  _img->planes[PLANE_U]=_y4m->dst_buf+pic_sz;
+  _img->planes[PLANE_V]=_y4m->dst_buf+pic_sz+c_sz;
+  return 0;
+}
diff --git a/y4minput.h b/y4minput.h
new file mode 100644
index 000000000..153487504
--- /dev/null
+++ b/y4minput.h
@@ -0,0 +1,59 @@
+/*
+ *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license and patent
+ *  grant that can be found in the LICENSE file in the root of the source
+ *  tree. All contributing project authors may be found in the AUTHORS
+ *  file in the root of the source tree.
+ *
+ *  Based on code from the OggTheora software codec source code,
+ *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
+ */
+#if !defined(_y4minput_H)
+# define _y4minput_H (1)
+# include <stdio.h>
+# include "vpx/vpx_image.h"
+
+
+
+typedef struct y4m_input y4m_input;
+
+
+
+/*The function used to perform chroma conversion.*/
+typedef void (*y4m_convert_func)(y4m_input *_y4m,
+ unsigned char *_dst,unsigned char *_src);
+
+
+
+struct y4m_input{
+  int               pic_w;
+  int               pic_h;
+  int               fps_n;
+  int               fps_d;
+  int               par_n;
+  int               par_d;
+  char              interlace;
+  int               src_c_dec_h;
+  int               src_c_dec_v;
+  int               dst_c_dec_h;
+  int               dst_c_dec_v;
+  char              chroma_type[16];
+  /*The size of each converted frame buffer.*/
+  size_t            dst_buf_sz;
+  /*The amount to read directly into the converted frame buffer.*/
+  size_t            dst_buf_read_sz;
+  /*The size of the auxilliary buffer.*/
+  size_t            aux_buf_sz;
+  /*The amount to read into the auxilliary buffer.*/
+  size_t            aux_buf_read_sz;
+  y4m_convert_func  convert;
+  unsigned char    *dst_buf;
+  unsigned char    *aux_buf;
+};
+
+int y4m_input_open(y4m_input *_y4m,FILE *_fin,char *_skip,int _nskip);
+void y4m_input_close(y4m_input *_y4m);
+int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *img);
+
+#endif

From 7f9db419798632a77b7ba10fab4dfc0631f3211a Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Wed, 26 May 2010 19:36:20 -0400
Subject: [PATCH 004/307] Add support for YUV4MPEG2 output.

These files can be played back directly in most major open-source media
 frameworks without the need for additional patches or parameters.

Change-Id: I59f98c1658298245c688f7d107cf393445e470d5
---
 ivfdec.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/ivfdec.c b/ivfdec.c
index 8ee60b6fa..c97b03f78 100644
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -62,8 +62,10 @@ static const arg_def_t postprocarg = ARG_DEF(NULL, "postproc", 0,
                                      "Postprocess decoded frames");
 static const arg_def_t summaryarg = ARG_DEF(NULL, "summary", 0,
                                     "Show timing summary");
-static const arg_def_t outputfile = ARG_DEF("o", "output-raw-file", 1,
+static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
                                     "Output raw yv12 file instead of images");
+static const arg_def_t usey4marg = ARG_DEF("y", "y4m", 0,
+                                    "Output file is YUV4MPEG2");
 static const arg_def_t threadsarg = ARG_DEF("t", "threads", 1,
                                     "Max threads to use");
 static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0,
@@ -77,7 +79,7 @@ static const arg_def_t *all_args[] =
 {
     &codecarg, &prefixarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
     &progressarg, &limitarg, &postprocarg, &summaryarg, &outputfile,
-    &threadsarg, &quietarg,
+    &usey4marg, &threadsarg, &quietarg,
 #if CONFIG_MD5
     &md5arg,
 #endif
@@ -288,7 +290,12 @@ void out_close(void *out, const char *out_fn, int do_md5)
     }
 }
 
-unsigned int file_is_ivf(FILE *infile, unsigned int *fourcc)
+unsigned int file_is_ivf(FILE *infile,
+                         unsigned int *fourcc,
+                         unsigned int *width,
+                         unsigned int *height,
+                         unsigned int *timebase_num,
+                         unsigned int *timebase_den)
 {
     char raw_hdr[32];
     int is_ivf = 0;
@@ -305,6 +312,10 @@ unsigned int file_is_ivf(FILE *infile, unsigned int *fourcc)
                         " decode properly.");
 
             *fourcc = mem_get_le32(raw_hdr + 8);
+            *width = mem_get_le16(raw_hdr + 12);
+            *height = mem_get_le16(raw_hdr + 14);
+            *timebase_den = mem_get_le32(raw_hdr + 16);
+            *timebase_num = mem_get_le32(raw_hdr + 20);
         }
     }
 
@@ -330,6 +341,11 @@ int main(int argc, const char **argv_)
     struct arg               arg;
     char                   **argv, **argi, **argj;
     const char                   *fn2 = 0;
+    int                     use_y4m = 0;
+    unsigned int            width;
+    unsigned int            height;
+    unsigned int            timebase_num;
+    unsigned int            timebase_den;
     void                   *out = NULL;
     vpx_codec_dec_cfg_t     cfg = {0};
 #if CONFIG_VP8_DECODER
@@ -361,6 +377,8 @@ int main(int argc, const char **argv_)
         }
         else if (arg_match(&arg, &outputfile, argi))
             fn2 = arg.val;
+        else if (arg_match(&arg, &usey4marg, argi))
+            use_y4m = 1;
         else if (arg_match(&arg, &prefixarg, argi))
             prefix = strdup(arg.val);
         else if (arg_match(&arg, &use_yv12, argi))
@@ -446,10 +464,31 @@ int main(int argc, const char **argv_)
     if (fn2)
         out = out_open(fn2, do_md5);
 
-    is_ivf = file_is_ivf(infile, &fourcc);
+    is_ivf = file_is_ivf(infile, &fourcc, &width, &height,
+                         &timebase_num, &timebase_den);
 
     if (is_ivf)
     {
+        if (use_y4m)
+        {
+            char buffer[128];
+            if (!fn2)
+            {
+                fprintf(stderr, "YUV4MPEG2 output only supported with -o.\n");
+                return EXIT_FAILURE;
+            }
+            /*Correct for the factor of 2 applied to the timebase in the
+               encoder.*/
+            if(timebase_den&1)timebase_num<<=1;
+            else timebase_den>>=1;
+            /*Note: We can't output an aspect ratio here because IVF doesn't
+               store one, and neither does VP8.
+              That will have to wait until these tools support WebM natively.*/
+            sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
+                    "420jpeg", width, height, timebase_den, timebase_num, 'p');
+            out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5);
+        }
+
         /* Try to determine the codec from the fourcc. */
         for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
             if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc)
@@ -465,6 +504,11 @@ int main(int argc, const char **argv_)
                 break;
             }
     }
+    else if(use_y4m)
+    {
+        fprintf(stderr, "YUV4MPEG2 output only supported from IVF input.\n");
+        return EXIT_FAILURE;
+    }
 
     if (vpx_codec_dec_init(&decoder, iface ? iface :  ifaces[0].iface, &cfg,
                            postproc ? VPX_CODEC_USE_POSTPROC : 0))
@@ -533,6 +577,8 @@ int main(int argc, const char **argv_)
                             prefix, img->d_w, img->d_h, frame_in, sfx);
                     out = out_open(out_fn, do_md5);
                 }
+                else if(use_y4m)
+                    out_put(out, (unsigned char *)"FRAME\n", 6, do_md5);
 
                 buf = img->planes[VPX_PLANE_Y];
 

From 09202d80716ef7e2931de60c66b6fb2383f52613 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 4 Jun 2010 16:19:40 -0400
Subject: [PATCH 005/307] LICENSE: update with latest text

Change-Id: Ieebea089095d9073b3a94932791099f614ce120c
---
 LICENSE                                       |   42 +-
 PATENTS                                       |   22 +
 args.c                                        |    9 +-
 args.h                                        |    9 +-
 build/arm-wince-vs8/obj_int_extract.bat       |    9 +-
 build/make/Makefile                           |   11 +-
 build/make/ads2gas.pl                         |   11 +-
 build/make/ads2gas_apple.pl                   |   11 +-
 build/make/armlink_adapter.sh                 |    9 +-
 build/make/gen_asm_deps.sh                    |    9 +-
 build/make/gen_msvs_def.sh                    |    9 +-
 build/make/gen_msvs_proj.sh                   |    9 +-
 build/make/gen_msvs_sln.sh                    |    9 +-
 build/make/obj_int_extract.c                  |    9 +-
 build/make/version.sh                         |    9 +-
 docs.mk                                       |    9 +-
 example_xma.c                                 |    9 +-
 examples.mk                                   |    9 +-
 examples/decoder_tmpl.c                       |    9 +-
 examples/encoder_tmpl.c                       |    9 +-
 examples/gen_example_code.sh                  |    9 +-
 examples/gen_example_doxy.php                 |    9 +-
 examples/gen_example_text.sh                  |    9 +-
 .../markdown.php.orig                         | 2909 -----------------
 ivfdec.c                                      |    9 +-
 ivfenc.c                                      |    9 +-
 libs.doxy_template                            |   11 +-
 libs.mk                                       |    9 +-
 md5_utils.c                                   |    9 +-
 md5_utils.h                                   |    9 +-
 release.sh                                    |    9 +-
 solution.mk                                   |    9 +-
 vp8/common/alloccommon.c                      |    9 +-
 vp8/common/alloccommon.h                      |    9 +-
 vp8/common/arm/armv6/bilinearfilter_v6.asm    |    9 +-
 vp8/common/arm/armv6/copymem16x16_v6.asm      |    9 +-
 vp8/common/arm/armv6/copymem8x4_v6.asm        |    9 +-
 vp8/common/arm/armv6/copymem8x8_v6.asm        |    9 +-
 vp8/common/arm/armv6/filter_v6.asm            |    9 +-
 vp8/common/arm/armv6/idct_v6.asm              |    9 +-
 vp8/common/arm/armv6/iwalsh_v6.asm            |    9 +-
 vp8/common/arm/armv6/loopfilter_v6.asm        |    9 +-
 vp8/common/arm/armv6/recon_v6.asm             |    9 +-
 vp8/common/arm/armv6/simpleloopfilter_v6.asm  |    9 +-
 vp8/common/arm/armv6/sixtappredict8x4_v6.asm  |    9 +-
 vp8/common/arm/bilinearfilter_arm.c           |    9 +-
 vp8/common/arm/filter_arm.c                   |    9 +-
 vp8/common/arm/idct_arm.h                     |    9 +-
 vp8/common/arm/loopfilter_arm.c               |    9 +-
 vp8/common/arm/loopfilter_arm.h               |    9 +-
 .../arm/neon/bilinearpredict16x16_neon.asm    |    9 +-
 .../arm/neon/bilinearpredict4x4_neon.asm      |    9 +-
 .../arm/neon/bilinearpredict8x4_neon.asm      |    9 +-
 .../arm/neon/bilinearpredict8x8_neon.asm      |    9 +-
 .../arm/neon/buildintrapredictorsmby_neon.asm |    9 +-
 vp8/common/arm/neon/copymem16x16_neon.asm     |    9 +-
 vp8/common/arm/neon/copymem8x4_neon.asm       |    9 +-
 vp8/common/arm/neon/copymem8x8_neon.asm       |    9 +-
 vp8/common/arm/neon/iwalsh_neon.asm           |    9 +-
 .../neon/loopfilterhorizontaledge_uv_neon.asm |    9 +-
 .../neon/loopfilterhorizontaledge_y_neon.asm  |    9 +-
 .../loopfiltersimplehorizontaledge_neon.asm   |    9 +-
 .../loopfiltersimpleverticaledge_neon.asm     |    9 +-
 .../neon/loopfilterverticaledge_uv_neon.asm   |    9 +-
 .../neon/loopfilterverticaledge_y_neon.asm    |    9 +-
 .../mbloopfilterhorizontaledge_uv_neon.asm    |    9 +-
 .../mbloopfilterhorizontaledge_y_neon.asm     |    9 +-
 .../neon/mbloopfilterverticaledge_uv_neon.asm |    9 +-
 .../neon/mbloopfilterverticaledge_y_neon.asm  |    9 +-
 vp8/common/arm/neon/recon16x16mb_neon.asm     |    9 +-
 vp8/common/arm/neon/recon2b_neon.asm          |    9 +-
 vp8/common/arm/neon/recon4b_neon.asm          |    9 +-
 vp8/common/arm/neon/reconb_neon.asm           |    9 +-
 vp8/common/arm/neon/save_neon_reg.asm         |    9 +-
 .../arm/neon/shortidct4x4llm_1_neon.asm       |    9 +-
 vp8/common/arm/neon/shortidct4x4llm_neon.asm  |    9 +-
 .../arm/neon/sixtappredict16x16_neon.asm      |    9 +-
 vp8/common/arm/neon/sixtappredict4x4_neon.asm |    9 +-
 vp8/common/arm/neon/sixtappredict8x4_neon.asm |    9 +-
 vp8/common/arm/neon/sixtappredict8x8_neon.asm |    9 +-
 vp8/common/arm/recon_arm.c                    |    9 +-
 vp8/common/arm/recon_arm.h                    |    9 +-
 vp8/common/arm/reconintra4x4_arm.c            |    9 +-
 vp8/common/arm/reconintra_arm.c               |    9 +-
 vp8/common/arm/subpixel_arm.h                 |    9 +-
 vp8/common/arm/systemdependent.c              |    9 +-
 vp8/common/arm/vpx_asm_offsets.c              |    9 +-
 vp8/common/bigend.h                           |    9 +-
 vp8/common/blockd.c                           |    9 +-
 vp8/common/blockd.h                           |    9 +-
 vp8/common/boolcoder.h                        |    9 +-
 vp8/common/codec_common_interface.h           |    9 +-
 vp8/common/coefupdateprobs.h                  |    9 +-
 vp8/common/common.h                           |    9 +-
 vp8/common/common_types.h                     |    9 +-
 vp8/common/context.c                          |    9 +-
 vp8/common/debugmodes.c                       |    9 +-
 vp8/common/defaultcoefcounts.h                |    9 +-
 vp8/common/dma_desc.h                         |    9 +-
 vp8/common/duck_io.h                          |    9 +-
 vp8/common/entropy.c                          |    9 +-
 vp8/common/entropy.h                          |    9 +-
 vp8/common/entropymode.c                      |    9 +-
 vp8/common/entropymode.h                      |    9 +-
 vp8/common/entropymv.c                        |    9 +-
 vp8/common/entropymv.h                        |    9 +-
 vp8/common/extend.c                           |    9 +-
 vp8/common/extend.h                           |    9 +-
 vp8/common/filter_c.c                         |    9 +-
 vp8/common/findnearmv.c                       |    9 +-
 vp8/common/findnearmv.h                       |    9 +-
 vp8/common/fourcc.hpp                         |    9 +-
 vp8/common/g_common.h                         |    9 +-
 vp8/common/generic/systemdependent.c          |    9 +-
 vp8/common/header.h                           |    9 +-
 vp8/common/idct.h                             |    9 +-
 vp8/common/idctllm.c                          |    9 +-
 vp8/common/invtrans.c                         |    9 +-
 vp8/common/invtrans.h                         |    9 +-
 vp8/common/littlend.h                         |    9 +-
 vp8/common/loopfilter.c                       |    9 +-
 vp8/common/loopfilter.h                       |    9 +-
 vp8/common/loopfilter_filters.c               |    9 +-
 vp8/common/mac_specs.h                        |    9 +-
 vp8/common/mbpitch.c                          |    9 +-
 vp8/common/modecont.c                         |    9 +-
 vp8/common/modecont.h                         |    9 +-
 vp8/common/modecontext.c                      |    9 +-
 vp8/common/mv.h                               |    9 +-
 vp8/common/onyx.h                             |    9 +-
 vp8/common/onyxc_int.h                        |    9 +-
 vp8/common/onyxd.h                            |    9 +-
 vp8/common/partialgfupdate.h                  |    9 +-
 vp8/common/postproc.c                         |    9 +-
 vp8/common/postproc.h                         |    9 +-
 vp8/common/ppc/copy_altivec.asm               |    9 +-
 vp8/common/ppc/filter_altivec.asm             |    9 +-
 vp8/common/ppc/filter_bilinear_altivec.asm    |    9 +-
 vp8/common/ppc/idctllm_altivec.asm            |    9 +-
 vp8/common/ppc/loopfilter_altivec.c           |    9 +-
 vp8/common/ppc/loopfilter_filters_altivec.asm |    9 +-
 vp8/common/ppc/platform_altivec.asm           |    9 +-
 vp8/common/ppc/recon_altivec.asm              |    9 +-
 vp8/common/ppc/systemdependent.c              |    9 +-
 vp8/common/ppflags.h                          |    9 +-
 vp8/common/pragmas.h                          |    9 +-
 vp8/common/predictdc.c                        |    9 +-
 vp8/common/predictdc.h                        |    9 +-
 vp8/common/preproc.h                          |    9 +-
 vp8/common/preprocif.h                        |    9 +-
 vp8/common/proposed.h                         |    9 +-
 vp8/common/quant_common.c                     |    9 +-
 vp8/common/quant_common.h                     |    9 +-
 vp8/common/recon.c                            |    9 +-
 vp8/common/recon.h                            |    9 +-
 vp8/common/reconinter.c                       |    9 +-
 vp8/common/reconinter.h                       |    9 +-
 vp8/common/reconintra.c                       |    9 +-
 vp8/common/reconintra.h                       |    9 +-
 vp8/common/reconintra4x4.c                    |    9 +-
 vp8/common/reconintra4x4.h                    |    9 +-
 vp8/common/segmentation_common.c              |    9 +-
 vp8/common/segmentation_common.h              |    9 +-
 vp8/common/setupintrarecon.c                  |    9 +-
 vp8/common/setupintrarecon.h                  |    9 +-
 vp8/common/subpixel.h                         |    9 +-
 vp8/common/swapyv12buffer.c                   |    9 +-
 vp8/common/swapyv12buffer.h                   |    9 +-
 vp8/common/systemdependent.h                  |    9 +-
 vp8/common/textblit.c                         |    9 +-
 vp8/common/threading.h                        |    9 +-
 vp8/common/treecoder.c                        |    9 +-
 vp8/common/treecoder.h                        |    9 +-
 vp8/common/type_aliases.h                     |    9 +-
 vp8/common/vfwsetting.hpp                     |    9 +-
 vp8/common/vpx_ref_build_prefix.h             |    9 +-
 vp8/common/vpxblit.h                          |    9 +-
 vp8/common/vpxblit_c64.h                      |    9 +-
 vp8/common/vpxerrors.h                        |    9 +-
 vp8/common/x86/boolcoder.cxx                  |    9 +-
 vp8/common/x86/idct_x86.h                     |    9 +-
 vp8/common/x86/idctllm_mmx.asm                |    9 +-
 vp8/common/x86/iwalsh_mmx.asm                 |    9 +-
 vp8/common/x86/iwalsh_sse2.asm                |    9 +-
 vp8/common/x86/loopfilter_mmx.asm             |    9 +-
 vp8/common/x86/loopfilter_sse2.asm            |    9 +-
 vp8/common/x86/loopfilter_x86.c               |    9 +-
 vp8/common/x86/loopfilter_x86.h               |    9 +-
 vp8/common/x86/postproc_mmx.asm               |    9 +-
 vp8/common/x86/postproc_mmx.c                 |    9 +-
 vp8/common/x86/postproc_sse2.asm              |    9 +-
 vp8/common/x86/postproc_x86.h                 |    9 +-
 vp8/common/x86/recon_mmx.asm                  |    9 +-
 vp8/common/x86/recon_sse2.asm                 |    9 +-
 vp8/common/x86/recon_x86.h                    |    9 +-
 vp8/common/x86/subpixel_mmx.asm               |    9 +-
 vp8/common/x86/subpixel_sse2.asm              |    9 +-
 vp8/common/x86/subpixel_x86.h                 |    9 +-
 vp8/common/x86/vp8_asm_stubs.c                |    9 +-
 vp8/common/x86/x86_systemdependent.c          |    9 +-
 vp8/decoder/arm/armv5/dequantize_v5.asm       |    9 +-
 vp8/decoder/arm/armv6/dboolhuff_v6.asm        |    9 +-
 vp8/decoder/arm/armv6/dequantdcidct_v6.asm    |    9 +-
 vp8/decoder/arm/armv6/dequantidct_v6.asm      |    9 +-
 vp8/decoder/arm/armv6/dequantize_v6.asm       |    9 +-
 vp8/decoder/arm/dequantize_arm.c              |    9 +-
 vp8/decoder/arm/dequantize_arm.h              |    9 +-
 vp8/decoder/arm/detokenizearm_sjl.c           |    9 +-
 vp8/decoder/arm/detokenizearm_v6.asm          |    9 +-
 vp8/decoder/arm/dsystemdependent.c            |    9 +-
 vp8/decoder/arm/neon/dboolhuff_neon.asm       |    9 +-
 vp8/decoder/arm/neon/dequantdcidct_neon.asm   |    9 +-
 vp8/decoder/arm/neon/dequantidct_neon.asm     |    9 +-
 vp8/decoder/arm/neon/dequantizeb_neon.asm     |    9 +-
 vp8/decoder/dboolhuff.c                       |    9 +-
 vp8/decoder/dboolhuff.h                       |    9 +-
 vp8/decoder/decodemv.c                        |    9 +-
 vp8/decoder/decodemv.h                        |    9 +-
 vp8/decoder/decoderthreading.h                |    9 +-
 vp8/decoder/decodframe.c                      |    9 +-
 vp8/decoder/demode.c                          |    9 +-
 vp8/decoder/demode.h                          |    9 +-
 vp8/decoder/dequantize.c                      |    9 +-
 vp8/decoder/dequantize.h                      |    9 +-
 vp8/decoder/detokenize.c                      |    9 +-
 vp8/decoder/detokenize.h                      |    9 +-
 vp8/decoder/generic/dsystemdependent.c        |    9 +-
 vp8/decoder/onyxd_if.c                        |    9 +-
 vp8/decoder/onyxd_if_sjl.c                    |    9 +-
 vp8/decoder/onyxd_int.h                       |    9 +-
 vp8/decoder/threading.c                       |    9 +-
 vp8/decoder/treereader.h                      |    9 +-
 vp8/decoder/x86/dequantize_mmx.asm            |    9 +-
 vp8/decoder/x86/dequantize_x86.h              |    9 +-
 vp8/decoder/x86/onyxdxv.c                     |    9 +-
 vp8/decoder/x86/x86_dsystemdependent.c        |    9 +-
 vp8/decoder/xprintf.c                         |    9 +-
 vp8/decoder/xprintf.h                         |    9 +-
 vp8/encoder/arm/armv6/walsh_v6.asm            |    9 +-
 vp8/encoder/arm/boolhuff_arm.c                |    9 +-
 vp8/encoder/arm/csystemdependent.c            |    9 +-
 vp8/encoder/arm/dct_arm.h                     |    9 +-
 vp8/encoder/arm/encodemb_arm.c                |    9 +-
 vp8/encoder/arm/encodemb_arm.h                |    9 +-
 vp8/encoder/arm/mcomp_arm.c                   |    9 +-
 vp8/encoder/arm/neon/boolhuff_armv7.asm       |    9 +-
 vp8/encoder/arm/neon/fastfdct4x4_neon.asm     |    9 +-
 vp8/encoder/arm/neon/fastfdct8x4_neon.asm     |    9 +-
 vp8/encoder/arm/neon/fastquantizeb_neon.asm   |    9 +-
 vp8/encoder/arm/neon/sad16_neon.asm           |    9 +-
 vp8/encoder/arm/neon/sad8_neon.asm            |    9 +-
 vp8/encoder/arm/neon/shortfdct_neon.asm       |    9 +-
 vp8/encoder/arm/neon/subtract_neon.asm        |    9 +-
 vp8/encoder/arm/neon/variance_neon.asm        |    9 +-
 vp8/encoder/arm/neon/vp8_memcpy_neon.asm      |    9 +-
 vp8/encoder/arm/neon/vp8_mse16x16_neon.asm    |    9 +-
 vp8/encoder/arm/neon/vp8_packtokens_armv7.asm |    9 +-
 .../arm/neon/vp8_packtokens_mbrow_armv7.asm   |    9 +-
 .../neon/vp8_packtokens_partitions_armv7.asm  |    9 +-
 .../arm/neon/vp8_shortwalsh4x4_neon.asm       |    9 +-
 .../neon/vp8_subpixelvariance16x16_neon.asm   |    9 +-
 .../neon/vp8_subpixelvariance16x16s_neon.asm  |    9 +-
 .../arm/neon/vp8_subpixelvariance8x8_neon.asm |    9 +-
 vp8/encoder/arm/picklpf_arm.c                 |    9 +-
 vp8/encoder/arm/quantize_arm.c                |    9 +-
 vp8/encoder/arm/quantize_arm.h                |    9 +-
 vp8/encoder/arm/variance_arm.h                |    9 +-
 vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c     |    9 +-
 vp8/encoder/bitstream.c                       |    9 +-
 vp8/encoder/bitstream.h                       |    9 +-
 vp8/encoder/block.h                           |    9 +-
 vp8/encoder/boolhuff.c                        |    9 +-
 vp8/encoder/boolhuff.h                        |    9 +-
 vp8/encoder/dct.c                             |    9 +-
 vp8/encoder/dct.h                             |    9 +-
 vp8/encoder/encodeframe.c                     |    9 +-
 vp8/encoder/encodeintra.c                     |    9 +-
 vp8/encoder/encodeintra.h                     |    9 +-
 vp8/encoder/encodemb.c                        |    9 +-
 vp8/encoder/encodemb.h                        |    9 +-
 vp8/encoder/encodemv.c                        |    9 +-
 vp8/encoder/encodemv.h                        |    9 +-
 vp8/encoder/ethreading.c                      |    9 +-
 vp8/encoder/firstpass.c                       |    9 +-
 vp8/encoder/firstpass.h                       |    9 +-
 vp8/encoder/generic/csystemdependent.c        |    9 +-
 vp8/encoder/mcomp.c                           |    9 +-
 vp8/encoder/mcomp.h                           |    9 +-
 vp8/encoder/modecosts.c                       |    9 +-
 vp8/encoder/modecosts.h                       |    9 +-
 vp8/encoder/onyx_if.c                         |    9 +-
 vp8/encoder/onyx_int.h                        |    9 +-
 vp8/encoder/parms.cpp                         |    9 +-
 vp8/encoder/pickinter.c                       |    9 +-
 vp8/encoder/pickinter.h                       |    9 +-
 vp8/encoder/picklpf.c                         |    9 +-
 vp8/encoder/ppc/csystemdependent.c            |    9 +-
 vp8/encoder/ppc/encodemb_altivec.asm          |    9 +-
 vp8/encoder/ppc/fdct_altivec.asm              |    9 +-
 vp8/encoder/ppc/rdopt_altivec.asm             |    9 +-
 vp8/encoder/ppc/sad_altivec.asm               |    9 +-
 vp8/encoder/ppc/variance_altivec.asm          |    9 +-
 vp8/encoder/ppc/variance_subpixel_altivec.asm |    9 +-
 vp8/encoder/preproc.c                         |    9 +-
 vp8/encoder/psnr.c                            |    9 +-
 vp8/encoder/psnr.h                            |    9 +-
 vp8/encoder/quantize.c                        |    9 +-
 vp8/encoder/quantize.h                        |    9 +-
 vp8/encoder/ratectrl.c                        |    9 +-
 vp8/encoder/ratectrl.h                        |    9 +-
 vp8/encoder/rdopt.c                           |    9 +-
 vp8/encoder/rdopt.h                           |    9 +-
 vp8/encoder/sad_c.c                           |    9 +-
 vp8/encoder/ssim.c                            |    9 +-
 vp8/encoder/tokenize.c                        |    9 +-
 vp8/encoder/tokenize.h                        |    9 +-
 vp8/encoder/treewriter.c                      |    9 +-
 vp8/encoder/treewriter.h                      |    9 +-
 vp8/encoder/variance.h                        |    9 +-
 vp8/encoder/variance_c.c                      |    9 +-
 vp8/encoder/x86/csystemdependent.c            |    9 +-
 vp8/encoder/x86/dct_mmx.asm                   |    9 +-
 vp8/encoder/x86/dct_sse2.asm                  |    9 +-
 vp8/encoder/x86/dct_x86.h                     |    9 +-
 vp8/encoder/x86/encodemb_x86.h                |    9 +-
 vp8/encoder/x86/encodeopt.asm                 |    9 +-
 vp8/encoder/x86/fwalsh_sse2.asm               |    9 +-
 vp8/encoder/x86/mcomp_x86.h                   |    9 +-
 vp8/encoder/x86/preproc_mmx.c                 |    9 +-
 vp8/encoder/x86/quantize_mmx.asm              |    9 +-
 vp8/encoder/x86/sad_mmx.asm                   |    9 +-
 vp8/encoder/x86/sad_sse2.asm                  |    9 +-
 vp8/encoder/x86/sad_sse3.asm                  |    9 +-
 vp8/encoder/x86/sad_ssse3.asm                 |    9 +-
 vp8/encoder/x86/subtract_mmx.asm              |    9 +-
 vp8/encoder/x86/variance_impl_mmx.asm         |    9 +-
 vp8/encoder/x86/variance_impl_sse2.asm        |    9 +-
 vp8/encoder/x86/variance_mmx.c                |    9 +-
 vp8/encoder/x86/variance_sse2.c               |    9 +-
 vp8/encoder/x86/variance_x86.h                |    9 +-
 vp8/encoder/x86/x86_csystemdependent.c        |    9 +-
 vp8/vp8_common.mk                             |    9 +-
 vp8/vp8_cx_iface.c                            |    9 +-
 vp8/vp8_dx_iface.c                            |    9 +-
 vp8/vp8cx.mk                                  |    9 +-
 vp8/vp8cx_arm.mk                              |    9 +-
 vp8/vp8dx.mk                                  |    9 +-
 vp8/vp8dx_arm.mk                              |    9 +-
 vpx/internal/vpx_codec_internal.h             |    9 +-
 vpx/src/vpx_codec.c                           |    9 +-
 vpx/src/vpx_decoder.c                         |    9 +-
 vpx/src/vpx_decoder_compat.c                  |    9 +-
 vpx/src/vpx_encoder.c                         |    9 +-
 vpx/src/vpx_image.c                           |    9 +-
 vpx/vp8.h                                     |    9 +-
 vpx/vp8cx.h                                   |    9 +-
 vpx/vp8dx.h                                   |    9 +-
 vpx/vp8e.h                                    |    9 +-
 vpx/vpx_codec.h                               |    9 +-
 vpx/vpx_codec.mk                              |    9 +-
 vpx/vpx_codec_impl_bottom.h                   |    9 +-
 vpx/vpx_codec_impl_top.h                      |    9 +-
 vpx/vpx_decoder.h                             |    9 +-
 vpx/vpx_decoder_compat.h                      |    9 +-
 vpx/vpx_encoder.h                             |    9 +-
 vpx/vpx_image.h                               |    9 +-
 vpx/vpx_integer.h                             |    9 +-
 vpx_mem/include/nds/vpx_mem_nds.h             |    9 +-
 vpx_mem/include/vpx_mem_intrnl.h              |    9 +-
 vpx_mem/include/vpx_mem_tracker.h             |    9 +-
 vpx_mem/intel_linux/vpx_mem.c                 |    9 +-
 vpx_mem/intel_linux/vpx_mem_tracker.c         |    9 +-
 vpx_mem/memory_manager/hmm_alloc.c            |    9 +-
 vpx_mem/memory_manager/hmm_base.c             |    9 +-
 vpx_mem/memory_manager/hmm_dflt_abort.c       |    9 +-
 vpx_mem/memory_manager/hmm_grow.c             |    9 +-
 vpx_mem/memory_manager/hmm_largest.c          |    9 +-
 vpx_mem/memory_manager/hmm_resize.c           |    9 +-
 vpx_mem/memory_manager/hmm_shrink.c           |    9 +-
 vpx_mem/memory_manager/hmm_true.c             |    9 +-
 vpx_mem/memory_manager/include/cavl_if.h      |    9 +-
 vpx_mem/memory_manager/include/cavl_impl.h    |    9 +-
 vpx_mem/memory_manager/include/heapmm.h       |    9 +-
 vpx_mem/memory_manager/include/hmm_cnfg.h     |    9 +-
 vpx_mem/memory_manager/include/hmm_intrnl.h   |    9 +-
 vpx_mem/nds/vpx_mem_nds.c                     |    9 +-
 vpx_mem/ti_c6x/vpx_mem_ti_6cx.c               |    9 +-
 vpx_mem/vpx_mem.c                             |    9 +-
 vpx_mem/vpx_mem.h                             |    9 +-
 vpx_mem/vpx_mem_tracker.c                     |    9 +-
 vpx_ports/config.h                            |    9 +-
 vpx_ports/emms.asm                            |    9 +-
 vpx_ports/mem.h                               |    9 +-
 vpx_ports/mem_ops.h                           |    9 +-
 vpx_ports/mem_ops_aligned.h                   |    9 +-
 vpx_ports/vpx_timer.h                         |    9 +-
 vpx_ports/vpxtypes.h                          |    9 +-
 vpx_ports/x86.h                               |    9 +-
 vpx_ports/x86_abi_support.asm                 |    9 +-
 vpx_scale/arm/armv4/gen_scalers_armv4.asm     |    9 +-
 vpx_scale/arm/nds/yv12extend.c                |    9 +-
 .../neon/vp8_vpxyv12_copyframe_func_neon.asm  |    9 +-
 .../neon/vp8_vpxyv12_copyframeyonly_neon.asm  |    9 +-
 .../vp8_vpxyv12_copysrcframe_func_neon.asm    |    9 +-
 .../vp8_vpxyv12_extendframeborders_neon.asm   |    9 +-
 vpx_scale/arm/scalesystemdependant.c          |    9 +-
 vpx_scale/arm/yv12extend_arm.c                |    9 +-
 vpx_scale/blackfin/yv12config.c               |    9 +-
 vpx_scale/blackfin/yv12extend.c               |    9 +-
 vpx_scale/dm642/bicubic_scaler_c64.c          |    9 +-
 vpx_scale/dm642/gen_scalers_c64.c             |    9 +-
 vpx_scale/dm642/yv12extend.c                  |    9 +-
 vpx_scale/generic/bicubic_scaler.c            |    9 +-
 vpx_scale/generic/gen_scalers.c               |    9 +-
 vpx_scale/generic/scalesystemdependant.c      |    9 +-
 vpx_scale/generic/vpxscale.c                  |    9 +-
 vpx_scale/generic/yv12config.c                |    9 +-
 vpx_scale/generic/yv12extend.c                |    9 +-
 vpx_scale/include/arm/vpxscale_nofp.h         |    9 +-
 .../include/generic/vpxscale_arbitrary.h      |    9 +-
 .../include/generic/vpxscale_depricated.h     |    9 +-
 vpx_scale/include/generic/vpxscale_nofp.h     |    9 +-
 vpx_scale/include/leapster/vpxscale.h         |    9 +-
 vpx_scale/include/symbian/vpxscale_nofp.h     |    9 +-
 vpx_scale/include/vpxscale_nofp.h             |    9 +-
 vpx_scale/intel_linux/scaleopt.c              |    9 +-
 vpx_scale/intel_linux/scalesystemdependant.c  |    9 +-
 vpx_scale/leapster/doptsystemdependant_lf.c   |    9 +-
 vpx_scale/leapster/gen_scalers_lf.c           |    9 +-
 vpx_scale/leapster/vpxscale_lf.c              |    9 +-
 vpx_scale/leapster/yv12extend.c               |    9 +-
 vpx_scale/scale_mode.h                        |    9 +-
 vpx_scale/symbian/gen_scalers_armv4.asm       |    9 +-
 vpx_scale/symbian/scalesystemdependant.c      |    9 +-
 vpx_scale/vpxscale.h                          |    9 +-
 vpx_scale/wce/gen_scalers_armv4.asm           |    9 +-
 vpx_scale/wce/scalesystemdependant.c          |    9 +-
 vpx_scale/win32/scaleopt.c                    |    9 +-
 vpx_scale/win32/scalesystemdependant.c        |    9 +-
 vpx_scale/x86_64/scaleopt.c                   |    9 +-
 vpx_scale/x86_64/scalesystemdependant.c       |    9 +-
 vpx_scale/yv12config.h                        |    9 +-
 vpx_scale/yv12extend.h                        |    9 +-
 wince_wmain_adapter.cpp                       |    9 +-
 y4minput.c                                    |    9 +-
 y4minput.h                                    |    9 +-
 446 files changed, 2253 insertions(+), 4715 deletions(-)
 create mode 100644 PATENTS
 delete mode 100644 examples/includes/PHP-Markdown-Extra-1.2.3/markdown.php.orig

diff --git a/LICENSE b/LICENSE
index 6b0e86768..7a6f99547 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,22 +1,20 @@
-Copyright (c) 2010, Google, Inc.
-
-All rights reserved.
+Copyright (c) 2010, Google Inc. All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
+modification, are permitted provided that the following conditions are
+met:
 
-- Redistributions of source code must retain the above copyright
-  notice, this list of conditions and the following disclaimer.
+  * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
 
-- Redistributions in binary form must reproduce the above
-  copyright notice, this list of conditions and the following
-  disclaimer in the documentation and/or other materials provided
-  with the distribution.
+  * Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in
+    the documentation and/or other materials provided with the
+    distribution.
 
-- Neither the name of Google nor the names of its contributors may
-  be used to endorse or promote products derived from this software
-  without specific prior written permission.
+  * Neither the name of Google nor the names of its contributors may
+    be used to endorse or promote products derived from this software
+    without specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
@@ -30,19 +28,3 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-Subject to the terms and conditions of the above License, Google
-hereby grants to You a perpetual, worldwide, non-exclusive,
-no-charge, royalty-free, irrevocable (except as stated in this
-section) patent license to make, have made, use, offer to sell, sell,
-import, and otherwise transfer this implementation of VP8, where such
-license applies only to those patent claims, both currently owned by
-Google and acquired in the future, licensable by Google that are
-necessarily infringed by this implementation of VP8. If You or your
-agent or exclusive licensee institute or order or agree to the
-institution of patent litigation against any entity (including a
-cross-claim or counterclaim in a lawsuit) alleging that this
-implementation of VP8 or any code incorporated within this
-implementation of VP8 constitutes direct or contributory patent
-infringement, or inducement of patent infringement, then any rights
-granted to You under this License for this implementation of VP8
-shall terminate as of the date such litigation is filed.
diff --git a/PATENTS b/PATENTS
new file mode 100644
index 000000000..4414d8385
--- /dev/null
+++ b/PATENTS
@@ -0,0 +1,22 @@
+Additional IP Rights Grant (Patents)
+
+"This implementation" means the copyrightable works distributed by
+Google as part of the WebM Project.
+
+Google hereby grants to you a perpetual, worldwide, non-exclusive,
+no-charge, royalty-free, irrevocable (except as stated in this section)
+patent license to make, have made, use, offer to sell, sell, import,
+transfer, and otherwise run, modify and propagate the contents of this
+implementation of VP8, where such license applies only to those patent
+claims, both currently owned by Google and acquired in the future,
+licensable by Google that are necessarily infringed by this
+implementation of VP8. This grant does not include claims that would be
+infringed only as a consequence of further modification of this
+implementation. If you or your agent or exclusive licensee institute or
+order or agree to the institution of patent litigation against any
+entity (including a cross-claim or counterclaim in a lawsuit) alleging
+that this implementation of VP8 or any code incorporated within this
+implementation of VP8 constitutes direct or contributory patent
+infringement, or inducement of patent infringement, then any patent
+rights granted to you under this License for this implementation of VP8
+shall terminate as of the date such litigation is filed.
diff --git a/args.c b/args.c
index f2ad697e3..5fd046671 100644
--- a/args.c
+++ b/args.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/args.h b/args.h
index c063f5316..f1ec6c0f3 100644
--- a/args.h
+++ b/args.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/build/arm-wince-vs8/obj_int_extract.bat b/build/arm-wince-vs8/obj_int_extract.bat
index 9013dbd71..306916010 100644
--- a/build/arm-wince-vs8/obj_int_extract.bat
+++ b/build/arm-wince-vs8/obj_int_extract.bat
@@ -1,10 +1,11 @@
 @echo off
 REM   Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 REM 
-REM   Use of this source code is governed by a BSD-style license and patent
-REM   grant that can be found in the LICENSE file in the root of the source
-REM   tree. All contributing project authors may be found in the AUTHORS
-REM   file in the root of the source tree.
+REM   Use of this source code is governed by a BSD-style license 
+REM   that can be found in the LICENSE file in the root of the source
+REM   tree. An additional intellectual property rights grant can be found
+REM   in the file PATENTS.  All contributing project authors may 
+REM   be found in the AUTHORS file in the root of the source tree.    
 echo on
 
 
diff --git a/build/make/Makefile b/build/make/Makefile
index 5ea1f229e..6dd1279fc 100755
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.    
 ##
 
 
diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl
index 6fcba8462..bd5e7722f 100755
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@@ -1,11 +1,12 @@
 #!/usr/bin/perl
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.    
 ##
 
 
diff --git a/build/make/ads2gas_apple.pl b/build/make/ads2gas_apple.pl
index 569c3e762..7a324452c 100755
--- a/build/make/ads2gas_apple.pl
+++ b/build/make/ads2gas_apple.pl
@@ -1,11 +1,12 @@
 #!/usr/bin/env perl
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.    
 ##
 
 
diff --git a/build/make/armlink_adapter.sh b/build/make/armlink_adapter.sh
index dcaa82ca6..25fb6277e 100755
--- a/build/make/armlink_adapter.sh
+++ b/build/make/armlink_adapter.sh
@@ -2,10 +2,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/gen_asm_deps.sh b/build/make/gen_asm_deps.sh
index c1118e1a8..2c972e5a0 100755
--- a/build/make/gen_asm_deps.sh
+++ b/build/make/gen_asm_deps.sh
@@ -2,10 +2,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/gen_msvs_def.sh b/build/make/gen_msvs_def.sh
index 68b240624..b5c54fda9 100755
--- a/build/make/gen_msvs_def.sh
+++ b/build/make/gen_msvs_def.sh
@@ -2,10 +2,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index 477dec7fe..6288ee5c1 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -2,10 +2,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/gen_msvs_sln.sh b/build/make/gen_msvs_sln.sh
index b670ec598..ef425f565 100755
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -2,10 +2,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c
index d9afb9d30..0f64e8921 100644
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/build/make/version.sh b/build/make/version.sh
index 2bda70106..81682d614 100755
--- a/build/make/version.sh
+++ b/build/make/version.sh
@@ -2,10 +2,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/docs.mk b/docs.mk
index 868bcb995..f60a482b8 100644
--- a/docs.mk
+++ b/docs.mk
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/example_xma.c b/example_xma.c
index 753ca3c40..e5eca7af2 100644
--- a/example_xma.c
+++ b/example_xma.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/examples.mk b/examples.mk
index 0054f295d..bca0d9c9d 100644
--- a/examples.mk
+++ b/examples.mk
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/examples/decoder_tmpl.c b/examples/decoder_tmpl.c
index 59f0c11ed..826ad201c 100644
--- a/examples/decoder_tmpl.c
+++ b/examples/decoder_tmpl.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c
index 7b33e2b7c..9e262e9e5 100644
--- a/examples/encoder_tmpl.c
+++ b/examples/encoder_tmpl.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/examples/gen_example_code.sh b/examples/gen_example_code.sh
index f2e45c583..dc7ad7286 100755
--- a/examples/gen_example_code.sh
+++ b/examples/gen_example_code.sh
@@ -2,10 +2,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/examples/gen_example_doxy.php b/examples/gen_example_doxy.php
index 08beade5a..ba2617f28 100755
--- a/examples/gen_example_doxy.php
+++ b/examples/gen_example_doxy.php
@@ -2,10 +2,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/examples/gen_example_text.sh b/examples/gen_example_text.sh
index 0e1f796ea..fcc73d8c1 100755
--- a/examples/gen_example_text.sh
+++ b/examples/gen_example_text.sh
@@ -2,10 +2,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/examples/includes/PHP-Markdown-Extra-1.2.3/markdown.php.orig b/examples/includes/PHP-Markdown-Extra-1.2.3/markdown.php.orig
deleted file mode 100644
index b0ed7c99c..000000000
--- a/examples/includes/PHP-Markdown-Extra-1.2.3/markdown.php.orig
+++ /dev/null
@@ -1,2909 +0,0 @@
-<?php
-#
-# Markdown Extra  -  A text-to-HTML conversion tool for web writers
-#
-# PHP Markdown & Extra
-# Copyright (c) 2004-2008 Michel Fortin  
-# <http://www.michelf.com/projects/php-markdown/>
-#
-# Original Markdown
-# Copyright (c) 2004-2006 John Gruber  
-# <http://daringfireball.net/projects/markdown/>
-#
-
-
-define( 'MARKDOWN_VERSION',  "1.0.1m" ); # Sat 21 Jun 2008
-define( 'MARKDOWNEXTRA_VERSION',  "1.2.3" ); # Wed 31 Dec 2008
-
-
-#
-# Global default settings:
-#
-
-# Change to ">" for HTML output
-@define( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX',  " />");
-
-# Define the width of a tab for code blocks.
-@define( 'MARKDOWN_TAB_WIDTH',     4 );
-
-# Optional title attribute for footnote links and backlinks.
-@define( 'MARKDOWN_FN_LINK_TITLE',         "" );
-@define( 'MARKDOWN_FN_BACKLINK_TITLE',     "" );
-
-# Optional class attribute for footnote links and backlinks.
-@define( 'MARKDOWN_FN_LINK_CLASS',         "" );
-@define( 'MARKDOWN_FN_BACKLINK_CLASS',     "" );
-
-
-#
-# WordPress settings:
-#
-
-# Change to false to remove Markdown from posts and/or comments.
-@define( 'MARKDOWN_WP_POSTS',      true );
-@define( 'MARKDOWN_WP_COMMENTS',   true );
-
-
-
-### Standard Function Interface ###
-
-@define( 'MARKDOWN_PARSER_CLASS',  'MarkdownExtra_Parser' );
-
-function Markdown($text) {
-#
-# Initialize the parser and return the result of its transform method.
-#
-  # Setup static parser variable.
-  static $parser;
-  if (!isset($parser)) {
-    $parser_class = MARKDOWN_PARSER_CLASS;
-    $parser = new $parser_class;
-  }
-
-  # Transform text using parser.
-  return $parser->transform($text);
-}
-
-
-### WordPress Plugin Interface ###
-
-/*
-Plugin Name: Markdown Extra
-Plugin URI: http://www.michelf.com/projects/php-markdown/
-Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
-Version: 1.2.2
-Author: Michel Fortin
-Author URI: http://www.michelf.com/
-*/
-
-if (isset($wp_version)) {
-  # More details about how it works here:
-  # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
-  
-  # Post content and excerpts
-  # - Remove WordPress paragraph generator.
-  # - Run Markdown on excerpt, then remove all tags.
-  # - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
-  if (MARKDOWN_WP_POSTS) {
-    remove_filter('the_content',     'wpautop');
-        remove_filter('the_content_rss', 'wpautop');
-    remove_filter('the_excerpt',     'wpautop');
-    add_filter('the_content',     'mdwp_MarkdownPost', 6);
-        add_filter('the_content_rss', 'mdwp_MarkdownPost', 6);
-    add_filter('get_the_excerpt', 'mdwp_MarkdownPost', 6);
-    add_filter('get_the_excerpt', 'trim', 7);
-    add_filter('the_excerpt',     'mdwp_add_p');
-    add_filter('the_excerpt_rss', 'mdwp_strip_p');
-    
-    remove_filter('content_save_pre',  'balanceTags', 50);
-    remove_filter('excerpt_save_pre',  'balanceTags', 50);
-    add_filter('the_content',     'balanceTags', 50);
-    add_filter('get_the_excerpt', 'balanceTags', 9);
-  }
-  
-  # Add a footnote id prefix to posts when inside a loop.
-  function mdwp_MarkdownPost($text) {
-    static $parser;
-    if (!$parser) {
-      $parser_class = MARKDOWN_PARSER_CLASS;
-      $parser = new $parser_class;
-    }
-    if (is_single() || is_page() || is_feed()) {
-      $parser->fn_id_prefix = "";
-    } else {
-      $parser->fn_id_prefix = get_the_ID() . ".";
-    }
-    return $parser->transform($text);
-  }
-  
-  # Comments
-  # - Remove WordPress paragraph generator.
-  # - Remove WordPress auto-link generator.
-  # - Scramble important tags before passing them to the kses filter.
-  # - Run Markdown on excerpt then remove paragraph tags.
-  if (MARKDOWN_WP_COMMENTS) {
-    remove_filter('comment_text', 'wpautop', 30);
-    remove_filter('comment_text', 'make_clickable');
-    add_filter('pre_comment_content', 'Markdown', 6);
-    add_filter('pre_comment_content', 'mdwp_hide_tags', 8);
-    add_filter('pre_comment_content', 'mdwp_show_tags', 12);
-    add_filter('get_comment_text',    'Markdown', 6);
-    add_filter('get_comment_excerpt', 'Markdown', 6);
-    add_filter('get_comment_excerpt', 'mdwp_strip_p', 7);
-  
-    global $mdwp_hidden_tags, $mdwp_placeholders;
-    $mdwp_hidden_tags = explode(' ',
-      '<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>');
-    $mdwp_placeholders = explode(' ', str_rot13(
-      'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR '.
-      'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli'));
-  }
-  
-  function mdwp_add_p($text) {
-    if (!preg_match('{^$|^<(p|ul|ol|dl|pre|blockquote)>}i', $text)) {
-      $text = '<p>'.$text.'</p>';
-      $text = preg_replace('{\n{2,}}', "</p>\n\n<p>", $text);
-    }
-    return $text;
-  }
-  
-  function mdwp_strip_p($t) { return preg_replace('{</?p>}i', '', $t); }
-
-  function mdwp_hide_tags($text) {
-    global $mdwp_hidden_tags, $mdwp_placeholders;
-    return str_replace($mdwp_hidden_tags, $mdwp_placeholders, $text);
-  }
-  function mdwp_show_tags($text) {
-    global $mdwp_hidden_tags, $mdwp_placeholders;
-    return str_replace($mdwp_placeholders, $mdwp_hidden_tags, $text);
-  }
-}
-
-
-### bBlog Plugin Info ###
-
-function identify_modifier_markdown() {
-  return array(
-    'name' => 'markdown',
-    'type' => 'modifier',
-    'nicename' => 'PHP Markdown Extra',
-    'description' => 'A text-to-HTML conversion tool for web writers',
-    'authors' => 'Michel Fortin and John Gruber',
-    'licence' => 'GPL',
-    'version' => MARKDOWNEXTRA_VERSION,
-    'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>',
-    );
-}
-
-
-### Smarty Modifier Interface ###
-
-function smarty_modifier_markdown($text) {
-  return Markdown($text);
-}
-
-
-### Textile Compatibility Mode ###
-
-# Rename this file to "classTextile.php" and it can replace Textile everywhere.
-
-if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
-  # Try to include PHP SmartyPants. Should be in the same directory.
-  @include_once 'smartypants.php';
-  # Fake Textile class. It calls Markdown instead.
-  class Textile {
-    function TextileThis($text, $lite='', $encode='') {
-      if ($lite == '' && $encode == '')    $text = Markdown($text);
-      if (function_exists('SmartyPants'))  $text = SmartyPants($text);
-      return $text;
-    }
-    # Fake restricted version: restrictions are not supported for now.
-    function TextileRestricted($text, $lite='', $noimage='') {
-      return $this->TextileThis($text, $lite);
-    }
-    # Workaround to ensure compatibility with TextPattern 4.0.3.
-    function blockLite($text) { return $text; }
-  }
-}
-
-
-
-#
-# Markdown Parser Class
-#
-
-class Markdown_Parser {
-
-  # Regex to match balanced [brackets].
-  # Needed to insert a maximum bracked depth while converting to PHP.
-  var $nested_brackets_depth = 6;
-  var $nested_brackets_re;
-  
-  var $nested_url_parenthesis_depth = 4;
-  var $nested_url_parenthesis_re;
-
-  # Table of hash values for escaped characters:
-  var $escape_chars = '\`*_{}[]()>#+-.!';
-  var $escape_chars_re;
-
-  # Change to ">" for HTML output.
-  var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX;
-  var $tab_width = MARKDOWN_TAB_WIDTH;
-  
-  # Change to `true` to disallow markup or entities.
-  var $no_markup = false;
-  var $no_entities = true;
-  
-  # Predefined urls and titles for reference links and images.
-  var $predef_urls = array();
-  var $predef_titles = array();
-
-
-  function Markdown_Parser() {
-  #
-  # Constructor function. Initialize appropriate member variables.
-  #
-    $this->_initDetab();
-    $this->prepareItalicsAndBold();
-  
-    $this->nested_brackets_re = 
-      str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth).
-      str_repeat('\])*', $this->nested_brackets_depth);
-  
-    $this->nested_url_parenthesis_re = 
-      str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth).
-      str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth);
-    
-    $this->escape_chars_re = '['.preg_quote($this->escape_chars).']';
-    
-    # Sort document, block, and span gamut in ascendent priority order.
-    asort($this->document_gamut);
-    asort($this->block_gamut);
-    asort($this->span_gamut);
-  }
-
-
-  # Internal hashes used during transformation.
-  var $urls = array();
-  var $titles = array();
-  var $html_hashes = array();
-  
-  # Status flag to avoid invalid nesting.
-  var $in_anchor = false;
-  
-  
-  function setup() {
-  #
-  # Called before the transformation process starts to setup parser 
-  # states.
-  #
-    # Clear global hashes.
-    $this->urls = $this->predef_urls;
-    $this->titles = $this->predef_titles;
-    $this->html_hashes = array();
-    
-    $in_anchor = false;
-  }
-  
-  function teardown() {
-  #
-  # Called after the transformation process to clear any variable 
-  # which may be taking up memory unnecessarly.
-  #
-    $this->urls = array();
-    $this->titles = array();
-    $this->html_hashes = array();
-  }
-
-
-  function transform($text) {
-  #
-  # Main function. Performs some preprocessing on the input text
-  # and pass it through the document gamut.
-  #
-    $this->setup();
-  
-    # Remove UTF-8 BOM and marker character in input, if present.
-    $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text);
-
-    # Standardize line endings:
-    #   DOS to Unix and Mac to Unix
-    $text = preg_replace('{\r\n?}', "\n", $text);
-
-    # Make sure $text ends with a couple of newlines:
-    $text .= "\n\n";
-
-    # Convert all tabs to spaces.
-    $text = $this->detab($text);
-
-    # Turn block-level HTML blocks into hash entries
-    $text = $this->hashHTMLBlocks($text);
-
-    # Strip any lines consisting only of spaces and tabs.
-    # This makes subsequent regexen easier to write, because we can
-    # match consecutive blank lines with /\n+/ instead of something
-    # contorted like /[ ]*\n+/ .
-    $text = preg_replace('/^[ ]+$/m', '', $text);
-
-    # Run document gamut methods.
-    foreach ($this->document_gamut as $method => $priority) {
-      $text = $this->$method($text);
-    }
-    
-    $this->teardown();
-
-    return $text . "\n";
-  }
-  
-  var $document_gamut = array(
-    # Strip link definitions, store in hashes.
-    "stripLinkDefinitions" => 20,
-    
-    "runBasicBlockGamut"   => 30,
-    );
-
-
-  function stripLinkDefinitions($text) {
-  #
-  # Strips link definitions from text, stores the URLs and titles in
-  # hash references.
-  #
-    $less_than_tab = $this->tab_width - 1;
-
-    # Link defs are in the form: ^[id]: url "optional title"
-    $text = preg_replace_callback('{
-              ^[ ]{0,'.$less_than_tab.'}\[(.+)\][ ]?: # id = $1
-                [ ]*
-                \n?       # maybe *one* newline
-                [ ]*
-              <?(\S+?)>?      # url = $2
-                [ ]*
-                \n?       # maybe one newline
-                [ ]*
-              (?:
-                (?<=\s)     # lookbehind for whitespace
-                ["(]
-                (.*?)     # title = $3
-                [")]
-                [ ]*
-              )?  # title is optional
-              (?:\n+|\Z)
-      }xm',
-      array(&$this, '_stripLinkDefinitions_callback'),
-      $text);
-    return $text;
-  }
-  function _stripLinkDefinitions_callback($matches) {
-    $link_id = strtolower($matches[1]);
-    $this->urls[$link_id] = $matches[2];
-    $this->titles[$link_id] =& $matches[3];
-    return ''; # String that will replace the block
-  }
-
-
-  function hashHTMLBlocks($text) {
-    if ($this->no_markup)  return $text;
-
-    $less_than_tab = $this->tab_width - 1;
-
-    # Hashify HTML blocks:
-    # We only want to do this for block-level HTML tags, such as headers,
-    # lists, and tables. That's because we still want to wrap <p>s around
-    # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
-    # phrase emphasis, and spans. The list of tags we're looking for is
-    # hard-coded:
-    #
-    # *  List "a" is made of tags which can be both inline or block-level.
-    #    These will be treated block-level when the start tag is alone on 
-    #    its line, otherwise they're not matched here and will be taken as 
-    #    inline later.
-    # *  List "b" is made of tags which are always block-level;
-    #
-    $block_tags_a_re = 'ins|del';
-    $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'.
-               'script|noscript|form|fieldset|iframe|math';
-
-    # Regular expression for the content of a block tag.
-    $nested_tags_level = 4;
-    $attr = '
-      (?>       # optional tag attributes
-        \s      # starts with whitespace
-        (?>
-        [^>"/]+   # text outside quotes
-        |
-        /+(?!>)   # slash not followed by ">"
-        |
-        "[^"]*"   # text inside double quotes (tolerate ">")
-        |
-        \'[^\']*\'  # text inside single quotes (tolerate ">")
-        )*
-      )?  
-      ';
-    $content =
-      str_repeat('
-        (?>
-          [^<]+     # content without tag
-        |
-          <\2     # nested opening tag
-          '.$attr.' # attributes
-          (?>
-            />
-          |
-            >', $nested_tags_level).  # end of opening tag
-            '.*?'.          # last level nested tag content
-      str_repeat('
-            </\2\s*>  # closing nested tag
-          )
-          |       
-          <(?!/\2\s*> # other tags with a different name
-          )
-        )*',
-        $nested_tags_level);
-    $content2 = str_replace('\2', '\3', $content);
-
-    # First, look for nested blocks, e.g.:
-    #   <div>
-    #     <div>
-    #     tags for inner block must be indented.
-    #     </div>
-    #   </div>
-    #
-    # The outermost tags must start at the left margin for this to match, and
-    # the inner nested divs must be indented.
-    # We need to do this before the next, more liberal match, because the next
-    # match will start at the first `<div>` and stop at the first `</div>`.
-    $text = preg_replace_callback('{(?>
-      (?>
-        (?<=\n\n)   # Starting after a blank line
-        |       # or
-        \A\n?     # the beginning of the doc
-      )
-      (           # save in $1
-
-        # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
-        # in between.
-          
-            [ ]{0,'.$less_than_tab.'}
-            <('.$block_tags_b_re.')# start tag = $2
-            '.$attr.'>      # attributes followed by > and \n
-            '.$content.'    # content, support nesting
-            </\2>       # the matching end tag
-            [ ]*        # trailing spaces/tabs
-            (?=\n+|\Z)  # followed by a newline or end of document
-
-      | # Special version for tags of group a.
-
-            [ ]{0,'.$less_than_tab.'}
-            <('.$block_tags_a_re.')# start tag = $3
-            '.$attr.'>[ ]*\n  # attributes followed by >
-            '.$content2.'   # content, support nesting
-            </\3>       # the matching end tag
-            [ ]*        # trailing spaces/tabs
-            (?=\n+|\Z)  # followed by a newline or end of document
-          
-      | # Special case just for <hr />. It was easier to make a special 
-        # case than to make the other regex more complicated.
-      
-            [ ]{0,'.$less_than_tab.'}
-            <(hr)       # start tag = $2
-            '.$attr.'     # attributes
-            /?>         # the matching end tag
-            [ ]*
-            (?=\n{2,}|\Z)   # followed by a blank line or end of document
-      
-      | # Special case for standalone HTML comments:
-      
-          [ ]{0,'.$less_than_tab.'}
-          (?s:
-            <!-- .*? -->
-          )
-          [ ]*
-          (?=\n{2,}|\Z)   # followed by a blank line or end of document
-      
-      | # PHP and ASP-style processor instructions (<? and <%)
-      
-          [ ]{0,'.$less_than_tab.'}
-          (?s:
-            <([?%])     # $2
-            .*?
-            \2>
-          )
-          [ ]*
-          (?=\n{2,}|\Z)   # followed by a blank line or end of document
-          
-      )
-      )}Sxmi',
-      array(&$this, '_hashHTMLBlocks_callback'),
-      $text);
-
-    return $text;
-  }
-  function _hashHTMLBlocks_callback($matches) {
-    $text = $matches[1];
-    $key  = $this->hashBlock($text);
-    return "\n\n$key\n\n";
-  }
-  
-  
-  function hashPart($text, $boundary = 'X') {
-  #
-  # Called whenever a tag must be hashed when a function insert an atomic 
-  # element in the text stream. Passing $text to through this function gives
-  # a unique text-token which will be reverted back when calling unhash.
-  #
-  # The $boundary argument specify what character should be used to surround
-  # the token. By convension, "B" is used for block elements that needs not
-  # to be wrapped into paragraph tags at the end, ":" is used for elements
-  # that are word separators and "X" is used in the general case.
-  #
-    # Swap back any tag hash found in $text so we do not have to `unhash`
-    # multiple times at the end.
-    $text = $this->unhash($text);
-    
-    # Then hash the block.
-    static $i = 0;
-    $key = "$boundary\x1A" . ++$i . $boundary;
-    $this->html_hashes[$key] = $text;
-    return $key; # String that will replace the tag.
-  }
-
-
-  function hashBlock($text) {
-  #
-  # Shortcut function for hashPart with block-level boundaries.
-  #
-    return $this->hashPart($text, 'B');
-  }
-
-
-  var $block_gamut = array(
-  #
-  # These are all the transformations that form block-level
-  # tags like paragraphs, headers, and list items.
-  #
-    "doHeaders"         => 10,
-    "doHorizontalRules" => 20,
-    
-    "doLists"           => 40,
-    "doCodeBlocks"      => 50,
-    "doBlockQuotes"     => 60,
-    );
-
-  function runBlockGamut($text) {
-  #
-  # Run block gamut tranformations.
-  #
-    # We need to escape raw HTML in Markdown source before doing anything 
-    # else. This need to be done for each block, and not only at the 
-    # begining in the Markdown function since hashed blocks can be part of
-    # list items and could have been indented. Indented blocks would have 
-    # been seen as a code block in a previous pass of hashHTMLBlocks.
-    $text = $this->hashHTMLBlocks($text);
-    
-    return $this->runBasicBlockGamut($text);
-  }
-  
-  function runBasicBlockGamut($text) {
-  #
-  # Run block gamut tranformations, without hashing HTML blocks. This is 
-  # useful when HTML blocks are known to be already hashed, like in the first
-  # whole-document pass.
-  #
-    foreach ($this->block_gamut as $method => $priority) {
-      $text = $this->$method($text);
-    }
-    
-    # Finally form paragraph and restore hashed blocks.
-    $text = $this->formParagraphs($text);
-
-    return $text;
-  }
-  
-  
-  function doHorizontalRules($text) {
-    # Do Horizontal Rules:
-    return preg_replace(
-      '{
-        ^[ ]{0,3} # Leading space
-        ([-*_])   # $1: First marker
-        (?>     # Repeated marker group
-          [ ]{0,2}  # Zero, one, or two spaces.
-          \1      # Marker character
-        ){2,}   # Group repeated at least twice
-        [ ]*    # Tailing spaces
-        $     # End of line.
-      }mx',
-      "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n", 
-      $text);
-  }
-
-
-  var $span_gamut = array(
-  #
-  # These are all the transformations that occur *within* block-level
-  # tags like paragraphs, headers, and list items.
-  #
-    # Process character escapes, code spans, and inline HTML
-    # in one shot.
-    "parseSpan"           => -30,
-
-    # Process anchor and image tags. Images must come first,
-    # because ![foo][f] looks like an anchor.
-    "doImages"            =>  10,
-    "doAnchors"           =>  20,
-    
-    # Make links out of things like `<http://example.com/>`
-    # Must come after doAnchors, because you can use < and >
-    # delimiters in inline links like [this](<url>).
-    "doAutoLinks"         =>  30,
-    "encodeAmpsAndAngles" =>  40,
-
-    "doItalicsAndBold"    =>  50,
-    "doHardBreaks"        =>  60,
-    );
-
-  function runSpanGamut($text) {
-  #
-  # Run span gamut tranformations.
-  #
-    foreach ($this->span_gamut as $method => $priority) {
-      $text = $this->$method($text);
-    }
-
-    return $text;
-  }
-  
-  
-  function doHardBreaks($text) {
-    # Do hard breaks:
-    return preg_replace_callback('/ {2,}\n/', 
-      array(&$this, '_doHardBreaks_callback'), $text);
-  }
-  function _doHardBreaks_callback($matches) {
-    return $this->hashPart("<br$this->empty_element_suffix\n");
-  }
-
-
-  function doAnchors($text) {
-  #
-  # Turn Markdown link shortcuts into XHTML <a> tags.
-  #
-    if ($this->in_anchor) return $text;
-    $this->in_anchor = true;
-    
-    #
-    # First, handle reference-style links: [link text] [id]
-    #
-    $text = preg_replace_callback('{
-      (         # wrap whole match in $1
-        \[
-        ('.$this->nested_brackets_re.') # link text = $2
-        \]
-
-        [ ]?        # one optional space
-        (?:\n[ ]*)?   # one optional newline followed by spaces
-
-        \[
-        (.*?)   # id = $3
-        \]
-      )
-      }xs',
-      array(&$this, '_doAnchors_reference_callback'), $text);
-
-    #
-    # Next, inline-style links: [link text](url "optional title")
-    #
-    $text = preg_replace_callback('{
-      (       # wrap whole match in $1
-        \[
-        ('.$this->nested_brackets_re.') # link text = $2
-        \]
-        \(      # literal paren
-        [ ]*
-        (?:
-          <(\S*)> # href = $3
-        |
-          ('.$this->nested_url_parenthesis_re.')  # href = $4
-        )
-        [ ]*
-        (     # $5
-          ([\'"]) # quote char = $6
-          (.*?)   # Title = $7
-          \6    # matching quote
-          [ ]*  # ignore any spaces/tabs between closing quote and )
-        )?      # title is optional
-        \)
-      )
-      }xs',
-      array(&$this, '_DoAnchors_inline_callback'), $text);
-
-    #
-    # Last, handle reference-style shortcuts: [link text]
-    # These must come last in case you've also got [link test][1]
-    # or [link test](/foo)
-    #
-//    $text = preg_replace_callback('{
-//      (         # wrap whole match in $1
-//        \[
-//        ([^\[\]]+)    # link text = $2; can\'t contain [ or ]
-//        \]
-//      )
-//      }xs',
-//      array(&$this, '_doAnchors_reference_callback'), $text);
-
-    $this->in_anchor = false;
-    return $text;
-  }
-  function _doAnchors_reference_callback($matches) {
-    $whole_match =  $matches[1];
-    $link_text   =  $matches[2];
-    $link_id     =& $matches[3];
-
-    if ($link_id == "") {
-      # for shortcut links like [this][] or [this].
-      $link_id = $link_text;
-    }
-    
-    # lower-case and turn embedded newlines into spaces
-    $link_id = strtolower($link_id);
-    $link_id = preg_replace('{[ ]?\n}', ' ', $link_id);
-
-    if (isset($this->urls[$link_id])) {
-      $url = $this->urls[$link_id];
-      $url = $this->encodeAttribute($url);
-      
-      $result = "<a href=\"$url\"";
-      if ( isset( $this->titles[$link_id] ) ) {
-        $title = $this->titles[$link_id];
-        $title = $this->encodeAttribute($title);
-        $result .=  " title=\"$title\"";
-      }
-    
-      $link_text = $this->runSpanGamut($link_text);
-      $result .= ">$link_text</a>";
-      $result = $this->hashPart($result);
-    }
-    else {
-      $result = $whole_match;
-    }
-    return $result;
-  }
-  function _doAnchors_inline_callback($matches) {
-    $whole_match  =  $matches[1];
-    $link_text    =  $this->runSpanGamut($matches[2]);
-    $url      =  $matches[3] == '' ? $matches[4] : $matches[3];
-    $title      =& $matches[7];
-
-    $url = $this->encodeAttribute($url);
-
-    $result = "<a href=\"$url\"";
-    if (isset($title)) {
-      $title = $this->encodeAttribute($title);
-      $result .=  " title=\"$title\"";
-    }
-    
-    $link_text = $this->runSpanGamut($link_text);
-    $result .= ">$link_text</a>";
-
-    return $this->hashPart($result);
-  }
-
-
-  function doImages($text) {
-  #
-  # Turn Markdown image shortcuts into <img> tags.
-  #
-    #
-    # First, handle reference-style labeled images: ![alt text][id]
-    #
-    $text = preg_replace_callback('{
-      (       # wrap whole match in $1
-        !\[
-        ('.$this->nested_brackets_re.')   # alt text = $2
-        \]
-
-        [ ]?        # one optional space
-        (?:\n[ ]*)?   # one optional newline followed by spaces
-
-        \[
-        (.*?)   # id = $3
-        \]
-
-      )
-      }xs', 
-      array(&$this, '_doImages_reference_callback'), $text);
-
-    #
-    # Next, handle inline images:  ![alt text](url "optional title")
-    # Don't forget: encode * and _
-    #
-    $text = preg_replace_callback('{
-      (       # wrap whole match in $1
-        !\[
-        ('.$this->nested_brackets_re.')   # alt text = $2
-        \]
-        \s?     # One optional whitespace character
-        \(      # literal paren
-        [ ]*
-        (?:
-          <(\S*)> # src url = $3
-        |
-          ('.$this->nested_url_parenthesis_re.')  # src url = $4
-        )
-        [ ]*
-        (     # $5
-          ([\'"]) # quote char = $6
-          (.*?)   # title = $7
-          \6    # matching quote
-          [ ]*
-        )?      # title is optional
-        \)
-      )
-      }xs',
-      array(&$this, '_doImages_inline_callback'), $text);
-
-    return $text;
-  }
-  function _doImages_reference_callback($matches) {
-    $whole_match = $matches[1];
-    $alt_text    = $matches[2];
-    $link_id     = strtolower($matches[3]);
-
-    if ($link_id == "") {
-      $link_id = strtolower($alt_text); # for shortcut links like ![this][].
-    }
-
-    $alt_text = $this->encodeAttribute($alt_text);
-    if (isset($this->urls[$link_id])) {
-      $url = $this->encodeAttribute($this->urls[$link_id]);
-      $result = "<img src=\"$url\" alt=\"$alt_text\"";
-      if (isset($this->titles[$link_id])) {
-        $title = $this->titles[$link_id];
-        $title = $this->encodeAttribute($title);
-        $result .=  " title=\"$title\"";
-      }
-      $result .= $this->empty_element_suffix;
-      $result = $this->hashPart($result);
-    }
-    else {
-      # If there's no such link ID, leave intact:
-      $result = $whole_match;
-    }
-
-    return $result;
-  }
-  function _doImages_inline_callback($matches) {
-    $whole_match  = $matches[1];
-    $alt_text   = $matches[2];
-    $url      = $matches[3] == '' ? $matches[4] : $matches[3];
-    $title      =& $matches[7];
-
-    $alt_text = $this->encodeAttribute($alt_text);
-    $url = $this->encodeAttribute($url);
-    $result = "<img src=\"$url\" alt=\"$alt_text\"";
-    if (isset($title)) {
-      $title = $this->encodeAttribute($title);
-      $result .=  " title=\"$title\""; # $title already quoted
-    }
-    $result .= $this->empty_element_suffix;
-
-    return $this->hashPart($result);
-  }
-
-
-  function doHeaders($text) {
-    # Setext-style headers:
-    #   Header 1
-    #   ========
-    #  
-    #   Header 2
-    #   --------
-    #
-    $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',
-      array(&$this, '_doHeaders_callback_setext'), $text);
-
-    # atx-style headers:
-    # # Header 1
-    # ## Header 2
-    # ## Header 2 with closing hashes ##
-    # ...
-    # ###### Header 6
-    #
-    $text = preg_replace_callback('{
-        ^(\#{1,6})  # $1 = string of #\'s
-        [ ]*
-        (.+?)   # $2 = Header text
-        [ ]*
-        \#*     # optional closing #\'s (not counted)
-        \n+
-      }xm',
-      array(&$this, '_doHeaders_callback_atx'), $text);
-
-    return $text;
-  }
-  function _doHeaders_callback_setext($matches) {
-    # Terrible hack to check we haven't found an empty list item.
-    if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1]))
-      return $matches[0];
-    
-    $level = $matches[2]{0} == '=' ? 1 : 2;
-    $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>";
-    return "\n" . $this->hashBlock($block) . "\n\n";
-  }
-  function _doHeaders_callback_atx($matches) {
-    $level = strlen($matches[1]);
-    $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>";
-    return "\n" . $this->hashBlock($block) . "\n\n";
-  }
-
-
-  function doLists($text) {
-  #
-  # Form HTML ordered (numbered) and unordered (bulleted) lists.
-  #
-    $less_than_tab = $this->tab_width - 1;
-
-    # Re-usable patterns to match list item bullets and number markers:
-    $marker_ul_re  = '[*+-]';
-    $marker_ol_re  = '\d+[.]';
-    $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
-
-    $markers_relist = array($marker_ul_re, $marker_ol_re);
-
-    foreach ($markers_relist as $marker_re) {
-      # Re-usable pattern to match any entirel ul or ol list:
-      $whole_list_re = '
-        (               # $1 = whole list
-          (               # $2
-          [ ]{0,'.$less_than_tab.'}
-          ('.$marker_re.')      # $3 = first list item marker
-          [ ]+
-          )
-          (?s:.+?)
-          (               # $4
-            \z
-          |
-            \n{2,}
-            (?=\S)
-            (?!           # Negative lookahead for another list item marker
-            [ ]*
-            '.$marker_re.'[ ]+
-            )
-          )
-        )
-      '; // mx
-      
-      # We use a different prefix before nested lists than top-level lists.
-      # See extended comment in _ProcessListItems().
-    
-      if ($this->list_level) {
-        $text = preg_replace_callback('{
-            ^
-            '.$whole_list_re.'
-          }mx',
-          array(&$this, '_doLists_callback'), $text);
-      }
-      else {
-        $text = preg_replace_callback('{
-            (?:(?<=\n)\n|\A\n?) # Must eat the newline
-            '.$whole_list_re.'
-          }mx',
-          array(&$this, '_doLists_callback'), $text);
-      }
-    }
-
-    return $text;
-  }
-  function _doLists_callback($matches) {
-    # Re-usable patterns to match list item bullets and number markers:
-    $marker_ul_re  = '[*+-]';
-    $marker_ol_re  = '\d+[.]';
-    $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)";
-    
-    $list = $matches[1];
-    $list_type = preg_match("/$marker_ul_re/", $matches[3]) ? "ul" : "ol";
-    
-    $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re );
-    
-    $list .= "\n";
-    $result = $this->processListItems($list, $marker_any_re);
-    
-    $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>");
-    return "\n". $result ."\n\n";
-  }
-
-  var $list_level = 0;
-
-  function processListItems($list_str, $marker_any_re) {
-  #
-  # Process the contents of a single ordered or unordered list, splitting it
-  # into individual list items.
-  #
-    # The $this->list_level global keeps track of when we're inside a list.
-    # Each time we enter a list, we increment it; when we leave a list,
-    # we decrement. If it's zero, we're not in a list anymore.
-    #
-    # We do this because when we're not inside a list, we want to treat
-    # something like this:
-    #
-    #   I recommend upgrading to version
-    #   8. Oops, now this line is treated
-    #   as a sub-list.
-    #
-    # As a single paragraph, despite the fact that the second line starts
-    # with a digit-period-space sequence.
-    #
-    # Whereas when we're inside a list (or sub-list), that line will be
-    # treated as the start of a sub-list. What a kludge, huh? This is
-    # an aspect of Markdown's syntax that's hard to parse perfectly
-    # without resorting to mind-reading. Perhaps the solution is to
-    # change the syntax rules such that sub-lists must start with a
-    # starting cardinal number; e.g. "1." or "a.".
-    
-    $this->list_level++;
-
-    # trim trailing blank lines:
-    $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
-
-    $list_str = preg_replace_callback('{
-      (\n)?             # leading line = $1
-      (^[ ]*)             # leading whitespace = $2
-      ('.$marker_any_re.'       # list marker and space = $3
-        (?:[ ]+|(?=\n)) # space only required if item is not empty
-      )
-      ((?s:.*?))            # list item text   = $4
-      (?:(\n+(?=\n))|\n)        # tailing blank line = $5
-      (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n))))
-      }xm',
-      array(&$this, '_processListItems_callback'), $list_str);
-
-    $this->list_level--;
-    return $list_str;
-  }
-  function _processListItems_callback($matches) {
-    $item = $matches[4];
-    $leading_line =& $matches[1];
-    $leading_space =& $matches[2];
-    $marker_space = $matches[3];
-    $tailing_blank_line =& $matches[5];
-
-    if ($leading_line || $tailing_blank_line || 
-      preg_match('/\n{2,}/', $item))
-    {
-      # Replace marker with the appropriate whitespace indentation
-      $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item;
-      $item = $this->runBlockGamut($this->outdent($item)."\n");
-    }
-    else {
-      # Recursion for sub-lists:
-      $item = $this->doLists($this->outdent($item));
-      $item = preg_replace('/\n+$/', '', $item);
-      $item = $this->runSpanGamut($item);
-    }
-
-    return "<li>" . $item . "</li>\n";
-  }
-
-
-  function doCodeBlocks($text) {
-  #
-  # Process Markdown `<pre><code>` blocks.
-  #
-    $text = preg_replace_callback('{
-        (?:\n\n|\A\n?)
-        (             # $1 = the code block -- one or more lines, starting with a space/tab
-          (?>
-          [ ]{'.$this->tab_width.'}  # Lines must start with a tab or a tab-width of spaces
-          .*\n+
-          )+
-        )
-        ((?=^[ ]{0,'.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
-      }xm',
-      array(&$this, '_doCodeBlocks_callback'), $text);
-
-    return $text;
-  }
-  function _doCodeBlocks_callback($matches) {
-    $codeblock = $matches[1];
-
-    $codeblock = $this->outdent($codeblock);
-    $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
-
-    # trim leading newlines and trailing newlines
-    $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock);
-
-    $codeblock = "<pre><code>$codeblock\n</code></pre>";
-    return "\n\n".$this->hashBlock($codeblock)."\n\n";
-  }
-
-
-  function makeCodeSpan($code) {
-  #
-  # Create a code span markup for $code. Called from handleSpanToken.
-  #
-    $code = htmlspecialchars(trim($code), ENT_NOQUOTES);
-    return $this->hashPart("<code>$code</code>");
-  }
-
-
-  var $em_relist = array(
-    ''  => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)',
-    '*' => '(?<=\S)(?<!\*)\*(?!\*)',
-    '_' => '(?<=\S)(?<!_)_(?!_)',
-    );
-  var $strong_relist = array(
-    ''   => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)',
-    '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
-    '__' => '(?<=\S)(?<!_)__(?!_)',
-    );
-  var $em_strong_relist = array(
-    ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)',
-    '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
-    '___' => '(?<=\S)(?<!_)___(?!_)',
-    );
-  var $em_strong_prepared_relist;
-  
-  function prepareItalicsAndBold() {
-  #
-  # Prepare regular expressions for seraching emphasis tokens in any
-  # context.
-  #
-    foreach ($this->em_relist as $em => $em_re) {
-      foreach ($this->strong_relist as $strong => $strong_re) {
-        # Construct list of allowed token expressions.
-        $token_relist = array();
-        if (isset($this->em_strong_relist["$em$strong"])) {
-          $token_relist[] = $this->em_strong_relist["$em$strong"];
-        }
-        $token_relist[] = $em_re;
-        $token_relist[] = $strong_re;
-        
-        # Construct master expression from list.
-        $token_re = '{('. implode('|', $token_relist) .')}';
-        $this->em_strong_prepared_relist["$em$strong"] = $token_re;
-      }
-    }
-  }
-  
-  function doItalicsAndBold($text) {
-    $token_stack = array('');
-    $text_stack = array('');
-    $em = '';
-    $strong = '';
-    $tree_char_em = false;
-    
-    while (1) {
-      #
-      # Get prepared regular expression for seraching emphasis tokens
-      # in current context.
-      #
-      $token_re = $this->em_strong_prepared_relist["$em$strong"];
-      
-      #
-      # Each loop iteration seach for the next emphasis token. 
-      # Each token is then passed to handleSpanToken.
-      #
-      $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
-      $text_stack[0] .= $parts[0];
-      $token =& $parts[1];
-      $text =& $parts[2];
-      
-      if (empty($token)) {
-        # Reached end of text span: empty stack without emitting.
-        # any more emphasis.
-        while ($token_stack[0]) {
-          $text_stack[1] .= array_shift($token_stack);
-          $text_stack[0] .= array_shift($text_stack);
-        }
-        break;
-      }
-      
-      $token_len = strlen($token);
-      if ($tree_char_em) {
-        # Reached closing marker while inside a three-char emphasis.
-        if ($token_len == 3) {
-          # Three-char closing marker, close em and strong.
-          array_shift($token_stack);
-          $span = array_shift($text_stack);
-          $span = $this->runSpanGamut($span);
-          $span = "<strong><em>$span</em></strong>";
-          $text_stack[0] .= $this->hashPart($span);
-          $em = '';
-          $strong = '';
-        } else {
-          # Other closing marker: close one em or strong and
-          # change current token state to match the other
-          $token_stack[0] = str_repeat($token{0}, 3-$token_len);
-          $tag = $token_len == 2 ? "strong" : "em";
-          $span = $text_stack[0];
-          $span = $this->runSpanGamut($span);
-          $span = "<$tag>$span</$tag>";
-          $text_stack[0] = $this->hashPart($span);
-          $$tag = ''; # $$tag stands for $em or $strong
-        }
-        $tree_char_em = false;
-      } else if ($token_len == 3) {
-        if ($em) {
-          # Reached closing marker for both em and strong.
-          # Closing strong marker:
-          for ($i = 0; $i < 2; ++$i) {
-            $shifted_token = array_shift($token_stack);
-            $tag = strlen($shifted_token) == 2 ? "strong" : "em";
-            $span = array_shift($text_stack);
-            $span = $this->runSpanGamut($span);
-            $span = "<$tag>$span</$tag>";
-            $text_stack[0] .= $this->hashPart($span);
-            $$tag = ''; # $$tag stands for $em or $strong
-          }
-        } else {
-          # Reached opening three-char emphasis marker. Push on token 
-          # stack; will be handled by the special condition above.
-          $em = $token{0};
-          $strong = "$em$em";
-          array_unshift($token_stack, $token);
-          array_unshift($text_stack, '');
-          $tree_char_em = true;
-        }
-      } else if ($token_len == 2) {
-        if ($strong) {
-          # Unwind any dangling emphasis marker:
-          if (strlen($token_stack[0]) == 1) {
-            $text_stack[1] .= array_shift($token_stack);
-            $text_stack[0] .= array_shift($text_stack);
-          }
-          # Closing strong marker:
-          array_shift($token_stack);
-          $span = array_shift($text_stack);
-          $span = $this->runSpanGamut($span);
-          $span = "<strong>$span</strong>";
-          $text_stack[0] .= $this->hashPart($span);
-          $strong = '';
-        } else {
-          array_unshift($token_stack, $token);
-          array_unshift($text_stack, '');
-          $strong = $token;
-        }
-      } else {
-        # Here $token_len == 1
-        if ($em) {
-          if (strlen($token_stack[0]) == 1) {
-            # Closing emphasis marker:
-            array_shift($token_stack);
-            $span = array_shift($text_stack);
-            $span = $this->runSpanGamut($span);
-            $span = "<em>$span</em>";
-            $text_stack[0] .= $this->hashPart($span);
-            $em = '';
-          } else {
-            $text_stack[0] .= $token;
-          }
-        } else {
-          array_unshift($token_stack, $token);
-          array_unshift($text_stack, '');
-          $em = $token;
-        }
-      }
-    }
-    return $text_stack[0];
-  }
-
-
-  function doBlockQuotes($text) {
-    $text = preg_replace_callback('/
-        (               # Wrap whole match in $1
-        (?>
-          ^[ ]*>[ ]?      # ">" at the start of a line
-          .+\n          # rest of the first line
-          (.+\n)*         # subsequent consecutive lines
-          \n*           # blanks
-        )+
-        )
-      /xm',
-      array(&$this, '_doBlockQuotes_callback'), $text);
-
-    return $text;
-  }
-  function _doBlockQuotes_callback($matches) {
-    $bq = $matches[1];
-    # trim one level of quoting - trim whitespace-only lines
-    $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq);
-    $bq = $this->runBlockGamut($bq);    # recurse
-
-    $bq = preg_replace('/^/m', "  ", $bq);
-    # These leading spaces cause problem with <pre> content, 
-    # so we need to fix that:
-    $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx', 
-      array(&$this, '_DoBlockQuotes_callback2'), $bq);
-
-    return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n";
-  }
-  function _doBlockQuotes_callback2($matches) {
-    $pre = $matches[1];
-    $pre = preg_replace('/^  /m', '', $pre);
-    return $pre;
-  }
-
-
-  function formParagraphs($text) {
-  #
-  # Params:
-  #   $text - string to process with html <p> tags
-  #
-    # Strip leading and trailing lines:
-    $text = preg_replace('/\A\n+|\n+\z/', '', $text);
-
-    $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
-
-    #
-    # Wrap <p> tags and unhashify HTML blocks
-    #
-    foreach ($grafs as $key => $value) {
-      if (!preg_match('/^B\x1A[0-9]+B$/', $value)) {
-        # Is a paragraph.
-        $value = $this->runSpanGamut($value);
-        $value = preg_replace('/^([ ]*)/', "<p>", $value);
-        $value .= "</p>";
-        $grafs[$key] = $this->unhash($value);
-      }
-      else {
-        # Is a block.
-        # Modify elements of @grafs in-place...
-        $graf = $value;
-        $block = $this->html_hashes[$graf];
-        $graf = $block;
-//        if (preg_match('{
-//          \A
-//          (             # $1 = <div> tag
-//            <div  \s+
-//            [^>]*
-//            \b
-//            markdown\s*=\s*  ([\'"])  # $2 = attr quote char
-//            1
-//            \2
-//            [^>]*
-//            >
-//          )
-//          (             # $3 = contents
-//          .*
-//          )
-//          (</div>)          # $4 = closing tag
-//          \z
-//          }xs', $block, $matches))
-//        {
-//          list(, $div_open, , $div_content, $div_close) = $matches;
-//
-//          # We can't call Markdown(), because that resets the hash;
-//          # that initialization code should be pulled into its own sub, though.
-//          $div_content = $this->hashHTMLBlocks($div_content);
-//          
-//          # Run document gamut methods on the content.
-//          foreach ($this->document_gamut as $method => $priority) {
-//            $div_content = $this->$method($div_content);
-//          }
-//
-//          $div_open = preg_replace(
-//            '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
-//
-//          $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
-//        }
-        $grafs[$key] = $graf;
-      }
-    }
-
-    return implode("\n\n", $grafs);
-  }
-
-
-  function encodeAttribute($text) {
-  #
-  # Encode text for a double-quoted HTML attribute. This function
-  # is *not* suitable for attributes enclosed in single quotes.
-  #
-    $text = $this->encodeAmpsAndAngles($text);
-    $text = str_replace('"', '&quot;', $text);
-    return $text;
-  }
-  
-  
-  function encodeAmpsAndAngles($text) {
-  #
-  # Smart processing for ampersands and angle brackets that need to 
-  # be encoded. Valid character entities are left alone unless the
-  # no-entities mode is set.
-  #
-    if ($this->no_entities) {
-      $text = str_replace('&', '&amp;', $text);
-    } else {
-      # Ampersand-encoding based entirely on Nat Irons's Amputator
-      # MT plugin: <http://bumppo.net/projects/amputator/>
-      $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/', 
-                '&amp;', $text);;
-    }
-    # Encode remaining <'s
-    $text = str_replace('<', '&lt;', $text);
-
-    return $text;
-  }
-
-
-  function doAutoLinks($text) {
-    $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i', 
-      array(&$this, '_doAutoLinks_url_callback'), $text);
-
-    # Email addresses: <address@domain.foo>
-    $text = preg_replace_callback('{
-      <
-      (?:mailto:)?
-      (
-        [-.\w\x80-\xFF]+
-        \@
-        [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+
-      )
-      >
-      }xi',
-      array(&$this, '_doAutoLinks_email_callback'), $text);
-
-    return $text;
-  }
-  function _doAutoLinks_url_callback($matches) {
-    $url = $this->encodeAttribute($matches[1]);
-    $link = "<a href=\"$url\">$url</a>";
-    return $this->hashPart($link);
-  }
-  function _doAutoLinks_email_callback($matches) {
-    $address = $matches[1];
-    $link = $this->encodeEmailAddress($address);
-    return $this->hashPart($link);
-  }
-
-
-  function encodeEmailAddress($addr) {
-  #
-  # Input: an email address, e.g. "foo@example.com"
-  #
-  # Output: the email address as a mailto link, with each character
-  #   of the address encoded as either a decimal or hex entity, in
-  #   the hopes of foiling most address harvesting spam bots. E.g.:
-  #
-  #   <p><a href="&#109;&#x61;&#105;&#x6c;&#116;&#x6f;&#58;&#x66;o&#111;
-  #        &#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;&#101;&#46;&#x63;&#111;
-  #        &#x6d;">&#x66;o&#111;&#x40;&#101;&#x78;&#97;&#x6d;&#112;&#x6c;
-  #        &#101;&#46;&#x63;&#111;&#x6d;</a></p>
-  #
-  # Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
-  #   With some optimizations by Milian Wolff.
-  #
-    $addr = "mailto:" . $addr;
-    $chars = preg_split('/(?<!^)(?!$)/', $addr);
-    $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed.
-    
-    foreach ($chars as $key => $char) {
-      $ord = ord($char);
-      # Ignore non-ascii chars.
-      if ($ord < 128) {
-        $r = ($seed * (1 + $key)) % 100; # Pseudo-random function.
-        # roughly 10% raw, 45% hex, 45% dec
-        # '@' *must* be encoded. I insist.
-        if ($r > 90 && $char != '@') /* do nothing */;
-        else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';';
-        else              $chars[$key] = '&#'.$ord.';';
-      }
-    }
-    
-    $addr = implode('', $chars);
-    $text = implode('', array_slice($chars, 7)); # text without `mailto:`
-    $addr = "<a href=\"$addr\">$text</a>";
-
-    return $addr;
-  }
-
-
-  function parseSpan($str) {
-  #
-  # Take the string $str and parse it into tokens, hashing embeded HTML,
-  # escaped characters and handling code spans.
-  #
-    $output = '';
-    
-    $span_re = '{
-        (
-          \\\\'.$this->escape_chars_re.'
-        |
-          (?<![`\\\\])
-          `+            # code span marker
-      '.( $this->no_markup ? '' : '
-        |
-          <!--    .*?     -->   # comment
-        |
-          <\?.*?\?> | <%.*?%>   # processing instruction
-        |
-          <[/!$]?[-a-zA-Z0-9:]+ # regular tags
-          (?>
-            \s
-            (?>[^"\'>]+|"[^"]*"|\'[^\']*\')*
-          )?
-          >
-      ').'
-        )
-        }xs';
-
-    while (1) {
-      #
-      # Each loop iteration seach for either the next tag, the next 
-      # openning code span marker, or the next escaped character. 
-      # Each token is then passed to handleSpanToken.
-      #
-      $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE);
-      
-      # Create token from text preceding tag.
-      if ($parts[0] != "") {
-        $output .= $parts[0];
-      }
-      
-      # Check if we reach the end.
-      if (isset($parts[1])) {
-        $output .= $this->handleSpanToken($parts[1], $parts[2]);
-        $str = $parts[2];
-      }
-      else {
-        break;
-      }
-    }
-    
-    return $output;
-  }
-  
-  
-  function handleSpanToken($token, &$str) {
-  #
-  # Handle $token provided by parseSpan by determining its nature and 
-  # returning the corresponding value that should replace it.
-  #
-    switch ($token{0}) {
-      case "\\":
-        return $this->hashPart("&#". ord($token{1}). ";");
-      case "`":
-        # Search for end marker in remaining text.
-        if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm', 
-          $str, $matches))
-        {
-          $str = $matches[2];
-          $codespan = $this->makeCodeSpan($matches[1]);
-          return $this->hashPart($codespan);
-        }
-        return $token; // return as text since no ending marker found.
-      default:
-        return $this->hashPart($token);
-    }
-  }
-
-
-  function outdent($text) {
-  #
-  # Remove one level of line-leading tabs or spaces
-  #
-    return preg_replace('/^(\t|[ ]{1,'.$this->tab_width.'})/m', '', $text);
-  }
-
-
-  # String length function for detab. `_initDetab` will create a function to 
-  # hanlde UTF-8 if the default function does not exist.
-  var $utf8_strlen = 'mb_strlen';
-  
-  function detab($text) {
-  #
-  # Replace tabs with the appropriate amount of space.
-  #
-    # For each line we separate the line in blocks delemited by
-    # tab characters. Then we reconstruct every line by adding the 
-    # appropriate number of space between each blocks.
-    
-    $text = preg_replace_callback('/^.*\t.*$/m',
-      array(&$this, '_detab_callback'), $text);
-
-    return $text;
-  }
-  function _detab_callback($matches) {
-    $line = $matches[0];
-    $strlen = $this->utf8_strlen; # strlen function for UTF-8.
-    
-    # Split in blocks.
-    $blocks = explode("\t", $line);
-    # Add each blocks to the line.
-    $line = $blocks[0];
-    unset($blocks[0]); # Do not add first block twice.
-    foreach ($blocks as $block) {
-      # Calculate amount of space, insert spaces, insert block.
-      $amount = $this->tab_width - 
-        $strlen($line, 'UTF-8') % $this->tab_width;
-      $line .= str_repeat(" ", $amount) . $block;
-    }
-    return $line;
-  }
-  function _initDetab() {
-  #
-  # Check for the availability of the function in the `utf8_strlen` property
-  # (initially `mb_strlen`). If the function is not available, create a 
-  # function that will loosely count the number of UTF-8 characters with a
-  # regular expression.
-  #
-    if (function_exists($this->utf8_strlen)) return;
-    $this->utf8_strlen = create_function('$text', 'return preg_match_all(
-      "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/", 
-      $text, $m);');
-  }
-
-
-  function unhash($text) {
-  #
-  # Swap back in all the tags hashed by _HashHTMLBlocks.
-  #
-    return preg_replace_callback('/(.)\x1A[0-9]+\1/', 
-      array(&$this, '_unhash_callback'), $text);
-  }
-  function _unhash_callback($matches) {
-    return $this->html_hashes[$matches[0]];
-  }
-
-}
-
-
-#
-# Markdown Extra Parser Class
-#
-
-class MarkdownExtra_Parser extends Markdown_Parser {
-
-  # Prefix for footnote ids.
-  var $fn_id_prefix = "";
-  
-  # Optional title attribute for footnote links and backlinks.
-  var $fn_link_title = MARKDOWN_FN_LINK_TITLE;
-  var $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE;
-  
-  # Optional class attribute for footnote links and backlinks.
-  var $fn_link_class = MARKDOWN_FN_LINK_CLASS;
-  var $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS;
-  
-  # Predefined abbreviations.
-  var $predef_abbr = array();
-
-
-  function MarkdownExtra_Parser() {
-  #
-  # Constructor function. Initialize the parser object.
-  #
-    # Add extra escapable characters before parent constructor 
-    # initialize the table.
-    $this->escape_chars .= ':|';
-    
-    # Insert extra document, block, and span transformations. 
-    # Parent constructor will do the sorting.
-    $this->document_gamut += array(
-      "doFencedCodeBlocks" => 5,
-      "stripFootnotes"     => 15,
-      "stripAbbreviations" => 25,
-      "appendFootnotes"    => 50,
-      );
-    $this->block_gamut += array(
-      "doFencedCodeBlocks" => 5,
-      "doTables"           => 15,
-      "doDefLists"         => 45,
-      );
-    $this->span_gamut += array(
-      "doFootnotes"        => 5,
-      "doAbbreviations"    => 70,
-      );
-    
-    parent::Markdown_Parser();
-  }
-  
-  
-  # Extra variables used during extra transformations.
-  var $footnotes = array();
-  var $footnotes_ordered = array();
-  var $abbr_desciptions = array();
-  var $abbr_word_re = '';
-  
-  # Give the current footnote number.
-  var $footnote_counter = 1;
-  
-  
-  function setup() {
-  #
-  # Setting up Extra-specific variables.
-  #
-    parent::setup();
-    
-    $this->footnotes = array();
-    $this->footnotes_ordered = array();
-    $this->abbr_desciptions = array();
-    $this->abbr_word_re = '';
-    $this->footnote_counter = 1;
-    
-    foreach ($this->predef_abbr as $abbr_word => $abbr_desc) {
-      if ($this->abbr_word_re)
-        $this->abbr_word_re .= '|';
-      $this->abbr_word_re .= preg_quote($abbr_word);
-      $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
-    }
-  }
-  
-  function teardown() {
-  #
-  # Clearing Extra-specific variables.
-  #
-    $this->footnotes = array();
-    $this->footnotes_ordered = array();
-    $this->abbr_desciptions = array();
-    $this->abbr_word_re = '';
-    
-    parent::teardown();
-  }
-  
-  
-  ### HTML Block Parser ###
-  
-  # Tags that are always treated as block tags:
-  var $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend';
-  
-  # Tags treated as block tags only if the opening tag is alone on it's line:
-  var $context_block_tags_re = 'script|noscript|math|ins|del';
-  
-  # Tags where markdown="1" default to span mode:
-  var $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address';
-  
-  # Tags which must not have their contents modified, no matter where 
-  # they appear:
-  var $clean_tags_re = 'script|math';
-  
-  # Tags that do not need to be closed.
-  var $auto_close_tags_re = 'hr|img';
-  
-
-  function hashHTMLBlocks($text) {
-  #
-  # Hashify HTML Blocks and "clean tags".
-  #
-  # We only want to do this for block-level HTML tags, such as headers,
-  # lists, and tables. That's because we still want to wrap <p>s around
-  # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
-  # phrase emphasis, and spans. The list of tags we're looking for is
-  # hard-coded.
-  #
-  # This works by calling _HashHTMLBlocks_InMarkdown, which then calls
-  # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
-  # attribute is found whitin a tag, _HashHTMLBlocks_InHTML calls back
-  #  _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag.
-  # These two functions are calling each other. It's recursive!
-  #
-    #
-    # Call the HTML-in-Markdown hasher.
-    #
-    list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text);
-    
-    return $text;
-  }
-  function _hashHTMLBlocks_inMarkdown($text, $indent = 0, 
-                    $enclosing_tag_re = '', $span = false)
-  {
-  #
-  # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags.
-  #
-  # *   $indent is the number of space to be ignored when checking for code 
-  #     blocks. This is important because if we don't take the indent into 
-  #     account, something like this (which looks right) won't work as expected:
-  #
-  #     <div>
-  #         <div markdown="1">
-  #         Hello World.  <-- Is this a Markdown code block or text?
-  #         </div>  <-- Is this a Markdown code block or a real tag?
-  #     <div>
-  #
-  #     If you don't like this, just don't indent the tag on which
-  #     you apply the markdown="1" attribute.
-  #
-  # *   If $enclosing_tag_re is not empty, stops at the first unmatched closing 
-  #     tag with that name. Nested tags supported.
-  #
-  # *   If $span is true, text inside must treated as span. So any double 
-  #     newline will be replaced by a single newline so that it does not create 
-  #     paragraphs.
-  #
-  # Returns an array of that form: ( processed text , remaining text )
-  #
-    if ($text === '') return array('', '');
-
-    # Regex to check for the presense of newlines around a block tag.
-    $newline_before_re = '/(?:^\n?|\n\n)*$/';
-    $newline_after_re = 
-      '{
-        ^           # Start of text following the tag.
-        (?>[ ]*<!--.*?-->)?   # Optional comment.
-        [ ]*\n          # Must be followed by newline.
-      }xs';
-    
-    # Regex to match any tag.
-    $block_tag_re =
-      '{
-        (         # $2: Capture hole tag.
-          </?         # Any opening or closing tag.
-            (?>       # Tag name.
-              '.$this->block_tags_re.'      |
-              '.$this->context_block_tags_re.'  |
-              '.$this->clean_tags_re.'          |
-              (?!\s)'.$enclosing_tag_re.'
-            )
-            (?:
-              (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
-              (?>
-                ".*?"   | # Double quotes (can contain `>`)
-                \'.*?\'     | # Single quotes (can contain `>`)
-                .+?       # Anything but quotes and `>`.
-              )*?
-            )?
-          >         # End of tag.
-        |
-          <!--    .*?     --> # HTML Comment
-        |
-          <\?.*?\?> | <%.*?%> # Processing instruction
-        |
-          <!\[CDATA\[.*?\]\]> # CData Block
-        |
-          # Code span marker
-          `+
-        '. ( !$span ? ' # If not in span.
-        |
-          # Indented code block
-          (?> ^[ ]*\n? | \n[ ]*\n )
-          [ ]{'.($indent+4).'}[^\n]* \n
-          (?>
-            (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n
-          )*
-        |
-          # Fenced code block marker
-          (?> ^ | \n )
-          [ ]{'.($indent).'}~~~+[ ]*\n
-        ' : '' ). ' # End (if not is span).
-        )
-      }xs';
-
-    
-    $depth = 0;   # Current depth inside the tag tree.
-    $parsed = ""; # Parsed text that will be returned.
-
-    #
-    # Loop through every tag until we find the closing tag of the parent
-    # or loop until reaching the end of text if no parent tag specified.
-    #
-    do {
-      #
-      # Split the text using the first $tag_match pattern found.
-      # Text before  pattern will be first in the array, text after
-      # pattern will be at the end, and between will be any catches made 
-      # by the pattern.
-      #
-      $parts = preg_split($block_tag_re, $text, 2, 
-                PREG_SPLIT_DELIM_CAPTURE);
-      
-      # If in Markdown span mode, add a empty-string span-level hash 
-      # after each newline to prevent triggering any block element.
-      if ($span) {
-        $void = $this->hashPart("", ':');
-        $newline = "$void\n";
-        $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void;
-      }
-      
-      $parsed .= $parts[0]; # Text before current tag.
-      
-      # If end of $text has been reached. Stop loop.
-      if (count($parts) < 3) {
-        $text = "";
-        break;
-      }
-      
-      $tag  = $parts[1]; # Tag to handle.
-      $text = $parts[2]; # Remaining text after current tag.
-      $tag_re = preg_quote($tag); # For use in a regular expression.
-      
-      #
-      # Check for: Code span marker
-      #
-      if ($tag{0} == "`") {
-        # Find corresponding end marker.
-        $tag_re = preg_quote($tag);
-        if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',
-          $text, $matches))
-        {
-          # End marker found: pass text unchanged until marker.
-          $parsed .= $tag . $matches[0];
-          $text = substr($text, strlen($matches[0]));
-        }
-        else {
-          # Unmatched marker: just skip it.
-          $parsed .= $tag;
-        }
-      }
-      #
-      # Check for: Indented code block or fenced code block marker.
-      #
-      else if ($tag{0} == "\n" || $tag{0} == "~") {
-        if ($tag{1} == "\n" || $tag{1} == " ") {
-          # Indented code block: pass it unchanged, will be handled 
-          # later.
-          $parsed .= $tag;
-        }
-        else {
-          # Fenced code block marker: find matching end marker.
-          $tag_re = preg_quote(trim($tag));
-          if (preg_match('{^(?>.*\n)+?'.$tag_re.' *\n}', $text, 
-            $matches)) 
-          {
-            # End marker found: pass text unchanged until marker.
-            $parsed .= $tag . $matches[0];
-            $text = substr($text, strlen($matches[0]));
-          }
-          else {
-            # No end marker: just skip it.
-            $parsed .= $tag;
-          }
-        }
-      }
-      #
-      # Check for: Opening Block level tag or
-      #            Opening Context Block tag (like ins and del) 
-      #               used as a block tag (tag is alone on it's line).
-      #
-      else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) ||
-        ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) &&
-          preg_match($newline_before_re, $parsed) &&
-          preg_match($newline_after_re, $text)  )
-        )
-      {
-        # Need to parse tag and following text using the HTML parser.
-        list($block_text, $text) = 
-          $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true);
-        
-        # Make sure it stays outside of any paragraph by adding newlines.
-        $parsed .= "\n\n$block_text\n\n";
-      }
-      #
-      # Check for: Clean tag (like script, math)
-      #            HTML Comments, processing instructions.
-      #
-      else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) ||
-        $tag{1} == '!' || $tag{1} == '?')
-      {
-        # Need to parse tag and following text using the HTML parser.
-        # (don't check for markdown attribute)
-        list($block_text, $text) = 
-          $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false);
-        
-        $parsed .= $block_text;
-      }
-      #
-      # Check for: Tag with same name as enclosing tag.
-      #
-      else if ($enclosing_tag_re !== '' &&
-        # Same name as enclosing tag.
-        preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag))
-      {
-        #
-        # Increase/decrease nested tag count.
-        #
-        if ($tag{1} == '/')           $depth--;
-        else if ($tag{strlen($tag)-2} != '/') $depth++;
-
-        if ($depth < 0) {
-          #
-          # Going out of parent element. Clean up and break so we
-          # return to the calling function.
-          #
-          $text = $tag . $text;
-          break;
-        }
-        
-        $parsed .= $tag;
-      }
-      else {
-        $parsed .= $tag;
-      }
-    } while ($depth >= 0);
-    
-    return array($parsed, $text);
-  }
-  function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) {
-  #
-  # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags.
-  #
-  # *   Calls $hash_method to convert any blocks.
-  # *   Stops when the first opening tag closes.
-  # *   $md_attr indicate if the use of the `markdown="1"` attribute is allowed.
-  #     (it is not inside clean tags)
-  #
-  # Returns an array of that form: ( processed text , remaining text )
-  #
-    if ($text === '') return array('', '');
-    
-    # Regex to match `markdown` attribute inside of a tag.
-    $markdown_attr_re = '
-      {
-        \s*     # Eat whitespace before the `markdown` attribute
-        markdown
-        \s*=\s*
-        (?>
-          (["\'])   # $1: quote delimiter   
-          (.*?)   # $2: attribute value
-          \1      # matching delimiter  
-        |
-          ([^\s>]*) # $3: unquoted attribute value
-        )
-        ()        # $4: make $3 always defined (avoid warnings)
-      }xs';
-    
-    # Regex to match any tag.
-    $tag_re = '{
-        (         # $2: Capture hole tag.
-          </?         # Any opening or closing tag.
-            [\w:$]+     # Tag name.
-            (?:
-              (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name.
-              (?>
-                ".*?"   | # Double quotes (can contain `>`)
-                \'.*?\'     | # Single quotes (can contain `>`)
-                .+?       # Anything but quotes and `>`.
-              )*?
-            )?
-          >         # End of tag.
-        |
-          <!--    .*?     --> # HTML Comment
-        |
-          <\?.*?\?> | <%.*?%> # Processing instruction
-        |
-          <!\[CDATA\[.*?\]\]> # CData Block
-        )
-      }xs';
-    
-    $original_text = $text;   # Save original text in case of faliure.
-    
-    $depth    = 0;  # Current depth inside the tag tree.
-    $block_text = ""; # Temporary text holder for current text.
-    $parsed   = ""; # Parsed text that will be returned.
-
-    #
-    # Get the name of the starting tag.
-    # (This pattern makes $base_tag_name_re safe without quoting.)
-    #
-    if (preg_match('/^<([\w:$]*)\b/', $text, $matches))
-      $base_tag_name_re = $matches[1];
-
-    #
-    # Loop through every tag until we find the corresponding closing tag.
-    #
-    do {
-      #
-      # Split the text using the first $tag_match pattern found.
-      # Text before  pattern will be first in the array, text after
-      # pattern will be at the end, and between will be any catches made 
-      # by the pattern.
-      #
-      $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE);
-      
-      if (count($parts) < 3) {
-        #
-        # End of $text reached with unbalenced tag(s).
-        # In that case, we return original text unchanged and pass the
-        # first character as filtered to prevent an infinite loop in the 
-        # parent function.
-        #
-        return array($original_text{0}, substr($original_text, 1));
-      }
-      
-      $block_text .= $parts[0]; # Text before current tag.
-      $tag         = $parts[1]; # Tag to handle.
-      $text        = $parts[2]; # Remaining text after current tag.
-      
-      #
-      # Check for: Auto-close tag (like <hr/>)
-      #      Comments and Processing Instructions.
-      #
-      if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) ||
-        $tag{1} == '!' || $tag{1} == '?')
-      {
-        # Just add the tag to the block as if it was text.
-        $block_text .= $tag;
-      }
-      else {
-        #
-        # Increase/decrease nested tag count. Only do so if
-        # the tag's name match base tag's.
-        #
-        if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) {
-          if ($tag{1} == '/')           $depth--;
-          else if ($tag{strlen($tag)-2} != '/') $depth++;
-        }
-        
-        #
-        # Check for `markdown="1"` attribute and handle it.
-        #
-        if ($md_attr && 
-          preg_match($markdown_attr_re, $tag, $attr_m) &&
-          preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3]))
-        {
-          # Remove `markdown` attribute from opening tag.
-          $tag = preg_replace($markdown_attr_re, '', $tag);
-          
-          # Check if text inside this tag must be parsed in span mode.
-          $this->mode = $attr_m[2] . $attr_m[3];
-          $span_mode = $this->mode == 'span' || $this->mode != 'block' &&
-            preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag);
-          
-          # Calculate indent before tag.
-          if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) {
-            $strlen = $this->utf8_strlen;
-            $indent = $strlen($matches[1], 'UTF-8');
-          } else {
-            $indent = 0;
-          }
-          
-          # End preceding block with this tag.
-          $block_text .= $tag;
-          $parsed .= $this->$hash_method($block_text);
-          
-          # Get enclosing tag name for the ParseMarkdown function.
-          # (This pattern makes $tag_name_re safe without quoting.)
-          preg_match('/^<([\w:$]*)\b/', $tag, $matches);
-          $tag_name_re = $matches[1];
-          
-          # Parse the content using the HTML-in-Markdown parser.
-          list ($block_text, $text)
-            = $this->_hashHTMLBlocks_inMarkdown($text, $indent, 
-              $tag_name_re, $span_mode);
-          
-          # Outdent markdown text.
-          if ($indent > 0) {
-            $block_text = preg_replace("/^[ ]{1,$indent}/m", "", 
-                          $block_text);
-          }
-          
-          # Append tag content to parsed text.
-          if (!$span_mode)  $parsed .= "\n\n$block_text\n\n";
-          else        $parsed .= "$block_text";
-          
-          # Start over a new block.
-          $block_text = "";
-        }
-        else $block_text .= $tag;
-      }
-      
-    } while ($depth > 0);
-    
-    #
-    # Hash last block text that wasn't processed inside the loop.
-    #
-    $parsed .= $this->$hash_method($block_text);
-    
-    return array($parsed, $text);
-  }
-
-
-  function hashClean($text) {
-  #
-  # Called whenever a tag must be hashed when a function insert a "clean" tag
-  # in $text, it pass through this function and is automaticaly escaped, 
-  # blocking invalid nested overlap.
-  #
-    return $this->hashPart($text, 'C');
-  }
-
-
-  function doHeaders($text) {
-  #
-  # Redefined to add id attribute support.
-  #
-    # Setext-style headers:
-    #   Header 1  {#header1}
-    #   ========
-    #  
-    #   Header 2  {#header2}
-    #   --------
-    #
-    $text = preg_replace_callback(
-      '{
-        (^.+?)                # $1: Header text
-        (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})?  # $2: Id attribute
-        [ ]*\n(=+|-+)[ ]*\n+        # $3: Header footer
-      }mx',
-      array(&$this, '_doHeaders_callback_setext'), $text);
-
-    # atx-style headers:
-    # # Header 1        {#header1}
-    # ## Header 2       {#header2}
-    # ## Header 2 with closing hashes ##  {#header3}
-    # ...
-    # ###### Header 6   {#header2}
-    #
-    $text = preg_replace_callback('{
-        ^(\#{1,6})  # $1 = string of #\'s
-        [ ]*
-        (.+?)   # $2 = Header text
-        [ ]*
-        \#*     # optional closing #\'s (not counted)
-        (?:[ ]+\{\#([-_:a-zA-Z0-9]+)\})? # id attribute
-        [ ]*
-        \n+
-      }xm',
-      array(&$this, '_doHeaders_callback_atx'), $text);
-
-    return $text;
-  }
-  function _doHeaders_attr($attr) {
-    if (empty($attr))  return "";
-    return " id=\"$attr\"";
-  }
-  function _doHeaders_callback_setext($matches) {
-    if ($matches[3] == '-' && preg_match('{^- }', $matches[1]))
-      return $matches[0];
-    $level = $matches[3]{0} == '=' ? 1 : 2;
-    $attr  = $this->_doHeaders_attr($id =& $matches[2]);
-    $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>";
-    return "\n" . $this->hashBlock($block) . "\n\n";
-  }
-  function _doHeaders_callback_atx($matches) {
-    $level = strlen($matches[1]);
-    $attr  = $this->_doHeaders_attr($id =& $matches[3]);
-    $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>";
-    return "\n" . $this->hashBlock($block) . "\n\n";
-  }
-
-
-  function doTables($text) {
-  #
-  # Form HTML tables.
-  #
-    $less_than_tab = $this->tab_width - 1;
-    #
-    # Find tables with leading pipe.
-    #
-    # | Header 1 | Header 2
-    # | -------- | --------
-    # | Cell 1   | Cell 2
-    # | Cell 3   | Cell 4
-    #
-    $text = preg_replace_callback('
-      {
-        ^             # Start of a line
-        [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
-        [|]             # Optional leading pipe (present)
-        (.+) \n           # $1: Header row (at least one pipe)
-        
-        [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
-        [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline
-        
-        (             # $3: Cells
-          (?>
-            [ ]*        # Allowed whitespace.
-            [|] .* \n     # Row content.
-          )*
-        )
-        (?=\n|\Z)         # Stop at final double newline.
-      }xm',
-      array(&$this, '_doTable_leadingPipe_callback'), $text);
-    
-    #
-    # Find tables without leading pipe.
-    #
-    # Header 1 | Header 2
-    # -------- | --------
-    # Cell 1   | Cell 2
-    # Cell 3   | Cell 4
-    #
-    $text = preg_replace_callback('
-      {
-        ^             # Start of a line
-        [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
-        (\S.*[|].*) \n        # $1: Header row (at least one pipe)
-        
-        [ ]{0,'.$less_than_tab.'} # Allowed whitespace.
-        ([-:]+[ ]*[|][-| :]*) \n  # $2: Header underline
-        
-        (             # $3: Cells
-          (?>
-            .* [|] .* \n    # Row content
-          )*
-        )
-        (?=\n|\Z)         # Stop at final double newline.
-      }xm',
-      array(&$this, '_DoTable_callback'), $text);
-
-    return $text;
-  }
-  function _doTable_leadingPipe_callback($matches) {
-    $head   = $matches[1];
-    $underline  = $matches[2];
-    $content  = $matches[3];
-    
-    # Remove leading pipe for each row.
-    $content  = preg_replace('/^ *[|]/m', '', $content);
-    
-    return $this->_doTable_callback(array($matches[0], $head, $underline, $content));
-  }
-  function _doTable_callback($matches) {
-    $head   = $matches[1];
-    $underline  = $matches[2];
-    $content  = $matches[3];
-
-    # Remove any tailing pipes for each line.
-    $head   = preg_replace('/[|] *$/m', '', $head);
-    $underline  = preg_replace('/[|] *$/m', '', $underline);
-    $content  = preg_replace('/[|] *$/m', '', $content);
-    
-    # Reading alignement from header underline.
-    $separators = preg_split('/ *[|] */', $underline);
-    foreach ($separators as $n => $s) {
-      if (preg_match('/^ *-+: *$/', $s))    $attr[$n] = ' align="right"';
-      else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"';
-      else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"';
-      else                  $attr[$n] = '';
-    }
-    
-    # Parsing span elements, including code spans, character escapes, 
-    # and inline HTML tags, so that pipes inside those gets ignored.
-    $head   = $this->parseSpan($head);
-    $headers  = preg_split('/ *[|] */', $head);
-    $col_count  = count($headers);
-    
-    # Write column headers.
-    $text = "<table>\n";
-    $text .= "<thead>\n";
-    $text .= "<tr>\n";
-    foreach ($headers as $n => $header)
-      $text .= "  <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n";
-    $text .= "</tr>\n";
-    $text .= "</thead>\n";
-    
-    # Split content by row.
-    $rows = explode("\n", trim($content, "\n"));
-    
-    $text .= "<tbody>\n";
-    foreach ($rows as $row) {
-      # Parsing span elements, including code spans, character escapes, 
-      # and inline HTML tags, so that pipes inside those gets ignored.
-      $row = $this->parseSpan($row);
-      
-      # Split row by cell.
-      $row_cells = preg_split('/ *[|] */', $row, $col_count);
-      $row_cells = array_pad($row_cells, $col_count, '');
-      
-      $text .= "<tr>\n";
-      foreach ($row_cells as $n => $cell)
-        $text .= "  <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n";
-      $text .= "</tr>\n";
-    }
-    $text .= "</tbody>\n";
-    $text .= "</table>";
-    
-    return $this->hashBlock($text) . "\n";
-  }
-
-  
-  function doDefLists($text) {
-  #
-  # Form HTML definition lists.
-  #
-    $less_than_tab = $this->tab_width - 1;
-
-    # Re-usable pattern to match any entire dl list:
-    $whole_list_re = '(?>
-      (               # $1 = whole list
-        (               # $2
-        [ ]{0,'.$less_than_tab.'}
-        ((?>.*\S.*\n)+)       # $3 = defined term
-        \n?
-        [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
-        )
-        (?s:.+?)
-        (               # $4
-          \z
-        |
-          \n{2,}
-          (?=\S)
-          (?!           # Negative lookahead for another term
-          [ ]{0,'.$less_than_tab.'}
-          (?: \S.*\n )+?      # defined term
-          \n?
-          [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
-          )
-          (?!           # Negative lookahead for another definition
-          [ ]{0,'.$less_than_tab.'}:[ ]+ # colon starting definition
-          )
-        )
-      )
-    )'; // mx
-
-    $text = preg_replace_callback('{
-        (?>\A\n?|(?<=\n\n))
-        '.$whole_list_re.'
-      }mx',
-      array(&$this, '_doDefLists_callback'), $text);
-
-    return $text;
-  }
-  function _doDefLists_callback($matches) {
-    # Re-usable patterns to match list item bullets and number markers:
-    $list = $matches[1];
-    
-    # Turn double returns into triple returns, so that we can make a
-    # paragraph for the last item in a list, if necessary:
-    $result = trim($this->processDefListItems($list));
-    $result = "<dl>\n" . $result . "\n</dl>";
-    return $this->hashBlock($result) . "\n\n";
-  }
-
-
-  function processDefListItems($list_str) {
-  #
-  # Process the contents of a single definition list, splitting it
-  # into individual term and definition list items.
-  #
-    $less_than_tab = $this->tab_width - 1;
-    
-    # trim trailing blank lines:
-    $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
-
-    # Process definition terms.
-    $list_str = preg_replace_callback('{
-      (?>\A\n?|\n\n+)         # leading line
-      (               # definition terms = $1
-        [ ]{0,'.$less_than_tab.'} # leading whitespace
-        (?![:][ ]|[ ])        # negative lookahead for a definition 
-                      #   mark (colon) or more whitespace.
-        (?> \S.* \n)+?        # actual term (not whitespace). 
-      )     
-      (?=\n?[ ]{0,3}:[ ])       # lookahead for following line feed 
-                      #   with a definition mark.
-      }xm',
-      array(&$this, '_processDefListItems_callback_dt'), $list_str);
-
-    # Process actual definitions.
-    $list_str = preg_replace_callback('{
-      \n(\n+)?            # leading line = $1
-      (               # marker space = $2
-        [ ]{0,'.$less_than_tab.'} # whitespace before colon
-        [:][ ]+           # definition mark (colon)
-      )
-      ((?s:.+?))            # definition text = $3
-      (?= \n+             # stop at next definition mark,
-        (?:             # next term or end of text
-          [ ]{0,'.$less_than_tab.'} [:][ ]  |
-          <dt> | \z
-        )           
-      )         
-      }xm',
-      array(&$this, '_processDefListItems_callback_dd'), $list_str);
-
-    return $list_str;
-  }
-  function _processDefListItems_callback_dt($matches) {
-    $terms = explode("\n", trim($matches[1]));
-    $text = '';
-    foreach ($terms as $term) {
-      $term = $this->runSpanGamut(trim($term));
-      $text .= "\n<dt>" . $term . "</dt>";
-    }
-    return $text . "\n";
-  }
-  function _processDefListItems_callback_dd($matches) {
-    $leading_line = $matches[1];
-    $marker_space = $matches[2];
-    $def      = $matches[3];
-
-    if ($leading_line || preg_match('/\n{2,}/', $def)) {
-      # Replace marker with the appropriate whitespace indentation
-      $def = str_repeat(' ', strlen($marker_space)) . $def;
-      $def = $this->runBlockGamut($this->outdent($def . "\n\n"));
-      $def = "\n". $def ."\n";
-    }
-    else {
-      $def = rtrim($def);
-      $def = $this->runSpanGamut($this->outdent($def));
-    }
-
-    return "\n<dd>" . $def . "</dd>\n";
-  }
-
-
-  function doFencedCodeBlocks($text) {
-  #
-  # Adding the fenced code block syntax to regular Markdown:
-  #
-  # ~~~
-  # Code block
-  # ~~~
-  #
-    $less_than_tab = $this->tab_width;
-    
-    $text = preg_replace_callback('{
-        (?:\n|\A)
-        # 1: Opening marker
-        (
-          ~{3,} # Marker: three tilde or more.
-        )
-        [ ]* \n # Whitespace and newline following marker.
-        
-        # 2: Content
-        (
-          (?>
-            (?!\1 [ ]* \n)  # Not a closing marker.
-            .*\n+
-          )+
-        )
-        
-        # Closing marker.
-        \1 [ ]* \n
-      }xm',
-      array(&$this, '_doFencedCodeBlocks_callback'), $text);
-
-    return $text;
-  }
-  function _doFencedCodeBlocks_callback($matches) {
-    $codeblock = $matches[2];
-    $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES);
-    $codeblock = preg_replace_callback('/^\n+/',
-      array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock);
-    $codeblock = "<pre><code>$codeblock</code></pre>";
-    return "\n\n".$this->hashBlock($codeblock)."\n\n";
-  }
-  function _doFencedCodeBlocks_newlines($matches) {
-    return str_repeat("<br$this->empty_element_suffix", 
-      strlen($matches[0]));
-  }
-
-
-  #
-  # Redefining emphasis markers so that emphasis by underscore does not
-  # work in the middle of a word.
-  #
-  var $em_relist = array(
-    ''  => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S)(?![.,:;]\s)',
-    '*' => '(?<=\S)(?<!\*)\*(?!\*)',
-    '_' => '(?<=\S)(?<!_)_(?![a-zA-Z0-9_])',
-    );
-  var $strong_relist = array(
-    ''   => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S)(?![.,:;]\s)',
-    '**' => '(?<=\S)(?<!\*)\*\*(?!\*)',
-    '__' => '(?<=\S)(?<!_)__(?![a-zA-Z0-9_])',
-    );
-  var $em_strong_relist = array(
-    ''    => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S)(?![.,:;]\s)',
-    '***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)',
-    '___' => '(?<=\S)(?<!_)___(?![a-zA-Z0-9_])',
-    );
-
-
-  function formParagraphs($text) {
-  #
-  # Params:
-  #   $text - string to process with html <p> tags
-  #
-    # Strip leading and trailing lines:
-    $text = preg_replace('/\A\n+|\n+\z/', '', $text);
-    
-    $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
-
-    #
-    # Wrap <p> tags and unhashify HTML blocks
-    #
-    foreach ($grafs as $key => $value) {
-      $value = trim($this->runSpanGamut($value));
-      
-      # Check if this should be enclosed in a paragraph.
-      # Clean tag hashes & block tag hashes are left alone.
-      $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value);
-      
-      if ($is_p) {
-        $value = "<p>$value</p>";
-      }
-      $grafs[$key] = $value;
-    }
-    
-    # Join grafs in one text, then unhash HTML tags. 
-    $text = implode("\n\n", $grafs);
-    
-    # Finish by removing any tag hashes still present in $text.
-    $text = $this->unhash($text);
-    
-    return $text;
-  }
-  
-  
-  ### Footnotes
-  
-  function stripFootnotes($text) {
-  #
-  # Strips link definitions from text, stores the URLs and titles in
-  # hash references.
-  #
-    $less_than_tab = $this->tab_width - 1;
-
-    # Link defs are in the form: [^id]: url "optional title"
-    $text = preg_replace_callback('{
-      ^[ ]{0,'.$less_than_tab.'}\[\^(.+?)\][ ]?:  # note_id = $1
-        [ ]*
-        \n?         # maybe *one* newline
-      (           # text = $2 (no blank lines allowed)
-        (?:         
-          .+        # actual text
-        |
-          \n        # newlines but 
-          (?!\[\^.+?\]:\s)# negative lookahead for footnote marker.
-          (?!\n+[ ]{0,3}\S)# ensure line is not blank and followed 
-                  # by non-indented content
-        )*
-      )   
-      }xm',
-      array(&$this, '_stripFootnotes_callback'),
-      $text);
-    return $text;
-  }
-  function _stripFootnotes_callback($matches) {
-    $note_id = $this->fn_id_prefix . $matches[1];
-    $this->footnotes[$note_id] = $this->outdent($matches[2]);
-    return ''; # String that will replace the block
-  }
-
-
-  function doFootnotes($text) {
-  #
-  # Replace footnote references in $text [^id] with a special text-token 
-  # which will be replaced by the actual footnote marker in appendFootnotes.
-  #
-    if (!$this->in_anchor) {
-      $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text);
-    }
-    return $text;
-  }
-
-  
-  function appendFootnotes($text) {
-  #
-  # Append footnote list to text.
-  #
-    $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
-      array(&$this, '_appendFootnotes_callback'), $text);
-  
-    if (!empty($this->footnotes_ordered)) {
-      $text .= "\n\n";
-      $text .= "<div class=\"footnotes\">\n";
-      $text .= "<hr". MARKDOWN_EMPTY_ELEMENT_SUFFIX ."\n";
-      $text .= "<ol>\n\n";
-      
-      $attr = " rev=\"footnote\"";
-      if ($this->fn_backlink_class != "") {
-        $class = $this->fn_backlink_class;
-        $class = $this->encodeAttribute($class);
-        $attr .= " class=\"$class\"";
-      }
-      if ($this->fn_backlink_title != "") {
-        $title = $this->fn_backlink_title;
-        $title = $this->encodeAttribute($title);
-        $attr .= " title=\"$title\"";
-      }
-      $num = 0;
-      
-      while (!empty($this->footnotes_ordered)) {
-        $footnote = reset($this->footnotes_ordered);
-        $note_id = key($this->footnotes_ordered);
-        unset($this->footnotes_ordered[$note_id]);
-        
-        $footnote .= "\n"; # Need to append newline before parsing.
-        $footnote = $this->runBlockGamut("$footnote\n");        
-        $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}', 
-          array(&$this, '_appendFootnotes_callback'), $footnote);
-        
-        $attr = str_replace("%%", ++$num, $attr);
-        $note_id = $this->encodeAttribute($note_id);
-        
-        # Add backlink to last paragraph; create new paragraph if needed.
-        $backlink = "<a href=\"#fnref:$note_id\"$attr>&#8617;</a>";
-        if (preg_match('{</p>$}', $footnote)) {
-          $footnote = substr($footnote, 0, -4) . "&#160;$backlink</p>";
-        } else {
-          $footnote .= "\n\n<p>$backlink</p>";
-        }
-        
-        $text .= "<li id=\"fn:$note_id\">\n";
-        $text .= $footnote . "\n";
-        $text .= "</li>\n\n";
-      }
-      
-      $text .= "</ol>\n";
-      $text .= "</div>";
-    }
-    return $text;
-  }
-  function _appendFootnotes_callback($matches) {
-    $node_id = $this->fn_id_prefix . $matches[1];
-    
-    # Create footnote marker only if it has a corresponding footnote *and*
-    # the footnote hasn't been used by another marker.
-    if (isset($this->footnotes[$node_id])) {
-      # Transfert footnote content to the ordered list.
-      $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id];
-      unset($this->footnotes[$node_id]);
-      
-      $num = $this->footnote_counter++;
-      $attr = " rel=\"footnote\"";
-      if ($this->fn_link_class != "") {
-        $class = $this->fn_link_class;
-        $class = $this->encodeAttribute($class);
-        $attr .= " class=\"$class\"";
-      }
-      if ($this->fn_link_title != "") {
-        $title = $this->fn_link_title;
-        $title = $this->encodeAttribute($title);
-        $attr .= " title=\"$title\"";
-      }
-      
-      $attr = str_replace("%%", $num, $attr);
-      $node_id = $this->encodeAttribute($node_id);
-      
-      return
-        "<sup id=\"fnref:$node_id\">".
-        "<a href=\"#fn:$node_id\"$attr>$num</a>".
-        "</sup>";
-    }
-    
-    return "[^".$matches[1]."]";
-  }
-    
-  
-  ### Abbreviations ###
-  
-  function stripAbbreviations($text) {
-  #
-  # Strips abbreviations from text, stores titles in hash references.
-  #
-    $less_than_tab = $this->tab_width - 1;
-
-    # Link defs are in the form: [id]*: url "optional title"
-    $text = preg_replace_callback('{
-      ^[ ]{0,'.$less_than_tab.'}\*\[(.+?)\][ ]?:  # abbr_id = $1
-      (.*)          # text = $2 (no blank lines allowed)  
-      }xm',
-      array(&$this, '_stripAbbreviations_callback'),
-      $text);
-    return $text;
-  }
-  function _stripAbbreviations_callback($matches) {
-    $abbr_word = $matches[1];
-    $abbr_desc = $matches[2];
-    if ($this->abbr_word_re)
-      $this->abbr_word_re .= '|';
-    $this->abbr_word_re .= preg_quote($abbr_word);
-    $this->abbr_desciptions[$abbr_word] = trim($abbr_desc);
-    return ''; # String that will replace the block
-  }
-  
-  
-  function doAbbreviations($text) {
-  #
-  # Find defined abbreviations in text and wrap them in <abbr> elements.
-  #
-    if ($this->abbr_word_re) {
-      // cannot use the /x modifier because abbr_word_re may 
-      // contain significant spaces:
-      $text = preg_replace_callback('{'.
-        '(?<![\w\x1A])'.
-        '(?:'.$this->abbr_word_re.')'.
-        '(?![\w\x1A])'.
-        '}', 
-        array(&$this, '_doAbbreviations_callback'), $text);
-    }
-    return $text;
-  }
-  function _doAbbreviations_callback($matches) {
-    $abbr = $matches[0];
-    if (isset($this->abbr_desciptions[$abbr])) {
-      $desc = $this->abbr_desciptions[$abbr];
-      if (empty($desc)) {
-        return $this->hashPart("<abbr>$abbr</abbr>");
-      } else {
-        $desc = $this->encodeAttribute($desc);
-        return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>");
-      }
-    } else {
-      return $matches[0];
-    }
-  }
-
-}
-
-
-/*
-
-PHP Markdown Extra
-==================
-
-Description
------------
-
-This is a PHP port of the original Markdown formatter written in Perl 
-by John Gruber. This special "Extra" version of PHP Markdown features 
-further enhancements to the syntax for making additional constructs 
-such as tables and definition list.
-
-Markdown is a text-to-HTML filter; it translates an easy-to-read /
-easy-to-write structured text format into HTML. Markdown's text format
-is most similar to that of plain text email, and supports features such
-as headers, *emphasis*, code blocks, blockquotes, and links.
-
-Markdown's syntax is designed not as a generic markup language, but
-specifically to serve as a front-end to (X)HTML. You can use span-level
-HTML tags anywhere in a Markdown document, and you can use block level
-HTML tags (like <div> and <table> as well).
-
-For more information about Markdown's syntax, see:
-
-<http://daringfireball.net/projects/markdown/>
-
-
-Bugs
-----
-
-To file bug reports please send email to:
-
-<michel.fortin@michelf.com>
-
-Please include with your report: (1) the example input; (2) the output you
-expected; (3) the output Markdown actually produced.
-
-
-Version History
---------------- 
-
-See the readme file for detailed release notes for this version.
-
-
-Copyright and License
----------------------
-
-PHP Markdown & Extra
-Copyright (c) 2004-2008 Michel Fortin  
-<http://www.michelf.com/>  
-All rights reserved.
-
-Based on Markdown  
-Copyright (c) 2003-2006 John Gruber   
-<http://daringfireball.net/>   
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-* Redistributions of source code must retain the above copyright notice,
-  this list of conditions and the following disclaimer.
-
-* Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
-* Neither the name "Markdown" nor the names of its contributors may
-  be used to endorse or promote products derived from this software
-  without specific prior written permission.
-
-This software is provided by the copyright holders and contributors "as
-is" and any express or implied warranties, including, but not limited
-to, the implied warranties of merchantability and fitness for a
-particular purpose are disclaimed. In no event shall the copyright owner
-or contributors be liable for any direct, indirect, incidental, special,
-exemplary, or consequential damages (including, but not limited to,
-procurement of substitute goods or services; loss of use, data, or
-profits; or business interruption) however caused and on any theory of
-liability, whether in contract, strict liability, or tort (including
-negligence or otherwise) arising in any way out of the use of this
-software, even if advised of the possibility of such damage.
-
-*/
-?>
\ No newline at end of file
diff --git a/ivfdec.c b/ivfdec.c
index c97b03f78..e826f6e1f 100644
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/ivfenc.c b/ivfenc.c
index 4d96d468a..57c04dd02 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/libs.doxy_template b/libs.doxy_template
index eb37dfc18..ce8fde637 100644
--- a/libs.doxy_template
+++ b/libs.doxy_template
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.    
 ##
 
 
diff --git a/libs.mk b/libs.mk
index 544e71a2a..be237a137 100644
--- a/libs.mk
+++ b/libs.mk
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/md5_utils.c b/md5_utils.c
index 16c6f7e68..190d95570 100644
--- a/md5_utils.c
+++ b/md5_utils.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/md5_utils.h b/md5_utils.h
index 6c0e93e52..d1a981bb7 100644
--- a/md5_utils.h
+++ b/md5_utils.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 /*
diff --git a/release.sh b/release.sh
index 3b77dad72..827bbd0a6 100755
--- a/release.sh
+++ b/release.sh
@@ -2,10 +2,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/solution.mk b/solution.mk
index 783c6f805..21bf065fb 100644
--- a/solution.mk
+++ b/solution.mk
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 6384fac8e..9f6397e48 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/alloccommon.h b/vp8/common/alloccommon.h
index 73c7383c7..b87741281 100644
--- a/vp8/common/alloccommon.h
+++ b/vp8/common/alloccommon.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/armv6/bilinearfilter_v6.asm b/vp8/common/arm/armv6/bilinearfilter_v6.asm
index 4428cf8ff..ac0d3330f 100644
--- a/vp8/common/arm/armv6/bilinearfilter_v6.asm
+++ b/vp8/common/arm/armv6/bilinearfilter_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/copymem16x16_v6.asm b/vp8/common/arm/armv6/copymem16x16_v6.asm
index 00e97397c..344c4535b 100644
--- a/vp8/common/arm/armv6/copymem16x16_v6.asm
+++ b/vp8/common/arm/armv6/copymem16x16_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/copymem8x4_v6.asm b/vp8/common/arm/armv6/copymem8x4_v6.asm
index 94473ca65..3556b3a98 100644
--- a/vp8/common/arm/armv6/copymem8x4_v6.asm
+++ b/vp8/common/arm/armv6/copymem8x4_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/copymem8x8_v6.asm b/vp8/common/arm/armv6/copymem8x8_v6.asm
index 7cfa53389..1da0ff5b6 100644
--- a/vp8/common/arm/armv6/copymem8x8_v6.asm
+++ b/vp8/common/arm/armv6/copymem8x8_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/filter_v6.asm b/vp8/common/arm/armv6/filter_v6.asm
index a7863fc94..cdc74bab3 100644
--- a/vp8/common/arm/armv6/filter_v6.asm
+++ b/vp8/common/arm/armv6/filter_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/idct_v6.asm b/vp8/common/arm/armv6/idct_v6.asm
index 25c5165ec..9e932fa06 100644
--- a/vp8/common/arm/armv6/idct_v6.asm
+++ b/vp8/common/arm/armv6/idct_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/iwalsh_v6.asm b/vp8/common/arm/armv6/iwalsh_v6.asm
index 87475681f..460678330 100644
--- a/vp8/common/arm/armv6/iwalsh_v6.asm
+++ b/vp8/common/arm/armv6/iwalsh_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
     EXPORT |vp8_short_inv_walsh4x4_armv6|
diff --git a/vp8/common/arm/armv6/loopfilter_v6.asm b/vp8/common/arm/armv6/loopfilter_v6.asm
index c2b02dc0a..eeeacd330 100644
--- a/vp8/common/arm/armv6/loopfilter_v6.asm
+++ b/vp8/common/arm/armv6/loopfilter_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/recon_v6.asm b/vp8/common/arm/armv6/recon_v6.asm
index 085ff80c9..6f3ccbef9 100644
--- a/vp8/common/arm/armv6/recon_v6.asm
+++ b/vp8/common/arm/armv6/recon_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/simpleloopfilter_v6.asm b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
index 15c6c7d16..b820cedb1 100644
--- a/vp8/common/arm/armv6/simpleloopfilter_v6.asm
+++ b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
index 551d863e9..641546390 100644
--- a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
+++ b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c
index bf972a3bc..b93539ca6 100644
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index 2a4640cae..233be24dd 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h
index f9ed21e0d..cfd9d76d8 100644
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index fa7c62617..d98c90867 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h
index 4bb49456d..b59e2b58f 100644
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
index a2fea2bd6..076a3d33c 100644
--- a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
index 74d2db5dc..f199ba3f3 100644
--- a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
index 46ebb0e0b..9a3a03938 100644
--- a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
index 80728d4f8..10a636690 100644
--- a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
index f42ac63c9..7cd9d7562 100644
--- a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+++ b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/copymem16x16_neon.asm b/vp8/common/arm/neon/copymem16x16_neon.asm
index 89d5e1018..b25bfdcbc 100644
--- a/vp8/common/arm/neon/copymem16x16_neon.asm
+++ b/vp8/common/arm/neon/copymem16x16_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/copymem8x4_neon.asm b/vp8/common/arm/neon/copymem8x4_neon.asm
index 302f734ff..0c62ee251 100644
--- a/vp8/common/arm/neon/copymem8x4_neon.asm
+++ b/vp8/common/arm/neon/copymem8x4_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/copymem8x8_neon.asm b/vp8/common/arm/neon/copymem8x8_neon.asm
index 50d39ef66..84e0afda3 100644
--- a/vp8/common/arm/neon/copymem8x8_neon.asm
+++ b/vp8/common/arm/neon/copymem8x8_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/iwalsh_neon.asm b/vp8/common/arm/neon/iwalsh_neon.asm
index 4fc744c96..b8199ce9a 100644
--- a/vp8/common/arm/neon/iwalsh_neon.asm
+++ b/vp8/common/arm/neon/iwalsh_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
     EXPORT  |vp8_short_inv_walsh4x4_neon|
     EXPORT  |vp8_short_inv_walsh4x4_1_neon|
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
index e3e8e8a72..5d25e3d89 100644
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
index f11055d42..ebe52fc99 100644
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
index 6d74fab52..dbbdd74c8 100644
--- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
index 2bb6222b9..480e3184b 100644
--- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
index d79cc68a3..a402282e0 100644
--- a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
index 3a230a953..18eba9f50 100644
--- a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
index 86eddaa2e..21b85dadd 100644
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
index 2ab0fc240..64d98c67d 100644
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
index ad5afba34..0e72e8046 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
index 60e517519..91396a180 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/recon16x16mb_neon.asm b/vp8/common/arm/neon/recon16x16mb_neon.asm
index b9ba1cbc3..7c06c0308 100644
--- a/vp8/common/arm/neon/recon16x16mb_neon.asm
+++ b/vp8/common/arm/neon/recon16x16mb_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/recon2b_neon.asm b/vp8/common/arm/neon/recon2b_neon.asm
index 25aaf8c8e..3d87e2dac 100644
--- a/vp8/common/arm/neon/recon2b_neon.asm
+++ b/vp8/common/arm/neon/recon2b_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/recon4b_neon.asm b/vp8/common/arm/neon/recon4b_neon.asm
index a4f5b806b..63cd98715 100644
--- a/vp8/common/arm/neon/recon4b_neon.asm
+++ b/vp8/common/arm/neon/recon4b_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/reconb_neon.asm b/vp8/common/arm/neon/reconb_neon.asm
index 16d85a0d5..0ecdc1423 100644
--- a/vp8/common/arm/neon/reconb_neon.asm
+++ b/vp8/common/arm/neon/reconb_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/save_neon_reg.asm b/vp8/common/arm/neon/save_neon_reg.asm
index 4873e447f..f5db2a8e7 100644
--- a/vp8/common/arm/neon/save_neon_reg.asm
+++ b/vp8/common/arm/neon/save_neon_reg.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
index 7d06ff908..24e5fed12 100644
--- a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
+++ b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
index ffecfbfbc..c566c67fc 100644
--- a/vp8/common/arm/neon/shortidct4x4llm_neon.asm
+++ b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
index 9f5f0d2ce..6f3716db0 100644
--- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
index c23a9dbd1..6fe9eadcc 100644
--- a/vp8/common/arm/neon/sixtappredict4x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
index 18e19f958..a6ff4f776 100644
--- a/vp8/common/arm/neon/sixtappredict8x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
index d27485e6c..bb35ae4ce 100644
--- a/vp8/common/arm/neon/sixtappredict8x8_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/arm/recon_arm.c b/vp8/common/arm/recon_arm.c
index 130059e64..2cc9ee77e 100644
--- a/vp8/common/arm/recon_arm.c
+++ b/vp8/common/arm/recon_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h
index fd9f85eea..392297be4 100644
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/reconintra4x4_arm.c b/vp8/common/arm/reconintra4x4_arm.c
index 334d35236..65fb1f027 100644
--- a/vp8/common/arm/reconintra4x4_arm.c
+++ b/vp8/common/arm/reconintra4x4_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c
index d7ee1ddfa..29f4a2c47 100644
--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h
index 56aec55b9..0eb2c58d8 100644
--- a/vp8/common/arm/subpixel_arm.h
+++ b/vp8/common/arm/subpixel_arm.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c
index ecc6929c0..27d3deec0 100644
--- a/vp8/common/arm/systemdependent.c
+++ b/vp8/common/arm/systemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/arm/vpx_asm_offsets.c b/vp8/common/arm/vpx_asm_offsets.c
index 68634bf55..ff4d7527c 100644
--- a/vp8/common/arm/vpx_asm_offsets.c
+++ b/vp8/common/arm/vpx_asm_offsets.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/bigend.h b/vp8/common/bigend.h
index 6a91ba1ae..cd6b9886c 100644
--- a/vp8/common/bigend.h
+++ b/vp8/common/bigend.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/blockd.c b/vp8/common/blockd.c
index 53f5e72d2..e0ed56129 100644
--- a/vp8/common/blockd.c
+++ b/vp8/common/blockd.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 84ed53ad2..9f8a00fe7 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/boolcoder.h b/vp8/common/boolcoder.h
index 0659d4873..66f67c284 100644
--- a/vp8/common/boolcoder.h
+++ b/vp8/common/boolcoder.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/codec_common_interface.h b/vp8/common/codec_common_interface.h
index 7881b0a41..d836564d8 100644
--- a/vp8/common/codec_common_interface.h
+++ b/vp8/common/codec_common_interface.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 #ifndef CODEC_COMMON_INTERFACE_H
diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h
index 99affd618..6131d1269 100644
--- a/vp8/common/coefupdateprobs.h
+++ b/vp8/common/coefupdateprobs.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/common.h b/vp8/common/common.h
index 29f6d371b..bfa8a9c41 100644
--- a/vp8/common/common.h
+++ b/vp8/common/common.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/common_types.h b/vp8/common/common_types.h
index deb5ed8e5..a307ed6f1 100644
--- a/vp8/common/common_types.h
+++ b/vp8/common/common_types.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/context.c b/vp8/common/context.c
index 17ee8c338..f0cb83845 100644
--- a/vp8/common/context.c
+++ b/vp8/common/context.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/debugmodes.c b/vp8/common/debugmodes.c
index e2d2d2c0f..e66981413 100644
--- a/vp8/common/debugmodes.c
+++ b/vp8/common/debugmodes.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h
index ccdf326e6..f9247d290 100644
--- a/vp8/common/defaultcoefcounts.h
+++ b/vp8/common/defaultcoefcounts.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/dma_desc.h b/vp8/common/dma_desc.h
index 5e6fa0ca9..765405d4a 100644
--- a/vp8/common/dma_desc.h
+++ b/vp8/common/dma_desc.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/duck_io.h b/vp8/common/duck_io.h
index f63a5cdc1..02f6895a1 100644
--- a/vp8/common/duck_io.h
+++ b/vp8/common/duck_io.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index e524c2acc..8d01bf8f4 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index 1415832d5..29be82c9d 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c
index 7dc1acde0..72cbd6411 100644
--- a/vp8/common/entropymode.c
+++ b/vp8/common/entropymode.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h
index ff630a477..bd44b83ff 100644
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c
index 2b00c17a9..176fecd82 100644
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h
index d940c599b..395984c44 100644
--- a/vp8/common/entropymv.h
+++ b/vp8/common/entropymv.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/extend.c b/vp8/common/extend.c
index 74079527c..43d7aed85 100644
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/extend.h b/vp8/common/extend.h
index 6809ae756..bb8a01650 100644
--- a/vp8/common/extend.h
+++ b/vp8/common/extend.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/filter_c.c b/vp8/common/filter_c.c
index 38991cb28..f24f8a287 100644
--- a/vp8/common/filter_c.c
+++ b/vp8/common/filter_c.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c
index fcb1f202c..550681ece 100644
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index 2c02033e6..3e0718a10 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/fourcc.hpp b/vp8/common/fourcc.hpp
index 5f1faed2f..9823b5611 100644
--- a/vp8/common/fourcc.hpp
+++ b/vp8/common/fourcc.hpp
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/g_common.h b/vp8/common/g_common.h
index e68c53e1c..3f434010a 100644
--- a/vp8/common/g_common.h
+++ b/vp8/common/g_common.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 0011ae0dc..6e64885c7 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/header.h b/vp8/common/header.h
index 8b2b0094a..b8b905973 100644
--- a/vp8/common/header.h
+++ b/vp8/common/header.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/idct.h b/vp8/common/idct.h
index 47b5f0576..2185bd357 100644
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index 57cf8584e..4261d241e 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index 1ff596ead..00502c62e 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index 93a40f956..be30ca002 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/littlend.h b/vp8/common/littlend.h
index 08c525c5d..0961163a9 100644
--- a/vp8/common/littlend.h
+++ b/vp8/common/littlend.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index 79e617754..4937195d3 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index c6ce508cc..a9a976eb8 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index 7d16e4843..eaf7327b4 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/mac_specs.h b/vp8/common/mac_specs.h
index 97bffc776..a12b8d59c 100644
--- a/vp8/common/mac_specs.h
+++ b/vp8/common/mac_specs.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c
index a7e0ce99a..b183b8e30 100644
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/modecont.c b/vp8/common/modecont.c
index 9301a2567..c008eef63 100644
--- a/vp8/common/modecont.c
+++ b/vp8/common/modecont.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/modecont.h b/vp8/common/modecont.h
index 0c57651ed..4b79722e5 100644
--- a/vp8/common/modecont.h
+++ b/vp8/common/modecont.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/modecontext.c b/vp8/common/modecontext.c
index ceee74c70..a4b2f7629 100644
--- a/vp8/common/modecontext.c
+++ b/vp8/common/modecontext.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/mv.h b/vp8/common/mv.h
index 3d8418108..c3db5f03e 100644
--- a/vp8/common/mv.h
+++ b/vp8/common/mv.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index 428721996..3ed6f2db2 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 39eab24ca..d1cb76618 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h
index 644c0ec77..ea04c14ae 100644
--- a/vp8/common/onyxd.h
+++ b/vp8/common/onyxd.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/partialgfupdate.h b/vp8/common/partialgfupdate.h
index 32a55ee6c..355aa795b 100644
--- a/vp8/common/partialgfupdate.h
+++ b/vp8/common/partialgfupdate.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 0979185d6..1f36d4ee9 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index cd99056b0..e148f2a7a 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/ppc/copy_altivec.asm b/vp8/common/ppc/copy_altivec.asm
index e87eb2112..5ca2d170c 100644
--- a/vp8/common/ppc/copy_altivec.asm
+++ b/vp8/common/ppc/copy_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/filter_altivec.asm b/vp8/common/ppc/filter_altivec.asm
index 2a3550773..1a7ebf7b9 100644
--- a/vp8/common/ppc/filter_altivec.asm
+++ b/vp8/common/ppc/filter_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/filter_bilinear_altivec.asm b/vp8/common/ppc/filter_bilinear_altivec.asm
index 27e02a87f..73e758e13 100644
--- a/vp8/common/ppc/filter_bilinear_altivec.asm
+++ b/vp8/common/ppc/filter_bilinear_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/idctllm_altivec.asm b/vp8/common/ppc/idctllm_altivec.asm
index e88af8d7d..9ebe6af4f 100644
--- a/vp8/common/ppc/idctllm_altivec.asm
+++ b/vp8/common/ppc/idctllm_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/loopfilter_altivec.c b/vp8/common/ppc/loopfilter_altivec.c
index 586eed477..8bf5e5760 100644
--- a/vp8/common/ppc/loopfilter_altivec.c
+++ b/vp8/common/ppc/loopfilter_altivec.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/ppc/loopfilter_filters_altivec.asm b/vp8/common/ppc/loopfilter_filters_altivec.asm
index 78a5cf9b3..26c51a6c2 100644
--- a/vp8/common/ppc/loopfilter_filters_altivec.asm
+++ b/vp8/common/ppc/loopfilter_filters_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/platform_altivec.asm b/vp8/common/ppc/platform_altivec.asm
index 227ef2a94..23680c94e 100644
--- a/vp8/common/ppc/platform_altivec.asm
+++ b/vp8/common/ppc/platform_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/recon_altivec.asm b/vp8/common/ppc/recon_altivec.asm
index f478b954c..212664dc5 100644
--- a/vp8/common/ppc/recon_altivec.asm
+++ b/vp8/common/ppc/recon_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c
index 284731085..4ccf69061 100644
--- a/vp8/common/ppc/systemdependent.c
+++ b/vp8/common/ppc/systemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index c66397682..57aeb1d7c 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/pragmas.h b/vp8/common/pragmas.h
index 25a4b776f..523c8b70c 100644
--- a/vp8/common/pragmas.h
+++ b/vp8/common/pragmas.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/predictdc.c b/vp8/common/predictdc.c
index df4c96e4a..18d7da86d 100644
--- a/vp8/common/predictdc.c
+++ b/vp8/common/predictdc.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/predictdc.h b/vp8/common/predictdc.h
index b8871e452..69036eea4 100644
--- a/vp8/common/predictdc.h
+++ b/vp8/common/predictdc.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/preproc.h b/vp8/common/preproc.h
index 00ec9a8d7..a02745c72 100644
--- a/vp8/common/preproc.h
+++ b/vp8/common/preproc.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/preprocif.h b/vp8/common/preprocif.h
index 986c45b10..f700f7688 100644
--- a/vp8/common/preprocif.h
+++ b/vp8/common/preprocif.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/proposed.h b/vp8/common/proposed.h
index 1171ede43..65b783494 100644
--- a/vp8/common/proposed.h
+++ b/vp8/common/proposed.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c
index 09fe31fe5..6fd3bc01e 100644
--- a/vp8/common/quant_common.c
+++ b/vp8/common/quant_common.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/quant_common.h b/vp8/common/quant_common.h
index 0c92ce8b9..49d11bc5b 100644
--- a/vp8/common/quant_common.h
+++ b/vp8/common/quant_common.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/recon.c b/vp8/common/recon.c
index d1268ea22..b09ef37ca 100644
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/recon.h b/vp8/common/recon.h
index f65a90f7e..607895ca5 100644
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index c48886deb..91ec76be8 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index b2d1ae97a..9df480637 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index e33bce348..23d87ee1f 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/reconintra.h b/vp8/common/reconintra.h
index d63aa15cb..b7c4d1d66 100644
--- a/vp8/common/reconintra.h
+++ b/vp8/common/reconintra.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index d92d5c96a..3b22423a5 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/reconintra4x4.h b/vp8/common/reconintra4x4.h
index 788c8c40a..881d091b6 100644
--- a/vp8/common/reconintra4x4.h
+++ b/vp8/common/reconintra4x4.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/segmentation_common.c b/vp8/common/segmentation_common.c
index 72b8c874b..2568c7cea 100644
--- a/vp8/common/segmentation_common.c
+++ b/vp8/common/segmentation_common.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/segmentation_common.h b/vp8/common/segmentation_common.h
index bb93533a3..7b36b49e0 100644
--- a/vp8/common/segmentation_common.h
+++ b/vp8/common/segmentation_common.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c
index 38bfae822..e796d4203 100644
--- a/vp8/common/setupintrarecon.c
+++ b/vp8/common/setupintrarecon.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h
index 6ec79b29c..ea4e34205 100644
--- a/vp8/common/setupintrarecon.h
+++ b/vp8/common/setupintrarecon.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/subpixel.h b/vp8/common/subpixel.h
index fbd5f4daf..446697c30 100644
--- a/vp8/common/subpixel.h
+++ b/vp8/common/subpixel.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/swapyv12buffer.c b/vp8/common/swapyv12buffer.c
index afe6a885e..5bdf431a2 100644
--- a/vp8/common/swapyv12buffer.c
+++ b/vp8/common/swapyv12buffer.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/swapyv12buffer.h b/vp8/common/swapyv12buffer.h
index caf9499d9..f2c3b745f 100644
--- a/vp8/common/swapyv12buffer.h
+++ b/vp8/common/swapyv12buffer.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/systemdependent.h b/vp8/common/systemdependent.h
index 1829b649c..56218e603 100644
--- a/vp8/common/systemdependent.h
+++ b/vp8/common/systemdependent.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/textblit.c b/vp8/common/textblit.c
index a45937b12..5d117f1b0 100644
--- a/vp8/common/textblit.c
+++ b/vp8/common/textblit.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index a02cb244b..7c94645a0 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/treecoder.c b/vp8/common/treecoder.c
index 4ad018d49..0ccd64dae 100644
--- a/vp8/common/treecoder.c
+++ b/vp8/common/treecoder.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index 0356d2b02..908dbcb41 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/type_aliases.h b/vp8/common/type_aliases.h
index addd26469..a0d871746 100644
--- a/vp8/common/type_aliases.h
+++ b/vp8/common/type_aliases.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/vfwsetting.hpp b/vp8/common/vfwsetting.hpp
index e352e7a19..c01a0ddd5 100644
--- a/vp8/common/vfwsetting.hpp
+++ b/vp8/common/vfwsetting.hpp
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/vpx_ref_build_prefix.h b/vp8/common/vpx_ref_build_prefix.h
index 40608c6dd..cded66c17 100644
--- a/vp8/common/vpx_ref_build_prefix.h
+++ b/vp8/common/vpx_ref_build_prefix.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/vpxblit.h b/vp8/common/vpxblit.h
index d03e0bd02..2c7f673e1 100644
--- a/vp8/common/vpxblit.h
+++ b/vp8/common/vpxblit.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/vpxblit_c64.h b/vp8/common/vpxblit_c64.h
index a8e28f59a..7659b5cb3 100644
--- a/vp8/common/vpxblit_c64.h
+++ b/vp8/common/vpxblit_c64.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/vpxerrors.h b/vp8/common/vpxerrors.h
index e4c9f3ef3..f0ec707ba 100644
--- a/vp8/common/vpxerrors.h
+++ b/vp8/common/vpxerrors.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/boolcoder.cxx b/vp8/common/x86/boolcoder.cxx
index 06faca69c..cd9c495bf 100644
--- a/vp8/common/x86/boolcoder.cxx
+++ b/vp8/common/x86/boolcoder.cxx
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/idct_x86.h b/vp8/common/x86/idct_x86.h
index 5dfb212e1..1f2cb631b 100644
--- a/vp8/common/x86/idct_x86.h
+++ b/vp8/common/x86/idct_x86.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm
index 2751c6934..5ec01e9c4 100644
--- a/vp8/common/x86/idctllm_mmx.asm
+++ b/vp8/common/x86/idctllm_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm
index 562e5908f..6cb897910 100644
--- a/vp8/common/x86/iwalsh_mmx.asm
+++ b/vp8/common/x86/iwalsh_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
index 96943dfb8..cb61691fd 100644
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index 6e4d2b651..6e6efabe0 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 5275dfa3b..1c0a3881c 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index 143ee7469..f5af7cffe 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/loopfilter_x86.h b/vp8/common/x86/loopfilter_x86.h
index c87f38a31..503bf5b3a 100644
--- a/vp8/common/x86/loopfilter_x86.h
+++ b/vp8/common/x86/loopfilter_x86.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm
index 721c8d612..070765109 100644
--- a/vp8/common/x86/postproc_mmx.asm
+++ b/vp8/common/x86/postproc_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/postproc_mmx.c b/vp8/common/x86/postproc_mmx.c
index 095797b1e..f3b29234a 100644
--- a/vp8/common/x86/postproc_mmx.c
+++ b/vp8/common/x86/postproc_mmx.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index bfa36fa70..5097b2a30 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/postproc_x86.h b/vp8/common/x86/postproc_x86.h
index 49a190793..f93942733 100644
--- a/vp8/common/x86/postproc_x86.h
+++ b/vp8/common/x86/postproc_x86.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm
index ba60c5db7..95c308d7d 100644
--- a/vp8/common/x86/recon_mmx.asm
+++ b/vp8/common/x86/recon_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index f2685a76f..2ce028cdb 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h
index c46977842..fcd429c1c 100644
--- a/vp8/common/x86/recon_x86.h
+++ b/vp8/common/x86/recon_x86.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index c50211813..b3e4ad52c 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index dee04f2d9..c8821614d 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index efa7b2e09..bd6859cf4 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 68454f709..80389429d 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 5312e06da..09ff3c545 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/arm/armv5/dequantize_v5.asm b/vp8/decoder/arm/armv5/dequantize_v5.asm
index eb3f0307c..80b2e0cc7 100644
--- a/vp8/decoder/arm/armv5/dequantize_v5.asm
+++ b/vp8/decoder/arm/armv5/dequantize_v5.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/armv6/dboolhuff_v6.asm b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
index 143e33e46..eca8eeb72 100644
--- a/vp8/decoder/arm/armv6/dboolhuff_v6.asm
+++ b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
index 3daa9b34f..c5b0b7b9f 100644
--- a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
+++ b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/armv6/dequantidct_v6.asm b/vp8/decoder/arm/armv6/dequantidct_v6.asm
index 61bb48d04..0d1c6b448 100644
--- a/vp8/decoder/arm/armv6/dequantidct_v6.asm
+++ b/vp8/decoder/arm/armv6/dequantidct_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/decoder/arm/armv6/dequantize_v6.asm
index 95e38594f..c35e7c630 100644
--- a/vp8/decoder/arm/armv6/dequantize_v6.asm
+++ b/vp8/decoder/arm/armv6/dequantize_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
index 54006a921..913267dc4 100644
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index c8a61a4a7..ae7cf8e45 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/arm/detokenizearm_sjl.c b/vp8/decoder/arm/detokenizearm_sjl.c
index c714452a6..a126a0555 100644
--- a/vp8/decoder/arm/detokenizearm_sjl.c
+++ b/vp8/decoder/arm/detokenizearm_sjl.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/arm/detokenizearm_v6.asm b/vp8/decoder/arm/detokenizearm_v6.asm
index 4d87ee5bd..439c9abdc 100644
--- a/vp8/decoder/arm/detokenizearm_v6.asm
+++ b/vp8/decoder/arm/detokenizearm_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
index 455c83a9c..f146d604e 100644
--- a/vp8/decoder/arm/dsystemdependent.c
+++ b/vp8/decoder/arm/dsystemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/arm/neon/dboolhuff_neon.asm b/vp8/decoder/arm/neon/dboolhuff_neon.asm
index 7ec62a3d8..01315a40e 100644
--- a/vp8/decoder/arm/neon/dboolhuff_neon.asm
+++ b/vp8/decoder/arm/neon/dboolhuff_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/neon/dequantdcidct_neon.asm b/vp8/decoder/arm/neon/dequantdcidct_neon.asm
index 3392f2c2b..482f02dee 100644
--- a/vp8/decoder/arm/neon/dequantdcidct_neon.asm
+++ b/vp8/decoder/arm/neon/dequantdcidct_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/neon/dequantidct_neon.asm b/vp8/decoder/arm/neon/dequantidct_neon.asm
index bba4d5dfb..3d00dbf15 100644
--- a/vp8/decoder/arm/neon/dequantidct_neon.asm
+++ b/vp8/decoder/arm/neon/dequantidct_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/decoder/arm/neon/dequantizeb_neon.asm
index 1bde94607..14698f8f4 100644
--- a/vp8/decoder/arm/neon/dequantizeb_neon.asm
+++ b/vp8/decoder/arm/neon/dequantizeb_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/dboolhuff.c b/vp8/decoder/dboolhuff.c
index 442054ed3..7027c0f54 100644
--- a/vp8/decoder/dboolhuff.c
+++ b/vp8/decoder/dboolhuff.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index 772dbdb2e..c6d69e776 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 6035f3e6a..32925fe9c 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/decodemv.h b/vp8/decoder/decodemv.h
index 403007183..8b7fb684f 100644
--- a/vp8/decoder/decodemv.h
+++ b/vp8/decoder/decodemv.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h
index ebc5c27b2..6c0363b5f 100644
--- a/vp8/decoder/decoderthreading.h
+++ b/vp8/decoder/decoderthreading.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 4edf4f60d..0abe4962c 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/demode.c b/vp8/decoder/demode.c
index fd05e6db5..881b49ebc 100644
--- a/vp8/decoder/demode.c
+++ b/vp8/decoder/demode.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/demode.h b/vp8/decoder/demode.h
index 51bbc5e7a..8d2fbee8e 100644
--- a/vp8/decoder/demode.h
+++ b/vp8/decoder/demode.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 14798d9af..2c286ec77 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index d16b02e58..7fd7cbbc7 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index c081518fb..e35f77a84 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h
index 6a9a47607..6cfb66bb2 100644
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 302b64bf8..ad64a38af 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 6875585f0..76387f564 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/onyxd_if_sjl.c b/vp8/decoder/onyxd_if_sjl.c
index 363ad5d72..12d28a5b9 100644
--- a/vp8/decoder/onyxd_if_sjl.c
+++ b/vp8/decoder/onyxd_if_sjl.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index fa4fa48e4..2eea614a2 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index e35d1757f..470d1aeb1 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/treereader.h b/vp8/decoder/treereader.h
index eb10e2460..f1893df31 100644
--- a/vp8/decoder/treereader.h
+++ b/vp8/decoder/treereader.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
index 02be4872e..1611e034d 100644
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
index 5def406d3..4c91633aa 100644
--- a/vp8/decoder/x86/dequantize_x86.h
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/x86/onyxdxv.c b/vp8/decoder/x86/onyxdxv.c
index 75a676a07..22d0548cb 100644
--- a/vp8/decoder/x86/onyxdxv.c
+++ b/vp8/decoder/x86/onyxdxv.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index 6d7cc3666..2dfb469b7 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/xprintf.c b/vp8/decoder/xprintf.c
index cb2221c15..7465010ae 100644
--- a/vp8/decoder/xprintf.c
+++ b/vp8/decoder/xprintf.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/decoder/xprintf.h b/vp8/decoder/xprintf.h
index 2f175e943..765607556 100644
--- a/vp8/decoder/xprintf.h
+++ b/vp8/decoder/xprintf.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/armv6/walsh_v6.asm b/vp8/encoder/arm/armv6/walsh_v6.asm
index 608c9ae65..461e49290 100644
--- a/vp8/encoder/arm/armv6/walsh_v6.asm
+++ b/vp8/encoder/arm/armv6/walsh_v6.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
     EXPORT |vp8_short_walsh4x4_armv6|
diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c
index e70b3ad47..8c0faffd4 100644
--- a/vp8/encoder/arm/boolhuff_arm.c
+++ b/vp8/encoder/arm/boolhuff_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c
index 003979680..7fba99589 100644
--- a/vp8/encoder/arm/csystemdependent.c
+++ b/vp8/encoder/arm/csystemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h
index a671862fb..bb60c9d24 100644
--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/encodemb_arm.c b/vp8/encoder/arm/encodemb_arm.c
index 3f1d05391..7d58f9438 100644
--- a/vp8/encoder/arm/encodemb_arm.c
+++ b/vp8/encoder/arm/encodemb_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h
index 28f9e5c5f..525135a41 100644
--- a/vp8/encoder/arm/encodemb_arm.h
+++ b/vp8/encoder/arm/encodemb_arm.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c
index 07f218605..9418a60bb 100644
--- a/vp8/encoder/arm/mcomp_arm.c
+++ b/vp8/encoder/arm/mcomp_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/neon/boolhuff_armv7.asm
index 9a5f36661..674eea960 100644
--- a/vp8/encoder/arm/neon/boolhuff_armv7.asm
+++ b/vp8/encoder/arm/neon/boolhuff_armv7.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
index d5dec440d..44e6dfc73 100644
--- a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
+++ b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
index de1c25469..6a286f6ab 100644
--- a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
+++ b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
index 11070377b..e3a94a6dd 100644
--- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm
+++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/sad16_neon.asm b/vp8/encoder/arm/neon/sad16_neon.asm
index 6169f10da..7e2ab13de 100644
--- a/vp8/encoder/arm/neon/sad16_neon.asm
+++ b/vp8/encoder/arm/neon/sad16_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/sad8_neon.asm b/vp8/encoder/arm/neon/sad8_neon.asm
index 28604ddeb..fc8c4e132 100644
--- a/vp8/encoder/arm/neon/sad8_neon.asm
+++ b/vp8/encoder/arm/neon/sad8_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm
index 26bc0d06c..4399c9700 100644
--- a/vp8/encoder/arm/neon/shortfdct_neon.asm
+++ b/vp8/encoder/arm/neon/shortfdct_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm
index 8781ca0cc..d4803ff6e 100644
--- a/vp8/encoder/arm/neon/subtract_neon.asm
+++ b/vp8/encoder/arm/neon/subtract_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/variance_neon.asm b/vp8/encoder/arm/neon/variance_neon.asm
index 64b83ca43..b90169313 100644
--- a/vp8/encoder/arm/neon/variance_neon.asm
+++ b/vp8/encoder/arm/neon/variance_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
index f26b4d7ae..0a372c39d 100644
--- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
index f53596727..087ea1716 100644
--- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
index 9c52c52f6..bfa97d720 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
index 92b098909..334c88feb 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
index 6d5f882ed..267e21649 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
index 5269c0af8..ebd5dc1bc 100644
--- a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
index aec716e3b..185277f23 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
index 3d02d7c40..611b1e447 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
index bd56761fa..614d48fc0 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/arm/picklpf_arm.c b/vp8/encoder/arm/picklpf_arm.c
index 0586e55d8..fb0b3bdc9 100644
--- a/vp8/encoder/arm/picklpf_arm.c
+++ b/vp8/encoder/arm/picklpf_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c
index 46906d3a2..e8bd44b44 100644
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/quantize_arm.h b/vp8/encoder/arm/quantize_arm.h
index e93f0fef1..14bc923cd 100644
--- a/vp8/encoder/arm/quantize_arm.h
+++ b/vp8/encoder/arm/quantize_arm.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index d9fc9b3e0..1c160202c 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
index 8cdf0791f..28aac70cb 100644
--- a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
+++ b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index e468f40f0..ce9d2fd66 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index ee69f66e4..de4b94ded 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index cc4cbe067..b765cc069 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c
index c101384d9..2e95c7579 100644
--- a/vp8/encoder/boolhuff.c
+++ b/vp8/encoder/boolhuff.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
index 0d929f067..95635e3ed 100644
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index 5207e39c4..4f5f9f056 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h
index fb307cfb3..2aaf731cf 100644
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index a4e377220..46b697e8c 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 403d0204a..813efb990 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/encodeintra.h b/vp8/encoder/encodeintra.h
index 4a43ab275..49b3257a5 100644
--- a/vp8/encoder/encodeintra.h
+++ b/vp8/encoder/encodeintra.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index bb43d3d5b..e7c2610c5 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index 91ca8f552..5285a385a 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index 2320b413a..3b58a80da 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h
index 1c1f450a0..bf6d7af32 100644
--- a/vp8/encoder/encodemv.h
+++ b/vp8/encoder/encodemv.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index a0b50d2a1..d7a81f2f5 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index d5d430906..90f95e083 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/firstpass.h b/vp8/encoder/firstpass.h
index d7b52f3f3..48257ce66 100644
--- a/vp8/encoder/firstpass.h
+++ b/vp8/encoder/firstpass.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 52aab6642..96028b313 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 2a2de3d0a..3c1507ff6 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 921206fec..40cbb073c 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c
index 73170cf52..6632a3629 100644
--- a/vp8/encoder/modecosts.c
+++ b/vp8/encoder/modecosts.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/modecosts.h b/vp8/encoder/modecosts.h
index 5ade26566..0c46acd12 100644
--- a/vp8/encoder/modecosts.h
+++ b/vp8/encoder/modecosts.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 56516fcab..bb0e406fa 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 55076b091..b934d98c7 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/parms.cpp b/vp8/encoder/parms.cpp
index 66fdafb1a..d7d30a8f1 100644
--- a/vp8/encoder/parms.cpp
+++ b/vp8/encoder/parms.cpp
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index d61e2ceda..3746bebc9 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index fb28837ed..76fdb99f1 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index bbd7840b8..0527b80a6 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/ppc/csystemdependent.c b/vp8/encoder/ppc/csystemdependent.c
index f99277f99..66fca8ff5 100644
--- a/vp8/encoder/ppc/csystemdependent.c
+++ b/vp8/encoder/ppc/csystemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/ppc/encodemb_altivec.asm b/vp8/encoder/ppc/encodemb_altivec.asm
index e0e976d71..14a36a17e 100644
--- a/vp8/encoder/ppc/encodemb_altivec.asm
+++ b/vp8/encoder/ppc/encodemb_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/ppc/fdct_altivec.asm b/vp8/encoder/ppc/fdct_altivec.asm
index eaab14c79..01f336407 100644
--- a/vp8/encoder/ppc/fdct_altivec.asm
+++ b/vp8/encoder/ppc/fdct_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/ppc/rdopt_altivec.asm b/vp8/encoder/ppc/rdopt_altivec.asm
index 917bfe036..4f9b050a7 100644
--- a/vp8/encoder/ppc/rdopt_altivec.asm
+++ b/vp8/encoder/ppc/rdopt_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/ppc/sad_altivec.asm b/vp8/encoder/ppc/sad_altivec.asm
index 1102ccf17..6d927280e 100644
--- a/vp8/encoder/ppc/sad_altivec.asm
+++ b/vp8/encoder/ppc/sad_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/ppc/variance_altivec.asm b/vp8/encoder/ppc/variance_altivec.asm
index 952bf7286..4e3fd5984 100644
--- a/vp8/encoder/ppc/variance_altivec.asm
+++ b/vp8/encoder/ppc/variance_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/ppc/variance_subpixel_altivec.asm b/vp8/encoder/ppc/variance_subpixel_altivec.asm
index 148a8d25b..4dcf7e44f 100644
--- a/vp8/encoder/ppc/variance_subpixel_altivec.asm
+++ b/vp8/encoder/ppc/variance_subpixel_altivec.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/preproc.c b/vp8/encoder/preproc.c
index d2a13dced..e9cc0752c 100644
--- a/vp8/encoder/preproc.c
+++ b/vp8/encoder/preproc.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c
index 0e34cecb1..c5e9dad8c 100644
--- a/vp8/encoder/psnr.c
+++ b/vp8/encoder/psnr.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/psnr.h b/vp8/encoder/psnr.h
index 9f6ca0bbf..dd0b4e587 100644
--- a/vp8/encoder/psnr.h
+++ b/vp8/encoder/psnr.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 6028ebf56..5aa99161d 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index 868e8e3a8..32720708a 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 23a2d1abd..d136bd2f4 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h
index 588c7a823..ff5778fd5 100644
--- a/vp8/encoder/ratectrl.h
+++ b/vp8/encoder/ratectrl.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 601c52978..e1cfc4c05 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index c6eae4b92..617241dfb 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c
index 74c6bd76a..1914c60b7 100644
--- a/vp8/encoder/sad_c.c
+++ b/vp8/encoder/sad_c.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c
index df214a89f..35dd10c88 100644
--- a/vp8/encoder/ssim.c
+++ b/vp8/encoder/ssim.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 33ddd64e7..819f6a58b 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h
index 02aacc222..51f912b06 100644
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/treewriter.c b/vp8/encoder/treewriter.c
index e398044db..942442b09 100644
--- a/vp8/encoder/treewriter.c
+++ b/vp8/encoder/treewriter.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/treewriter.h b/vp8/encoder/treewriter.h
index 05ac74cb7..075df50ca 100644
--- a/vp8/encoder/treewriter.h
+++ b/vp8/encoder/treewriter.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index b3b55c319..6610e7d68 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index 85269b9d3..efcf2b7a3 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/csystemdependent.c b/vp8/encoder/x86/csystemdependent.c
index 186ee6856..8bc687785 100644
--- a/vp8/encoder/x86/csystemdependent.c
+++ b/vp8/encoder/x86/csystemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm
index e13423796..3dfc47b61 100644
--- a/vp8/encoder/x86/dct_mmx.asm
+++ b/vp8/encoder/x86/dct_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index 3e5e9a70c..8ddc5d72b 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/dct_x86.h b/vp8/encoder/x86/dct_x86.h
index bc80e64ef..fec1a2edd 100644
--- a/vp8/encoder/x86/dct_x86.h
+++ b/vp8/encoder/x86/dct_x86.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/encodemb_x86.h b/vp8/encoder/x86/encodemb_x86.h
index 9397a6cca..d1ba7d94d 100644
--- a/vp8/encoder/x86/encodemb_x86.h
+++ b/vp8/encoder/x86/encodemb_x86.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
index 194047155..cdc17a525 100644
--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
index 7d8620178..196669758 100644
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/mcomp_x86.h b/vp8/encoder/x86/mcomp_x86.h
index 5661491ad..c2b4b369b 100644
--- a/vp8/encoder/x86/mcomp_x86.h
+++ b/vp8/encoder/x86/mcomp_x86.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/preproc_mmx.c b/vp8/encoder/x86/preproc_mmx.c
index 69617ca47..8b23bb516 100644
--- a/vp8/encoder/x86/preproc_mmx.c
+++ b/vp8/encoder/x86/preproc_mmx.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm
index 847fc6e37..25adca0ed 100644
--- a/vp8/encoder/x86/quantize_mmx.asm
+++ b/vp8/encoder/x86/quantize_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm
index a825698e7..4b3574922 100644
--- a/vp8/encoder/x86/sad_mmx.asm
+++ b/vp8/encoder/x86/sad_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm
index 53240bbf1..f4ef5518a 100644
--- a/vp8/encoder/x86/sad_sse2.asm
+++ b/vp8/encoder/x86/sad_sse2.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index 38cc02957..edfe82f26 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/sad_ssse3.asm b/vp8/encoder/x86/sad_ssse3.asm
index 1bb956121..79c4b4433 100644
--- a/vp8/encoder/x86/sad_ssse3.asm
+++ b/vp8/encoder/x86/sad_ssse3.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm
index ce3e61066..d9babd37c 100644
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ b/vp8/encoder/x86/subtract_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm
index d0da82ad4..31f66ecf2 100644
--- a/vp8/encoder/x86/variance_impl_mmx.asm
+++ b/vp8/encoder/x86/variance_impl_mmx.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index 7e5ee284b..1ccc6c567 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 4a5b25b0d..788b8331f 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index ea80753bd..78ecd7b22 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
index 35fc90c48..f00ad321d 100644
--- a/vp8/encoder/x86/variance_x86.h
+++ b/vp8/encoder/x86/variance_x86.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index f1391ba8c..ad10a9e42 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index a9efbd753..5ed53ba2c 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 16ad678b6..d04e4767c 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 19c59cd80..6a27ee0f0 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 651ee7767..a512ff910 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index f0753d93e..16009f9b7 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index 76368eb53..44b4b495d 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index 1b4a7ecf7..58ccac573 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index e95d603e8..f525a6013 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/src/vpx_codec.c b/vpx/src/vpx_codec.c
index 14f4be32e..0ef3a7b68 100644
--- a/vpx/src/vpx_codec.c
+++ b/vpx/src/vpx_codec.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
index d0de8b4bc..9939aa28a 100644
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/src/vpx_decoder_compat.c b/vpx/src/vpx_decoder_compat.c
index 96594fe2f..d5cc16e1c 100644
--- a/vpx/src/vpx_decoder_compat.c
+++ b/vpx/src/vpx_decoder_compat.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index a9a40de71..55fd5bf84 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/src/vpx_image.c b/vpx/src/vpx_image.c
index 55ee391fc..e8a2959e8 100644
--- a/vpx/src/vpx_image.c
+++ b/vpx/src/vpx_image.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vp8.h b/vpx/vp8.h
index 6778b7e9c..a493ef620 100644
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 0773edc16..110406411 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index 8203557ba..d602e8067 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vp8e.h b/vpx/vp8e.h
index 85ca39f3a..f72fd2466 100644
--- a/vpx/vp8e.h
+++ b/vpx/vp8e.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index 145ca29fa..f821559c0 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
index 223f9ad36..4e09b5fab 100644
--- a/vpx/vpx_codec.mk
+++ b/vpx/vpx_codec.mk
@@ -1,10 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license and patent
-##  grant that can be found in the LICENSE file in the root of the source
-##  tree. All contributing project authors may be found in the AUTHORS
-##  file in the root of the source tree.
+##  Use of this source code is governed by a BSD-style license 
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may 
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/vpx/vpx_codec_impl_bottom.h b/vpx/vpx_codec_impl_bottom.h
index c52654cec..02bf8b319 100644
--- a/vpx/vpx_codec_impl_bottom.h
+++ b/vpx/vpx_codec_impl_bottom.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vpx_codec_impl_top.h b/vpx/vpx_codec_impl_top.h
index f73809a8e..fce14c1e2 100644
--- a/vpx/vpx_codec_impl_top.h
+++ b/vpx/vpx_codec_impl_top.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index ab0818f04..865f5dfd9 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vpx_decoder_compat.h b/vpx/vpx_decoder_compat.h
index 25bb5eb36..e66a096e0 100644
--- a/vpx/vpx_decoder_compat.h
+++ b/vpx/vpx_decoder_compat.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 67393be5a..8ad7055e4 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index 7b235a4c5..7e4a03a30 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx/vpx_integer.h b/vpx/vpx_integer.h
index e250422b0..f06c64181 100644
--- a/vpx/vpx_integer.h
+++ b/vpx/vpx_integer.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/include/nds/vpx_mem_nds.h b/vpx_mem/include/nds/vpx_mem_nds.h
index c33240398..361c29b12 100644
--- a/vpx_mem/include/nds/vpx_mem_nds.h
+++ b/vpx_mem/include/nds/vpx_mem_nds.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/include/vpx_mem_intrnl.h b/vpx_mem/include/vpx_mem_intrnl.h
index 3b68d8615..4605b345d 100644
--- a/vpx_mem/include/vpx_mem_intrnl.h
+++ b/vpx_mem/include/vpx_mem_intrnl.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/include/vpx_mem_tracker.h b/vpx_mem/include/vpx_mem_tracker.h
index ab85d19c4..49f783e83 100644
--- a/vpx_mem/include/vpx_mem_tracker.h
+++ b/vpx_mem/include/vpx_mem_tracker.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/intel_linux/vpx_mem.c b/vpx_mem/intel_linux/vpx_mem.c
index 002e407ef..7bce794e8 100644
--- a/vpx_mem/intel_linux/vpx_mem.c
+++ b/vpx_mem/intel_linux/vpx_mem.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/intel_linux/vpx_mem_tracker.c b/vpx_mem/intel_linux/vpx_mem_tracker.c
index fa023e348..fcbebc932 100644
--- a/vpx_mem/intel_linux/vpx_mem_tracker.c
+++ b/vpx_mem/intel_linux/vpx_mem_tracker.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_alloc.c b/vpx_mem/memory_manager/hmm_alloc.c
index 9abd81ee4..3f10097e0 100644
--- a/vpx_mem/memory_manager/hmm_alloc.c
+++ b/vpx_mem/memory_manager/hmm_alloc.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_base.c b/vpx_mem/memory_manager/hmm_base.c
index 0cacc3f8f..fbc36de14 100644
--- a/vpx_mem/memory_manager/hmm_base.c
+++ b/vpx_mem/memory_manager/hmm_base.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_dflt_abort.c b/vpx_mem/memory_manager/hmm_dflt_abort.c
index dc59f5507..71b41d924 100644
--- a/vpx_mem/memory_manager/hmm_dflt_abort.c
+++ b/vpx_mem/memory_manager/hmm_dflt_abort.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_grow.c b/vpx_mem/memory_manager/hmm_grow.c
index 79d75a74b..a979c7a6e 100644
--- a/vpx_mem/memory_manager/hmm_grow.c
+++ b/vpx_mem/memory_manager/hmm_grow.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_largest.c b/vpx_mem/memory_manager/hmm_largest.c
index 5ebe398e0..82a6a36f2 100644
--- a/vpx_mem/memory_manager/hmm_largest.c
+++ b/vpx_mem/memory_manager/hmm_largest.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_resize.c b/vpx_mem/memory_manager/hmm_resize.c
index 6e3f2f041..cb93bb1d3 100644
--- a/vpx_mem/memory_manager/hmm_resize.c
+++ b/vpx_mem/memory_manager/hmm_resize.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_shrink.c b/vpx_mem/memory_manager/hmm_shrink.c
index 5ef9b233f..d84513625 100644
--- a/vpx_mem/memory_manager/hmm_shrink.c
+++ b/vpx_mem/memory_manager/hmm_shrink.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/hmm_true.c b/vpx_mem/memory_manager/hmm_true.c
index 41103c89e..586fc2248 100644
--- a/vpx_mem/memory_manager/hmm_true.c
+++ b/vpx_mem/memory_manager/hmm_true.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/include/cavl_if.h b/vpx_mem/memory_manager/include/cavl_if.h
index e2733ef2f..da1b148dd 100644
--- a/vpx_mem/memory_manager/include/cavl_if.h
+++ b/vpx_mem/memory_manager/include/cavl_if.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/include/cavl_impl.h b/vpx_mem/memory_manager/include/cavl_impl.h
index 267bc7312..e67cc8a9b 100644
--- a/vpx_mem/memory_manager/include/cavl_impl.h
+++ b/vpx_mem/memory_manager/include/cavl_impl.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/include/heapmm.h b/vpx_mem/memory_manager/include/heapmm.h
index 933e30dd7..4e46c41fb 100644
--- a/vpx_mem/memory_manager/include/heapmm.h
+++ b/vpx_mem/memory_manager/include/heapmm.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/include/hmm_cnfg.h b/vpx_mem/memory_manager/include/hmm_cnfg.h
index 86e4e9fa8..715163c1b 100644
--- a/vpx_mem/memory_manager/include/hmm_cnfg.h
+++ b/vpx_mem/memory_manager/include/hmm_cnfg.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/memory_manager/include/hmm_intrnl.h b/vpx_mem/memory_manager/include/hmm_intrnl.h
index 6e2be08fc..1dddb8ac7 100644
--- a/vpx_mem/memory_manager/include/hmm_intrnl.h
+++ b/vpx_mem/memory_manager/include/hmm_intrnl.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/nds/vpx_mem_nds.c b/vpx_mem/nds/vpx_mem_nds.c
index f2a3043b2..88d474af8 100644
--- a/vpx_mem/nds/vpx_mem_nds.c
+++ b/vpx_mem/nds/vpx_mem_nds.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c b/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
index 6501855c0..c2a1c2813 100644
--- a/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
+++ b/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/vpx_mem.c b/vpx_mem/vpx_mem.c
index f6b1a3550..ba92024bf 100644
--- a/vpx_mem/vpx_mem.c
+++ b/vpx_mem/vpx_mem.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/vpx_mem.h b/vpx_mem/vpx_mem.h
index 6ccb9be55..a1239c127 100644
--- a/vpx_mem/vpx_mem.h
+++ b/vpx_mem/vpx_mem.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_mem/vpx_mem_tracker.c b/vpx_mem/vpx_mem_tracker.c
index 4427e27fc..92152a6fd 100644
--- a/vpx_mem/vpx_mem_tracker.c
+++ b/vpx_mem/vpx_mem_tracker.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_ports/config.h b/vpx_ports/config.h
index b87669ec1..9d32393ae 100644
--- a/vpx_ports/config.h
+++ b/vpx_ports/config.h
@@ -1,9 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 #include "vpx_config.h"
diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm
index 03e34992c..096176dc8 100644
--- a/vpx_ports/emms.asm
+++ b/vpx_ports/emms.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h
index 1078169ee..ade2e3024 100644
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h
index 869d583f8..109c2707f 100644
--- a/vpx_ports/mem_ops.h
+++ b/vpx_ports/mem_ops.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h
index 1d0db2ccb..7c4d95ec3 100644
--- a/vpx_ports/mem_ops_aligned.h
+++ b/vpx_ports/mem_ops_aligned.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h
index 5c045387f..11fee84b5 100644
--- a/vpx_ports/vpx_timer.h
+++ b/vpx_ports/vpx_timer.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h
index 14244bd68..b9c8b8c0f 100644
--- a/vpx_ports/vpxtypes.h
+++ b/vpx_ports/vpxtypes.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index 935d03762..8c23abd38 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index dd6acf16a..6fdbf8add 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/arm/armv4/gen_scalers_armv4.asm b/vpx_scale/arm/armv4/gen_scalers_armv4.asm
index 1c904edae..e317fe9af 100644
--- a/vpx_scale/arm/armv4/gen_scalers_armv4.asm
+++ b/vpx_scale/arm/armv4/gen_scalers_armv4.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/arm/nds/yv12extend.c b/vpx_scale/arm/nds/yv12extend.c
index 56959cb18..9ec346662 100644
--- a/vpx_scale/arm/nds/yv12extend.c
+++ b/vpx_scale/arm/nds/yv12extend.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
index 26384c42c..64e7bfe66 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
index a50ae60d7..f79de3c0d 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
index c8923d5a5..2e24e24a8 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
index 8c9ce1962..418578fd0 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/arm/scalesystemdependant.c b/vpx_scale/arm/scalesystemdependant.c
index b270a5f06..67954b2c5 100644
--- a/vpx_scale/arm/scalesystemdependant.c
+++ b/vpx_scale/arm/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/arm/yv12extend_arm.c b/vpx_scale/arm/yv12extend_arm.c
index 7c3f7cd07..5069030d5 100644
--- a/vpx_scale/arm/yv12extend_arm.c
+++ b/vpx_scale/arm/yv12extend_arm.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/blackfin/yv12config.c b/vpx_scale/blackfin/yv12config.c
index 7cb083fb9..950a5d2ce 100644
--- a/vpx_scale/blackfin/yv12config.c
+++ b/vpx_scale/blackfin/yv12config.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/blackfin/yv12extend.c b/vpx_scale/blackfin/yv12extend.c
index d5be4950d..80504ef9d 100644
--- a/vpx_scale/blackfin/yv12extend.c
+++ b/vpx_scale/blackfin/yv12extend.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/dm642/bicubic_scaler_c64.c b/vpx_scale/dm642/bicubic_scaler_c64.c
index 9bd379725..e026be598 100644
--- a/vpx_scale/dm642/bicubic_scaler_c64.c
+++ b/vpx_scale/dm642/bicubic_scaler_c64.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/dm642/gen_scalers_c64.c b/vpx_scale/dm642/gen_scalers_c64.c
index 2126a7534..34f95f71a 100644
--- a/vpx_scale/dm642/gen_scalers_c64.c
+++ b/vpx_scale/dm642/gen_scalers_c64.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/dm642/yv12extend.c b/vpx_scale/dm642/yv12extend.c
index ca25a5fce..2557a3af0 100644
--- a/vpx_scale/dm642/yv12extend.c
+++ b/vpx_scale/dm642/yv12extend.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c
index e3c2b4a80..48f909671 100644
--- a/vpx_scale/generic/bicubic_scaler.c
+++ b/vpx_scale/generic/bicubic_scaler.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c
index a5e545f70..948e3d7ae 100644
--- a/vpx_scale/generic/gen_scalers.c
+++ b/vpx_scale/generic/gen_scalers.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/generic/scalesystemdependant.c b/vpx_scale/generic/scalesystemdependant.c
index 542e5daa9..7a24a26b7 100644
--- a/vpx_scale/generic/scalesystemdependant.c
+++ b/vpx_scale/generic/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c
index 206cd5512..8f8cfb504 100644
--- a/vpx_scale/generic/vpxscale.c
+++ b/vpx_scale/generic/vpxscale.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index 04617be51..008130b98 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
index 4906625c8..907fa6820 100644
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/arm/vpxscale_nofp.h b/vpx_scale/include/arm/vpxscale_nofp.h
index d6181d207..39e37428f 100644
--- a/vpx_scale/include/arm/vpxscale_nofp.h
+++ b/vpx_scale/include/arm/vpxscale_nofp.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h
index 2b50f24cf..68b8fc0e6 100644
--- a/vpx_scale/include/generic/vpxscale_arbitrary.h
+++ b/vpx_scale/include/generic/vpxscale_arbitrary.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/generic/vpxscale_depricated.h b/vpx_scale/include/generic/vpxscale_depricated.h
index 015eed0fc..dbe778655 100644
--- a/vpx_scale/include/generic/vpxscale_depricated.h
+++ b/vpx_scale/include/generic/vpxscale_depricated.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/generic/vpxscale_nofp.h b/vpx_scale/include/generic/vpxscale_nofp.h
index c4d5f4c6f..97ea60e99 100644
--- a/vpx_scale/include/generic/vpxscale_nofp.h
+++ b/vpx_scale/include/generic/vpxscale_nofp.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/leapster/vpxscale.h b/vpx_scale/include/leapster/vpxscale.h
index f70029cae..a40f2ebea 100644
--- a/vpx_scale/include/leapster/vpxscale.h
+++ b/vpx_scale/include/leapster/vpxscale.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/symbian/vpxscale_nofp.h b/vpx_scale/include/symbian/vpxscale_nofp.h
index d6181d207..39e37428f 100644
--- a/vpx_scale/include/symbian/vpxscale_nofp.h
+++ b/vpx_scale/include/symbian/vpxscale_nofp.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/include/vpxscale_nofp.h b/vpx_scale/include/vpxscale_nofp.h
index f6482f944..f39a1c6c8 100644
--- a/vpx_scale/include/vpxscale_nofp.h
+++ b/vpx_scale/include/vpxscale_nofp.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/intel_linux/scaleopt.c b/vpx_scale/intel_linux/scaleopt.c
index 6555600e9..1bdb488af 100644
--- a/vpx_scale/intel_linux/scaleopt.c
+++ b/vpx_scale/intel_linux/scaleopt.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/intel_linux/scalesystemdependant.c b/vpx_scale/intel_linux/scalesystemdependant.c
index 9ed48bfc6..82b90c910 100644
--- a/vpx_scale/intel_linux/scalesystemdependant.c
+++ b/vpx_scale/intel_linux/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/leapster/doptsystemdependant_lf.c b/vpx_scale/leapster/doptsystemdependant_lf.c
index ca1316730..f4ccb163b 100644
--- a/vpx_scale/leapster/doptsystemdependant_lf.c
+++ b/vpx_scale/leapster/doptsystemdependant_lf.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/leapster/gen_scalers_lf.c b/vpx_scale/leapster/gen_scalers_lf.c
index 1b9c7c745..f9a11fecd 100644
--- a/vpx_scale/leapster/gen_scalers_lf.c
+++ b/vpx_scale/leapster/gen_scalers_lf.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/leapster/vpxscale_lf.c b/vpx_scale/leapster/vpxscale_lf.c
index 5f05e5de0..deabd989f 100644
--- a/vpx_scale/leapster/vpxscale_lf.c
+++ b/vpx_scale/leapster/vpxscale_lf.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/leapster/yv12extend.c b/vpx_scale/leapster/yv12extend.c
index 480d971b4..5a89c3164 100644
--- a/vpx_scale/leapster/yv12extend.c
+++ b/vpx_scale/leapster/yv12extend.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h
index 2a9ab7612..41aefa27c 100644
--- a/vpx_scale/scale_mode.h
+++ b/vpx_scale/scale_mode.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/symbian/gen_scalers_armv4.asm b/vpx_scale/symbian/gen_scalers_armv4.asm
index 1c904edae..e317fe9af 100644
--- a/vpx_scale/symbian/gen_scalers_armv4.asm
+++ b/vpx_scale/symbian/gen_scalers_armv4.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/symbian/scalesystemdependant.c b/vpx_scale/symbian/scalesystemdependant.c
index a2acc3e9d..75ef26dfd 100644
--- a/vpx_scale/symbian/scalesystemdependant.c
+++ b/vpx_scale/symbian/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h
index 9a86b75de..f3057fe54 100644
--- a/vpx_scale/vpxscale.h
+++ b/vpx_scale/vpxscale.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/wce/gen_scalers_armv4.asm b/vpx_scale/wce/gen_scalers_armv4.asm
index 1c904edae..e317fe9af 100644
--- a/vpx_scale/wce/gen_scalers_armv4.asm
+++ b/vpx_scale/wce/gen_scalers_armv4.asm
@@ -1,10 +1,11 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
+;  Use of this source code is governed by a BSD-style license 
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may 
+;  be found in the AUTHORS file in the root of the source tree.
 ;
 
 
diff --git a/vpx_scale/wce/scalesystemdependant.c b/vpx_scale/wce/scalesystemdependant.c
index a5a6a5275..1d62eb598 100644
--- a/vpx_scale/wce/scalesystemdependant.c
+++ b/vpx_scale/wce/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c
index da0533e6b..59d49e625 100644
--- a/vpx_scale/win32/scaleopt.c
+++ b/vpx_scale/win32/scaleopt.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/win32/scalesystemdependant.c b/vpx_scale/win32/scalesystemdependant.c
index 9ed48bfc6..82b90c910 100644
--- a/vpx_scale/win32/scalesystemdependant.c
+++ b/vpx_scale/win32/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/x86_64/scaleopt.c b/vpx_scale/x86_64/scaleopt.c
index 3d2d5f237..de4c478fc 100644
--- a/vpx_scale/x86_64/scaleopt.c
+++ b/vpx_scale/x86_64/scaleopt.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/x86_64/scalesystemdependant.c b/vpx_scale/x86_64/scalesystemdependant.c
index 43f05a68c..032444965 100644
--- a/vpx_scale/x86_64/scalesystemdependant.c
+++ b/vpx_scale/x86_64/scalesystemdependant.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index a8d0ce45b..f01bd78ec 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/vpx_scale/yv12extend.h b/vpx_scale/yv12extend.h
index 9968feae8..b310ef5fd 100644
--- a/vpx_scale/yv12extend.h
+++ b/vpx_scale/yv12extend.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/wince_wmain_adapter.cpp b/wince_wmain_adapter.cpp
index db2119cb5..48d3ab336 100644
--- a/wince_wmain_adapter.cpp
+++ b/wince_wmain_adapter.cpp
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  */
 
 
diff --git a/y4minput.c b/y4minput.c
index f1f50bc79..895226d4f 100644
--- a/y4minput.c
+++ b/y4minput.c
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  *
  *  Based on code from the OggTheora software codec source code,
  *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.
diff --git a/y4minput.h b/y4minput.h
index 153487504..e3f930433 100644
--- a/y4minput.h
+++ b/y4minput.h
@@ -1,10 +1,11 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license and patent
- *  grant that can be found in the LICENSE file in the root of the source
- *  tree. All contributing project authors may be found in the AUTHORS
- *  file in the root of the source tree.
+ *  Use of this source code is governed by a BSD-style license 
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may 
+ *  be found in the AUTHORS file in the root of the source tree.
  *
  *  Based on code from the OggTheora software codec source code,
  *  Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors.

From 9a27722b98ba58282efbf668864259139d4faa18 Mon Sep 17 00:00:00 2001
From: Alex Converse <alex.converse@gmail.com>
Date: Sat, 5 Jun 2010 12:19:40 -0400
Subject: [PATCH 006/307] Remove some bashism from the shell scripts.

Note that configure.sh still uses the bashism $(RANDOM).
---
 build/make/configure.sh | 6 +++---
 release.sh              | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index 1f4f24db9..a70a84eec 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -120,8 +120,8 @@ EOF
 
 show_targets() {
     while [ -n "$*" ]; do
-        if [ "${1%%-*}" == "${2%%-*}" ]; then
-            if [ "${2%%-*}" == "${3%%-*}" ]; then
+        if [ "${1%%-*}" = "${2%%-*}" ]; then
+            if [ "${2%%-*}" = "${3%%-*}" ]; then
                 printf "    %-24s %-24s %-24s\n" "$1" "$2" "$3"
                 shift; shift; shift
             else
@@ -475,7 +475,7 @@ post_process_common_cmdline() {
     prefix="${prefix%/}"
     libdir="${libdir:-${prefix}/lib}"
     libdir="${libdir%/}"
-    if [ "${libdir#${prefix}}" == "${libdir}" ]; then
+    if [ "${libdir#${prefix}}" = "${libdir}" ]; then
         die "Libdir ${libdir} must be a subdirectory of ${prefix}"
     fi
 }
diff --git a/release.sh b/release.sh
index 827bbd0a6..880ad0f59 100755
--- a/release.sh
+++ b/release.sh
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/bin/sh
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
@@ -21,7 +21,7 @@ for opt; do
     esac
 done
 
-TAB=$'\t'
+TAB="$(printf '\t')"
 cat > release.mk << EOF
 %\$(BUILD_SFX).tar.bz2: %/.done
 ${TAB}@echo "\$(subst .tar.bz2,,\$@): tarball"
@@ -186,7 +186,7 @@ for cfg in $CONFIGS; do
     esac
     opts="$opts --enable-postproc"
 
-    [ "x${clean}" == "xyes" ] \
+    [ "x${clean}" = "xyes" ] \
         && rm -rf ${full_cfg}${BUILD_SFX}${TAR_SFX} \
         && rm -rf logs/${full_cfg}${BUILD_SFX}.log.bz2
 

From 7aa97a35b515bfb7d7bbcdee4db376f815343e44 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 3 Jun 2010 10:29:04 -0400
Subject: [PATCH 007/307] shared library support (.so)

This patch adds support for building shared libraries when configured
with the --enable-shared switch.

Building DLLs would require more invasive changes to the sample
utilities than I want to make in this patch, since on Windows you can't
use the address of an imported symbol in a static initializer. The best
way to work around this is proably to build the codec interface mapping
table with an init() function, but dll support is of questionable value
anyway, since most windows users will probably use a media framework
lib like webmdshow, which links this library in staticly.

Change-Id: Iafb48900549b0c6b67f4a05d3b790b2643d026f4
---
 build/make/Makefile | 15 +++++++++++++++
 configure           | 20 ++++++++++++++++++++
 libs.mk             | 29 ++++++++++++++++++++++++++++-
 vp8/exports_dec     |  1 +
 vp8/exports_enc     |  1 +
 vp8/vp8cx.mk        |  3 +++
 vp8/vp8dx.mk        |  3 +++
 vpx/exports         | 17 -----------------
 vpx/exports_com     | 16 ++++++++++++++++
 vpx/exports_dec     |  9 +++++++++
 vpx/exports_enc     |  8 ++++++++
 11 files changed, 104 insertions(+), 18 deletions(-)
 create mode 100644 vp8/exports_dec
 create mode 100644 vp8/exports_enc
 delete mode 100644 vpx/exports
 create mode 100644 vpx/exports_com
 create mode 100644 vpx/exports_dec
 create mode 100644 vpx/exports_enc

diff --git a/build/make/Makefile b/build/make/Makefile
index 6dd1279fc..ba2578e93 100755
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -220,6 +220,20 @@ $(1):
 	$(qexec)$$(AR) $$(ARFLAGS) $$@ $$?
 endef
 
+define so_template
+# Not using a pattern rule here because we don't want to generate empty
+# archives when they are listed as a dependency in files not responsible
+# for creating them.
+#
+# This needs further abstraction for dealing with non-GNU linkers.
+$(1):
+	$(if $(quiet),@echo "    [LD] $$@")
+	$(qexec)$$(LD) -shared $$(LDFLAGS) \
+            -Wl,--no-undefined -Wl,-soname,$$(SONAME) \
+            -Wl,--version-script,$$(SO_VERSION_SCRIPT) -o $$@ \
+            $$(filter %.o,$$?)
+endef
+
 define lipo_lib_template
 $(1): $(addsuffix /$(1),$(FAT_ARCHS))
 	$(if $(quiet),@echo "    [LIPO] $$@")
@@ -283,6 +297,7 @@ LIBS=$(call enabled,LIBS)
 .libs: $(LIBS)
 	@touch $@
 $(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib))))
+$(foreach lib,$(filter %so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
 
 INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS)
 ifeq ($(MAKECMDGOALS),dist)
diff --git a/configure b/configure
index d7d041c27..f2b42bef4 100755
--- a/configure
+++ b/configure
@@ -39,6 +39,7 @@ Advanced options:
   ${toggle_spatial_resampling}    spatial sampling (scaling) support
   ${toggle_realtime_only}         enable this option while building for real-time encoding
   ${toggle_runtime_cpu_detect}    runtime cpu detection
+  ${toggle_shared}                shared library support
 
 Codecs:
   Codecs can be selectively enabled or disabled individually, or by family:
@@ -242,6 +243,7 @@ CONFIG_LIST="
     static_msvcrt
     spatial_resampling
     realtime_only
+    shared
 "
 CMDLINE_SELECT="
     extra_warnings
@@ -280,6 +282,7 @@ CMDLINE_SELECT="
     mem_tracker
     spatial_resampling
     realtime_only
+    shared
 "
 
 process_cmdline() {
@@ -369,6 +372,12 @@ process_targets() {
     if [ -f "${source_path}/build/make/version.sh" ]; then
         local ver=`"$source_path/build/make/version.sh" --bare $source_path`
         DIST_DIR="${DIST_DIR}-${ver}"
+        ver=${ver%%-*}
+        VERSION_PATCH=${ver##*.}
+        ver=${ver%.*}
+        VERSION_MINOR=${ver##*.}
+        ver=${ver#v}
+        VERSION_MAJOR=${ver%.*}
     fi
     enabled child || cat <<EOF >> config.mk
 ifeq (\$(MAKECMDGOALS),dist)
@@ -377,6 +386,11 @@ else
 DIST_DIR?=\$(DESTDIR)${prefix}
 endif
 LIBSUBDIR=${libdir##${prefix}/}
+
+VERSION_MAJOR=${VERSION_MAJOR}
+VERSION_MINOR=${VERSION_MINOR}
+VERSION_PATCH=${VERSION_PATCH}
+
 EOF
     enabled child || echo "CONFIGURE_ARGS?=${CONFIGURE_ARGS}" >> config.mk
 
@@ -396,6 +410,12 @@ EOF
 }
 
 process_detect() {
+    if enabled shared; then
+        # Can only build shared libs on a subset of platforms. Doing this check
+        # here rather than at option parse time because the target auto-detect
+        # magic happens after the command line has been parsed.
+	enabled linux || die "--enable-shared only supported on ELF for now"
+    fi
     if [ -z "$CC" ]; then
         echo "Bypassing toolchain for environment detection."
         enable external_build
diff --git a/libs.mk b/libs.mk
index be237a137..c6b08d21b 100644
--- a/libs.mk
+++ b/libs.mk
@@ -92,7 +92,9 @@ CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
 endif
 CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm
-CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports
+CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
+CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
+CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec
 
 INSTALL-LIBS-yes += include/vpx/vpx_codec.h
 INSTALL-LIBS-yes += include/vpx/vpx_image.h
@@ -175,6 +177,31 @@ LIBVPX_OBJS=$(call objs,$(CODEC_SRCS))
 OBJS-$(BUILD_LIBVPX) += $(LIBVPX_OBJS)
 LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)
+
+BUILD_LIBVPX_SO         := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
+LIBVPX_SO               := libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)
+LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)
+$(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) libvpx.ver
+$(BUILD_PFX)$(LIBVPX_SO): LDFLAGS += -lm -pthread
+$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR)
+$(BUILD_PFX)$(LIBVPX_SO): SO_VERSION_SCRIPT = libvpx.ver
+LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
+                             libvpx.so libvpx.so.$(VERSION_MAJOR) \
+                             libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR))
+
+libvpx.ver: $(call enabled,CODEC_EXPORTS)
+	@echo "    [CREATE] $@"
+	$(qexec)echo "{ global:" > $@
+	$(qexec)for f in $?; do awk '{print $$2";"}' < $$f >>$@; done
+	$(qexec)echo "local: *; };" >> $@
+CLEAN-OBJS += libvpx.ver
+
+$(addprefix $(DIST_DIR)/,$(LIBVPX_SO_SYMLINKS)):
+	@echo "    [LN]      $@"
+	$(qexec)ln -sf $(LIBVPX_SO) $@
+
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
 endif
 
 LIBS-$(LIPO_LIBVPX) += libvpx.a
diff --git a/vp8/exports_dec b/vp8/exports_dec
new file mode 100644
index 000000000..f9b985c86
--- /dev/null
+++ b/vp8/exports_dec
@@ -0,0 +1 @@
+data vpx_codec_vp8_dx_algo
diff --git a/vp8/exports_enc b/vp8/exports_enc
new file mode 100644
index 000000000..996701113
--- /dev/null
+++ b/vp8/exports_enc
@@ -0,0 +1 @@
+data vpx_codec_vp8_cx_algo
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index a512ff910..544a3b3fa 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -10,6 +10,9 @@
 
 
 include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
+
+VP8_CX_EXPORTS += exports_enc
+
 VP8_CX_SRCS-yes += $(VP8_COMMON_SRCS-yes)
 VP8_CX_SRCS-no  += $(VP8_COMMON_SRCS-no)
 VP8_CX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes)
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index 44b4b495d..24f18b7cd 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -10,6 +10,9 @@
 
 
 include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
+
+VP8_DX_EXPORTS += exports_dec
+
 VP8_DX_SRCS-yes += $(VP8_COMMON_SRCS-yes)
 VP8_DX_SRCS-no  += $(VP8_COMMON_SRCS-no)
 VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes)
diff --git a/vpx/exports b/vpx/exports
deleted file mode 100644
index f5e7473bc..000000000
--- a/vpx/exports
+++ /dev/null
@@ -1,17 +0,0 @@
-text vpx_dec_control
-text vpx_dec_decode
-text vpx_dec_destroy
-text vpx_dec_err_to_string
-text vpx_dec_error
-text vpx_dec_error_detail
-text vpx_dec_get_caps
-text vpx_dec_get_frame
-text vpx_dec_get_mem_map
-text vpx_dec_get_stream_info
-text vpx_dec_iface_name
-text vpx_dec_init_ver
-text vpx_dec_peek_stream_info
-text vpx_dec_register_put_frame_cb
-text vpx_dec_register_put_slice_cb
-text vpx_dec_set_mem_map
-text vpx_dec_xma_init_ver
diff --git a/vpx/exports_com b/vpx/exports_com
new file mode 100644
index 000000000..2ab05099f
--- /dev/null
+++ b/vpx/exports_com
@@ -0,0 +1,16 @@
+text vpx_codec_build_config
+text vpx_codec_control_
+text vpx_codec_destroy
+text vpx_codec_err_to_string
+text vpx_codec_error
+text vpx_codec_error_detail
+text vpx_codec_get_caps
+text vpx_codec_iface_name
+text vpx_codec_version
+text vpx_codec_version_extra_str
+text vpx_codec_version_str
+text vpx_img_alloc
+text vpx_img_flip
+text vpx_img_free
+text vpx_img_set_rect
+text vpx_img_wrap
diff --git a/vpx/exports_dec b/vpx/exports_dec
new file mode 100644
index 000000000..ed121f7ec
--- /dev/null
+++ b/vpx/exports_dec
@@ -0,0 +1,9 @@
+text vpx_codec_dec_init_ver
+text vpx_codec_decode
+text vpx_codec_get_frame
+text vpx_codec_get_mem_map
+text vpx_codec_get_stream_info
+text vpx_codec_peek_stream_info
+text vpx_codec_register_put_frame_cb
+text vpx_codec_register_put_slice_cb
+text vpx_codec_set_mem_map
diff --git a/vpx/exports_enc b/vpx/exports_enc
new file mode 100644
index 000000000..3d5674926
--- /dev/null
+++ b/vpx/exports_enc
@@ -0,0 +1,8 @@
+text vpx_codec_enc_config_default
+text vpx_codec_enc_config_set
+text vpx_codec_enc_init_ver
+text vpx_codec_encode
+text vpx_codec_get_cx_data
+text vpx_codec_get_global_headers
+text vpx_codec_get_preview_frame
+text vpx_codec_set_cx_data_buf

From 8916fa2c3ef54956b60ded340cb8d974fb0ea709 Mon Sep 17 00:00:00 2001
From: Luca Barbato <lu_zero@gentoo.org>
Date: Sun, 6 Jun 2010 18:51:49 +0200
Subject: [PATCH 008/307] Make shared object use extralibs

this way -lm doesn't get ignored if additional LDFLAGS get passed from
env

Change-Id: Ie630369ae6ed2780377c35aa2726e759d527bb50
---
 build/make/Makefile | 2 +-
 libs.mk             | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/build/make/Makefile b/build/make/Makefile
index ba2578e93..4f7df439a 100755
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -231,7 +231,7 @@ $(1):
 	$(qexec)$$(LD) -shared $$(LDFLAGS) \
             -Wl,--no-undefined -Wl,-soname,$$(SONAME) \
             -Wl,--version-script,$$(SO_VERSION_SCRIPT) -o $$@ \
-            $$(filter %.o,$$?)
+            $$(filter %.o,$$?) $$(extralibs)
 endef
 
 define lipo_lib_template
diff --git a/libs.mk b/libs.mk
index c6b08d21b..fd4543b07 100644
--- a/libs.mk
+++ b/libs.mk
@@ -182,7 +182,7 @@ BUILD_LIBVPX_SO         := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
 LIBVPX_SO               := libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)
 LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)
 $(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) libvpx.ver
-$(BUILD_PFX)$(LIBVPX_SO): LDFLAGS += -lm -pthread
+$(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm -pthread
 $(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR)
 $(BUILD_PFX)$(LIBVPX_SO): SO_VERSION_SCRIPT = libvpx.ver
 LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \

From 854c007a77b0e86b14caaf510f2e23f04e80fe1b Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Thu, 3 Jun 2010 15:08:44 -0700
Subject: [PATCH 009/307] Remove duplicate and unused functions

Change-Id: I944035e720ef834561a9da0d723879a4f787312c
---
 vp8/encoder/block.h       |   1 -
 vp8/encoder/encodeintra.c |   9 +-
 vp8/encoder/encodemb.c    | 349 +-------------------------------------
 vp8/encoder/ethreading.c  |   1 -
 vp8/encoder/onyx_if.c     |   2 -
 vp8/encoder/quantize.c    |  68 --------
 vp8/encoder/quantize.h    |   3 -
 vp8/encoder/rdopt.c       |   6 +-
 8 files changed, 8 insertions(+), 431 deletions(-)

diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index b765cc069..36648cddc 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -106,7 +106,6 @@ typedef struct
     void (*short_walsh4x4)(short *input, short *output, int pitch);
 
     void (*quantize_b)(BLOCK *b, BLOCKD *d);
-    void (*quantize_brd)(BLOCK *b, BLOCKD *d);
 
 
 
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 813efb990..d632bd85d 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -68,7 +68,7 @@ void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BL
 
     x->short_fdct4x4rd(be->src_diff, be->coeff, 32);
 
-    x->quantize_brd(be, b);
+    x->quantize_b(be, b);
 
     x->e_mbd.mbmi.mb_skip_coeff &= (!b->eob);
 
@@ -160,8 +160,7 @@ void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     x->e_mbd.mbmi.mb_skip_coeff = 1;
 
-    vp8_quantize_mbyrd(x);
-
+    vp8_quantize_mby(x);
 
     vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
@@ -227,9 +226,7 @@ void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_transform_mbuvrd(x);
 
-    vp8_quantize_mbuvrd(x);
-
-
+    vp8_quantize_mbuv(x);
 
     vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index e7c2610c5..9fef75d86 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -494,140 +494,6 @@ void vp8_optimize_b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_
     return;
 }
 
-void vp8_optimize_bplus(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
-{
-    BLOCK *b = &x->block[i];
-    BLOCKD *bd = &x->e_mbd.block[i];
-    short *dequant_ptr = &bd->dequant[0][0];
-    int nzpos[16] = {0};
-    short saved_qcoefs[16];
-    short saved_dqcoefs[16];
-    int baserate, baseerror, baserd;
-    int rate, error, thisrd;
-    int k;
-    int nzcoefcount = 0;
-    int nc, bestnc = 0;
-    int besteob;
-
-    // count potential coefficient to be optimized
-    for (k = !type; k < 16; k++)
-    {
-        int qcoef = abs(bd->qcoeff[k]);
-        int coef = abs(b->coeff[k]);
-        int dq   = dequant_ptr[k];
-
-        if (qcoef && (qcoef * dq < coef) && (coef < (qcoef * dq + dq)))
-        {
-            nzpos[nzcoefcount] = k;
-            nzcoefcount++;
-        }
-    }
-
-    // if nothing here, do nothing for this block.
-    if (!nzcoefcount)
-    {
-        //do not update context, we need do the other half.
-        //*a = *l = (bd->eob != !type);
-        return;
-    }
-
-    // save a copy of quantized coefficients
-    vpx_memcpy(saved_qcoefs, bd->qcoeff, 32);
-    vpx_memcpy(saved_dqcoefs, bd->dqcoeff, 32);
-
-    besteob   = bd->eob;
-    baserate  = cost_coeffs(x, bd, type, a, l);
-    baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
-    baserd    = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
-
-    for (nc = 1; nc < (1 << nzcoefcount); nc++)
-    {
-        //reset coefficients
-        vpx_memcpy(bd->qcoeff, saved_qcoefs, 32);
-        vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
-
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int pos = nzpos[k];
-
-            if ((nc & (1 << k)))
-            {
-                int cur_qcoef = bd->qcoeff[pos];
-
-                if (cur_qcoef < 0)
-                {
-                    bd->qcoeff[pos]--;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    bd->qcoeff[pos]++;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-            }
-        }
-
-        {
-            int eob = -1;
-            int rc;
-            int m;
-
-            for (m = 0; m < 16; m++)
-            {
-                rc   = vp8_default_zig_zag1d[m];
-
-                if (bd->qcoeff[rc])
-                    eob = m;
-            }
-
-            bd->eob = eob + 1;
-        }
-
-        rate  = cost_coeffs(x, bd, type, a, l);
-        error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
-        thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
-        if (thisrd < baserd)
-        {
-            baserd = thisrd;
-            bestnc = nc;
-            besteob = bd->eob;
-        }
-    }
-
-    //reset coefficients
-    vpx_memcpy(bd->qcoeff,  saved_qcoefs, 32);
-    vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
-
-    if (bestnc)
-    {
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int pos = nzpos[k];
-
-            if (bestnc & (1 << k))
-            {
-                int cur_qcoef = bd->qcoeff[pos];
-
-                if (cur_qcoef < 0)
-                {
-                    bd->qcoeff[pos]++;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    bd->qcoeff[pos]--;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-            }
-        }
-    }
-
-    bd->eob = besteob;
-    //do not update context, we need do the other half.
-    //*a = *l = (bd->eob != !type);
-    return;
-}
 
 void vp8_optimize_y2b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
 {
@@ -752,181 +618,6 @@ void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 
 
 
-void vp8_super_slow_yquant_optimization(MACROBLOCK *x, int type, const VP8_ENCODER_RTCD *rtcd)
-{
-    BLOCK  *b = &x->block[0];
-    BLOCKD *bd = &x->e_mbd.block[0];
-    short *dequant_ptr = &bd->dequant[0][0];
-    struct
-    {
-        int block;
-        int pos;
-    } nzpos[256];
-    short saved_qcoefs[256];
-    short saved_dqcoefs[256];
-    short *coef_ptr   = x->coeff;
-    short *qcoef_ptr  = x->e_mbd.qcoeff;
-    short *dqcoef_ptr = x->e_mbd.dqcoeff;
-
-    int baserate, baseerror, baserd;
-    int rate, error, thisrd;
-    int i, k;
-    int nzcoefcount = 0;
-    int nc, bestnc = 0;
-    int besteob;
-
-    //this code has assumption in macroblock coeff buffer layout
-    for (i = 0; i < 16; i++)
-    {
-        // count potential coefficient to be optimized
-        for (k = !type; k < 16; k++)
-        {
-            int qcoef = abs(qcoef_ptr[i*16 + k]);
-            int coef = abs(coef_ptr[i*16 + k]);
-            int dq   = dequant_ptr[k];
-
-            if (qcoef && (qcoef * dq > coef) && (qcoef * dq < coef + dq))
-            {
-                nzpos[nzcoefcount].block = i;
-                nzpos[nzcoefcount].pos   = k;
-                nzcoefcount++;
-            }
-        }
-    }
-
-    // if nothing here, do nothing for this macro_block.
-    if (!nzcoefcount || nzcoefcount > 15)
-    {
-        return;
-    }
-
-    /******************************************************************************
-    looking from each coeffient's perspective, each identifed coefficent above could
-    have 2 values:roundeddown(x) and roundedup(x). Therefore the total number of
-    different states is less than 2**nzcoefcount.
-    ******************************************************************************/
-    // save the qunatized coefficents and dequantized coefficicents
-    vpx_memcpy(saved_qcoefs, x->e_mbd.qcoeff,  256);
-    vpx_memcpy(saved_dqcoefs, x->e_mbd.dqcoeff, 256);
-
-    baserate    = mbycost_coeffs(x);
-    baseerror   = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, !type);
-    baserd      = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
-
-    for (nc = 1; nc < (1 << nzcoefcount); nc++)
-    {
-        //reset coefficients
-        vpx_memcpy(x->e_mbd.qcoeff,  saved_qcoefs, 256);
-        vpx_memcpy(x->e_mbd.dqcoeff, saved_dqcoefs, 256);
-
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int bk  = nzpos[k].block;
-            int pos = nzpos[k].pos;
-            int mbkpos  = bk * 16 + pos;
-
-            if ((nc & (1 << k)))
-            {
-                int cur_qcoef = x->e_mbd.qcoeff[mbkpos];
-
-                if (cur_qcoef < 0)
-                {
-                    x->e_mbd.qcoeff[mbkpos]++;
-                    x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    x->e_mbd.qcoeff[mbkpos]--;
-                    x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
-                }
-            }
-        }
-
-        for (i = 0; i < 16; i++)
-        {
-            BLOCKD *bd = &x->e_mbd.block[i];
-            {
-                int eob = -1;
-                int rc;
-                int l;
-
-                for (l = 0; l < 16; l++)
-                {
-                    rc   = vp8_default_zig_zag1d[l];
-
-                    if (bd->qcoeff[rc])
-                        eob = l;
-                }
-
-                bd->eob = eob + 1;
-            }
-        }
-
-        rate  = mbycost_coeffs(x);
-        error = ENCODEMB_INVOKE(&rtcd->encodemb, mberr)(x, !type);;
-        thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
-        if (thisrd < baserd)
-        {
-            baserd = thisrd;
-            bestnc = nc;
-            besteob = bd->eob;
-        }
-    }
-
-    //reset coefficients
-    vpx_memcpy(x->e_mbd.qcoeff,  saved_qcoefs, 256);
-    vpx_memcpy(x->e_mbd.dqcoeff, saved_dqcoefs, 256);
-
-    if (bestnc)
-    {
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int bk  = nzpos[k].block;
-            int pos = nzpos[k].pos;
-            int mbkpos  = bk * 16 + pos;
-
-            if ((nc & (1 << k)))
-            {
-                int cur_qcoef = x->e_mbd.qcoeff[mbkpos];
-
-                if (cur_qcoef < 0)
-                {
-                    x->e_mbd.qcoeff[mbkpos]++;
-                    x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    x->e_mbd.qcoeff[mbkpos]--;
-                    x->e_mbd.dqcoeff[mbkpos] = x->e_mbd.qcoeff[mbkpos] * dequant_ptr[pos];
-                }
-            }
-        }
-    }
-
-    for (i = 0; i < 16; i++)
-    {
-        BLOCKD *bd = &x->e_mbd.block[i];
-        {
-            int eob = -1;
-            int rc;
-            int l;
-
-            for (l = 0; l < 16; l++)
-            {
-                rc   = vp8_default_zig_zag1d[l];
-
-                if (bd->qcoeff[rc])
-                    eob = l;
-            }
-
-            bd->eob = eob + 1;
-        }
-    }
-
-    return;
-}
-
 static void vp8_find_mb_skip_coef(MACROBLOCK *x)
 {
     int i;
@@ -955,42 +646,6 @@ static void vp8_find_mb_skip_coef(MACROBLOCK *x)
 }
 
 
-void vp8_optimize_mb_slow(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
-{
-    int b;
-    TEMP_CONTEXT t, t2;
-    int type = 0;
-
-
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
-
-    if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
-        type = 3;
-
-    vp8_super_slow_yquant_optimization(x, type, rtcd);
-    /*
-    for(b=0;b<16;b++)
-    {
-        vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
-    }
-    */
-
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
-
-    for (b = 16; b < 20; b++)
-    {
-        vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
-    }
-
-    vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
-
-    for (b = 20; b < 24; b++)
-    {
-        vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
-    }
-}
-
-
 void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
     int b;
@@ -1105,7 +760,7 @@ void vp8_encode_inter16x16uv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_transform_mbuv(x);
 
-    vp8_quantize_mbuv(x);
+    vp8_quantize_mb(x);
 
     vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
@@ -1120,6 +775,6 @@ void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_transform_mbuvrd(x);
 
-    vp8_quantize_mbuvrd(x);
+    vp8_quantize_mbuv(x);
 
 }
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index d7a81f2f5..640a84db7 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -263,7 +263,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
     z->vp8_short_fdct4x4_ptr = x->vp8_short_fdct4x4_ptr;
     z->short_walsh4x4    = x->short_walsh4x4;
     z->quantize_b        = x->quantize_b;
-    z->quantize_brd      = x->quantize_brd;
 
     /*
     z->mvc              = x->mvc;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index bb0e406fa..c0ca5b306 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1150,12 +1150,10 @@ void vp8_set_speed_features(VP8_COMP *cpi)
     if (cpi->sf.improved_quant)
     {
         cpi->mb.quantize_b    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
-        cpi->mb.quantize_brd  = QUANTIZE_INVOKE(&cpi->rtcd.quantize, quantb);
     }
     else
     {
         cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
-        cpi->mb.quantize_brd    = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
     }
 
 #if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 5aa99161d..f2c3d2f81 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -180,71 +180,3 @@ void vp8_quantize_mbuv(MACROBLOCK *x)
         x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
     }
 }
-
-// This function is not currently called
-void vp8_quantize_mbrd(MACROBLOCK *x)
-{
-    int i;
-
-    x->e_mbd.mbmi.mb_skip_coeff = 1;
-
-    if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
-    {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
-        }
-
-        for (i = 16; i < 25; i++)
-        {
-            x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
-    }
-    else
-    {
-        for (i = 0; i < 24; i++)
-        {
-            x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
-    }
-}
-
-void vp8_quantize_mbuvrd(MACROBLOCK *x)
-{
-    int i;
-
-    for (i = 16; i < 24; i++)
-    {
-        x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-        x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-    }
-}
-
-void vp8_quantize_mbyrd(MACROBLOCK *x)
-{
-    int i;
-
-    if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
-    {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
-        }
-
-        x->quantize_brd(&x->block[24], &x->e_mbd.block[24]);
-        x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob);
-
-    }
-    else
-    {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_brd(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
-    }
-}
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index 32720708a..f5ee9d7a2 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -46,8 +46,5 @@ typedef struct
 extern void vp8_quantize_mb(MACROBLOCK *x);
 extern void vp8_quantize_mbuv(MACROBLOCK *x);
 extern void vp8_quantize_mby(MACROBLOCK *x);
-extern void vp8_quantize_mbyrd(MACROBLOCK *x);
-extern void vp8_quantize_mbuvrd(MACROBLOCK *x);
-extern void vp8_quantize_mbrd(MACROBLOCK *x);
 
 #endif
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index e1cfc4c05..ff0304600 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1038,7 +1038,7 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels
 
             // set to 0 no way to account for 2nd order DC so discount
             //be->coeff[0] = 0;
-            x->quantize_brd(be, bd);
+            x->quantize_b(be, bd);
 
             distortion += ENCODEMB_INVOKE(rtcd, berr)(be->coeff, bd->dqcoeff);
         }
@@ -1076,7 +1076,7 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp
     // Quantization
     for (b = 0; b < 16; b++)
     {
-        mb->quantize_brd(&mb->block[b], &mb->e_mbd.block[b]);
+        mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]);
     }
 
     // DC predication and Quantization of 2nd Order block
@@ -1084,7 +1084,7 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp
     {
 
         {
-            mb->quantize_brd(mb_y2, x_y2);
+            mb->quantize_b(mb_y2, x_y2);
         }
     }
 

From 0dd78af3e9b089eacc9af280adfb5549fc7ecdcd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20J=C3=A4genstedt?= <philipj@opera.com>
Date: Mon, 7 Jun 2010 16:42:09 +0800
Subject: [PATCH 010/307] remove unreferenced variable i

---
 vp8/decoder/detokenize.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index e35f77a84..d6f5ca26c 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -66,7 +66,6 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
 
     ENTROPY_CONTEXT *a;
     ENTROPY_CONTEXT *l;
-    int i;
 
     /* Clear entropy contexts for Y blocks */
     a = A[Y1CONTEXT];

From 28de670cd990e8d1da8ffafb34d1bc24c9c2299a Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Mon, 7 Jun 2010 17:34:46 +0100
Subject: [PATCH 011/307] Fix RD bug.

---
 vp8/encoder/firstpass.c |  8 ++++++++
 vp8/encoder/rdopt.c     | 36 +++++++++++++++---------------------
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 90f95e083..26f09d5e3 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1869,6 +1869,14 @@ void vp8_second_pass(VP8_COMP *cpi)
         }
     }
 
+    // Keep a globally available copy of this frames iiratio and the next.
+    cpi->this_iiratio = this_frame_intra_error / DOUBLE_DIVIDE_CHECK(this_frame_coded_error);
+    {
+        FIRSTPASS_STATS next_frame;
+        if ( lookup_next_frame_stats(cpi, &next_frame) != EOF )
+            cpi->next_iiratio = next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error);
+    }
+
     // Set nominal per second bandwidth for this frame
     cpi->target_bandwidth = cpi->per_frame_bandwidth * cpi->output_frame_rate;
     if (cpi->target_bandwidth < 0)
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index ff0304600..a6bd8e86d 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -171,15 +171,13 @@ static void fill_token_costs(
 
 }
 
-static int rd_iifactor [ 32 ] =  {    16,  16,  16,  12,   8,   4,   2,   0,
+static int rd_iifactor [ 32 ] =  {    4,   4,   3,   2,   1,   0,   0,   0,
                                       0,   0,   0,   0,   0,   0,   0,   0,
                                       0,   0,   0,   0,   0,   0,   0,   0,
                                       0,   0,   0,   0,   0,   0,   0,   0,
                                  };
 
 
-
-
 // The values in this table should be reviewed
 static int sad_per_bit16lut[128] =
 {
@@ -238,36 +236,32 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
 
     vp8_clear_system_state();  //__asm emms;
 
-    cpi->RDMULT = (int)((0.00007 * (capped_q * capped_q * capped_q * capped_q)) - (0.0125 * (capped_q * capped_q * capped_q)) +
-                        (2.25 * (capped_q * capped_q)) - (12.5 * capped_q) + 25.0);
+    cpi->RDMULT = (int)( (0.0001 * (capped_q * capped_q * capped_q * capped_q))
+                        -(0.0125 * (capped_q * capped_q * capped_q))
+                        +(3.25 * (capped_q * capped_q))
+                        -(12.5 * capped_q) + 50.0);
 
-    if (cpi->RDMULT < 25)
-        cpi->RDMULT = 25;
+    if (cpi->RDMULT < 50)
+        cpi->RDMULT = 50;
 
-    if (cpi->pass == 2)
+    if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
     {
-        if (cpi->common.frame_type == KEY_FRAME)
-            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[0]) / 16;
-        else if (cpi->next_iiratio > 31)
-            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) / 16;
+        if (cpi->next_iiratio > 31)
+            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4;
         else
-            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) / 16;
+            cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) >> 4;
     }
 
 
     // Extend rate multiplier along side quantizer zbin increases
     if (cpi->zbin_over_quant  > 0)
     {
-        // Extend rate multiplier along side quantizer zbin increases
-        if (cpi->zbin_over_quant  > 0)
-        {
-            double oq_factor = pow(1.006,  cpi->zbin_over_quant);
+        double oq_factor = pow(1.006,  cpi->zbin_over_quant);
 
-            if (oq_factor > (1.0 + ((double)cpi->zbin_over_quant / 64.0)))
-                oq_factor = (1.0 + (double)cpi->zbin_over_quant / 64.0);
+        if (oq_factor > (1.0 + ((double)cpi->zbin_over_quant / 64.0)))
+            oq_factor = (1.0 + (double)cpi->zbin_over_quant / 64.0);
 
-            cpi->RDMULT = (int)(oq_factor * cpi->RDMULT);
-        }
+        cpi->RDMULT = (int)(oq_factor * cpi->RDMULT);
     }
 
     cpi->mb.errorperbit = (cpi->RDMULT / 100);

From 6702a4047da8f99a54a4eae87059d6897ea55266 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Tue, 8 Jun 2010 09:59:57 +0100
Subject: [PATCH 012/307] Correct comment

---
 vp8/encoder/ratectrl.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index d136bd2f4..944a2e832 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1364,8 +1364,7 @@ int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame)
                 if (cpi->zbin_over_quant > zbin_oqmax)
                     cpi->zbin_over_quant = zbin_oqmax;
 
-                // Each over-run step is assumed to equate to approximately
-                // 3% reduction in bitrate
+                // Adjust bits_per_mb_at_this_q estimate
                 bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q);
                 Factor += factor_adjustment;
 

From 4bb895e85401d81d7dd2615380309812edfefc5e Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Tue, 8 Jun 2010 07:56:55 -0700
Subject: [PATCH 013/307] fix a typo

Change-Id: I180a05ad57ee6164a6a169ee08e8affd09671eee
---
 vp8/encoder/encodemb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 9fef75d86..a66b90c53 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -760,7 +760,7 @@ void vp8_encode_inter16x16uv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_transform_mbuv(x);
 
-    vp8_quantize_mb(x);
+    vp8_quantize_mbuv(x);
 
     vp8_inverse_transform_mbuv(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 

From 3225b893e80627aa95e924ec789468db6c38f82c Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Tue, 8 Jun 2010 13:32:15 -0700
Subject: [PATCH 014/307] minor cleanup of quantizer and fdct code

Change-Id: I7ccc580410bea096a70dce0cc3d455348d4287c5
---
 vp8/encoder/block.h      |  1 -
 vp8/encoder/ethreading.c |  1 -
 vp8/encoder/onyx_if.c    |  1 -
 vp8/encoder/quantize.c   | 55 ++++++++++++----------------------------
 4 files changed, 16 insertions(+), 42 deletions(-)

diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 36648cddc..2e866ddf4 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -102,7 +102,6 @@ typedef struct
     void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
     void (*short_fdct4x4rd)(short *input, short *output, int pitch);
     void (*short_fdct8x4rd)(short *input, short *output, int pitch);
-    void (*vp8_short_fdct4x4_ptr)(short *input, short *output, int pitch);
     void (*short_walsh4x4)(short *input, short *output, int pitch);
 
     void (*quantize_b)(BLOCK *b, BLOCKD *d);
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 640a84db7..11b19368f 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -260,7 +260,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
     z->short_fdct4x4rd   = x->short_fdct4x4rd;
     z->short_fdct8x4rd   = x->short_fdct8x4rd;
     z->short_fdct8x4rd   = x->short_fdct8x4rd;
-    z->vp8_short_fdct4x4_ptr = x->vp8_short_fdct4x4_ptr;
     z->short_walsh4x4    = x->short_walsh4x4;
     z->quantize_b        = x->quantize_b;
 
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index c0ca5b306..a2a27d3ae 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1144,7 +1144,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
         cpi->mb.short_fdct4x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4);
     }
 
-    cpi->mb.vp8_short_fdct4x4_ptr = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
     cpi->mb.short_walsh4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, walsh_short4x4);
 
     if (cpi->sf.improved_quant)
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index f2c3d2f81..73e80e36c 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -56,9 +56,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
             }
         }
     }
-
     d->eob = eob + 1;
-
 }
 
 void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
@@ -112,61 +110,40 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
 
     d->eob = eob + 1;
 }
+
 void vp8_quantize_mby(MACROBLOCK *x)
 {
     int i;
+    int has_2nd_order = (x->e_mbd.mbmi.mode != B_PRED
+        && x->e_mbd.mbmi.mode != SPLITMV);
 
-    if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
+    for (i = 0; i < 16; i++)
     {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
-        }
+        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
+        x->e_mbd.mbmi.mb_skip_coeff &=
+            (x->e_mbd.block[i].eob <= has_2nd_order);
+    }
 
+    if(has_2nd_order)
+    {
         x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
         x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob);
-
-    }
-    else
-    {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
     }
 }
 
 void vp8_quantize_mb(MACROBLOCK *x)
 {
     int i;
+    int has_2nd_order=(x->e_mbd.mbmi.mode != B_PRED
+        && x->e_mbd.mbmi.mode != SPLITMV);
 
     x->e_mbd.mbmi.mb_skip_coeff = 1;
-
-    if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
+    for (i = 0; i < 24+has_2nd_order; i++)
     {
-        for (i = 0; i < 16; i++)
-        {
-            x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
-        }
-
-        for (i = 16; i < 25; i++)
-        {
-            x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
+        x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
+        x->e_mbd.mbmi.mb_skip_coeff &=
+            (x->e_mbd.block[i].eob <= (has_2nd_order && i<16));
     }
-    else
-    {
-        for (i = 0; i < 24; i++)
-        {
-            x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
-    }
-
 }
 
 

From 3085025fa1392e99bc95a519657374f2e9a4249b Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 9 Jun 2010 11:29:20 -0400
Subject: [PATCH 015/307] Remove secondary mv clamping from decode stage

This patch removes the secondary MV clamping from the MV decoder. This
behavior was consistent with limits placed on non-split MVs by the
reference encoder, but was inconsistent with the MVs generated in the
split case.

The purpose of this secondary clamping was only to prevent crashes on
invalid data. It was not intended to be a behaviour an encoder could or
should rely on. Instead of doing additional clamping in a way that
changes the entropy context, the secondary clamp is removed and the
border handling is made implmentation specific. With respect to the
spec, the border is treated as essentially infinite, limited only by
the clamping performed on the near/nearest reference and the maximum
encodable magnitude of the residual MV.

This does not affect any currently produced streams.

Change-Id: I68d35a2fbb51570d6569eab4ad233961405230a3
---
 vp8/common/blockd.h      |   3 +-
 vp8/decoder/decodemv.c   |  44 +++++++++--------
 vp8/decoder/decodframe.c | 101 ++++++++++++++++++++++++++++++++++-----
 vp8/decoder/threading.c  |   8 ++--
 4 files changed, 117 insertions(+), 39 deletions(-)

diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 9f8a00fe7..2b25f62b4 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -174,9 +174,8 @@ typedef struct
     int dc_diff;
     unsigned char   segment_id;                  // Which set of segmentation parameters should be used for this MB
     int force_no_skip;
-
+    int need_to_clamp_mvs;
     B_MODE_INFO partition_bmi[16];
-
 } MB_MODE_INFO;
 
 
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 32925fe9c..7e004238a 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -171,6 +171,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
             VP8_COMMON *const pc = &pbi->common;
             MACROBLOCKD *xd = &pbi->mb;
 
+            mbmi->need_to_clamp_mvs = 0;
             vp8dx_bool_decoder_fill(bc);
 
             // Distance of Mb to the various image edges.
@@ -269,6 +270,17 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
                             break;
                         }
 
+                        if (mv->col < xd->mb_to_left_edge
+                                      - LEFT_TOP_MARGIN
+                            || mv->col > xd->mb_to_right_edge
+                                         + RIGHT_BOTTOM_MARGIN
+                            || mv->row < xd->mb_to_top_edge
+                                         - LEFT_TOP_MARGIN
+                            || mv->row > xd->mb_to_bottom_edge
+                                         + RIGHT_BOTTOM_MARGIN
+                            )
+                            mbmi->need_to_clamp_mvs = 1;
+
                         /* Fill (uniform) modes, mvs of jth subset.
                            Must do it here because ensuing subsets can
                            refer back to us via "left" or "above". */
@@ -325,27 +337,18 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
                     read_mv(bc, mv, (const MV_CONTEXT *) mvc);
                     mv->row += best_mv.row;
                     mv->col += best_mv.col;
-                    /* Encoder should not produce invalid motion vectors, but since
-                     * arbitrary length MVs can be parsed from the bitstream, we
-                     * need to clamp them here in case we're reading bad data to
-                     * avoid a crash.
+
+                    /* Don't need to check this on NEARMV and NEARESTMV modes
+                     * since those modes clamp the MV. The NEWMV mode does not,
+                     * so signal to the prediction stage whether special
+                     * handling may be required.
                      */
-#if CONFIG_DEBUG
-                    assert(mv->col >= (xd->mb_to_left_edge - LEFT_TOP_MARGIN));
-                    assert(mv->col <= (xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN));
-                    assert(mv->row >= (xd->mb_to_top_edge - LEFT_TOP_MARGIN));
-                    assert(mv->row <= (xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN));
-#endif
-
-                    if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
-                        mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
-                    else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
-                    if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
-                        mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
-                    else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
+                    if (mv->col < xd->mb_to_left_edge - LEFT_TOP_MARGIN
+                        || mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN
+                        || mv->row < xd->mb_to_top_edge - LEFT_TOP_MARGIN
+                        || mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN
+                        )
+                        mbmi->need_to_clamp_mvs = 1;
 
                 propagate_mv:  /* same MV throughout */
                     {
@@ -381,7 +384,6 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
                     assert(0);
 #endif
                 }
-
             }
             else
             {
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 0abe4962c..de4a64588 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
@@ -126,6 +126,47 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
     }
 }
 
+
+static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
+{
+    /* If the MV points so far into the UMV border that no visible pixels
+     * are used for reconstruction, the subpel part of the MV can be
+     * discarded and the MV limited to 16 pixels with equivalent results.
+     *
+     * This limit kicks in at 19 pixels for the top and left edges, for
+     * the 16 pixels plus 3 taps right of the central pixel when subpel
+     * filtering. The bottom and right edges use 16 pixels plus 2 pixels
+     * left of the central pixel when filtering.
+     */
+    if (mv->col < (xd->mb_to_left_edge - (19 << 3)))
+        mv->col = xd->mb_to_left_edge - (16 << 3);
+    else if (mv->col > xd->mb_to_right_edge + (18 << 3))
+        mv->col = xd->mb_to_right_edge + (16 << 3);
+
+    if (mv->row < (xd->mb_to_top_edge - (19 << 3)))
+        mv->row = xd->mb_to_top_edge - (16 << 3);
+    else if (mv->row > xd->mb_to_bottom_edge + (18 << 3))
+        mv->row = xd->mb_to_bottom_edge + (16 << 3);
+}
+
+
+static void clamp_mvs(MACROBLOCKD *xd)
+{
+    if (xd->mbmi.mode == SPLITMV)
+    {
+        int i;
+
+        for (i=0; i<16; i++)
+            clamp_mv_to_umv_border(&xd->block[i].bmi.mv.as_mv, xd);
+    }
+    else
+    {
+        clamp_mv_to_umv_border(&xd->mbmi.mv.as_mv, xd);
+        clamp_mv_to_umv_border(&xd->block[16].bmi.mv.as_mv, xd);
+    }
+
+}
+
 static void reconstruct_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
 {
     if (xd->frame_type == KEY_FRAME  ||  xd->mbmi.ref_frame == INTRA_FRAME)
@@ -233,6 +274,8 @@ static void de_quantand_idct(VP8D_COMP *pbi, MACROBLOCKD *xd)
 void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
 {
     int eobtotal = 0;
+    MV  orig_mvs[24];
+    int i, do_clamp = xd->mbmi.need_to_clamp_mvs;
 
     if (xd->mbmi.mb_skip_coeff)
     {
@@ -243,20 +286,50 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
         eobtotal = vp8_decode_mb_tokens(pbi, xd);
     }
 
-    xd->mode_info_context->mbmi.dc_diff = 1;
-
-    if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV && eobtotal == 0)
+    /* Perform temporary clamping of the MV to be used for prediction */
+    if (do_clamp)
     {
-        xd->mode_info_context->mbmi.dc_diff = 0;
-        skip_recon_mb(pbi, xd);
-        return;
+        if (xd->mbmi.mode == SPLITMV)
+            for (i=0; i<24; i++)
+                orig_mvs[i] = xd->block[i].bmi.mv.as_mv;
+        else
+        {
+            orig_mvs[0] = xd->mbmi.mv.as_mv;
+            orig_mvs[1] = xd->block[16].bmi.mv.as_mv;
+        }
+        clamp_mvs(xd);
     }
 
-    if (xd->segmentation_enabled)
-        mb_init_dequantizer(pbi, xd);
+    xd->mode_info_context->mbmi.dc_diff = 1;
 
-    de_quantand_idct(pbi, xd);
-    reconstruct_mb(pbi, xd);
+    do {
+        if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV && eobtotal == 0)
+        {
+            xd->mode_info_context->mbmi.dc_diff = 0;
+            skip_recon_mb(pbi, xd);
+            break;
+        }
+
+        if (xd->segmentation_enabled)
+            mb_init_dequantizer(pbi, xd);
+
+        de_quantand_idct(pbi, xd);
+        reconstruct_mb(pbi, xd);
+    } while(0);
+
+
+    /* Restore the original MV so as not to affect the entropy context. */
+    if (do_clamp)
+    {
+        if (xd->mbmi.mode == SPLITMV)
+            for (i=0; i<24; i++)
+                xd->block[i].bmi.mv.as_mv = orig_mvs[i];
+        else
+        {
+            xd->mbmi.mv.as_mv = orig_mvs[0];
+            xd->block[16].bmi.mv.as_mv = orig_mvs[1];
+        }
+    }
 }
 
 static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
@@ -314,7 +387,9 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
     {
         // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
-        vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
+        // the partition_bmi array is unused in the decoder, so don't copy it.
+        vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi,
+                   sizeof(MB_MODE_INFO) - sizeof(xd->mbmi.partition_bmi));
 
         if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
         {
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 470d1aeb1..87bba2020 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
@@ -154,7 +154,9 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                     }
 
                     // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
-                    vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi, 32); //sizeof(MB_MODE_INFO) );
+                    // the partition_bmi array is unused in the decoder, so don't copy it.
+                    vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi,
+                               sizeof(MB_MODE_INFO) - sizeof(xd->mbmi.partition_bmi));
 
                     if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
                     {

From a04ed23ff56f536c3da206563f64dea19e5a7e82 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Wed, 9 Jun 2010 15:03:48 +0100
Subject: [PATCH 016/307] Adjust to avoid long line

---
 vp8/encoder/firstpass.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 26f09d5e3..f0ddbf2d5 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1869,12 +1869,16 @@ void vp8_second_pass(VP8_COMP *cpi)
         }
     }
 
-    // Keep a globally available copy of this frames iiratio and the next.
-    cpi->this_iiratio = this_frame_intra_error / DOUBLE_DIVIDE_CHECK(this_frame_coded_error);
+    // Keep a globally available copy of this and the next frame's iiratio.
+    cpi->this_iiratio = this_frame_intra_error /
+                        DOUBLE_DIVIDE_CHECK(this_frame_coded_error);
     {
         FIRSTPASS_STATS next_frame;
         if ( lookup_next_frame_stats(cpi, &next_frame) != EOF )
-            cpi->next_iiratio = next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error);
+        {
+            cpi->next_iiratio = next_frame.intra_error /
+                                DOUBLE_DIVIDE_CHECK(next_frame.coded_error);
+        }
     }
 
     // Set nominal per second bandwidth for this frame

From ffd5b58f9144c40dbfd1cf9ea99d089a3f4e4f42 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20J=C3=A4genstedt?= <philipj@opera.com>
Date: Mon, 7 Jun 2010 06:12:14 +0200
Subject: [PATCH 017/307] Detect toolchain based on gcc -dumpmachine

Using uname fails e.g. on a 64-bit machine with a 32-bit toolchain.
The following gcc -dumpmachine strings have been verified:
 * 32-bit Linux gives i486-linux-gnu
 * 64-bit Linux gives x86_64-linux-gnu
 * Mac OS X 10.5 gives i686-apple-darwin9
 * MinGW gives mingw32

*darwin8* and *bsd* can safely be assumed to be correct, but *cygwin*
is a guess.

Change-Id: I6bef2ab5e97cbd3410aa66b0c4f84d2231884b05
---
 build/make/configure.sh | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index a70a84eec..28ed21cf5 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -497,10 +497,10 @@ setup_gnu_toolchain() {
 
 process_common_toolchain() {
     if [ -z "$toolchain" ]; then
-        uname="$(uname -a)"
+	gcctarget="$(gcc -dumpmachine 2> /dev/null)"
 
         # detect tgt_isa
-        case "$uname" in
+        case "$gcctarget" in
             *x86_64*)
                 tgt_isa=x86_64
                 ;;
@@ -510,19 +510,19 @@ process_common_toolchain() {
         esac
 
         # detect tgt_os
-        case "$uname" in
-            *Darwin\ Kernel\ Version\ 8*)
+        case "$gcctarget" in
+            *darwin8*)
                 tgt_isa=universal
                 tgt_os=darwin8
                 ;;
-            *Darwin\ Kernel\ Version\ 9*)
+            *darwin9*)
                 tgt_isa=universal
                 tgt_os=darwin9
                 ;;
-            *Msys*|*Cygwin*)
+            *msys*|*cygwin*)
                 tgt_os=win32
                 ;;
-            *Linux*|*BSD*)
+            *linux*|*bsd*)
                 tgt_os=linux
                 ;;
         esac

From 8873a938116d1eeee1cfa5b8b97da5a9c653125d Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 10 Jun 2010 11:48:48 -0400
Subject: [PATCH 018/307] Improve vp8_sixtap_predict functions

Restructure vp8_sixtap_predict functions to eliminate extra 5-line
calculation while doing first-pass only. Also, combline functions
to eliminate usage of intermediate buffer. This gives decoder a 3%
performance gain on my test clips.

Change-Id: I13de49638884d1a57d0855c63aea719316d08c1b
---
 vp8/common/x86/subpixel_sse2.asm | 637 +++++++++++++++++++++++--------
 vp8/common/x86/vp8_asm_stubs.c   | 134 ++++---
 2 files changed, 555 insertions(+), 216 deletions(-)

diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index c8821614d..cc2837b8d 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -402,6 +402,485 @@ vp8_filter_block1d8_v6_sse2_loop:
     ret
 
 
+;void vp8_filter_block1d16_v6_sse2
+;(
+;    unsigned short *src_ptr,
+;    unsigned char *output_ptr,
+;    int dst_ptich,
+;    unsigned int pixels_per_line,
+;    unsigned int pixel_step,
+;    unsigned int output_height,
+;    unsigned int output_width,
+;    const short    *vp8_filter
+;)
+;/************************************************************************************
+; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The
+; input pixel array has output_height rows.
+;*************************************************************************************/
+global sym(vp8_filter_block1d16_v6_sse2)
+sym(vp8_filter_block1d16_v6_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 8
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov         rax,        arg(7) ;vp8_filter
+        movsxd      rdx,        dword ptr arg(3) ;pixels_per_line
+
+        mov         rdi,        arg(1) ;output_ptr
+        mov         rsi,        arg(0) ;src_ptr
+
+        sub         rsi,        rdx
+        sub         rsi,        rdx
+
+        movsxd      rcx,        DWORD PTR arg(5) ;[output_height]
+%if ABI_IS_32BIT=0
+        movsxd      r8,         dword ptr arg(2) ; dst_ptich
+%endif
+
+vp8_filter_block1d16_v6_sse2_loop:
+; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order.
+        movdqa      xmm1,       XMMWORD PTR [rsi + rdx]       ; line 2
+        movdqa      xmm2,       XMMWORD PTR [rsi + rdx + 16]
+        pmullw      xmm1,       [rax + 16]
+        pmullw      xmm2,       [rax + 16]
+
+        movdqa      xmm3,       XMMWORD PTR [rsi + rdx * 4]       ; line 5
+        movdqa      xmm4,       XMMWORD PTR [rsi + rdx * 4 + 16]
+        pmullw      xmm3,       [rax + 64]
+        pmullw      xmm4,       [rax + 64]
+
+        movdqa      xmm5,       XMMWORD PTR [rsi + rdx * 2]       ; line 3
+        movdqa      xmm6,       XMMWORD PTR [rsi + rdx * 2 + 16]
+        pmullw      xmm5,       [rax + 32]
+        pmullw      xmm6,       [rax + 32]
+
+        movdqa      xmm7,       XMMWORD PTR [rsi]       ; line 1
+        movdqa      xmm0,       XMMWORD PTR [rsi + 16]
+        pmullw      xmm7,       [rax]
+        pmullw      xmm0,       [rax]
+
+        paddsw      xmm1,       xmm3
+        paddsw      xmm2,       xmm4
+        paddsw      xmm1,       xmm5
+        paddsw      xmm2,       xmm6
+        paddsw      xmm1,       xmm7
+        paddsw      xmm2,       xmm0
+
+        add         rsi,        rdx
+
+        movdqa      xmm3,       XMMWORD PTR [rsi + rdx * 2]       ; line 4
+        movdqa      xmm4,       XMMWORD PTR [rsi + rdx * 2 + 16]
+        pmullw      xmm3,       [rax + 48]
+        pmullw      xmm4,       [rax + 48]
+
+        movdqa      xmm5,       XMMWORD PTR [rsi + rdx * 4]       ; line 6
+        movdqa      xmm6,       XMMWORD PTR [rsi + rdx * 4 + 16]
+        pmullw      xmm5,       [rax + 80]
+        pmullw      xmm6,       [rax + 80]
+
+        movdqa      xmm7,       XMMWORD PTR [rd GLOBAL]
+        pxor        xmm0,       xmm0                        ; clear xmm0
+
+        paddsw      xmm1,       xmm3
+        paddsw      xmm2,       xmm4
+        paddsw      xmm1,       xmm5
+        paddsw      xmm2,       xmm6
+
+        paddsw      xmm1,       xmm7
+        paddsw      xmm2,       xmm7
+
+        psraw       xmm1,       7
+        psraw       xmm2,       7
+
+        packuswb    xmm1,       xmm2              ; pack and saturate
+        movdqa      XMMWORD PTR [rdi], xmm1       ; store the results in the destination
+%if ABI_IS_32BIT
+        add         rdi,        DWORD PTR arg(2) ;[dst_ptich]
+%else
+        add         rdi,        r8
+%endif
+        dec         rcx         ; decrement count
+        jnz         vp8_filter_block1d16_v6_sse2_loop               ; next row
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_filter_block1d8_h6_only_sse2
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char  *output_ptr,
+;    int dst_ptich,
+;    unsigned int    output_height,
+;    const short    *vp8_filter
+;)
+; First-pass filter only when yoffset==0
+global sym(vp8_filter_block1d8_h6_only_sse2)
+sym(vp8_filter_block1d8_h6_only_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov         rdx,        arg(5) ;vp8_filter
+        mov         rsi,        arg(0) ;src_ptr
+
+        mov         rdi,        arg(2) ;output_ptr
+
+        movsxd      rcx,        dword ptr arg(4) ;output_height
+        movsxd      rax,        dword ptr arg(1) ;src_pixels_per_line            ; Pitch for Source
+%if ABI_IS_32BIT=0
+        movsxd      r8,         dword ptr arg(3) ;dst_ptich
+%endif
+        pxor        xmm0,       xmm0                        ; clear xmm0 for unpack
+
+filter_block1d8_h6_only_rowloop:
+        movq        xmm3,       MMWORD PTR [rsi - 2]
+        movq        xmm1,       MMWORD PTR [rsi + 6]
+
+        prefetcht2  [rsi+rax-2]
+
+        pslldq      xmm1,       8
+        por         xmm1,       xmm3
+
+        movdqa      xmm4,       xmm1
+        movdqa      xmm5,       xmm1
+
+        movdqa      xmm6,       xmm1
+        movdqa      xmm7,       xmm1
+
+        punpcklbw   xmm3,       xmm0                        ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+        psrldq      xmm4,       1                           ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+        pmullw      xmm3,       XMMWORD PTR [rdx]           ; x[-2] * H[-2]; Tap 1
+        punpcklbw   xmm4,       xmm0                        ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+        psrldq      xmm5,       2                           ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+        pmullw      xmm4,       XMMWORD PTR [rdx+16]        ; x[-1] * H[-1]; Tap 2
+
+
+        punpcklbw   xmm5,       xmm0                        ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+        psrldq      xmm6,       3                           ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+        pmullw      xmm5,       [rdx+32]                    ; x[ 0] * H[ 0]; Tap 3
+
+        punpcklbw   xmm6,       xmm0                        ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+        psrldq      xmm7,       4                           ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+        pmullw      xmm6,       [rdx+48]                    ; x[ 1] * h[ 1] ; Tap 4
+
+        punpcklbw   xmm7,       xmm0                        ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+        psrldq      xmm1,       5                           ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+
+        pmullw      xmm7,       [rdx+64]                    ; x[ 2] * h[ 2] ; Tap 5
+
+        punpcklbw   xmm1,       xmm0                        ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+        pmullw      xmm1,       [rdx+80]                    ; x[ 3] * h[ 3] ; Tap 6
+
+
+        paddsw      xmm4,       xmm7
+        paddsw      xmm4,       xmm5
+
+        paddsw      xmm4,       xmm3
+        paddsw      xmm4,       xmm6
+
+        paddsw      xmm4,       xmm1
+        paddsw      xmm4,       [rd GLOBAL]
+
+        psraw       xmm4,       7
+
+        packuswb    xmm4,       xmm0
+
+        movq        QWORD PTR [rdi],   xmm4       ; store the results in the destination
+        lea         rsi,        [rsi + rax]
+
+%if ABI_IS_32BIT
+        add         rdi,        DWORD Ptr arg(3) ;dst_ptich
+%else
+        add         rdi,        r8
+%endif
+        dec         rcx
+
+        jnz         filter_block1d8_h6_only_rowloop                ; next row
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_filter_block1d16_h6_only_sse2
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char  *output_ptr,
+;    int dst_ptich,
+;    unsigned int    output_height,
+;    const short    *vp8_filter
+;)
+; First-pass filter only when yoffset==0
+global sym(vp8_filter_block1d16_h6_only_sse2)
+sym(vp8_filter_block1d16_h6_only_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov         rdx,        arg(5) ;vp8_filter
+        mov         rsi,        arg(0) ;src_ptr
+
+        mov         rdi,        arg(2) ;output_ptr
+
+        movsxd      rcx,        dword ptr arg(4) ;output_height
+        movsxd      rax,        dword ptr arg(1) ;src_pixels_per_line            ; Pitch for Source
+%if ABI_IS_32BIT=0
+        movsxd      r8,         dword ptr arg(3) ;dst_ptich
+%endif
+
+        pxor        xmm0,       xmm0                        ; clear xmm0 for unpack
+
+filter_block1d16_h6_only_sse2_rowloop:
+        movq        xmm3,       MMWORD PTR [rsi - 2]
+        movq        xmm1,       MMWORD PTR [rsi + 6]
+
+        movq        xmm2,       MMWORD PTR [rsi +14]
+        pslldq      xmm2,       8
+
+        por         xmm2,       xmm1
+        prefetcht2  [rsi+rax-2]
+
+        pslldq      xmm1,       8
+        por         xmm1,       xmm3
+
+        movdqa      xmm4,       xmm1
+        movdqa      xmm5,       xmm1
+
+        movdqa      xmm6,       xmm1
+        movdqa      xmm7,       xmm1
+
+        punpcklbw   xmm3,       xmm0                        ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+        psrldq      xmm4,       1                           ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+        pmullw      xmm3,       XMMWORD PTR [rdx]           ; x[-2] * H[-2]; Tap 1
+        punpcklbw   xmm4,       xmm0                        ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+        psrldq      xmm5,       2                           ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+        pmullw      xmm4,       XMMWORD PTR [rdx+16]        ; x[-1] * H[-1]; Tap 2
+
+        punpcklbw   xmm5,       xmm0                        ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+        psrldq      xmm6,       3                           ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+        pmullw      xmm5,       [rdx+32]                    ; x[ 0] * H[ 0]; Tap 3
+
+        punpcklbw   xmm6,       xmm0                        ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+        psrldq      xmm7,       4                           ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+        pmullw      xmm6,       [rdx+48]                    ; x[ 1] * h[ 1] ; Tap 4
+
+        punpcklbw   xmm7,       xmm0                        ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+        psrldq      xmm1,       5                           ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+        pmullw      xmm7,       [rdx+64]                    ; x[ 2] * h[ 2] ; Tap 5
+
+        punpcklbw   xmm1,       xmm0                        ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+        pmullw      xmm1,       [rdx+80]                    ; x[ 3] * h[ 3] ; Tap 6
+
+        paddsw      xmm4,       xmm7
+        paddsw      xmm4,       xmm5
+
+        paddsw      xmm4,       xmm3
+        paddsw      xmm4,       xmm6
+
+        paddsw      xmm4,       xmm1
+        paddsw      xmm4,       [rd GLOBAL]
+
+        psraw       xmm4,       7
+
+        packuswb    xmm4,       xmm0                        ; lower 8 bytes
+
+        movq        QWORD Ptr [rdi],         xmm4           ; store the results in the destination
+
+        movdqa      xmm3,       xmm2
+        movdqa      xmm4,       xmm2
+
+        movdqa      xmm5,       xmm2
+        movdqa      xmm6,       xmm2
+
+        movdqa      xmm7,       xmm2
+
+        punpcklbw   xmm3,       xmm0                        ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2
+        psrldq      xmm4,       1                           ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1
+
+        pmullw      xmm3,       XMMWORD PTR [rdx]           ; x[-2] * H[-2]; Tap 1
+        punpcklbw   xmm4,       xmm0                        ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1
+
+        psrldq      xmm5,       2                           ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00
+        pmullw      xmm4,       XMMWORD PTR [rdx+16]        ; x[-1] * H[-1]; Tap 2
+
+        punpcklbw   xmm5,       xmm0                        ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00
+        psrldq      xmm6,       3                           ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01
+
+        pmullw      xmm5,       [rdx+32]                    ; x[ 0] * H[ 0]; Tap 3
+
+        punpcklbw   xmm6,       xmm0                        ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01
+        psrldq      xmm7,       4                           ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02
+
+        pmullw      xmm6,       [rdx+48]                    ; x[ 1] * h[ 1] ; Tap 4
+
+        punpcklbw   xmm7,       xmm0                        ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02
+        psrldq      xmm2,       5                           ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03
+
+        pmullw      xmm7,       [rdx+64]                    ; x[ 2] * h[ 2] ; Tap 5
+
+        punpcklbw   xmm2,       xmm0                        ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03
+        pmullw      xmm2,       [rdx+80]                    ; x[ 3] * h[ 3] ; Tap 6
+
+        paddsw      xmm4,       xmm7
+        paddsw      xmm4,       xmm5
+
+        paddsw      xmm4,       xmm3
+        paddsw      xmm4,       xmm6
+
+        paddsw      xmm4,       xmm2
+        paddsw      xmm4,       [rd GLOBAL]
+
+        psraw       xmm4,       7
+
+        packuswb    xmm4,       xmm0                        ; higher 8 bytes
+
+        movq        QWORD Ptr [rdi+8],      xmm4            ; store the results in the destination
+
+        lea         rsi,        [rsi + rax]
+%if ABI_IS_32BIT
+        add         rdi,        DWORD Ptr arg(3) ;dst_ptich
+%else
+        add         rdi,        r8
+%endif
+
+        dec         rcx
+        jnz         filter_block1d16_h6_only_sse2_rowloop                ; next row
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_filter_block1d8_v6_only_sse2
+;(
+;    unsigned char *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char *output_ptr,
+;    int dst_ptich,
+;    unsigned int output_height,
+;    const short    *vp8_filter
+;)
+; Second-pass filter only when xoffset==0
+global sym(vp8_filter_block1d8_v6_only_sse2)
+sym(vp8_filter_block1d8_v6_only_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov         rsi,        arg(0) ;src_ptr
+        mov         rdi,        arg(2) ;output_ptr
+
+        movsxd      rcx,        dword ptr arg(4) ;output_height
+        movsxd      rdx,        dword ptr arg(1) ;src_pixels_per_line
+
+        mov         rax,        arg(5) ;vp8_filter
+
+        pxor        xmm0,       xmm0                        ; clear xmm0
+
+        movdqa      xmm7,       XMMWORD PTR [rd GLOBAL]
+%if ABI_IS_32BIT=0
+        movsxd      r8,         dword ptr arg(3) ; dst_ptich
+%endif
+
+vp8_filter_block1d8_v6_only_sse2_loop:
+        movq        xmm1,       MMWORD PTR [rsi]
+        movq        xmm2,       MMWORD PTR [rsi + rdx]
+        movq        xmm3,       MMWORD PTR [rsi + rdx * 2]
+        movq        xmm5,       MMWORD PTR [rsi + rdx * 4]
+        add         rsi,        rdx
+        movq        xmm4,       MMWORD PTR [rsi + rdx * 2]
+        movq        xmm6,       MMWORD PTR [rsi + rdx * 4]
+
+        punpcklbw   xmm1,       xmm0
+        pmullw      xmm1,       [rax]
+
+        punpcklbw   xmm2,       xmm0
+        pmullw      xmm2,       [rax + 16]
+
+        punpcklbw   xmm3,       xmm0
+        pmullw      xmm3,       [rax + 32]
+
+        punpcklbw   xmm5,       xmm0
+        pmullw      xmm5,       [rax + 64]
+
+        punpcklbw   xmm4,       xmm0
+        pmullw      xmm4,       [rax + 48]
+
+        punpcklbw   xmm6,       xmm0
+        pmullw      xmm6,       [rax + 80]
+
+        paddsw      xmm2,       xmm5
+        paddsw      xmm2,       xmm3
+
+        paddsw      xmm2,       xmm1
+        paddsw      xmm2,       xmm4
+
+        paddsw      xmm2,       xmm6
+        paddsw      xmm2,       xmm7
+
+        psraw       xmm2,       7
+        packuswb    xmm2,       xmm0              ; pack and saturate
+
+        movq        QWORD PTR [rdi], xmm2         ; store the results in the destination
+%if ABI_IS_32BIT
+        add         rdi,        DWORD PTR arg(3) ;[dst_ptich]
+%else
+        add         rdi,        r8
+%endif
+        dec         rcx         ; decrement count
+        jnz         vp8_filter_block1d8_v6_only_sse2_loop               ; next row
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
 ;void vp8_unpack_block1d16_h6_sse2
 ;(
 ;    unsigned char  *src_ptr,
@@ -459,164 +938,6 @@ unpack_block1d16_h6_sse2_rowloop:
     ret
 
 
-;void vp8_unpack_block1d8_h6_sse2
-;(
-;    unsigned char  *src_ptr,
-;    unsigned short *output_ptr,
-;    unsigned int    src_pixels_per_line,
-;    unsigned int    output_height,
-;    unsigned int    output_width
-;)
-global sym(vp8_unpack_block1d8_h6_sse2)
-sym(vp8_unpack_block1d8_h6_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-        mov         rsi,        arg(0) ;src_ptr
-        mov         rdi,        arg(1) ;output_ptr
-
-        movsxd      rcx,        dword ptr arg(3) ;output_height
-        movsxd      rax,        dword ptr arg(2) ;src_pixels_per_line            ; Pitch for Source
-
-        pxor        xmm0,       xmm0                        ; clear xmm0 for unpack
-%if ABI_IS_32BIT=0
-        movsxd      r8,         dword ptr arg(4) ;output_width            ; Pitch for Source
-%endif
-
-unpack_block1d8_h6_sse2_rowloop:
-        movq        xmm1,       MMWORD PTR [rsi]            ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2
-        lea         rsi,        [rsi + rax]
-
-        punpcklbw   xmm1,       xmm0
-        movdqa      XMMWORD Ptr [rdi],         xmm1
-
-%if ABI_IS_32BIT
-        add         rdi,        DWORD Ptr arg(4) ;[output_width]
-%else
-        add         rdi,        r8
-%endif
-        dec         rcx
-        jnz         unpack_block1d8_h6_sse2_rowloop                ; next row
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-;void vp8_pack_block1d8_v6_sse2
-;(
-;    short *src_ptr,
-;    unsigned char *output_ptr,
-;    int dst_ptich,
-;    unsigned int pixels_per_line,
-;    unsigned int output_height,
-;    unsigned int output_width
-;)
-global sym(vp8_pack_block1d8_v6_sse2)
-sym(vp8_pack_block1d8_v6_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-        movsxd      rdx,        dword ptr arg(3) ;pixels_per_line
-        mov         rdi,        arg(1) ;output_ptr
-
-        mov         rsi,        arg(0) ;src_ptr
-        movsxd      rcx,        DWORD PTR arg(4) ;[output_height]
-%if ABI_IS_32BIT=0
-        movsxd      r8,         dword ptr arg(5) ;output_width            ; Pitch for Source
-%endif
-
-pack_block1d8_v6_sse2_loop:
-        movdqa      xmm0,       XMMWORD PTR [rsi]
-        packuswb    xmm0,       xmm0
-
-        movq        QWORD PTR [rdi], xmm0         ; store the results in the destination
-        lea         rsi,        [rsi+rdx]
-
-%if ABI_IS_32BIT
-        add         rdi,        DWORD Ptr arg(5) ;[output_width]
-%else
-        add         rdi,        r8
-%endif
-        dec         rcx         ; decrement count
-        jnz         pack_block1d8_v6_sse2_loop               ; next row
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-;void vp8_pack_block1d16_v6_sse2
-;(
-;    short *src_ptr,
-;    unsigned char *output_ptr,
-;    int dst_ptich,
-;    unsigned int pixels_per_line,
-;    unsigned int output_height,
-;    unsigned int output_width
-;)
-global sym(vp8_pack_block1d16_v6_sse2)
-sym(vp8_pack_block1d16_v6_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-        movsxd      rdx,        dword ptr arg(3) ;pixels_per_line
-        mov         rdi,        arg(1) ;output_ptr
-
-        mov         rsi,        arg(0) ;src_ptr
-        movsxd      rcx,        DWORD PTR arg(4) ;[output_height]
-%if ABI_IS_32BIT=0
-        movsxd      r8,         dword ptr arg(2) ;dst_pitch
-%endif
-
-pack_block1d16_v6_sse2_loop:
-        movdqa      xmm0,       XMMWORD PTR [rsi]
-        movdqa      xmm1,       XMMWORD PTR [rsi+16]
-
-        packuswb    xmm0,       xmm1
-        movdqa      XMMWORD PTR [rdi], xmm0         ; store the results in the destination
-
-        add         rsi,        rdx
-%if ABI_IS_32BIT
-        add         rdi,        DWORD Ptr arg(2) ;dst_pitch
-%else
-        add         rdi,        r8
-%endif
-        dec         rcx         ; decrement count
-        jnz         pack_block1d16_v6_sse2_loop               ; next row
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
 ;void vp8_bilinear_predict16x16_sse2
 ;(
 ;    unsigned char  *src_ptr,
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 80389429d..163ec5b29 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -68,6 +68,17 @@ extern void vp8_filter_block1d8_v6_sse2
     unsigned int output_width,
     const short    *vp8_filter
 );
+extern void vp8_filter_block1d16_v6_sse2
+(
+    unsigned short *src_ptr,
+    unsigned char *output_ptr,
+    int dst_ptich,
+    unsigned int pixels_per_line,
+    unsigned int pixel_step,
+    unsigned int output_height,
+    unsigned int output_width,
+    const short    *vp8_filter
+);
 extern void vp8_unpack_block1d16_h6_sse2
 (
     unsigned char  *src_ptr,
@@ -76,31 +87,32 @@ extern void vp8_unpack_block1d16_h6_sse2
     unsigned int    output_height,
     unsigned int    output_width
 );
-extern void vp8_unpack_block1d8_h6_sse2
+extern void vp8_filter_block1d8_h6_only_sse2
 (
     unsigned char  *src_ptr,
-    unsigned short *output_ptr,
     unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    int dst_ptich,
     unsigned int    output_height,
-    unsigned int    output_width
+    const short    *vp8_filter
 );
-extern void vp8_pack_block1d8_v6_sse2
+extern void vp8_filter_block1d16_h6_only_sse2
 (
-    unsigned short *src_ptr,
+    unsigned char  *src_ptr,
+    unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    int dst_ptich,
+    unsigned int    output_height,
+    const short    *vp8_filter
+);
+extern void vp8_filter_block1d8_v6_only_sse2
+(
+    unsigned char *src_ptr,
+    unsigned int   src_pixels_per_line,
     unsigned char *output_ptr,
     int dst_ptich,
-    unsigned int pixels_per_line,
-    unsigned int output_height,
-    unsigned int output_width
-);
-extern void vp8_pack_block1d16_v6_sse2
-(
-    unsigned short *src_ptr,
-    unsigned char *output_ptr,
-    int dst_ptich,
-    unsigned int pixels_per_line,
-    unsigned int output_height,
-    unsigned int output_width
+    unsigned int   output_height,
+    const short   *vp8_filter
 );
 extern prototype_subpixel_predict(vp8_bilinear_predict8x8_mmx);
 
@@ -247,23 +259,26 @@ void vp8_sixtap_predict16x16_sse2
 
     if (xoffset)
     {
-        HFilter = vp8_six_tap_mmx[xoffset];
-        vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 21, 32, HFilter);
+        if (yoffset)
+        {
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 21, 32, HFilter);
+            VFilter = vp8_six_tap_mmx[yoffset];
+            vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
+        }
+        else
+        {
+            // First-pass only
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
+        }
     }
     else
     {
-        vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 21, 32);
-    }
-
-    if (yoffset)
-    {
+        // Second-pass only
         VFilter = vp8_six_tap_mmx[yoffset];
-        vp8_filter_block1d8_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, 16, VFilter);
-        vp8_filter_block1d8_v6_sse2(FData2 + 40, dst_ptr + 8, dst_pitch, 32, 16 , 16, 16, VFilter);
-    }
-    else
-    {
-        vp8_pack_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32,  16, 16);
+        vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 21, 32);
+        vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
     }
 }
 
@@ -283,25 +298,26 @@ void vp8_sixtap_predict8x8_sse2
 
     if (xoffset)
     {
-        HFilter = vp8_six_tap_mmx[xoffset];
-        vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 13, 16, HFilter);
+        if (yoffset)
+        {
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 13, 16, HFilter);
+            VFilter = vp8_six_tap_mmx[yoffset];
+            vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
+        }
+        else
+        {
+            // First-pass only
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
+        }
     }
     else
     {
-        vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 13, 16);
-    }
-
-    if (yoffset)
-    {
+        // Second-pass only
         VFilter = vp8_six_tap_mmx[yoffset];
-        vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 8, dst_pitch, VFilter);
+        vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
     }
-    else
-    {
-        vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16,  8, dst_pitch);
-    }
-
-
 }
 
 
@@ -320,24 +336,26 @@ void vp8_sixtap_predict8x4_sse2
 
     if (xoffset)
     {
-        HFilter = vp8_six_tap_mmx[xoffset];
-        vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 9, 16, HFilter);
+        if (yoffset)
+        {
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 1, 9, 16, HFilter);
+            VFilter = vp8_six_tap_mmx[yoffset];
+            vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
+        }
+        else
+        {
+            // First-pass only
+            HFilter = vp8_six_tap_mmx[xoffset];
+            vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
+        }
     }
     else
     {
-        vp8_unpack_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 9, 16);
-    }
-
-    if (yoffset)
-    {
+        // Second-pass only
         VFilter = vp8_six_tap_mmx[yoffset];
-        vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16, 8 , 4, dst_pitch, VFilter);
+        vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
     }
-    else
-    {
-        vp8_pack_block1d8_v6_sse2(FData2 + 16, dst_ptr,   dst_pitch, 16,  4, dst_pitch);
-    }
-
-
 }
+
 #endif

From 330dd67b7b0619da57492ca24207425026b387f4 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 10 Jun 2010 12:07:34 -0400
Subject: [PATCH 019/307] Fix MinGW toolchain detection

Updated the comment in change I6bef2ab5, but missed adding the code to
the commit.

Change-Id: I14d300489b79730e3995175bfe5f9271b569abe3
---
 build/make/configure.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index 28ed21cf5..a7b7d8084 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -519,7 +519,7 @@ process_common_toolchain() {
                 tgt_isa=universal
                 tgt_os=darwin9
                 ;;
-            *msys*|*cygwin*)
+            *mingw32*|*cygwin*)
                 tgt_os=win32
                 ;;
             *linux*|*bsd*)

From 317a66693b690eadd886d55286a24f428d7c50e5 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 10 Jun 2010 12:08:01 -0400
Subject: [PATCH 020/307] Remove reference to 'vpx Technologies'

Vestigial.

Change-Id: Iffa9e6d5ba5199b136d7549890101da17c11e3c3
---
 vp8/vp8_cx_iface.c | 4 ++--
 vp8/vp8_dx_iface.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index d04e4767c..9c05a642a 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -1068,7 +1068,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
 #endif
 vpx_codec_iface_t vpx_codec_vp8_cx_algo =
 {
-    "vpx Technologies VP8 Encoder" VERSION_STRING,
+    "WebM Project VP8 Encoder" VERSION_STRING,
     VPX_CODEC_INTERNAL_ABI_VERSION,
     VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR,
     /* vpx_codec_caps_t          caps; */
@@ -1157,7 +1157,7 @@ static vpx_codec_err_t api1_encode(vpx_codec_alg_priv_t  *ctx,
 
 vpx_codec_iface_t vpx_enc_vp8_algo =
 {
-    "vpx Technologies VP8 Encoder (Deprecated API)" VERSION_STRING,
+    "WebM Project VP8 Encoder (Deprecated API)" VERSION_STRING,
     VPX_CODEC_INTERNAL_ABI_VERSION,
     VPX_CODEC_CAP_ENCODER,
     /* vpx_codec_caps_t          caps; */
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 6a27ee0f0..1f61207ae 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -657,7 +657,7 @@ vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] =
 #endif
 vpx_codec_iface_t vpx_codec_vp8_dx_algo =
 {
-    "vpx Technologies VP8 Decoder" VERSION_STRING,
+    "WebM Project VP8 Decoder" VERSION_STRING,
     VPX_CODEC_INTERNAL_ABI_VERSION,
     VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC,
     /* vpx_codec_caps_t          caps; */
@@ -680,7 +680,7 @@ vpx_codec_iface_t vpx_codec_vp8_dx_algo =
  */
 vpx_codec_iface_t vpx_codec_vp8_algo =
 {
-    "vpx Technologies VP8 Decoder (Deprecated API)" VERSION_STRING,
+    "WebM Project VP8 Decoder (Deprecated API)" VERSION_STRING,
     VPX_CODEC_INTERNAL_ABI_VERSION,
     VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC,
     /* vpx_codec_caps_t          caps; */

From 05c6eca4db7f2abec32c9ce0fc325d9a0b933fb7 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Thu, 10 Jun 2010 18:42:24 -0400
Subject: [PATCH 021/307] Fix new MV clamping scheme for chroma MVs.

The new scheme introduced in I68d35a2f did not clamp chroma MVs in the SPLITMV
 case, and clamped them incorrectly (to the luma plane bounds) in every other
 case.
Because chroma MVs are computed from the luma MVs before clamping occurs, they
 could still point outside of the frame buffer and cause crashes.
This clamping happens outside of the MV prediction loop, and so should not
 affect bitstream decoding.
---
 vp8/decoder/decodframe.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index de4a64588..04fb03fa4 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -149,6 +149,19 @@ static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
         mv->row = xd->mb_to_bottom_edge + (16 << 3);
 }
 
+/* A version of the above function for chroma block MVs.*/
+static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
+{
+    if (2*mv->col < (xd->mb_to_left_edge - (19 << 3)))
+        mv->col = (xd->mb_to_left_edge - (16 << 3)) >> 1;
+    else if (2*mv->col > xd->mb_to_right_edge + (18 << 3))
+        mv->col = (xd->mb_to_right_edge + (16 << 3)) >> 1;
+
+    if (2*mv->row < (xd->mb_to_top_edge - (19 << 3)))
+        mv->row = (xd->mb_to_top_edge - (16 << 3)) >> 1;
+    else if (2*mv->row > xd->mb_to_bottom_edge + (18 << 3))
+        mv->row = (xd->mb_to_bottom_edge + (16 << 3)) >> 1;
+}
 
 static void clamp_mvs(MACROBLOCKD *xd)
 {
@@ -158,11 +171,13 @@ static void clamp_mvs(MACROBLOCKD *xd)
 
         for (i=0; i<16; i++)
             clamp_mv_to_umv_border(&xd->block[i].bmi.mv.as_mv, xd);
+        for (i=16; i<24; i++)
+            clamp_uvmv_to_umv_border(&xd->block[i].bmi.mv.as_mv, xd);
     }
     else
     {
         clamp_mv_to_umv_border(&xd->mbmi.mv.as_mv, xd);
-        clamp_mv_to_umv_border(&xd->block[16].bmi.mv.as_mv, xd);
+        clamp_uvmv_to_umv_border(&xd->block[16].bmi.mv.as_mv, xd);
     }
 
 }

From fb220d257b3179b6c7e9c04c82003a1b6c1d7c29 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 10 Jun 2010 08:56:31 -0400
Subject: [PATCH 022/307] replace while(0) construct with if/else

No good reason to be tricky here. I don't know why 'break' occurred to me
as the natrual replacement for the 'return', but an if/else block is
definitely clearer.

Change-Id: I08a336307afeb0dc7efa494b37398f239f66c2cf
---
 vp8/decoder/decodframe.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 04fb03fa4..72c312fc1 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -317,20 +317,19 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
 
     xd->mode_info_context->mbmi.dc_diff = 1;
 
-    do {
-        if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV && eobtotal == 0)
-        {
-            xd->mode_info_context->mbmi.dc_diff = 0;
-            skip_recon_mb(pbi, xd);
-            break;
-        }
-
+    if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV && eobtotal == 0)
+    {
+        xd->mode_info_context->mbmi.dc_diff = 0;
+        skip_recon_mb(pbi, xd);
+    }
+    else
+    {
         if (xd->segmentation_enabled)
             mb_init_dequantizer(pbi, xd);
 
         de_quantand_idct(pbi, xd);
         reconstruct_mb(pbi, xd);
-    } while(0);
+    }
 
 
     /* Restore the original MV so as not to affect the entropy context. */

From 3419e4d78555b8297e7f0767e1376e1cdb88e65c Mon Sep 17 00:00:00 2001
From: Frank Galligan <fgalligan@google.com>
Date: Thu, 10 Jun 2010 16:59:21 -0400
Subject: [PATCH 023/307] Change preprocessor check to _WIN32

Change-Id: I841dc0b8ebb150ac998f4076c148d7bb187e4301
---
 ivfenc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ivfenc.c b/ivfenc.c
index 57c04dd02..c20df5b96 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -12,7 +12,7 @@
 /* This is a simple program that encodes YV12 files and generates ivf
  * files using the new interface.
  */
-#if defined(_MSC_VER)
+#if defined(_WIN32)
 #define USE_POSIX_MMAP 0
 #else
 #define USE_POSIX_MMAP 1

From f6a58d620da72e6753b0ec119e141a2991d4c8f3 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Fri, 11 Jun 2010 15:10:51 +0100
Subject: [PATCH 024/307] Tuning of baseline Rd equation to improve behavior at
 the

 low and high Q ends.
---
 vp8/encoder/rdopt.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index a6bd8e86d..4aedbd925 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -237,12 +237,12 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
     vp8_clear_system_state();  //__asm emms;
 
     cpi->RDMULT = (int)( (0.0001 * (capped_q * capped_q * capped_q * capped_q))
-                        -(0.0125 * (capped_q * capped_q * capped_q))
+                        -(0.015 * (capped_q * capped_q * capped_q))
                         +(3.25 * (capped_q * capped_q))
-                        -(12.5 * capped_q) + 50.0);
+                        -(17.5 * capped_q) + 125.0);
 
-    if (cpi->RDMULT < 50)
-        cpi->RDMULT = 50;
+    if (cpi->RDMULT < 125)
+        cpi->RDMULT = 125;
 
     if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
     {

From 7a81b29d38a14bcf6656d3d15cc47490523f946e Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Fri, 11 Jun 2010 15:17:57 +0100
Subject: [PATCH 025/307] Use local pointer to pbi->common.

---
 vp8/decoder/decodframe.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 72c312fc1..0f1b879ca 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -450,7 +450,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
         vp8_build_uvmvs(xd, pc->full_pixel);
 
         /*
-        if(pbi->common.current_video_frame==0 &&mb_col==1 && mb_row==0)
+        if(pc->current_video_frame==0 &&mb_col==1 && mb_row==0)
         pbi->debugoutput =1;
         else
         pbi->debugoutput =0;
@@ -699,7 +699,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
                                    "Invalid frame height");
             }
 
-            if (vp8_alloc_frame_buffers(&pbi->common, pc->Width, pc->Height))
+            if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height))
                 vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
                                    "Failed to allocate frame buffers");
         }
@@ -936,17 +936,17 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO));
 
 
-    if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
+    if (pbi->b_multithreaded_lf && pc->filter_level != 0)
         vp8_start_lfthread(pbi);
 
-    if (pbi->b_multithreaded_rd && pbi->common.multi_token_partition != ONE_PARTITION)
+    if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION)
     {
         vp8_mtdecode_mb_rows(pbi, xd);
     }
     else
     {
         int ibc = 0;
-        int num_part = 1 << pbi->common.multi_token_partition;
+        int num_part = 1 << pc->multi_token_partition;
 
         // Decode the individual macro block
         for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
@@ -975,8 +975,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     // vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes  \n",bc->pos+pbi->bc2.pos);
 
     // If this was a kf or Gf note the Q used
-    if ((pc->frame_type == KEY_FRAME) || (pc->refresh_golden_frame) || pbi->common.refresh_alt_ref_frame)
+    if ((pc->frame_type == KEY_FRAME) ||
+         pc->refresh_golden_frame || pc->refresh_alt_ref_frame)
+    {
         pc->last_kf_gf_q = pc->base_qindex;
+    }
 
     if (pc->refresh_entropy_probs == 0)
     {

From 20f7332b34b5ad0810e943c7d8fe561dad9c97a2 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Fri, 11 Jun 2010 16:12:45 +0100
Subject: [PATCH 026/307] Incorrect comment.

(Thanks to Ronald S. Bultje)
---
 vp8/vp8_dx_iface.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 1f61207ae..a99364d5e 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -258,12 +258,12 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t         *data,
 
     vpx_codec_err_t res = VPX_CODEC_OK;
     {
-        /*Parse from VP8 compressed data, the implies knowledge of the
-         *VP8 bitsteam.
-         * First 3 byte header including version, frame type and an offset
-         * Next 3 bytes are image sizewith 12 bit each for width and height
+        /* Parse uncompresssed part of key frame header.
+         * 3 bytes:- including version, frame type and an offset
+         * 3 bytes:- sync code (0x9d, 0x01, 0x2a)
+         * 4 bytes:- including image width and height in the lowest 14 bits
+         *           of each 2-byte value.
          */
-
         si->is_kf = 0;
 
         if (data_sz >= 10 && !(data[0] & 0x01))  /* I-Frame */

From 63ea8705eb0b4609b1c87968817d18421f051641 Mon Sep 17 00:00:00 2001
From: Makoto Kato <makoto.kt@gmail.com>
Date: Fri, 11 Jun 2010 18:32:28 +0900
Subject: [PATCH 027/307] some XMM registers are non-volatile on windows x64
 ABI

XMM6 to XMM15 are non-volatile on Windows x64 ABI.  We have to save
these registers.

Change-Id: I4676309f1350af25c8a35f0c81b1f0499ab99076
---
 vp8/common/x86/iwalsh_sse2.asm     |  2 ++
 vp8/common/x86/loopfilter_sse2.asm | 12 ++++++++++++
 vp8/common/x86/postproc_sse2.asm   |  6 ++++++
 vp8/common/x86/recon_sse2.asm      |  2 ++
 vp8/common/x86/subpixel_sse2.asm   | 12 ++++++++++++
 vpx_ports/x86_abi_support.asm      | 19 +++++++++++++++++++
 6 files changed, 53 insertions(+)

diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
index cb61691fd..bb0d1d7ae 100644
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -17,6 +17,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 2
+    SAVE_XMM
     push        rsi
     push        rdi
     ; end prolog
@@ -101,6 +102,7 @@ sym(vp8_short_inv_walsh4x4_sse2):
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 1c0a3881c..d160dd65a 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -26,6 +26,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -212,6 +213,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -231,6 +233,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -652,6 +655,7 @@ sym(vp8_loop_filter_vertical_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -671,6 +675,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1002,6 +1007,7 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1021,6 +1027,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1564,6 +1571,7 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1583,6 +1591,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1679,6 +1688,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1698,6 +1708,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
     push        rbp         ; save old base pointer value.
     mov         rbp, rsp    ; set new base pointer value.
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx         ; save callee-saved reg
     push        rsi
     push        rdi
@@ -1942,6 +1953,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index 5097b2a30..9e56429e3 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -26,6 +26,7 @@ sym(vp8_post_proc_down_and_across_xmm):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -240,6 +241,7 @@ acrossnextcol:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -254,6 +256,7 @@ sym(vp8_mbpost_proc_down_xmm):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -439,6 +442,7 @@ loop_row:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -452,6 +456,7 @@ sym(vp8_mbpost_proc_across_ip_xmm):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -612,6 +617,7 @@ nextcol4:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index 2ce028cdb..cfdbfada9 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -67,6 +67,7 @@ sym(vp8_recon4b_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 4
+    SAVE_XMM
     push        rsi
     push        rdi
     ; end prolog
@@ -119,6 +120,7 @@ sym(vp8_recon4b_sse2):
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index cc2837b8d..b71a2f9d1 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -37,6 +37,7 @@ sym(vp8_filter_block1d8_h6_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -129,6 +130,7 @@ filter_block1d8_h6_rowloop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -155,6 +157,7 @@ sym(vp8_filter_block1d16_h6_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -304,6 +307,7 @@ filter_block1d16_h6_sse2_rowloop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -329,6 +333,7 @@ sym(vp8_filter_block1d8_v6_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 8
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -397,6 +402,7 @@ vp8_filter_block1d8_v6_sse2_loop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -510,6 +516,7 @@ vp8_filter_block1d16_v6_sse2_loop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -641,6 +648,7 @@ sym(vp8_filter_block1d16_h6_only_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -876,6 +884,7 @@ vp8_filter_block1d8_v6_only_sse2_loop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -894,6 +903,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -933,6 +943,7 @@ unpack_block1d16_h6_sse2_rowloop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -953,6 +964,7 @@ sym(vp8_bilinear_predict16x16_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index 6fdbf8add..7840e3594 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -215,6 +215,25 @@
   %define UNSHADOW_ARGS mov rsp, rbp
 %endif
 
+; must keep XMM6:XMM15 (libvpx uses XMM6 and XMM7) on Win64 ABI
+; rsp register has to be aligned
+%ifidn __OUTPUT_FORMAT__,x64
+%macro SAVE_XMM 0
+  sub rsp, 32
+  movdqa XMMWORD PTR [rsp], xmm6
+  movdqa XMMWORD PTR [rsp+16], xmm7
+%endmacro
+%macro RESTORE_XMM 0
+  movdqa xmm6, XMMWORD PTR [rsp]
+  movdqa xmm7, XMMWORD PTR [rsp+16]
+  add rsp, 32
+%endmacro
+%else
+%macro SAVE_XMM 0
+%endmacro
+%macro RESTORE_XMM 0
+%endmacro
+%endif
 
 ; Name of the rodata section
 ;

From b7c5d80212de672545278564b40e55be0ce1b1b4 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 11 Jun 2010 12:16:36 -0400
Subject: [PATCH 028/307] platform autodetect: accept amd64 as a synonym for
 x86_64

Thanks to James Cloos <cloos at jhcloos dot com> for the tip.

Change-Id: If377cc084dd7c16a4f51191a2aa0d83e7117ebec
---
 build/make/configure.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index a7b7d8084..05e550fb0 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -501,7 +501,7 @@ process_common_toolchain() {
 
         # detect tgt_isa
         case "$gcctarget" in
-            *x86_64*)
+            *x86_64*|*amd64*)
                 tgt_isa=x86_64
                 ;;
             *i[3456]86*)

From 9099fc0d69a69525656b4bfeeb1e7aabec04897b Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 11 Jun 2010 13:05:08 -0400
Subject: [PATCH 029/307] require --enable-psnr to build ssim

ssim.c comiles in a huge (512M) amount of global scratch space. Allocating
this data on the heap would be a better solution, but this file doesn't
need to be built at all in most cases, so as a first pass, disable it
except when doing opsnr.stt output (--enable-psnr).

Change-Id: I320d812f6d652a12516a16b52295ebff20b5bd42
---
 vp8/vp8cx.mk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 544a3b3fa..9496ef0be 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -74,7 +74,7 @@ VP8_CX_SRCS-yes += encoder/quantize.c
 VP8_CX_SRCS-yes += encoder/ratectrl.c
 VP8_CX_SRCS-yes += encoder/rdopt.c
 VP8_CX_SRCS-yes += encoder/sad_c.c
-VP8_CX_SRCS-yes += encoder/ssim.c
+VP8_CX_SRCS-$(CONFIG_PSNR) += encoder/ssim.c
 VP8_CX_SRCS-yes += encoder/tokenize.c
 VP8_CX_SRCS-yes += encoder/treewriter.c
 VP8_CX_SRCS-yes += encoder/variance_c.c

From 59c50966ac86d4c06b67466f2ac947a328658447 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 11 Jun 2010 13:15:30 -0400
Subject: [PATCH 030/307] Enable vp8_sad16x16x4d_sse3 in non-RTCD case

Typo caused C version of 16x16x4 SAD to be called when built with
--disable-runtime-cpu-detect.

Change-Id: I0fe6fa67280b3a5f13acb3c8ed914f039aaaf316
---
 vp8/encoder/x86/variance_x86.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
index f00ad321d..9cdd662b5 100644
--- a/vp8/encoder/x86/variance_x86.h
+++ b/vp8/encoder/x86/variance_x86.h
@@ -241,7 +241,7 @@ extern prototype_sad_multi_dif_address(vp8_sad4x4x4d_sse3);
 #define vp8_variance_sad4x4x3 vp8_sad4x4x3_sse3
 
 #undef  vp8_variance_sad16x16x4d
-#define vp8_variance_sad16x16x4 vp8_sad16x16x4d_sse3
+#define vp8_variance_sad16x16x4d vp8_sad16x16x4d_sse3
 
 #undef  vp8_variance_sad16x8x4d
 #define vp8_variance_sad16x8x4d vp8_sad16x8x4d_sse3

From 46abed8d27d16e8cf0bc7c6df631d07c1e749189 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20J=C3=A4genstedt?= <philipj@opera.com>
Date: Wed, 12 May 2010 04:58:58 +0200
Subject: [PATCH 031/307] gitignore: initial version

Change-Id: I653ff5062660bc35cfc8a99d176e36d3d63bae20
---
 .gitignore | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 68 insertions(+)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 000000000..9a149a8d7
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,68 @@
+*~
+*.a
+*.d
+*.o
+*-old
+*-new
+*.mk
+*.asm
+TAGS
+.bins
+.libs
+.deps
+/.cflags-new
+/.cflags-old
+/.docs
+/.install-*
+/Makefile
+/config.err
+/config.mk
+/decode_to_md5
+/decode_to_md5.c
+/decode_to_md5.dox
+/decode_with_drops
+/decode_with_drops.c
+/decode_with_drops.dox
+/docs-generic-gnu.mk
+/docs/
+/doxyfile
+/error_resilient
+/error_resilient.c
+/error_resilient.dox
+/examples-generic-gnu.mk
+/examples.doxy
+/force_keyframe
+/force_keyframe.c
+/force_keyframe.dox
+/ivfdec
+/ivfdec.dox
+/ivfenc
+/ivfenc.dox
+/libs-generic-gnu.mk
+/libs.doxy
+/postproc
+/postproc.c
+/postproc.dox
+/samples.dox
+/simple_decoder
+/simple_decoder.c
+/simple_decoder.dox
+/simple_encoder
+/simple_encoder.c
+/simple_encoder.dox
+/twopass_encoder
+/twopass_encoder.c
+/twopass_encoder.dox
+/vp8_api1_migration.dox
+/vp8_scalable_patterns
+/vp8_scalable_patterns.c
+/vp8_scalable_patterns.dox
+/vp8_set_maps
+/vp8_set_maps.c
+/vp8_set_maps.dox
+/vp8cx_set_ref
+/vp8cx_set_ref.c
+/vp8cx_set_ref.dox
+/vpx_config.c
+/vpx_config.h
+/vpx_version.h

From 3245d463c4a3e44185722c1e108d20576cd3f9f8 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 10 Jun 2010 17:10:12 -0400
Subject: [PATCH 032/307] ivfenc: support reading/writing from a pipe

Use - for the filename to use stdin/stdout. Update to avoid opening
the file multiple times.

Change-Id: I356356fa16bb334d4b22abc531dc03c0d95917a3
---
 ivfenc.c | 334 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 170 insertions(+), 164 deletions(-)

diff --git a/ivfenc.c b/ivfenc.c
index c20df5b96..b13db6522 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
@@ -55,8 +55,8 @@ void die(const char *fmt, ...)
 {
     va_list ap;
     va_start(ap, fmt);
-    vprintf(fmt, ap);
-    printf("\n");
+    vfprintf(stderr, fmt, ap);
+    fprintf(stderr, "\n");
     usage_exit();
 }
 
@@ -66,10 +66,10 @@ static void ctx_exit_on_error(vpx_codec_ctx_t *ctx, const char *s)
     {
         const char *detail = vpx_codec_error_detail(ctx);
 
-        printf("%s: %s\n", s, vpx_codec_error(ctx));
+        fprintf(stderr, "%s: %s\n", s, vpx_codec_error(ctx));
 
         if (detail)
-            printf("    %s\n", detail);
+            fprintf(stderr, "    %s\n", detail);
 
         exit(EXIT_FAILURE);
     }
@@ -226,9 +226,15 @@ enum video_file_type
     FILE_TYPE_Y4M
 };
 
+struct detect_buffer {
+    char buf[4];
+    int  valid;
+};
+
+
 #define IVF_FRAME_HDR_SZ (4+8) /* 4 byte size + 8 byte timestamp */
 static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
-                      y4m_input *y4m)
+                      y4m_input *y4m, struct detect_buffer *detect)
 {
     int plane = 0;
 
@@ -275,7 +281,15 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
 
             for (r = 0; r < h; r++)
             {
-                fread(ptr, 1, w, f);
+                if (detect->valid)
+                {
+                    memcpy(ptr, detect->buf, 4);
+                    fread(ptr+4, 1, w-4, f);
+                    detect->valid = 0;
+                }
+                else
+                    fread(ptr, 1, w, f);
+
                 ptr += img->stride[plane];
             }
         }
@@ -286,16 +300,14 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
 
 
 unsigned int file_is_y4m(FILE *infile,
-                         y4m_input *y4m)
+                         y4m_input *y4m,
+                         char       detect[4])
 {
-    char raw_hdr[4];
-    if (fread(raw_hdr, 1, 4, infile) == 4 &&
-        memcmp(raw_hdr, "YUV4", 4) == 0 &&
-        y4m_input_open(y4m, infile, raw_hdr, 4) >= 0)
+    if(memcmp(detect, "YUV4", 4) == 0 &&
+        y4m_input_open(y4m, infile, detect, 4) >= 0)
     {
         return 1;
     }
-    rewind(infile);
     return 0;
 }
 
@@ -303,18 +315,21 @@ unsigned int file_is_y4m(FILE *infile,
 unsigned int file_is_ivf(FILE *infile,
                          unsigned int *fourcc,
                          unsigned int *width,
-                         unsigned int *height)
+                         unsigned int *height,
+                         char          detect[4])
 {
     char raw_hdr[IVF_FILE_HDR_SZ];
     int is_ivf = 0;
 
+    if(memcmp(detect, "DKIF", 4) != 0)
+        return 0;
+
     /* See write_ivf_file_header() for more documentation on the file header
      * layout.
      */
-    if (fread(raw_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ)
+    if (fread(raw_hdr + 4, 1, IVF_FILE_HDR_SZ - 4, infile)
+        == IVF_FILE_HDR_SZ - 4)
     {
-        if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K'
-            && raw_hdr[2] == 'I' && raw_hdr[3] == 'F')
         {
             is_ivf = 1;
 
@@ -331,8 +346,6 @@ unsigned int file_is_ivf(FILE *infile,
         *width = mem_get_le16(raw_hdr + 12);
         *height = mem_get_le16(raw_hdr + 14);
     }
-    else
-        rewind(infile);
 
     return is_ivf;
 }
@@ -546,28 +559,28 @@ static void usage_exit()
 {
     int i;
 
-    printf("Usage: %s <options> src_filename dst_filename\n", exec_name);
+    fprintf(stderr, "Usage: %s <options> src_filename dst_filename\n", exec_name);
 
-    printf("\n_options:\n");
+    fprintf(stderr, "\n_options:\n");
     arg_show_usage(stdout, main_args);
-    printf("\n_encoder Global Options:\n");
+    fprintf(stderr, "\n_encoder Global Options:\n");
     arg_show_usage(stdout, global_args);
-    printf("\n_rate Control Options:\n");
+    fprintf(stderr, "\n_rate Control Options:\n");
     arg_show_usage(stdout, rc_args);
-    printf("\n_twopass Rate Control Options:\n");
+    fprintf(stderr, "\n_twopass Rate Control Options:\n");
     arg_show_usage(stdout, rc_twopass_args);
-    printf("\n_keyframe Placement Options:\n");
+    fprintf(stderr, "\n_keyframe Placement Options:\n");
     arg_show_usage(stdout, kf_args);
 #if CONFIG_VP8_ENCODER
-    printf("\n_vp8 Specific Options:\n");
+    fprintf(stderr, "\n_vp8 Specific Options:\n");
     arg_show_usage(stdout, vp8_args);
 #endif
-    printf("\n"
+    fprintf(stderr, "\n"
            "Included encoders:\n"
            "\n");
 
     for (i = 0; i < sizeof(codecs) / sizeof(codecs[0]); i++)
-        printf("    %-6s - %s\n",
+        fprintf(stderr, "    %-6s - %s\n",
                codecs[i].name,
                vpx_codec_iface_name(codecs[i].iface));
 
@@ -687,7 +700,7 @@ int main(int argc, const char **argv_)
         /* DWIM: Assume the user meant passes=2 if pass=2 is specified */
         if (one_pass_only > arg_passes)
         {
-            printf("Warning: Assuming --pass=%d implies --passes=%d\n",
+            fprintf(stderr, "Warning: Assuming --pass=%d implies --passes=%d\n",
                    one_pass_only, one_pass_only);
             arg_passes = one_pass_only;
         }
@@ -701,7 +714,8 @@ int main(int argc, const char **argv_)
 
     if (res)
     {
-        printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
+        fprintf(stderr, "Failed to get config: %s\n",
+                vpx_codec_err_to_string(res));
         return EXIT_FAILURE;
     }
 
@@ -761,24 +775,27 @@ int main(int argc, const char **argv_)
             cfg.rc_2pass_vbr_bias_pct = arg_parse_uint(&arg);
 
             if (arg_passes < 2)
-                printf("Warning: option %s ignored in one-pass mode.\n",
-                       arg.name);
+                fprintf(stderr,
+                        "Warning: option %s ignored in one-pass mode.\n",
+                        arg.name);
         }
         else if (arg_match(&arg, &minsection_pct, argi))
         {
             cfg.rc_2pass_vbr_minsection_pct = arg_parse_uint(&arg);
 
             if (arg_passes < 2)
-                printf("Warning: option %s ignored in one-pass mode.\n",
-                       arg.name);
+                fprintf(stderr,
+                        "Warning: option %s ignored in one-pass mode.\n",
+                        arg.name);
         }
         else if (arg_match(&arg, &maxsection_pct, argi))
         {
             cfg.rc_2pass_vbr_maxsection_pct = arg_parse_uint(&arg);
 
             if (arg_passes < 2)
-                printf("Warning: option %s ignored in one-pass mode.\n",
-                       arg.name);
+                fprintf(stderr,
+                        "Warning: option %s ignored in one-pass mode.\n",
+                        arg.name);
         }
         else if (arg_match(&arg, &kf_min_dist, argi))
             cfg.kf_min_dist = arg_parse_uint(&arg);
@@ -826,7 +843,7 @@ int main(int argc, const char **argv_)
 
     /* Check for unrecognized options */
     for (argi = argv; *argi; argi++)
-        if (argi[0][0] == '-')
+        if (argi[0][0] == '-' && argi[0][1])
             die("Error: Unrecognized option %s\n", *argi);
 
     /* Handle non-option arguments */
@@ -836,140 +853,126 @@ int main(int argc, const char **argv_)
     if (!in_fn || !out_fn)
         usage_exit();
 
-    /* Parse certain options from the input file, if possible */
-    infile = fopen(in_fn, "rb");
-
-    if (!infile)
-    {
-        printf("Failed to open input file");
-        return EXIT_FAILURE;
-    }
-
-    if (file_is_y4m(infile, &y4m))
-    {
-        file_type = FILE_TYPE_Y4M;
-        cfg.g_w = y4m.pic_w;
-        cfg.g_h = y4m.pic_h;
-        /* Use the frame rate from the file only if none was specified on the
-         *  command-line.
-         */
-        if (!arg_have_timebase)
-        {
-            cfg.g_timebase.num = y4m.fps_d;
-            cfg.g_timebase.den = y4m.fps_n;
-        }
-        arg_use_i420 = 0;
-    }
-    else if (file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h))
-    {
-        file_type = FILE_TYPE_IVF;
-        switch (fourcc)
-        {
-        case 0x32315659:
-            arg_use_i420 = 0;
-            break;
-        case 0x30323449:
-            arg_use_i420 = 1;
-            break;
-        default:
-            printf("Unsupported fourcc (%08x) in IVF\n", fourcc);
-            return EXIT_FAILURE;
-        }
-    }
-    else
-        file_type = FILE_TYPE_RAW;
-
-    fclose(infile);
-
-
-#define SHOW(field) printf("    %-28s = %d\n", #field, cfg.field)
-
-    if (verbose)
-    {
-        printf("Codec: %s\n", vpx_codec_iface_name(codec->iface));
-        printf("Source file: %s Format: %s\n", in_fn, arg_use_i420 ? "I420" : "YV12");
-        printf("Destination file: %s\n", out_fn);
-        printf("Encoder parameters:\n");
-
-        SHOW(g_usage);
-        SHOW(g_threads);
-        SHOW(g_profile);
-        SHOW(g_w);
-        SHOW(g_h);
-        SHOW(g_timebase.num);
-        SHOW(g_timebase.den);
-        SHOW(g_error_resilient);
-        SHOW(g_pass);
-        SHOW(g_lag_in_frames);
-        SHOW(rc_dropframe_thresh);
-        SHOW(rc_resize_allowed);
-        SHOW(rc_resize_up_thresh);
-        SHOW(rc_resize_down_thresh);
-        SHOW(rc_end_usage);
-        SHOW(rc_target_bitrate);
-        SHOW(rc_min_quantizer);
-        SHOW(rc_max_quantizer);
-        SHOW(rc_undershoot_pct);
-        SHOW(rc_overshoot_pct);
-        SHOW(rc_buf_sz);
-        SHOW(rc_buf_initial_sz);
-        SHOW(rc_buf_optimal_sz);
-        SHOW(rc_2pass_vbr_bias_pct);
-        SHOW(rc_2pass_vbr_minsection_pct);
-        SHOW(rc_2pass_vbr_maxsection_pct);
-        SHOW(kf_mode);
-        SHOW(kf_min_dist);
-        SHOW(kf_max_dist);
-    }
-
-    if (file_type == FILE_TYPE_Y4M)
-        /*The Y4M reader does its own allocation.
-          Just initialize this here to avoid problems if we never read any
-           frames.*/
-        memset(&raw, 0, sizeof(raw));
-    else
-        vpx_img_alloc(&raw, arg_use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12,
-                      cfg.g_w, cfg.g_h, 1);
-
-    // This was added so that ivfenc will create monotically increasing
-    // timestamps.  Since we create new timestamps for alt-reference frames
-    // we need to make room in the series of timestamps.  Since there can
-    // only be 1 alt-ref frame ( current bitstream) multiplying by 2
-    // gives us enough room.
-    cfg.g_timebase.den *= 2;
-
     memset(&stats, 0, sizeof(stats));
 
     for (pass = one_pass_only ? one_pass_only - 1 : 0; pass < arg_passes; pass++)
     {
         int frames_in = 0, frames_out = 0;
         unsigned long nbytes = 0;
+        struct detect_buffer detect;
 
-        infile = fopen(in_fn, "rb");
+        /* Parse certain options from the input file, if possible */
+        infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb") : stdin;
 
         if (!infile)
         {
-            printf("Failed to open input file");
+            fprintf(stderr, "Failed to open input file\n");
             return EXIT_FAILURE;
         }
 
-        /*Skip the file header.*/
-        if (file_type == FILE_TYPE_IVF)
+        fread(detect.buf, 1, 4, infile);
+        detect.valid = 0;
+
+        if (file_is_y4m(infile, &y4m, detect.buf))
         {
-            char raw_hdr[IVF_FILE_HDR_SZ];
-            (void)fread(raw_hdr, 1, IVF_FILE_HDR_SZ, infile);
+            file_type = FILE_TYPE_Y4M;
+            cfg.g_w = y4m.pic_w;
+            cfg.g_h = y4m.pic_h;
+            /* Use the frame rate from the file only if none was specified on the
+             *  command-line.
+             */
+            if (!arg_have_timebase)
+            {
+                cfg.g_timebase.num = y4m.fps_d;
+                cfg.g_timebase.den = y4m.fps_n;
+            }
+            arg_use_i420 = 0;
         }
-        else if(file_type == FILE_TYPE_Y4M)
+        else if (file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, detect.buf))
         {
-            char buffer[80];
-            (void)fgets(buffer, sizeof(buffer)/sizeof(*buffer) - 1, infile);
+            file_type = FILE_TYPE_IVF;
+            switch (fourcc)
+            {
+            case 0x32315659:
+                arg_use_i420 = 0;
+                break;
+            case 0x30323449:
+                arg_use_i420 = 1;
+                break;
+            default:
+                fprintf(stderr, "Unsupported fourcc (%08x) in IVF\n", fourcc);
+                return EXIT_FAILURE;
+            }
+        }
+        else
+        {
+            file_type = FILE_TYPE_RAW;
+            detect.valid = 1;
+        }
+#define SHOW(field) fprintf(stderr, "    %-28s = %d\n", #field, cfg.field)
+
+        if (verbose && pass == 0)
+        {
+            fprintf(stderr, "Codec: %s\n", vpx_codec_iface_name(codec->iface));
+            fprintf(stderr, "Source file: %s Format: %s\n", in_fn,
+                    arg_use_i420 ? "I420" : "YV12");
+            fprintf(stderr, "Destination file: %s\n", out_fn);
+            fprintf(stderr, "Encoder parameters:\n");
+
+            SHOW(g_usage);
+            SHOW(g_threads);
+            SHOW(g_profile);
+            SHOW(g_w);
+            SHOW(g_h);
+            SHOW(g_timebase.num);
+            SHOW(g_timebase.den);
+            SHOW(g_error_resilient);
+            SHOW(g_pass);
+            SHOW(g_lag_in_frames);
+            SHOW(rc_dropframe_thresh);
+            SHOW(rc_resize_allowed);
+            SHOW(rc_resize_up_thresh);
+            SHOW(rc_resize_down_thresh);
+            SHOW(rc_end_usage);
+            SHOW(rc_target_bitrate);
+            SHOW(rc_min_quantizer);
+            SHOW(rc_max_quantizer);
+            SHOW(rc_undershoot_pct);
+            SHOW(rc_overshoot_pct);
+            SHOW(rc_buf_sz);
+            SHOW(rc_buf_initial_sz);
+            SHOW(rc_buf_optimal_sz);
+            SHOW(rc_2pass_vbr_bias_pct);
+            SHOW(rc_2pass_vbr_minsection_pct);
+            SHOW(rc_2pass_vbr_maxsection_pct);
+            SHOW(kf_mode);
+            SHOW(kf_min_dist);
+            SHOW(kf_max_dist);
         }
 
-        outfile = fopen(out_fn, "wb");
+        if(pass == 0) {
+            if (file_type == FILE_TYPE_Y4M)
+                /*The Y4M reader does its own allocation.
+                  Just initialize this here to avoid problems if we never read any
+                   frames.*/
+                memset(&raw, 0, sizeof(raw));
+            else
+                vpx_img_alloc(&raw, arg_use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12,
+                              cfg.g_w, cfg.g_h, 1);
+
+            // This was added so that ivfenc will create monotically increasing
+            // timestamps.  Since we create new timestamps for alt-reference frames
+            // we need to make room in the series of timestamps.  Since there can
+            // only be 1 alt-ref frame ( current bitstream) multiplying by 2
+            // gives us enough room.
+            cfg.g_timebase.den *= 2;
+        }
+
+        outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb") : stdout;
 
         if (!outfile)
         {
-            printf("Failed to open output file");
+            fprintf(stderr, "Failed to open output file\n");
             return EXIT_FAILURE;
         }
 
@@ -977,7 +980,7 @@ int main(int argc, const char **argv_)
         {
             if (!stats_open_file(&stats, stats_fn, pass))
             {
-                printf("Failed to open statistics store\n");
+                fprintf(stderr, "Failed to open statistics store\n");
                 return EXIT_FAILURE;
             }
         }
@@ -985,7 +988,7 @@ int main(int argc, const char **argv_)
         {
             if (!stats_open_mem(&stats, pass))
             {
-                printf("Failed to open statistics store\n");
+                fprintf(stderr, "Failed to open statistics store\n");
                 return EXIT_FAILURE;
             }
         }
@@ -1020,8 +1023,8 @@ int main(int argc, const char **argv_)
         for (i = 0; i < arg_ctrl_cnt; i++)
         {
             if (vpx_codec_control_(&encoder, arg_ctrls[i][0], arg_ctrls[i][1]))
-                printf("Error: Tried to set control %d = %d\n",
-                       arg_ctrls[i][0], arg_ctrls[i][1]);
+                fprintf(stderr, "Error: Tried to set control %d = %d\n",
+                        arg_ctrls[i][0], arg_ctrls[i][1]);
 
             ctx_exit_on_error(&encoder, "Failed to control codec");
         }
@@ -1037,13 +1040,15 @@ int main(int argc, const char **argv_)
 
             if (!arg_limit || frames_in < arg_limit)
             {
-                frame_avail = read_frame(infile, &raw, file_type, &y4m);
+                frame_avail = read_frame(infile, &raw, file_type, &y4m,
+                                         &detect);
 
                 if (frame_avail)
                     frames_in++;
 
-                printf("\rPass %d/%d frame %4d/%-4d %7ldB \033[K", pass + 1,
-                       arg_passes, frames_in, frames_out, nbytes);
+                fprintf(stderr,
+                        "\rPass %d/%d frame %4d/%-4d %7ldB \033[K", pass + 1,
+                        arg_passes, frames_in, frames_out, nbytes);
             }
             else
                 frame_avail = 0;
@@ -1067,15 +1072,15 @@ int main(int argc, const char **argv_)
                 {
                 case VPX_CODEC_CX_FRAME_PKT:
                     frames_out++;
-                    printf(" %6luF",
-                           (unsigned long)pkt->data.frame.sz);
+                    fprintf(stderr, " %6luF",
+                            (unsigned long)pkt->data.frame.sz);
                     write_ivf_frame_header(outfile, pkt);
                     fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile);
                     nbytes += pkt->data.raw.sz;
                     break;
                 case VPX_CODEC_STATS_PKT:
                     frames_out++;
-                    printf(" %6luS",
+                    fprintf(stderr, " %6luS",
                            (unsigned long)pkt->data.twopass_stats.sz);
                     stats_write(&stats,
                                 pkt->data.twopass_stats.buf,
@@ -1089,7 +1094,7 @@ int main(int argc, const char **argv_)
                         int i;
 
                         for (i = 0; i < 4; i++)
-                            printf("%.3lf ", pkt->data.psnr.psnr[i]);
+                            fprintf(stderr, "%.3lf ", pkt->data.psnr.psnr[i]);
                     }
 
                     break;
@@ -1104,7 +1109,8 @@ int main(int argc, const char **argv_)
         /* this bitrate calc is simplified and relies on the fact that this
          * application uses 1/timebase for framerate.
          */
-        printf("\rPass %d/%d frame %4d/%-4d %7ldB %7ldb/f %7"PRId64"b/s"
+        fprintf(stderr,
+               "\rPass %d/%d frame %4d/%-4d %7ldB %7ldb/f %7"PRId64"b/s"
                " %7lu %s (%.2f fps)\033[K", pass + 1,
                arg_passes, frames_in, frames_out, nbytes, nbytes * 8 / frames_in,
                nbytes * 8 *(int64_t)cfg.g_timebase.den/2/ cfg.g_timebase.num / frames_in,
@@ -1121,7 +1127,7 @@ int main(int argc, const char **argv_)
 
         fclose(outfile);
         stats_close(&stats);
-        printf("\n");
+        fprintf(stderr, "\n");
 
         if (one_pass_only)
             break;

From cd475da8ed054187ed5e2013cc71f46a28202575 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Sat, 12 Jun 2010 14:11:51 -0400
Subject: [PATCH 033/307] Make this/next iiratio unsigned.

This patch addresses issue #79, which is a regression since commit
28de670 "Fix RD bug." If the coded error value is zero, the iiratio
calculation effectively multiplies by 1000000 by the
DOUBLE_DIVIDE_CHECK macro. This can result in a value larger than
INT_MAX, giving a negative ratio. Since the error values are
conceptually unsigned (though they're stored in a double) this patch
makes the iiratio values unsigned, which allows the clamping to work
as expected.
---
 vp8/encoder/onyx_int.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index b934d98c7..889bf735f 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -523,8 +523,8 @@ typedef struct
     int motion_lvl;
     int motion_speed;
     int motion_var;
-    int next_iiratio;
-    int this_iiratio;
+    unsigned int next_iiratio;
+    unsigned int this_iiratio;
     int this_frame_modified_error;
 
     double norm_intra_err_per_mb;

From df2c62d35782442b621388be8adba32a709afb1a Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 14 Jun 2010 08:33:54 -0400
Subject: [PATCH 034/307] ivfenc: fix two-pass support of raw files

Commit 3245d46 "ivfenc: support reading/writing from a pipe" broke
support for two pass encodes of raw files when done in two
invocations of ivfenc. The raw image was only set up on pass 0,
which was never hit when running with --pass=2 --passes=2.

Change-Id: I6a9858be1a8998d5bd45331123b46b1baa05b379
---
 ivfenc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ivfenc.c b/ivfenc.c
index b13db6522..e9a49cddd 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -950,7 +950,7 @@ int main(int argc, const char **argv_)
             SHOW(kf_max_dist);
         }
 
-        if(pass == 0) {
+        if(pass == (one_pass_only ? one_pass_only - 1 : 0)) {
             if (file_type == FILE_TYPE_Y4M)
                 /*The Y4M reader does its own allocation.
                   Just initialize this here to avoid problems if we never read any

From 1856f2213dae9fefaad0906011de31917853cffc Mon Sep 17 00:00:00 2001
From: Andres Mejia <mcitadel@gmail.com>
Date: Mon, 14 Jun 2010 01:27:33 -0400
Subject: [PATCH 035/307] Use public domain implementation for MD5 algorithm

The RSA Data Security, Inc. implementation license bears a requirement
similar to the old problematic BSD license with advertising clause.

Change-Id: I877b71ff0548934b1c4fd87245696f53dedbdf26
---
 examples/decode_to_md5.txt |   8 +-
 ivfdec.c                   |   8 +-
 md5_utils.c                | 478 +++++++++++++++++--------------------
 md5_utils.h                |  71 +++---
 4 files changed, 258 insertions(+), 307 deletions(-)

diff --git a/examples/decode_to_md5.txt b/examples/decode_to_md5.txt
index 0599b135c..b3dd56876 100644
--- a/examples/decode_to_md5.txt
+++ b/examples/decode_to_md5.txt
@@ -26,21 +26,21 @@ is processed, then U, then V. It is important to honor the image's `stride`
 values.
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_DX
 unsigned char  md5_sum[16];
-md5_ctx_t      md5;
+MD5Context     md5;
 int            i;
 
-md5_init(&md5);
+MD5Init(&md5);
 
 for(plane=0; plane < 3; plane++) {
     unsigned char *buf =img->planes[plane];
 
     for(y=0; y<img->d_h >> (plane?1:0); y++) {
-        md5_update(&md5, buf, img->d_w >> (plane?1:0));
+        MD5Update(&md5, buf, img->d_w >> (plane?1:0));
         buf += img->stride[plane];
     }
 }
 
-md5_finalize(&md5, md5_sum);
+MD5Final(md5_sum, &md5);
 for(i=0; i<16; i++)
     fprintf(outfile, "%02x",md5_sum[i]);
 fprintf(outfile, "  img-%dx%d-%04d.i420\n", img->d_w, img->d_h,
diff --git a/ivfdec.c b/ivfdec.c
index e826f6e1f..2b26d55e8 100644
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -235,9 +235,9 @@ void *out_open(const char *out_fn, int do_md5)
     if (do_md5)
     {
 #if CONFIG_MD5
-        md5_ctx_t *md5_ctx = out = malloc(sizeof(md5_ctx_t));
+        MD5Context *md5_ctx = out = malloc(sizeof(MD5Context));
         (void)out_fn;
-        md5_init(md5_ctx);
+        MD5Init(md5_ctx);
 #endif
     }
     else
@@ -259,7 +259,7 @@ void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5)
     if (do_md5)
     {
 #if CONFIG_MD5
-        md5_update(out, buf, len);
+        MD5Update(out, buf, len);
 #endif
     }
     else
@@ -276,7 +276,7 @@ void out_close(void *out, const char *out_fn, int do_md5)
         uint8_t md5[16];
         int i;
 
-        md5_finalize(out, md5);
+        MD5Final(md5, out);
         free(out);
 
         for (i = 0; i < 16; i++)
diff --git a/md5_utils.c b/md5_utils.c
index 190d95570..455d9cd2b 100644
--- a/md5_utils.c
+++ b/md5_utils.c
@@ -1,299 +1,253 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * This code implements the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest.  This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
  *
- *  Use of this source code is governed by a BSD-style license 
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
- *  be found in the AUTHORS file in the root of the source tree.
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * MD5Context structure, pass it to MD5Init, call MD5Update as
+ * needed on buffers full of bytes, and then call MD5Final, which
+ * will fill a supplied 16-byte array with the digest.
+ *
+ * Changed so as no longer to depend on Colin Plumb's `usual.h' header
+ * definitions
+ *  - Ian Jackson <ian@chiark.greenend.org.uk>.
+ * Still in the public domain.
  */
 
+#include <sys/types.h>    /* for stupid systems */
 
-/*
-Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
-rights reserved.
-
-License to copy and use this software is granted provided that it
-is identified as the "RSA Data Security, Inc. MD5 Message-Digest
-Algorithm" in all material mentioning or referencing this software
-or this function.
-
-License is also granted to make and use derivative works provided
-that such works are identified as "derived from the RSA Data
-Security, Inc. MD5 Message-Digest Algorithm" in all material
-mentioning or referencing the derived work.
-
-RSA Data Security, Inc. makes no representations concerning either
-the merchantability of this software or the suitability of this
-software for any particular purpose. It is provided "as is"
-without express or implied warranty of any kind.
-
-These notices must be retained in any copies of any part of this
-documentation and/or software.
-*/
+#include <string.h>   /* for memcpy() */
 
 #include "md5_utils.h"
-#include <string.h>
 
-/* Constants for md5_transform routine.
- */
-#define S11 7
-#define S12 12
-#define S13 17
-#define S14 22
-#define S21 5
-#define S22 9
-#define S23 14
-#define S24 20
-#define S31 4
-#define S32 11
-#define S33 16
-#define S34 23
-#define S41 6
-#define S42 10
-#define S43 15
-#define S44 21
-
-static void md5_transform(uint32_t state[4], const uint8_t block[64]);
-static void Encode(uint8_t *output, const uint32_t *input, unsigned int len);
-static void Decode(uint32_t *output, const uint8_t *input, unsigned int len);
-#define md5_memset memset
-#define md5_memcpy memcpy
-
-static unsigned char PADDING[64] =
+void
+byteSwap(UWORD32 *buf, unsigned words)
 {
-    0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
-};
+    md5byte *p;
 
-/* F, G, H and I are basic MD5 functions.
- */
-#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
-#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
-#define H(x, y, z) ((x) ^ (y) ^ (z))
-#define I(x, y, z) ((y) ^ ((x) | (~z)))
+    /* Only swap bytes for big endian machines */
+    int i = 1;
 
-/* ROTATE_LEFT rotates x left n bits.
- */
-#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
+    if (*(char *)&i == 1)
+        return;
 
-/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
-Rotation is separate from addition to prevent recomputation.
- */
-#define FF(a, b, c, d, x, s, ac) { \
-        (a) += F ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-        (a) = ROTATE_LEFT ((a), (s)); \
-        (a) += (b); \
-    }
-#define GG(a, b, c, d, x, s, ac) { \
-        (a) += G ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-        (a) = ROTATE_LEFT ((a), (s)); \
-        (a) += (b); \
-    }
-#define HH(a, b, c, d, x, s, ac) { \
-        (a) += H ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-        (a) = ROTATE_LEFT ((a), (s)); \
-        (a) += (b); \
-    }
-#define II(a, b, c, d, x, s, ac) { \
-        (a) += I ((b), (c), (d)) + (x) + (uint32_t)(ac); \
-        (a) = ROTATE_LEFT ((a), (s)); \
-        (a) += (b); \
-    }
+    p = (md5byte *)buf;
 
-/* MD5 initialization. Begins an MD5 operation, writing a new context.
- */
-void md5_init(md5_ctx_t *context)
-{
-    context->count[0] = context->count[1] = 0;
-    /* Load magic initialization constants.
-    */
-    context->state[0] = 0x67452301;
-    context->state[1] = 0xefcdab89;
-    context->state[2] = 0x98badcfe;
-    context->state[3] = 0x10325476;
-}
-
-/* MD5 block update operation. Continues an MD5 message-digest
-  operation, processing another message block, and updating the
-  context.
- */
-void md5_update(md5_ctx_t *context, const uint8_t *input, unsigned int input_len)
-{
-    unsigned int i, index, part_len;
-
-    /* Compute number of bytes mod 64 */
-    index = (unsigned int)((context->count[0] >> 3) & 0x3F);
-
-    /* Update number of bits */
-    if ((context->count[0] += ((uint32_t)input_len << 3))
-        < ((uint32_t)input_len << 3))
-        context->count[1]++;
-
-    context->count[1] += ((uint32_t)input_len >> 29);
-
-    part_len = 64 - index;
-
-    /* Transform as many times as possible. */
-    if (input_len >= part_len)
+    do
     {
-        memcpy(&context->buffer[index], input, part_len);
-        md5_transform(context->state, context->buffer);
-
-        for (i = part_len; i + 63 < input_len; i += 64)
-            md5_transform(context->state, &input[i]);
-
-        index = 0;
+        *buf++ = (UWORD32)((unsigned)p[3] << 8 | p[2]) << 16 |
+                 ((unsigned)p[1] << 8 | p[0]);
+        p += 4;
     }
-    else
-        i = 0;
-
-    /* Buffer remaining input */
-    memcpy(&context->buffer[index], &input[i], input_len - i);
+    while (--words);
 }
 
-/* MD5 finalization. Ends an MD5 message-digest operation, writing the
-  the message digest and zeroizing the context.
+/*
+ * Start MD5 accumulation.  Set bit count to 0 and buffer to mysterious
+ * initialization constants.
  */
-void md5_finalize(md5_ctx_t *context, uint8_t digest[16])
+void
+MD5Init(struct MD5Context *ctx)
 {
-    unsigned char bits[8];
-    unsigned int index, pad_len;
+    ctx->buf[0] = 0x67452301;
+    ctx->buf[1] = 0xefcdab89;
+    ctx->buf[2] = 0x98badcfe;
+    ctx->buf[3] = 0x10325476;
 
-    /* Save number of bits */
-    Encode(bits, context->count, 8);
-
-    /* Pad out to 56 mod 64.
-    */
-    index = (unsigned int)((context->count[0] >> 3) & 0x3f);
-    pad_len = (index < 56) ? (56 - index) : (120 - index);
-    md5_update(context, PADDING, pad_len);
-
-    /* Append length (before padding) */
-    md5_update(context, bits, 8);
-    /* Store state in digest */
-    Encode(digest, context->state, 16);
-
-    /* Zeroize sensitive information.
-    */
-    memset(context, 0, sizeof(*context));
+    ctx->bytes[0] = 0;
+    ctx->bytes[1] = 0;
 }
 
-/* MD5 basic transformation. Transforms state based on block.
+/*
+ * Update context to reflect the concatenation of another buffer full
+ * of bytes.
  */
-static void md5_transform(uint32_t state[4], const uint8_t block[64])
+void
+MD5Update(struct MD5Context *ctx, md5byte const *buf, unsigned len)
 {
-    uint32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16];
+    UWORD32 t;
 
-    Decode(x, block, 64);
+    /* Update byte count */
 
-    /* Round 1 */
-    FF(a, b, c, d, x[ 0], S11, 0xd76aa478);  /* 1 */
-    FF(d, a, b, c, x[ 1], S12, 0xe8c7b756);  /* 2 */
-    FF(c, d, a, b, x[ 2], S13, 0x242070db);  /* 3 */
-    FF(b, c, d, a, x[ 3], S14, 0xc1bdceee);  /* 4 */
-    FF(a, b, c, d, x[ 4], S11, 0xf57c0faf);  /* 5 */
-    FF(d, a, b, c, x[ 5], S12, 0x4787c62a);  /* 6 */
-    FF(c, d, a, b, x[ 6], S13, 0xa8304613);  /* 7 */
-    FF(b, c, d, a, x[ 7], S14, 0xfd469501);  /* 8 */
-    FF(a, b, c, d, x[ 8], S11, 0x698098d8);  /* 9 */
-    FF(d, a, b, c, x[ 9], S12, 0x8b44f7af);  /* 10 */
-    FF(c, d, a, b, x[10], S13, 0xffff5bb1);  /* 11 */
-    FF(b, c, d, a, x[11], S14, 0x895cd7be);  /* 12 */
-    FF(a, b, c, d, x[12], S11, 0x6b901122);  /* 13 */
-    FF(d, a, b, c, x[13], S12, 0xfd987193);  /* 14 */
-    FF(c, d, a, b, x[14], S13, 0xa679438e);  /* 15 */
-    FF(b, c, d, a, x[15], S14, 0x49b40821);  /* 16 */
+    t = ctx->bytes[0];
 
-    /* Round 2 */
-    GG(a, b, c, d, x[ 1], S21, 0xf61e2562);  /* 17 */
-    GG(d, a, b, c, x[ 6], S22, 0xc040b340);  /* 18 */
-    GG(c, d, a, b, x[11], S23, 0x265e5a51);  /* 19 */
-    GG(b, c, d, a, x[ 0], S24, 0xe9b6c7aa);  /* 20 */
-    GG(a, b, c, d, x[ 5], S21, 0xd62f105d);  /* 21 */
-    GG(d, a, b, c, x[10], S22,  0x2441453);  /* 22 */
-    GG(c, d, a, b, x[15], S23, 0xd8a1e681);  /* 23 */
-    GG(b, c, d, a, x[ 4], S24, 0xe7d3fbc8);  /* 24 */
-    GG(a, b, c, d, x[ 9], S21, 0x21e1cde6);  /* 25 */
-    GG(d, a, b, c, x[14], S22, 0xc33707d6);  /* 26 */
-    GG(c, d, a, b, x[ 3], S23, 0xf4d50d87);  /* 27 */
-    GG(b, c, d, a, x[ 8], S24, 0x455a14ed);  /* 28 */
-    GG(a, b, c, d, x[13], S21, 0xa9e3e905);  /* 29 */
-    GG(d, a, b, c, x[ 2], S22, 0xfcefa3f8);  /* 30 */
-    GG(c, d, a, b, x[ 7], S23, 0x676f02d9);  /* 31 */
-    GG(b, c, d, a, x[12], S24, 0x8d2a4c8a);  /* 32 */
+    if ((ctx->bytes[0] = t + len) < t)
+        ctx->bytes[1]++;  /* Carry from low to high */
 
-    /* Round 3 */
-    HH(a, b, c, d, x[ 5], S31, 0xfffa3942);  /* 33 */
-    HH(d, a, b, c, x[ 8], S32, 0x8771f681);  /* 34 */
-    HH(c, d, a, b, x[11], S33, 0x6d9d6122);  /* 35 */
-    HH(b, c, d, a, x[14], S34, 0xfde5380c);  /* 36 */
-    HH(a, b, c, d, x[ 1], S31, 0xa4beea44);  /* 37 */
-    HH(d, a, b, c, x[ 4], S32, 0x4bdecfa9);  /* 38 */
-    HH(c, d, a, b, x[ 7], S33, 0xf6bb4b60);  /* 39 */
-    HH(b, c, d, a, x[10], S34, 0xbebfbc70);  /* 40 */
-    HH(a, b, c, d, x[13], S31, 0x289b7ec6);  /* 41 */
-    HH(d, a, b, c, x[ 0], S32, 0xeaa127fa);  /* 42 */
-    HH(c, d, a, b, x[ 3], S33, 0xd4ef3085);  /* 43 */
-    HH(b, c, d, a, x[ 6], S34,  0x4881d05);  /* 44 */
-    HH(a, b, c, d, x[ 9], S31, 0xd9d4d039);  /* 45 */
-    HH(d, a, b, c, x[12], S32, 0xe6db99e5);  /* 46 */
-    HH(c, d, a, b, x[15], S33, 0x1fa27cf8);  /* 47 */
-    HH(b, c, d, a, x[ 2], S34, 0xc4ac5665);  /* 48 */
+    t = 64 - (t & 0x3f);  /* Space available in ctx->in (at least 1) */
 
-    /* Round 4 */
-    II(a, b, c, d, x[ 0], S41, 0xf4292244);  /* 49 */
-    II(d, a, b, c, x[ 7], S42, 0x432aff97);  /* 50 */
-    II(c, d, a, b, x[14], S43, 0xab9423a7);  /* 51 */
-    II(b, c, d, a, x[ 5], S44, 0xfc93a039);  /* 52 */
-    II(a, b, c, d, x[12], S41, 0x655b59c3);  /* 53 */
-    II(d, a, b, c, x[ 3], S42, 0x8f0ccc92);  /* 54 */
-    II(c, d, a, b, x[10], S43, 0xffeff47d);  /* 55 */
-    II(b, c, d, a, x[ 1], S44, 0x85845dd1);  /* 56 */
-    II(a, b, c, d, x[ 8], S41, 0x6fa87e4f);  /* 57 */
-    II(d, a, b, c, x[15], S42, 0xfe2ce6e0);  /* 58 */
-    II(c, d, a, b, x[ 6], S43, 0xa3014314);  /* 59 */
-    II(b, c, d, a, x[13], S44, 0x4e0811a1);  /* 60 */
-    II(a, b, c, d, x[ 4], S41, 0xf7537e82);  /* 61 */
-    II(d, a, b, c, x[11], S42, 0xbd3af235);  /* 62 */
-    II(c, d, a, b, x[ 2], S43, 0x2ad7d2bb);  /* 63 */
-    II(b, c, d, a, x[ 9], S44, 0xeb86d391);  /* 64 */
-
-    state[0] += a;
-    state[1] += b;
-    state[2] += c;
-    state[3] += d;
-
-    /* Zeroize sensitive information.
-    */
-    memset(x, 0, sizeof(x));
-}
-
-/* Encodes input (uint32_t) into output (unsigned char). Assumes len is
-  a multiple of 4.
- */
-static void Encode(uint8_t *output, const uint32_t *input, unsigned int len)
-{
-    unsigned int i, j;
-
-    for (i = 0, j = 0; j < len; i++, j += 4)
+    if (t > len)
     {
-        output[j] = (unsigned char)(input[i] & 0xff);
-        output[j+1] = (unsigned char)((input[i] >> 8) & 0xff);
-        output[j+2] = (unsigned char)((input[i] >> 16) & 0xff);
-        output[j+3] = (unsigned char)((input[i] >> 24) & 0xff);
+        memcpy((md5byte *)ctx->in + 64 - t, buf, len);
+        return;
     }
+
+    /* First chunk is an odd size */
+    memcpy((md5byte *)ctx->in + 64 - t, buf, t);
+    byteSwap(ctx->in, 16);
+    MD5Transform(ctx->buf, ctx->in);
+    buf += t;
+    len -= t;
+
+    /* Process data in 64-byte chunks */
+    while (len >= 64)
+    {
+        memcpy(ctx->in, buf, 64);
+        byteSwap(ctx->in, 16);
+        MD5Transform(ctx->buf, ctx->in);
+        buf += 64;
+        len -= 64;
+    }
+
+    /* Handle any remaining bytes of data. */
+    memcpy(ctx->in, buf, len);
 }
 
-/* Decodes input (unsigned char) into output (uint32_t). Assumes len is
-  a multiple of 4.
+/*
+ * Final wrapup - pad to 64-byte boundary with the bit pattern
+ * 1 0* (64-bit count of bits processed, MSB-first)
  */
-static void Decode(uint32_t *output, const uint8_t *input, unsigned int len)
+void
+MD5Final(md5byte digest[16], struct MD5Context *ctx)
 {
-    unsigned int i, j;
+    int count = ctx->bytes[0] & 0x3f; /* Number of bytes in ctx->in */
+    md5byte *p = (md5byte *)ctx->in + count;
 
-    for (i = 0, j = 0; j < len; i++, j += 4)
-        output[i] = ((uint32_t)input[j]) | (((uint32_t)input[j+1]) << 8) |
-                    (((uint32_t)input[j+2]) << 16) | (((uint32_t)input[j+3]) << 24);
+    /* Set the first char of padding to 0x80.  There is always room. */
+    *p++ = 0x80;
+
+    /* Bytes of padding needed to make 56 bytes (-8..55) */
+    count = 56 - 1 - count;
+
+    if (count < 0)    /* Padding forces an extra block */
+    {
+        memset(p, 0, count + 8);
+        byteSwap(ctx->in, 16);
+        MD5Transform(ctx->buf, ctx->in);
+        p = (md5byte *)ctx->in;
+        count = 56;
+    }
+
+    memset(p, 0, count);
+    byteSwap(ctx->in, 14);
+
+    /* Append length in bits and transform */
+    ctx->in[14] = ctx->bytes[0] << 3;
+    ctx->in[15] = ctx->bytes[1] << 3 | ctx->bytes[0] >> 29;
+    MD5Transform(ctx->buf, ctx->in);
+
+    byteSwap(ctx->buf, 4);
+    memcpy(digest, ctx->buf, 16);
+    memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */
 }
+
+#ifndef ASM_MD5
+
+/* The four core functions - F1 is optimized somewhat */
+
+/* #define F1(x, y, z) (x & y | ~x & z) */
+#define F1(x, y, z) (z ^ (x & (y ^ z)))
+#define F2(x, y, z) F1(z, x, y)
+#define F3(x, y, z) (x ^ y ^ z)
+#define F4(x, y, z) (y ^ (x | ~z))
+
+/* This is the central step in the MD5 algorithm. */
+#define MD5STEP(f,w,x,y,z,in,s) \
+    (w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x)
+
+/*
+ * The core of the MD5 algorithm, this alters an existing MD5 hash to
+ * reflect the addition of 16 longwords of new data.  MD5Update blocks
+ * the data and converts bytes into longwords for this routine.
+ */
+void
+MD5Transform(UWORD32 buf[4], UWORD32 const in[16])
+{
+    register UWORD32 a, b, c, d;
+
+    a = buf[0];
+    b = buf[1];
+    c = buf[2];
+    d = buf[3];
+
+    MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7);
+    MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12);
+    MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17);
+    MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22);
+    MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7);
+    MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12);
+    MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17);
+    MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22);
+    MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7);
+    MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12);
+    MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17);
+    MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22);
+    MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7);
+    MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12);
+    MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17);
+    MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22);
+
+    MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5);
+    MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9);
+    MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14);
+    MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20);
+    MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5);
+    MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9);
+    MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14);
+    MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20);
+    MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5);
+    MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9);
+    MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14);
+    MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20);
+    MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5);
+    MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9);
+    MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14);
+    MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20);
+
+    MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4);
+    MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11);
+    MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16);
+    MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23);
+    MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4);
+    MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11);
+    MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16);
+    MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23);
+    MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4);
+    MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11);
+    MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16);
+    MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23);
+    MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4);
+    MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11);
+    MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16);
+    MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23);
+
+    MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6);
+    MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10);
+    MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15);
+    MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21);
+    MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6);
+    MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10);
+    MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15);
+    MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21);
+    MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6);
+    MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10);
+    MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15);
+    MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21);
+    MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6);
+    MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10);
+    MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15);
+    MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21);
+
+    buf[0] += a;
+    buf[1] += b;
+    buf[2] += c;
+    buf[3] += d;
+}
+
+#endif
diff --git a/md5_utils.h b/md5_utils.h
index d1a981bb7..5ca1b5f28 100644
--- a/md5_utils.h
+++ b/md5_utils.h
@@ -1,45 +1,42 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ * This is the header file for the MD5 message-digest algorithm.
+ * The algorithm is due to Ron Rivest.  This code was
+ * written by Colin Plumb in 1993, no copyright is claimed.
+ * This code is in the public domain; do with it what you wish.
  *
- *  Use of this source code is governed by a BSD-style license 
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
- *  be found in the AUTHORS file in the root of the source tree.
+ * Equivalent code is available from RSA Data Security, Inc.
+ * This code has been tested against that, and is equivalent,
+ * except that you don't need to include two pages of legalese
+ * with every copy.
+ *
+ * To compute the message digest of a chunk of bytes, declare an
+ * MD5Context structure, pass it to MD5Init, call MD5Update as
+ * needed on buffers full of bytes, and then call MD5Final, which
+ * will fill a supplied 16-byte array with the digest.
+ *
+ * Changed so as no longer to depend on Colin Plumb's `usual.h'
+ * header definitions
+ *  - Ian Jackson <ian@chiark.greenend.org.uk>.
+ * Still in the public domain.
  */
 
-/*
-Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
-rights reserved.
+#ifndef MD5_H
+#define MD5_H
 
-License to copy and use this software is granted provided that it
-is identified as the "RSA Data Security, Inc. MD5 Message-Digest
-Algorithm" in all material mentioning or referencing this software
-or this function.
+#define md5byte unsigned char
+#define UWORD32 unsigned int
 
-License is also granted to make and use derivative works provided
-that such works are identified as "derived from the RSA Data
-Security, Inc. MD5 Message-Digest Algorithm" in all material
-mentioning or referencing the derived work.
-
-RSA Data Security, Inc. makes no representations concerning either
-the merchantability of this software or the suitability of this
-software for any particular purpose. It is provided "as is"
-without express or implied warranty of any kind.
-
-These notices must be retained in any copies of any part of this
-documentation and/or software.
-*/
-#include "vpx/vpx_integer.h"
-
-/* MD5 context. */
-typedef struct
+typedef struct MD5Context MD5Context;
+struct MD5Context
 {
-    uint32_t state[4];        /* state (ABCD) */
-    uint32_t count[2];        /* number of bits, modulo 2^64 (lsb first) */
-    uint8_t  buffer[64];      /* input buffer */
-} md5_ctx_t;
+    UWORD32 buf[4];
+    UWORD32 bytes[2];
+    UWORD32 in[16];
+};
 
-void md5_init(md5_ctx_t *ctx);
-void md5_update(md5_ctx_t *ctx, const uint8_t *buf, unsigned int len);
-void md5_finalize(md5_ctx_t *ctx, uint8_t md5[16]);
+void MD5Init(struct MD5Context *context);
+void MD5Update(struct MD5Context *context, md5byte const *buf, unsigned len);
+void MD5Final(unsigned char digest[16], struct MD5Context *context);
+void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]);
+
+#endif /* !MD5_H */

From a0d04ea94e291ed72a452eee3309a298959727eb Mon Sep 17 00:00:00 2001
From: Fabio Pedretti <fabio.ped@libero.it>
Date: Mon, 14 Jun 2010 09:05:35 -0400
Subject: [PATCH 036/307] Remove useless 500 frame limit

Change-Id: Ib82de60cf32cf08844c3e2d88d7c587396f3892c
---
 configure                         |  5 -----
 vpx/internal/vpx_codec_internal.h |  3 +--
 vpx/src/vpx_decoder.c             | 12 ------------
 vpx/src/vpx_encoder.c             | 13 -------------
 4 files changed, 1 insertion(+), 32 deletions(-)

diff --git a/configure b/configure
index f2b42bef4..9a3269a7f 100755
--- a/configure
+++ b/configure
@@ -27,13 +27,11 @@ Advanced options:
                                   supported by hardware [auto]
   ${toggle_codec_srcs}            in/exclude codec library source code
   ${toggle_debug_libs}            in/exclude debug version of libraries
-  ${toggle_eval_limit}            enable limited evaluation build
   ${toggle_md5}                   support for output of checksum data
   ${toggle_static_msvcrt}         use static MSVCRT (VS builds only)
   ${toggle_vp8}                   VP8 codec support
   ${toggle_psnr}                  output of PSNR data, if supported (encoders)
   ${toggle_mem_tracker}           track memory usage
-  ${toggle_eval_limit}            decoder limitted to 500 frames
   ${toggle_postproc}              postprocessing
   ${toggle_multithread}           multithreaded encoding and decoding.
   ${toggle_spatial_resampling}    spatial sampling (scaling) support
@@ -230,7 +228,6 @@ CONFIG_LIST="
     dequant_tokens
     dc_recon
     new_tokens
-    eval_limit
     runtime_cpu_detect
     postproc
     postproc_generic
@@ -271,7 +268,6 @@ CMDLINE_SELECT="
     dequant_tokens
     dc_recon
     new_tokens
-    eval_limit
     postproc
     postproc_generic
     multithread
@@ -361,7 +357,6 @@ process_targets() {
     enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
     ! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
     ! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
-    enabled eval_limit && DIST_DIR="${DIST_DIR}-eval"
     ! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
     DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
     case "${tgt_os}" in
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index f525a6013..c653cc5a8 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -56,7 +56,7 @@
  * types, removing or reassigning enums, adding/removing/rearranging
  * fields to structures
  */
-#define VPX_CODEC_INTERNAL_ABI_VERSION (2) /**<\hideinitializer*/
+#define VPX_CODEC_INTERNAL_ABI_VERSION (3) /**<\hideinitializer*/
 
 typedef struct vpx_codec_alg_priv  vpx_codec_alg_priv_t;
 
@@ -340,7 +340,6 @@ struct vpx_codec_priv
     vpx_codec_iface_t              *iface;
     struct vpx_codec_alg_priv      *alg_priv;
     const char                     *err_detail;
-    unsigned int                    eval_counter;
     vpx_codec_flags_t               init_flags;
     struct
     {
diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
index 9939aa28a..cbf5259f2 100644
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -122,22 +122,10 @@ vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t    *ctx,
         res = VPX_CODEC_INVALID_PARAM;
     else if (!ctx->iface || !ctx->priv)
         res = VPX_CODEC_ERROR;
-
-#if CONFIG_EVAL_LIMIT
-    else if (ctx->priv->eval_counter >= 500)
-    {
-        ctx->priv->err_detail = "Evaluation limit exceeded.";
-        res = VPX_CODEC_ERROR;
-    }
-
-#endif
     else
     {
         res = ctx->iface->dec.decode(ctx->priv->alg_priv, data, data_sz,
                                      user_priv, deadline);
-#if CONFIG_EVAL_LIMIT
-        ctx->priv->eval_counter++;
-#endif
     }
 
     return SAVE_STATUS(ctx, res);
diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index 55fd5bf84..f43328f3d 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -127,15 +127,6 @@ vpx_codec_err_t  vpx_codec_encode(vpx_codec_ctx_t            *ctx,
         res = VPX_CODEC_ERROR;
     else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER))
         res = VPX_CODEC_INCAPABLE;
-
-#if CONFIG_EVAL_LIMIT
-    else if (ctx->priv->eval_counter >= 500)
-    {
-        ctx->priv->err_detail = "Evaluation limit exceeded.";
-        res = VPX_CODEC_ERROR;
-    }
-
-#endif
     else
     {
         /* Execute in a normalized floating point environment, if the platform
@@ -145,10 +136,6 @@ vpx_codec_err_t  vpx_codec_encode(vpx_codec_ctx_t            *ctx,
         res = ctx->iface->enc.encode(ctx->priv->alg_priv, img, pts,
                                      duration, flags, deadline);
         FLOATING_POINT_RESTORE();
-
-#if CONFIG_EVAL_LIMIT
-        ctx->priv->eval_counter++;
-#endif
     }
 
     return SAVE_STATUS(ctx, res);

From 48c84d138f4a5f66e982c49d4972f85aaae532a8 Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Mon, 14 Jun 2010 14:07:56 -0400
Subject: [PATCH 037/307] sse2 version of vp8_regular_quantize_b

Added sse2 version of vp8_regular_quantize_b which improved encode
performance(for the clip used) by ~10% for 32 bit builds and ~3% for
64 bit builds.

Also updated SHADOW_ARGS_TO_STACK to allow for more than 9 arguments.

Change-Id: I62f78eabc8040b39f3ffdf21be175811e96b39af
---
 vp8/encoder/quantize.h                 |   4 +
 vp8/encoder/x86/quantize_sse2.asm      | 254 +++++++++++++++++++++++++
 vp8/encoder/x86/quantize_x86.h         |  38 ++++
 vp8/encoder/x86/x86_csystemdependent.c |  35 ++++
 vp8/vp8cx.mk                           |   1 +
 vpx_ports/x86_abi_support.asm          |  15 +-
 6 files changed, 338 insertions(+), 9 deletions(-)
 create mode 100644 vp8/encoder/x86/quantize_sse2.asm
 create mode 100644 vp8/encoder/x86/quantize_x86.h

diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index f5ee9d7a2..ca073ef0a 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -17,6 +17,10 @@
 #define prototype_quantize_block(sym) \
     void (sym)(BLOCK *b,BLOCKD *d)
 
+#if ARCH_X86 || ARCH_X86_64
+#include "x86/quantize_x86.h"
+#endif
+
 #if ARCH_ARM
 #include "arm/quantize_arm.h"
 #endif
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
new file mode 100644
index 000000000..c64a8ba12
--- /dev/null
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -0,0 +1,254 @@
+;
+;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+
+;int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
+;               short *qcoeff_ptr,short *dequant_ptr,
+;               const int *default_zig_zag, short *round_ptr,
+;               short *quant_ptr, short *dqcoeff_ptr,
+;               unsigned short zbin_oq_value,
+;               short *zbin_boost_ptr);
+;
+global sym(vp8_regular_quantize_b_impl_sse2)
+sym(vp8_regular_quantize_b_impl_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 10
+    push        rsi
+    push        rdi
+    push        rbx
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+
+    %define abs_minus_zbin_lo 0
+    %define abs_minus_zbin_hi 16
+    %define temp_qcoeff_lo 32
+    %define temp_qcoeff_hi 48
+    %define save_xmm6 64
+    %define save_xmm7 80
+    %define eob 96
+
+    %define vp8_regularquantizeb_stack_size eob + 16
+
+    sub         rsp, vp8_regularquantizeb_stack_size
+
+    movdqa      DQWORD PTR[rsp + save_xmm6], xmm6
+    movdqa      DQWORD PTR[rsp + save_xmm7], xmm7
+
+    mov         rdx, arg(0)                 ;coeff_ptr
+    mov         eax, arg(8)                 ;zbin_oq_value
+
+    mov         rcx, arg(1)                 ;zbin_ptr
+    movd        xmm7, eax
+
+    movdqa      xmm0, DQWORD PTR[rdx]
+    movdqa      xmm4, DQWORD PTR[rdx + 16]
+
+    movdqa      xmm1, xmm0
+    movdqa      xmm5, xmm4
+
+    psraw       xmm0, 15                    ;sign of z (aka sz)
+    psraw       xmm4, 15                    ;sign of z (aka sz)
+
+    pxor        xmm1, xmm0
+    pxor        xmm5, xmm4
+
+    movdqa      xmm2, DQWORD PTR[rcx]       ;load zbin_ptr
+    movdqa      xmm3, DQWORD PTR[rcx + 16]  ;load zbin_ptr
+
+    pshuflw     xmm7, xmm7, 0
+    psubw       xmm1, xmm0                  ;x = abs(z)
+
+    punpcklwd   xmm7, xmm7                  ;duplicated zbin_oq_value
+    psubw       xmm5, xmm4                  ;x = abs(z)
+
+    paddw       xmm2, xmm7
+    paddw       xmm3, xmm7
+
+    psubw       xmm1, xmm2                  ;sub (zbin_ptr + zbin_oq_value)
+    psubw       xmm5, xmm3                  ;sub (zbin_ptr + zbin_oq_value)
+
+    mov         rdi, arg(5)                 ;round_ptr
+    mov         rsi, arg(6)                 ;quant_ptr
+
+    movdqa      DQWORD PTR[rsp + abs_minus_zbin_lo], xmm1
+    movdqa      DQWORD PTR[rsp + abs_minus_zbin_hi], xmm5
+
+    paddw       xmm1, xmm2                  ;add (zbin_ptr + zbin_oq_value) back
+    paddw       xmm5, xmm3                  ;add (zbin_ptr + zbin_oq_value) back
+
+    movdqa      xmm2, DQWORD PTR[rdi]
+    movdqa      xmm3, DQWORD PTR[rsi]
+
+    movdqa      xmm6, DQWORD PTR[rdi + 16]
+    movdqa      xmm7, DQWORD PTR[rsi + 16]
+
+    paddw       xmm1, xmm2
+    paddw       xmm5, xmm6
+
+    pmulhw      xmm1, xmm3
+    pmulhw      xmm5, xmm7
+
+    mov         rsi, arg(2)                 ;qcoeff_ptr
+    pxor        xmm6, xmm6
+
+    pxor        xmm1, xmm0
+    pxor        xmm5, xmm4
+
+    psubw       xmm1, xmm0
+    psubw       xmm5, xmm4
+
+    movdqa      DQWORD PTR[rsp + temp_qcoeff_lo], xmm1
+    movdqa      DQWORD PTR[rsp + temp_qcoeff_hi], xmm5
+
+    movdqa      DQWORD PTR[rsi], xmm6       ;zero qcoeff
+    movdqa      DQWORD PTR[rsi + 16], xmm6  ;zero qcoeff
+
+    xor         rax, rax
+    mov         rcx, -1
+
+    mov         [rsp + eob], rcx
+    mov         rsi, arg(9)                 ;zbin_boost_ptr
+
+    mov         rbx, arg(4)                 ;default_zig_zag
+
+rq_zigzag_loop:
+    movsxd      rcx, DWORD PTR[rbx + rax*4] ;now we have rc
+    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
+    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++
+
+    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+
+    sub         edx, edi                    ;x - zbin
+    jl          rq_zigzag_1
+
+    mov         rdi, arg(2)                 ;qcoeff_ptr
+
+    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+    cmp         edx, 0
+    je          rq_zigzag_1
+
+    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+    mov         rsi, arg(9)                 ;zbin_boost_ptr
+    mov         [rsp + eob], rax            ;eob = i
+
+rq_zigzag_1:
+    movsxd      rcx, DWORD PTR[rbx + rax*4 + 4]
+    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
+    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++
+
+    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+    lea         rax, [rax + 1]
+
+    sub         edx, edi                    ;x - zbin
+    jl          rq_zigzag_1a
+
+    mov         rdi, arg(2)                 ;qcoeff_ptr
+
+    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+    cmp         edx, 0
+    je          rq_zigzag_1a
+
+    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+    mov         rsi, arg(9)                 ;zbin_boost_ptr
+    mov         [rsp + eob], rax            ;eob = i
+
+rq_zigzag_1a:
+    movsxd      rcx, DWORD PTR[rbx + rax*4 + 4]
+    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
+    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++
+
+    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+    lea         rax, [rax + 1]
+
+    sub         edx, edi                    ;x - zbin
+    jl          rq_zigzag_1b
+
+    mov         rdi, arg(2)                 ;qcoeff_ptr
+
+    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+    cmp         edx, 0
+    je          rq_zigzag_1b
+
+    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+    mov         rsi, arg(9)                 ;zbin_boost_ptr
+    mov         [rsp + eob], rax            ;eob = i
+
+rq_zigzag_1b:
+    movsxd      rcx, DWORD PTR[rbx + rax*4 + 4]
+    movsx       edi, WORD PTR [rsi]         ;*zbin_boost_ptr aka zbin
+    lea         rsi, [rsi + 2]              ;zbin_boost_ptr++
+
+    movsx       edx, WORD PTR[rsp + abs_minus_zbin_lo + rcx *2]
+    lea         rax, [rax + 1]
+
+    sub         edx, edi                    ;x - zbin
+    jl          rq_zigzag_1c
+
+    mov         rdi, arg(2)                 ;qcoeff_ptr
+
+    movsx       edx, WORD PTR[rsp + temp_qcoeff_lo + rcx *2]
+
+    cmp         edx, 0
+    je          rq_zigzag_1c
+
+    mov         WORD PTR[rdi + rcx * 2], dx ;qcoeff_ptr[rc] = temp_qcoeff[rc]
+
+    mov         rsi, arg(9)                 ;zbin_boost_ptr
+    mov         [rsp + eob], rax            ;eob = i
+
+rq_zigzag_1c:
+    lea         rax, [rax + 1]
+
+    cmp         rax, 16
+    jl          rq_zigzag_loop
+
+    mov         rdi, arg(2)                 ;qcoeff_ptr
+    mov         rcx, arg(3)                 ;dequant_ptr
+    mov         rsi, arg(7)                 ;dqcoeff_ptr
+
+    movdqa      xmm2, DQWORD PTR[rdi]
+    movdqa      xmm3, DQWORD PTR[rdi + 16]
+
+    movdqa      xmm0, DQWORD PTR[rcx]
+    movdqa      xmm1, DQWORD PTR[rcx + 16]
+
+    pmullw      xmm0, xmm2
+    pmullw      xmm1, xmm3
+
+    movdqa      DQWORD PTR[rsi], xmm0       ;store dqcoeff
+    movdqa      DQWORD PTR[rsi + 16], xmm1  ;store dqcoeff
+
+    mov         rax, [rsp + eob]
+
+    movdqa      xmm6, DQWORD PTR[rsp + save_xmm6]
+    movdqa      xmm7, DQWORD PTR[rsp + save_xmm7]
+
+    add         rax, 1
+
+    add         rsp, vp8_regularquantizeb_stack_size
+    pop         rsp
+
+    ; begin epilog
+    pop         rbx
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h
new file mode 100644
index 000000000..37d69a890
--- /dev/null
+++ b/vp8/encoder/x86/quantize_x86.h
@@ -0,0 +1,38 @@
+/*
+ *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license and patent
+ *  grant that can be found in the LICENSE file in the root of the source
+ *  tree. All contributing project authors may be found in the AUTHORS
+ *  file in the root of the source tree.
+ */
+
+#ifndef QUANTIZE_X86_H
+#define QUANTIZE_X86_H
+
+
+/* Note:
+ *
+ * This platform is commonly built for runtime CPU detection. If you modify
+ * any of the function mappings present in this file, be sure to also update
+ * them in the function pointer initialization code
+ */
+#if HAVE_MMX
+
+#endif
+
+
+#if HAVE_SSE2
+extern prototype_quantize_block(vp8_regular_quantize_b_sse2);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+#undef vp8_quantize_quantb
+#define vp8_quantize_quantb vp8_regular_quantize_b_sse2
+
+#endif
+
+#endif
+
+
+#endif
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index ad10a9e42..f6123a823 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -121,6 +121,40 @@ void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d)
              );
 }
 
+int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
+                               short *qcoeff_ptr,short *dequant_ptr,
+                               const int *default_zig_zag, short *round_ptr,
+                               short *quant_ptr, short *dqcoeff_ptr,
+                               unsigned short zbin_oq_value,
+                               short *zbin_boost_ptr);
+
+void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
+{
+    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
+    short *coeff_ptr  = &b->coeff[0];
+    short *zbin_ptr   = &b->zbin[0][0];
+    short *round_ptr  = &b->round[0][0];
+    short *quant_ptr  = &b->quant[0][0];
+    short *qcoeff_ptr = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = &d->dequant[0][0];
+    short zbin_oq_value = b->zbin_extra;
+
+    d->eob = vp8_regular_quantize_b_impl_sse2(
+        coeff_ptr,
+        zbin_ptr,
+        qcoeff_ptr,
+        dequant_ptr,
+        vp8_default_zig_zag1d,
+
+        round_ptr,
+        quant_ptr,
+        dqcoeff_ptr,
+        zbin_oq_value,
+        zbin_boost_ptr
+        );
+}
+
 int vp8_mbblock_error_xmm_impl(short *coeff_ptr, short *dcoef_ptr, int dc);
 int vp8_mbblock_error_xmm(MACROBLOCK *mb, int dc)
 {
@@ -251,6 +285,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         /* cpi->rtcd.encodemb.sub* not implemented for wmt */
 
         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
+        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;
     }
 
 #endif
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 9496ef0be..971a17520 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -98,6 +98,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_impl_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
 VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index 7840e3594..a1622e6a4 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -199,16 +199,13 @@
         push r9
     %endif
     %if %1 > 6
-        mov rax,[rbp+16]
-        push rax
-    %endif
-    %if %1 > 7
-        mov rax,[rbp+24]
-        push rax
-    %endif
-    %if %1 > 8
-        mov rax,[rbp+32]
+      %assign i %1-6
+      %assign off 16
+      %rep i
+        mov rax,[rbp+off]
         push rax
+        %assign off off+8
+      %endrep
     %endif
   %endm
 %endif

From 5a72620de96d2e2eafb353123ffad0d7122efcbc Mon Sep 17 00:00:00 2001
From: Guillermo Ballester Valor <gbvalor@gmail.com>
Date: Fri, 11 Jun 2010 14:33:49 -0400
Subject: [PATCH 038/307] Fix compiler warnings

Change-Id: I2a97f08cc3c7808ce5be39e910cc5147ecf03a1d
---
 vp8/encoder/firstpass.c | 4 ++++
 vp8/encoder/onyx_if.c   | 6 +++---
 vp8/encoder/pickinter.c | 1 +
 vp8/encoder/rdopt.c     | 5 +++++
 vp8/vp8_cx_iface.c      | 2 +-
 5 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index f0ddbf2d5..ced2d7c66 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1232,6 +1232,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
     start_pos = cpi->stats_in;
 
+    vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
+
     // Preload the stats for the next frame.
     mod_frame_err = calculate_modified_err(cpi, this_frame);
 
@@ -2038,6 +2040,8 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     double kf_group_coded_err = 0.0;
     double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
 
+    vpx_memset(&next_frame, 0, sizeof(next_frame)); // assure clean
+
     vp8_clear_system_state();  //__asm emms;
     start_position = cpi->stats_in;
 
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index a2a27d3ae..98827f961 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -811,7 +811,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
         sf->full_freq[1] = 31;
         sf->search_method = NSTEP;
 
-        if (!cpi->ref_frame_flags & VP8_LAST_FLAG)
+        if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
         {
             sf->thresh_mult[THR_NEWMV    ] = INT_MAX;
             sf->thresh_mult[THR_NEARESTMV] = INT_MAX;
@@ -820,7 +820,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
             sf->thresh_mult[THR_SPLITMV  ] = INT_MAX;
         }
 
-        if (!cpi->ref_frame_flags & VP8_GOLD_FLAG)
+        if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
         {
             sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
             sf->thresh_mult[THR_ZEROG    ] = INT_MAX;
@@ -829,7 +829,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
             sf->thresh_mult[THR_SPLITG   ] = INT_MAX;
         }
 
-        if (!cpi->ref_frame_flags & VP8_ALT_FLAG)
+        if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
         {
             sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
             sf->thresh_mult[THR_ZEROA    ] = INT_MAX;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 3746bebc9..a50f99bf3 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -454,6 +454,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
     vpx_memset(mode_mv, 0, sizeof(mode_mv));
     vpx_memset(nearest_mv, 0, sizeof(nearest_mv));
     vpx_memset(near_mv, 0, sizeof(near_mv));
+    vpx_memset(&best_mbmode, 0, sizeof(best_mbmode));
 
 
     // set up all the refframe dependent pointers.
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 4aedbd925..9a772a706 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1125,6 +1125,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
     MV bmvs[16];
     int beobs[16];
 
+    vpx_memset(beobs, 0, sizeof(beobs));
+
+
     for (segmentation = 0; segmentation < VP8_NUMMBSPLITS; segmentation++)
     {
         int label_count;
@@ -1459,6 +1462,8 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
     *returnintra = INT_MAX;
 
+    vpx_memset(&best_mbmode, 0, sizeof(best_mbmode)); // clean
+
     cpi->mbs_tested_so_far++;          // Count of the number of MBs tested so far this frame
 
     x->skip = 0;
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 9c05a642a..69d95d86f 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -109,7 +109,7 @@ update_error_state(vpx_codec_alg_priv_t                 *ctx,
     } while(0)
 
 #define RANGE_CHECK(p,memb,lo,hi) do {\
-        if(!((p)->memb >= (lo) && (p)->memb <= hi)) \
+        if(!(((p)->memb == lo || (p)->memb > (lo)) && (p)->memb <= hi)) \
             ERROR(#memb " out of range ["#lo".."#hi"]");\
     } while(0)
 

From 89c8b3dbc6400ccfcbe5f0dcfe5ccc4fd320f500 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 14 Jun 2010 10:22:55 -0400
Subject: [PATCH 039/307] vp8_cx_iface: set default cpu used to 0

Change-Id: I7b35f4717cdd204224112f72471b551617262417
---
 vp8/vp8_cx_iface.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 69d95d86f..d63c03938 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -53,15 +53,15 @@ static const struct extraconfig_map extracfg_map[] =
             NULL,
 #if !(CONFIG_REALTIME_ONLY)
             VP8_BEST_QUALITY_ENCODING,  /* Encoding Mode */
-            -4,                         /* cpu_used      */
+            0,                          /* cpu_used      */
 #else
             VP8_REAL_TIME_ENCODING,     /* Encoding Mode */
-            -8,                         /* cpu_used      */
+            4,                          /* cpu_used      */
 #endif
             0,                          /* enable_auto_alt_ref */
             0,                          /* noise_sensitivity */
             0,                          /* Sharpness */
-            800,                        /* static_thresh */
+            0,                          /* static_thresh */
             VP8_ONE_TOKENPARTITION,     /* token_partitions */
             0, /* arnr_max_frames */
             0, /* arnr_strength */

From ec1871554b4793ad274ed8ae764ff5044d75e0d4 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 4 Jun 2010 18:05:12 -0400
Subject: [PATCH 040/307] VisualStudio projects: asm tool updates

vs8
 - pull yasm.rules [1] into the source tree to avoid need to install
   file into VC/VCProjectDefaults
 - reference same w/ToolFile & RelativePath
 - update arm branch to match

vs7:
 - quote source file paths passed to yasm

[1]:
http://www.tortall.net/svn/yasm/trunk/yasm/Mkfiles/vc9/yasm.rules@2271

Change-Id: I52b801496340cd7b1d0023d12afbc04624ecefc3
---
 build/.gitattributes               |   2 +
 build/arm-wince-vs8/.gitattributes |   1 -
 build/make/gen_msvs_proj.sh        |  13 ++--
 build/x86-msvs/yasm.rules          | 115 +++++++++++++++++++++++++++++
 libs.mk                            |   1 -
 5 files changed, 124 insertions(+), 8 deletions(-)
 create mode 100644 build/.gitattributes
 delete mode 100644 build/arm-wince-vs8/.gitattributes
 create mode 100644 build/x86-msvs/yasm.rules

diff --git a/build/.gitattributes b/build/.gitattributes
new file mode 100644
index 000000000..03db79bc0
--- /dev/null
+++ b/build/.gitattributes
@@ -0,0 +1,2 @@
+*-vs8/*.rules -crlf
+*-msvs/*.rules -crlf
diff --git a/build/arm-wince-vs8/.gitattributes b/build/arm-wince-vs8/.gitattributes
deleted file mode 100644
index be1eeb95a..000000000
--- a/build/arm-wince-vs8/.gitattributes
+++ /dev/null
@@ -1 +0,0 @@
-*.rules -crlf
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index 6288ee5c1..d94ae4d80 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -12,6 +12,7 @@
 
 self=$0
 self_basename=${self##*/}
+self_dirname=$(dirname "$0")
 EOL=$'\n'
 
 show_help() {
@@ -292,8 +293,8 @@ case "$target" in
     x86*)
         platforms[0]="Win32"
         # these are only used by vs7
-        asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} \$(InputPath)"
-        asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} \$(InputPath)"
+        asm_Debug_cmdline="yasm -Xvc -g cv8 -f \$(PlatformName) ${yasmincs} &quot;\$(InputPath)&quot;"
+        asm_Release_cmdline="yasm -Xvc -f \$(PlatformName) ${yasmincs} &quot;\$(InputPath)&quot;"
     ;;
     arm*|iwmmx*)
         case "${name}" in
@@ -343,19 +344,19 @@ generate_vcproj() {
 
     open_tag  ToolFiles
     case "$target" in
-        x86*) $uses_asm && tag DefaultToolFile FileName="yasm.rules"
+        x86*) $uses_asm && tag ToolFile RelativePath="$self_dirname/../x86-msvs/yasm.rules"
         ;;
         arm*|iwmmx*)
             if [ "$name" == "vpx_decoder" ];then
             case "$target" in
                 armv5*)
-                    tag DefaultToolFile FileName="armasmv5.rules"
+                    tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmv5.rules"
                 ;;
                 armv6*)
-                    tag DefaultToolFile FileName="armasmv6.rules"
+                    tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmv6.rules"
                 ;;
                 iwmmxt*)
-                    tag DefaultToolFile FileName="armasmxscale.rules"
+                    tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmxscale.rules"
                 ;;
             esac
             fi
diff --git a/build/x86-msvs/yasm.rules b/build/x86-msvs/yasm.rules
new file mode 100644
index 000000000..ee1fefbca
--- /dev/null
+++ b/build/x86-msvs/yasm.rules
@@ -0,0 +1,115 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<VisualStudioToolFile
+	Name="Yasm"
+	Version="8.00"
+	>
+  <Rules>
+    <CustomBuildRule
+			Name="YASM"
+			DisplayName="Yasm Assembler"
+			CommandLine="yasm -Xvc -f $(PlatformName) [AllOptions] [AdditionalOptions] [Inputs]"
+			Outputs="[$ObjectFileName]"
+			FileExtensions="*.asm"
+			ExecutionDescription="Assembling $(InputFileName)"
+			ShowOnlyRuleProperties="false"
+			>
+      <Properties>
+        <StringProperty
+					Name="Defines"
+					DisplayName="Definitions"
+					Category="Pre-Defined Symbols"
+					Description="Specify pre-defined symbols (&apos;symbol&apos; or &apos;symbol = value&apos;) "
+					Switch="-D [value]"
+					Delimited="true"
+					Inheritable="true"
+				/>
+        <StringProperty
+					Name="IncludePaths"
+					DisplayName="Include Paths"
+					Category="Configuration"
+					Description="Set the paths for any additional include files"
+					Switch="-I [value]"
+					Delimited="true"
+					Inheritable="true"
+				/>
+        <StringProperty
+					Name="UnDefines"
+					DisplayName="Remove Definitions"
+					Category="Pre-Defined Symbols"
+					Description="Remove pre-defined symbols "
+					Switch="-U [value]"
+					Delimited="true"
+					Inheritable="true"
+				/>
+        <StringProperty
+					Name="ObjectFileName"
+					DisplayName="Object File Name"
+					Category="Output"
+					Description="Select the output file name"
+					Switch="-o [value]"
+					DefaultValue="$(IntDir)\$(InputName).obj"
+				/>
+        <StringProperty
+					Name="ListFileName"
+					DisplayName="List File Name"
+					Category="Output"
+					Description="Select an output listing by setting its file name"
+					Switch="-l [value]"
+				/>
+        <StringProperty
+					Name="PreIncludeFile"
+					DisplayName="Pre Include File"
+					Category="Configuration"
+					Description="Select a pre-included file by setting its name"
+					Switch="-P [value]"
+				/>
+        <BooleanProperty
+					Name="Debug"
+					DisplayName="Debug Information"
+					Category="Output"
+					Description="Generate debugging information"
+					Switch="-g cv8"
+				/>
+        <EnumProperty
+					Name="PreProc"
+					DisplayName="Pre-Processor"
+					Category="Configuration"
+					Description="Select the pre-processor (&apos;nasm&apos; or &apos;raw&apos;)"
+					>
+          <Values>
+            <EnumValue
+							Value="0"
+							Switch="-rnasm"
+							DisplayName="Nasm "
+						/>
+            <EnumValue
+							Value="1"
+							Switch="-rraw"
+							DisplayName="Raw"
+						/>
+          </Values>
+        </EnumProperty>
+        <EnumProperty
+					Name="Parser"
+					DisplayName="Parser"
+					Category="Configuration"
+					Description="Select the parser for Intel (&apos;nasm&apos;) or AT&amp;T ( &apos;gas&apos;) syntax"
+					>
+          <Values>
+            <EnumValue
+							Value="0"
+							Switch="-pnasm"
+							DisplayName="Nasm"
+						/>
+            <EnumValue
+							Value="1"
+							Switch="-pgas"
+							DisplayName="Gas"
+						/>
+          </Values>
+        </EnumProperty>
+      </Properties>
+    </CustomBuildRule>
+  </Rules>
+</VisualStudioToolFile>
+
diff --git a/libs.mk b/libs.mk
index fd4543b07..115ceb537 100644
--- a/libs.mk
+++ b/libs.mk
@@ -132,7 +132,6 @@ ARM_ARCH=v6
 endif
 obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c
 	@cp $(SRC_PATH_BARE)/build/arm-wince-vs8/obj_int_extract.bat .
-	@cp $(SRC_PATH_BARE)/build/arm-wince-vs8/armasm$(ARM_ARCH).rules .
 	@echo "    [CREATE] $@"
 	$(SRC_PATH_BARE)/build/make/gen_msvs_proj.sh\
 			--exe\

From 397aad3ec28020a9b766e362e061701783c9633e Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Tue, 15 Jun 2010 09:11:26 -0400
Subject: [PATCH 041/307] More on "some XMM registers are non-volatile on
 windows x64 ABI"

Add same fix in subpixel_sse2.asm.

Change-Id: Icfda6103cbf74ec43308e96961dd738aa823c14d
---
 vp8/common/x86/subpixel_sse2.asm | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index b71a2f9d1..ee383ad47 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -428,6 +428,7 @@ sym(vp8_filter_block1d16_v6_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 8
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -537,6 +538,7 @@ sym(vp8_filter_block1d8_h6_only_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -628,6 +630,7 @@ filter_block1d8_h6_only_rowloop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -792,6 +795,7 @@ filter_block1d16_h6_only_sse2_rowloop:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -812,6 +816,7 @@ sym(vp8_filter_block1d8_v6_only_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -903,7 +908,7 @@ sym(vp8_unpack_block1d16_h6_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
-    SAVE_XMM
+    ;SAVE_XMM                          ;xmm6, xmm7 are not used here.
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -943,7 +948,7 @@ unpack_block1d16_h6_sse2_rowloop:
     pop rdi
     pop rsi
     RESTORE_GOT
-    RESTORE_XMM
+    ;RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1212,6 +1217,7 @@ done:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -1232,6 +1238,7 @@ sym(vp8_bilinear_predict8x8_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -1355,6 +1362,7 @@ next_row8x8:
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret

From c17b62e1bd8fe9335ba247061c072b10392e88a7 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Wed, 5 May 2010 17:58:19 -0400
Subject: [PATCH 042/307] Change bitreader to use a larger window.

Change bitreading functions to use a larger window which is refilled less
 often.

This makes it cheap enough to do bounds checking each time the window is
 refilled, which avoids the need to copy the input into a large circular
 buffer.
This uses less memory and speeds up the total decode time by 1.6% on an ARM11,
 2.8% on a Cortex A8, and 2.2% on x86-32, but less than 1% on x86-64.

Inlining vp8dx_bool_decoder_fill() has a big penalty on x86-32, as does moving
 the refill loop to the front of vp8dx_decode_bool().
However, having the refill loop between computation of the split values and
 the branch in vp8_decode_mb_tokens() is a big win on ARM (presumably due to
 memory latency and code size: refilling after normalization duplicates the
 code in the DECODE_AND_BRANCH_IF_ZERO and DECODE_AND_LOOP_IF_ZERO cases.
Unfortunately, refilling at the end of vp8dx_bool_decoder_fill() and at the
 beginning of each decode step in vp8_decode_mb_tokens() means the latter
 requires an extra refill at the end.
Platform-specific versions could avoid the problem, but would require most of
 detokenize.c to be duplicated.

Change-Id: I16c782a63376f2a15b78f8086d899b987204c1c7
---
 vp8/common/arm/vpx_asm_offsets.c       |  10 +-
 vp8/decoder/arm/dboolhuff_arm.h        |   6 --
 vp8/decoder/arm/dsystemdependent.c     |   2 -
 vp8/decoder/dboolhuff.c                | 103 ++++++---------------
 vp8/decoder/dboolhuff.h                | 123 +++++++++++--------------
 vp8/decoder/decodemv.c                 |   2 -
 vp8/decoder/decodframe.c               |  15 ---
 vp8/decoder/demode.c                   |   1 -
 vp8/decoder/detokenize.c               |  56 +++++------
 vp8/decoder/generic/dsystemdependent.c |   1 -
 vp8/decoder/threading.c                |   1 -
 11 files changed, 114 insertions(+), 206 deletions(-)

diff --git a/vp8/common/arm/vpx_asm_offsets.c b/vp8/common/arm/vpx_asm_offsets.c
index ff4d7527c..b1e64571f 100644
--- a/vp8/common/arm/vpx_asm_offsets.c
+++ b/vp8/common/arm/vpx_asm_offsets.c
@@ -70,15 +70,11 @@ DEFINE(detok_current_bc,                         offsetof(DETOK, current_bc));
 DEFINE(detok_coef_probs,                         offsetof(DETOK, coef_probs));
 DEFINE(detok_eob,                               offsetof(DETOK, eob));
 
-DEFINE(bool_decoder_lowvalue,                   offsetof(BOOL_DECODER, lowvalue));
-DEFINE(bool_decoder_range,                      offsetof(BOOL_DECODER, range));
+DEFINE(bool_decoder_user_buffer_end,            offsetof(BOOL_DECODER, user_buffer_end));
+DEFINE(bool_decoder_user_buffer,                offsetof(BOOL_DECODER, user_buffer));
 DEFINE(bool_decoder_value,                      offsetof(BOOL_DECODER, value));
 DEFINE(bool_decoder_count,                      offsetof(BOOL_DECODER, count));
-DEFINE(bool_decoder_user_buffer,                offsetof(BOOL_DECODER, user_buffer));
-DEFINE(bool_decoder_user_buffer_sz,             offsetof(BOOL_DECODER, user_buffer_sz));
-DEFINE(bool_decoder_decode_buffer,              offsetof(BOOL_DECODER, decode_buffer));
-DEFINE(bool_decoder_read_ptr,                   offsetof(BOOL_DECODER, read_ptr));
-DEFINE(bool_decoder_write_ptr,                  offsetof(BOOL_DECODER, write_ptr));
+DEFINE(bool_decoder_range,                      offsetof(BOOL_DECODER, range));
 
 DEFINE(tokenextrabits_min_val,                   offsetof(TOKENEXTRABITS, min_val));
 DEFINE(tokenextrabits_length,                   offsetof(TOKENEXTRABITS, Length));
diff --git a/vp8/decoder/arm/dboolhuff_arm.h b/vp8/decoder/arm/dboolhuff_arm.h
index 495004f9c..d2ebc71e8 100644
--- a/vp8/decoder/arm/dboolhuff_arm.h
+++ b/vp8/decoder/arm/dboolhuff_arm.h
@@ -16,9 +16,6 @@
 #undef vp8_dbool_start
 #define vp8_dbool_start vp8dx_start_decode_v6
 
-#undef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_v6
-
 #undef vp8_dbool_fill
 #define vp8_dbool_fill vp8_bool_decoder_fill_v6
 
@@ -33,9 +30,6 @@
 #undef vp8_dbool_start
 #define vp8_dbool_start vp8dx_start_decode_neon
 
-#undef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_neon
-
 #undef vp8_dbool_fill
 #define vp8_dbool_fill vp8_bool_decoder_fill_neon
 
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
index f146d604e..6defa3d5d 100644
--- a/vp8/decoder/arm/dsystemdependent.c
+++ b/vp8/decoder/arm/dsystemdependent.c
@@ -26,7 +26,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
     pbi->dequant.idct    = vp8_dequant_idct_neon;
     pbi->dequant.idct_dc = vp8_dequant_dc_idct_neon;
     pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.stop  = vp8dx_stop_decode_c;
     pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
     pbi->dboolhuff.debool = vp8dx_decode_bool_c;
     pbi->dboolhuff.devalue = vp8dx_decode_value_c;
@@ -36,7 +35,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
     pbi->dequant.idct    = vp8_dequant_idct_v6;
     pbi->dequant.idct_dc = vp8_dequant_dc_idct_v6;
     pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.stop  = vp8dx_stop_decode_c;
     pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
     pbi->dboolhuff.debool = vp8dx_decode_bool_c;
     pbi->dboolhuff.devalue = vp8dx_decode_value_c;
diff --git a/vp8/decoder/dboolhuff.c b/vp8/decoder/dboolhuff.c
index 7027c0f54..389aed013 100644
--- a/vp8/decoder/dboolhuff.c
+++ b/vp8/decoder/dboolhuff.c
@@ -26,86 +26,41 @@ DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
 };
 
 
-static void copy_in(BOOL_DECODER *br, unsigned int to_write)
-{
-    if (to_write > br->user_buffer_sz)
-        to_write = br->user_buffer_sz;
-
-    memcpy(br->write_ptr, br->user_buffer, to_write);
-    br->user_buffer += to_write;
-    br->user_buffer_sz -= to_write;
-    br->write_ptr = br_ptr_advance(br->write_ptr, to_write);
-}
-
 int vp8dx_start_decode_c(BOOL_DECODER *br, const unsigned char *source,
                         unsigned int source_sz)
 {
-    br->lowvalue = 0;
+    br->user_buffer_end = source+source_sz;
+    br->user_buffer     = source;
+    br->value    = 0;
+    br->count    = -8;
     br->range    = 255;
-    br->count    = 0;
-    br->user_buffer    = source;
-    br->user_buffer_sz = source_sz;
 
     if (source_sz && !source)
         return 1;
 
-    /* Allocate the ring buffer backing store with alignment equal to the
-     * buffer size*2 so that a single pointer can be used for wrapping rather
-     * than a pointer+offset.
-     */
-    br->decode_buffer  = vpx_memalign(VP8_BOOL_DECODER_SZ * 2,
-                                      VP8_BOOL_DECODER_SZ);
-
-    if (!br->decode_buffer)
-        return 1;
-
     /* Populate the buffer */
-    br->read_ptr = br->decode_buffer;
-    br->write_ptr = br->decode_buffer;
-    copy_in(br, VP8_BOOL_DECODER_SZ);
+    vp8dx_bool_decoder_fill_c(br);
 
-    /* Read the first byte */
-    br->value = (*br->read_ptr++) << 8;
     return 0;
 }
 
 
 void vp8dx_bool_decoder_fill_c(BOOL_DECODER *br)
 {
-    int          left, right;
+    const unsigned char *bufptr;
+    const unsigned char *bufend;
+    VP8_BD_VALUE         value;
+    int                  count;
+    bufend = br->user_buffer_end;
+    bufptr = br->user_buffer;
+    value = br->value;
+    count = br->count;
 
-    /* Find available room in the buffer */
-    left = 0;
-    right = br->read_ptr - br->write_ptr;
+    VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
 
-    if (right < 0)
-    {
-        /* Read pointer is behind the write pointer. We can write from the
-         * write pointer to the end of the buffer.
-         */
-        right = VP8_BOOL_DECODER_SZ - (br->write_ptr - br->decode_buffer);
-        left = br->read_ptr - br->decode_buffer;
-    }
-
-    if (right + left < 128)
-        return;
-
-    if (right)
-        copy_in(br, right);
-
-    if (left)
-    {
-        br->write_ptr = br->decode_buffer;
-        copy_in(br, left);
-    }
-
-}
-
-
-void vp8dx_stop_decode_c(BOOL_DECODER *bc)
-{
-    vpx_free(bc->decode_buffer);
-    bc->decode_buffer = 0;
+    br->user_buffer = bufptr;
+    br->value = value;
+    br->count = count;
 }
 
 #if 0
@@ -120,13 +75,18 @@ void vp8dx_stop_decode_c(BOOL_DECODER *bc)
 int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
 {
     unsigned int bit=0;
+    VP8_BD_VALUE value;
     unsigned int split;
-    unsigned int bigsplit;
-    register unsigned int range = br->range;
-    register unsigned int value = br->value;
+    VP8_BD_VALUE bigsplit;
+    int count;
+    unsigned int range;
+
+    value = br->value;
+    count = br->count;
+    range = br->range;
 
     split = 1 + (((range-1) * probability) >> 8);
-    bigsplit = (split<<8);
+    bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
 
     range = split;
     if(value >= bigsplit)
@@ -144,21 +104,16 @@ int vp8dx_decode_bool_c(BOOL_DECODER *br, int probability)
     }*/
 
     {
-        int count = br->count;
         register unsigned int shift = vp8dx_bitreader_norm[range];
         range <<= shift;
         value <<= shift;
         count -= shift;
-        if(count <= 0)
-        {
-            value |= (*br->read_ptr) << (-count);
-            br->read_ptr = br_ptr_advance(br->read_ptr, 1);
-            count += 8 ;
-        }
-        br->count = count;
     }
     br->value = value;
+    br->count = count;
     br->range = range;
+    if (count < 0)
+        vp8dx_bool_decoder_fill_c(br);
     return bit;
 }
 
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index c6d69e776..7ec235786 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -11,51 +11,31 @@
 
 #ifndef DBOOLHUFF_H
 #define DBOOLHUFF_H
+#include <stddef.h>
+#include <limits.h>
 #include "vpx_ports/config.h"
 #include "vpx_ports/mem.h"
 #include "vpx/vpx_integer.h"
 
-/* Size of the bool decoder backing storage
- *
- * This size was chosen to be greater than the worst case encoding of a
- * single macroblock. This was calcluated as follows (python):
- *
- *     def max_cost(prob):
- *         return max(prob_costs[prob], prob_costs[255-prob]) / 256;
- *
- *     tree_nodes_cost = 7 * max_cost(255)
- *     extra_bits_cost = sum([max_cost(bit) for bit in extra_bits])
- *     sign_bit_cost = max_cost(128)
- *     total_cost = tree_nodes_cost + extra_bits_cost + sign_bit_cost
- *
- * where the prob_costs table was taken from the C vp8_prob_cost table in
- * boolhuff.c and the extra_bits table was taken from the 11 extrabits for
- * a category 6 token as defined in vp8d_token_extra_bits2/detokenize.c
- *
- * This equation produced a maximum of 79 bits per coefficient. Scaling up
- * to the macroblock level:
- *
- *     79 bits/coeff * 16 coeff/block * 25 blocks/macroblock = 31600 b/mb
- *
- *     4096 bytes = 32768 bits > 31600
- */
-#define VP8_BOOL_DECODER_SZ       4096
-#define VP8_BOOL_DECODER_MASK     (VP8_BOOL_DECODER_SZ-1)
-#define VP8_BOOL_DECODER_PTR_MASK (~(uintptr_t)(VP8_BOOL_DECODER_SZ))
+typedef size_t VP8_BD_VALUE;
+
+# define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE)*CHAR_BIT)
+/*This is meant to be a large, positive constant that can still be efficiently
+   loaded as an immediate (on platforms like ARM, for example).
+  Even relatively modest values like 100 would work fine.*/
+# define VP8_LOTS_OF_BITS (0x40000000)
+
+
 
 struct vp8_dboolhuff_rtcd_vtable;
 
 typedef struct
 {
-    unsigned int         lowvalue;
-    unsigned int         range;
-    unsigned int         value;
-    int                  count;
+    const unsigned char *user_buffer_end;
     const unsigned char *user_buffer;
-    unsigned int         user_buffer_sz;
-    unsigned char       *decode_buffer;
-    const unsigned char *read_ptr;
-    unsigned char       *write_ptr;
+    VP8_BD_VALUE         value;
+    int                  count;
+    unsigned int         range;
 #if CONFIG_RUNTIME_CPU_DETECT
     struct vp8_dboolhuff_rtcd_vtable *rtcd;
 #endif
@@ -63,7 +43,6 @@ typedef struct
 
 #define prototype_dbool_start(sym) int sym(BOOL_DECODER *br, \
     const unsigned char *source, unsigned int source_sz)
-#define prototype_dbool_stop(sym) void sym(BOOL_DECODER *bc)
 #define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
 #define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
 #define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits);
@@ -76,10 +55,6 @@ typedef struct
 #define vp8_dbool_start vp8dx_start_decode_c
 #endif
 
-#ifndef vp8_dbool_stop
-#define vp8_dbool_stop vp8dx_stop_decode_c
-#endif
-
 #ifndef vp8_dbool_fill
 #define vp8_dbool_fill vp8dx_bool_decoder_fill_c
 #endif
@@ -93,20 +68,17 @@ typedef struct
 #endif
 
 extern prototype_dbool_start(vp8_dbool_start);
-extern prototype_dbool_stop(vp8_dbool_stop);
 extern prototype_dbool_fill(vp8_dbool_fill);
 extern prototype_dbool_debool(vp8_dbool_debool);
 extern prototype_dbool_devalue(vp8_dbool_devalue);
 
 typedef prototype_dbool_start((*vp8_dbool_start_fn_t));
-typedef prototype_dbool_stop((*vp8_dbool_stop_fn_t));
 typedef prototype_dbool_fill((*vp8_dbool_fill_fn_t));
 typedef prototype_dbool_debool((*vp8_dbool_debool_fn_t));
 typedef prototype_dbool_devalue((*vp8_dbool_devalue_fn_t));
 
 typedef struct vp8_dboolhuff_rtcd_vtable {
     vp8_dbool_start_fn_t   start;
-    vp8_dbool_stop_fn_t    stop;
     vp8_dbool_fill_fn_t    fill;
     vp8_dbool_debool_fn_t  debool;
     vp8_dbool_devalue_fn_t devalue;
@@ -123,17 +95,6 @@ typedef struct vp8_dboolhuff_rtcd_vtable {
 #define IF_RTCD(x) NULL
 //#endif
 
-static unsigned char *br_ptr_advance(const unsigned char *_ptr,
-                                     unsigned int n)
-{
-    uintptr_t  ptr = (uintptr_t)_ptr;
-
-    ptr += n;
-    ptr &= VP8_BOOL_DECODER_PTR_MASK;
-
-    return (void *)ptr;
-}
-
 DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
 
 /* wrapper functions to hide RTCD. static means inline means hopefully no
@@ -147,12 +108,34 @@ static int vp8dx_start_decode(BOOL_DECODER *br,
 #endif
     return DBOOLHUFF_INVOKE(rtcd, start)(br, source, source_sz);
 }
-static void vp8dx_stop_decode(BOOL_DECODER *br) {
-    DBOOLHUFF_INVOKE(br->rtcd, stop)(br);
-}
 static void vp8dx_bool_decoder_fill(BOOL_DECODER *br) {
     DBOOLHUFF_INVOKE(br->rtcd, fill)(br);
 }
+
+/*The refill loop is used in several places, so define it in a macro to make
+   sure they're all consistent.
+  An inline function would be cleaner, but has a significant penalty, because
+   multiple BOOL_DECODER fields must be modified, and the compiler is not smart
+   enough to eliminate the stores to those fields and the subsequent reloads
+   from them when inlining the function.*/
+#define VP8DX_BOOL_DECODER_FILL(_count,_value,_bufptr,_bufend) \
+    do \
+    { \
+        int shift; \
+        for(shift = VP8_BD_VALUE_SIZE - 8 - ((_count) + 8); shift >= 0; ) \
+        { \
+            if((_bufptr) >= (_bufend)) { \
+                (_count) = VP8_LOTS_OF_BITS; \
+                break; \
+            } \
+            (_count) += 8; \
+            (_value) |= (VP8_BD_VALUE)*(_bufptr)++ << shift; \
+            shift -= 8; \
+        } \
+    } \
+    while(0)
+
+
 static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
   /*
    * Until optimized versions of this function are available, we
@@ -161,13 +144,18 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
    *return DBOOLHUFF_INVOKE(br->rtcd, debool)(br, probability);
    */
     unsigned int bit = 0;
+    VP8_BD_VALUE value;
     unsigned int split;
-    unsigned int bigsplit;
-    register unsigned int range = br->range;
-    register unsigned int value = br->value;
+    VP8_BD_VALUE bigsplit;
+    int count;
+    unsigned int range;
+
+    value = br->value;
+    count = br->count;
+    range = br->range;
 
     split = 1 + (((range - 1) * probability) >> 8);
-    bigsplit = (split << 8);
+    bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8);
 
     range = split;
 
@@ -186,23 +174,16 @@ static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) {
     }*/
 
     {
-        int count = br->count;
         register unsigned int shift = vp8dx_bitreader_norm[range];
         range <<= shift;
         value <<= shift;
         count -= shift;
-
-        if (count <= 0)
-        {
-            value |= (*br->read_ptr) << (-count);
-            br->read_ptr = br_ptr_advance(br->read_ptr, 1);
-            count += 8 ;
-        }
-
-        br->count = count;
     }
     br->value = value;
+    br->count = count;
     br->range = range;
+    if(count < 0)
+        vp8dx_bool_decoder_fill(br);
     return bit;
 }
 
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 7e004238a..2b004df05 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -172,8 +172,6 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
             MACROBLOCKD *xd = &pbi->mb;
 
             mbmi->need_to_clamp_mvs = 0;
-            vp8dx_bool_decoder_fill(bc);
-
             // Distance of Mb to the various image edges.
             // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
             xd->mb_to_left_edge = -((mb_col * 16) << 3);
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 0f1b879ca..8d9db10fd 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -455,7 +455,6 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
         else
         pbi->debugoutput =0;
         */
-        vp8dx_bool_decoder_fill(xd->current_bc);
         vp8_decode_macroblock(pbi, xd);
 
 
@@ -563,18 +562,7 @@ static void stop_token_decoder(VP8D_COMP *pbi)
     VP8_COMMON *pc = &pbi->common;
 
     if (pc->multi_token_partition != ONE_PARTITION)
-    {
-        int num_part = (1 << pc->multi_token_partition);
-
-        for (i = 0; i < num_part; i++)
-        {
-            vp8dx_stop_decode(&pbi->mbc[i]);
-        }
-
         vpx_free(pbi->mbc);
-    }
-    else
-        vp8dx_stop_decode(& pbi->bc2);
 }
 
 static void init_frame(VP8D_COMP *pbi)
@@ -883,7 +871,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     }
 
 
-    vp8dx_bool_decoder_fill(bc);
     {
         // read coef probability tree
 
@@ -970,8 +957,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
     stop_token_decoder(pbi);
 
-    vp8dx_stop_decode(bc);
-
     // vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes  \n",bc->pos+pbi->bc2.pos);
 
     // If this was a kf or Gf note the Q used
diff --git a/vp8/decoder/demode.c b/vp8/decoder/demode.c
index 881b49ebc..07be9fb35 100644
--- a/vp8/decoder/demode.c
+++ b/vp8/decoder/demode.c
@@ -80,7 +80,6 @@ void vp8_kfread_modes(VP8D_COMP *pbi)
         {
             MB_PREDICTION_MODE y_mode;
 
-            vp8dx_bool_decoder_fill(bc);
             // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
             // By default on a key frame reset all MBs to segment 0
             m->mbmi.segment_id = 0;
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index d6f5ca26c..b202b7b44 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -15,7 +15,6 @@
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
 
-#define BR_COUNT 8
 #define BOOL_DATA UINT8
 
 #define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
@@ -105,6 +104,10 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
     }
 }
 DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
+#define FILL \
+    if(count < 0) \
+        VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
+
 #define NORMALIZE \
     /*if(range < 0x80)*/                            \
     { \
@@ -112,17 +115,13 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
         range <<= shift; \
         value <<= shift; \
         count -= shift; \
-        if(count <= 0) \
-        { \
-            count += BR_COUNT ; \
-            value |= (*bufptr) << (BR_COUNT-count); \
-            bufptr = br_ptr_advance(bufptr, 1); \
-        } \
     }
 
 #define DECODE_AND_APPLYSIGN(value_to_sign) \
     split = (range + 1) >> 1; \
-    if ( (value >> 8) < split ) \
+    bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+    FILL \
+    if ( value < bigsplit ) \
     { \
         range = split; \
         v= value_to_sign; \
@@ -130,28 +129,25 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
     else \
     { \
         range = range-split; \
-        value = value-(split<<8); \
+        value = value-bigsplit; \
         v = -value_to_sign; \
     } \
     range +=range;                   \
     value +=value;                   \
-    if (!--count) \
-    { \
-        count = BR_COUNT; \
-        value |= *bufptr; \
-        bufptr = br_ptr_advance(bufptr, 1); \
-    }
+    count--;
 
 #define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
     { \
         split = 1 +  ((( probability*(range-1) ) )>> 8); \
-        if ( (value >> 8) < split ) \
+        bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+        FILL \
+        if ( value < bigsplit ) \
         { \
             range = split; \
             NORMALIZE \
             goto branch; \
         } \
-        value -= (split<<8); \
+        value -= bigsplit; \
         range = range - split; \
         NORMALIZE \
     }
@@ -159,7 +155,9 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
 #define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
     { \
         split = 1 + ((( probability*(range-1) ) ) >> 8); \
-        if ( (value >> 8) < split ) \
+        bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+        FILL \
+        if ( value < bigsplit ) \
         { \
             range = split; \
             NORMALIZE \
@@ -170,7 +168,7 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
             goto branch; \
             } goto BLOCK_FINISHED; /*for malformed input */\
         } \
-        value -= (split<<8); \
+        value -= bigsplit; \
         range = range - split; \
         NORMALIZE \
     }
@@ -188,10 +186,12 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
 
 #define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
     split = 1 +  (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
-    if(value >= (split<<8))\
+    bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); \
+    FILL \
+    if(value >= bigsplit)\
     {\
         range = range-split;\
-        value = value-(split<<8);\
+        value = value-bigsplit;\
         val += ((UINT16)1<<bits_count);\
     }\
     else\
@@ -217,11 +217,13 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
     register int count;
 
     const BOOL_DATA *bufptr;
+    const BOOL_DATA *bufend;
     register unsigned int range;
-    register unsigned int value;
+    VP8_BD_VALUE value;
     const int *scan;
     register unsigned int shift;
     UINT32 split;
+    VP8_BD_VALUE bigsplit;
     INT16 *qcoeff_ptr;
 
     const vp8_prob *coef_probs;
@@ -253,10 +255,11 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
         qcoeff_ptr = &x->qcoeff[0];
     }
 
+    bufend  = bc->user_buffer_end;
+    bufptr  = bc->user_buffer;
+    value   = bc->value;
     count   = bc->count;
     range   = bc->range;
-    value   = bc->value;
-    bufptr  = bc->read_ptr;
 
 
     coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
@@ -384,10 +387,11 @@ BLOCK_FINISHED:
         goto BLOCK_LOOP;
     }
 
-    bc->count = count;
+    FILL
+    bc->user_buffer = bufptr;
     bc->value = value;
+    bc->count = count;
     bc->range = range;
-    bc->read_ptr = bufptr;
     return eobtotal;
 
 }
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index ad64a38af..4585d0882 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -24,7 +24,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
     pbi->dequant.idct    = vp8_dequant_idct_c;
     pbi->dequant.idct_dc = vp8_dequant_dc_idct_c;
     pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.stop  = vp8dx_stop_decode_c;
     pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
 #if 0 //For use with RTCD, when implemented
     pbi->dboolhuff.debool = vp8dx_decode_bool_c;
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 87bba2020..38d604210 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -202,7 +202,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                     vp8_build_uvmvs(xd, pc->full_pixel);
 
-                    vp8dx_bool_decoder_fill(xd->current_bc);
                     vp8_decode_macroblock(pbi, xd);
 
 

From bbfeefc7abf7b42b38ba911bcbce9220522a1db0 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 16 Jun 2010 12:27:52 -0400
Subject: [PATCH 043/307] gen_scalers: fix 64-bit integer promotion bug

i needs to be treated as signed to get the proper indexing on 64-bit
platforms. This behavior was accidentally reverted when fixing an
unsigned/signed comparison warning.

Change-Id: Ic306d609bdc8de94c8f8ba29c6e45c736101a82e
---
 vpx_scale/generic/gen_scalers.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c
index 948e3d7ae..ff841f33d 100644
--- a/vpx_scale/generic/gen_scalers.c
+++ b/vpx_scale/generic/gen_scalers.c
@@ -937,12 +937,13 @@ void vp8cx_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pit
 
 void vp8cx_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
 {
-    unsigned int i;
+    int i;
     int temp;
+    int width = dest_width;
 
     (void) dest_pitch;
 
-    for (i = 0; i < dest_width; i++)
+    for (i = 0; i < width; i++)
     {
         temp = 8;
         temp += source[i-(int)src_pitch] * 3;

From e703af974d2cf08aa7283bd0efd3bdd69801ea18 Mon Sep 17 00:00:00 2001
From: Tom Finegan <tomfinegan@google.com>
Date: Wed, 16 Jun 2010 13:24:55 -0400
Subject: [PATCH 044/307] Avoid encoding garbage when ivfenc encounters an
 unsupported Y4M file.

This change stops ivfenc from treating unsupported Y4M files as raw
input.

For example, if given an interlaced Y4M file, ivfenc treated the input
as if it were raw data because the unsupported Y4M file case previously
fell through without being handled.

Change-Id: I06caa50f3448e6388741a77346daaebf77c277e1
---
 ivfenc.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/ivfenc.c b/ivfenc.c
index e9a49cddd..11f2a8fef 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -299,12 +299,11 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
 }
 
 
-unsigned int file_is_y4m(FILE *infile,
+unsigned int file_is_y4m(FILE      *infile,
                          y4m_input *y4m,
                          char       detect[4])
 {
-    if(memcmp(detect, "YUV4", 4) == 0 &&
-        y4m_input_open(y4m, infile, detect, 4) >= 0)
+    if(memcmp(detect, "YUV4", 4) == 0)
     {
         return 1;
     }
@@ -875,18 +874,26 @@ int main(int argc, const char **argv_)
 
         if (file_is_y4m(infile, &y4m, detect.buf))
         {
-            file_type = FILE_TYPE_Y4M;
-            cfg.g_w = y4m.pic_w;
-            cfg.g_h = y4m.pic_h;
-            /* Use the frame rate from the file only if none was specified on the
-             *  command-line.
-             */
-            if (!arg_have_timebase)
+            if (y4m_input_open(&y4m, infile, detect.buf, 4) >= 0)
             {
-                cfg.g_timebase.num = y4m.fps_d;
-                cfg.g_timebase.den = y4m.fps_n;
+                file_type = FILE_TYPE_Y4M;
+                cfg.g_w = y4m.pic_w;
+                cfg.g_h = y4m.pic_h;
+                /* Use the frame rate from the file only if none was specified
+                 * on the command-line.
+                 */
+                if (!arg_have_timebase)
+                {
+                    cfg.g_timebase.num = y4m.fps_d;
+                    cfg.g_timebase.den = y4m.fps_n;
+                }
+                arg_use_i420 = 0;
+            }
+            else
+            {
+                fprintf(stderr, "Unsupported Y4M stream.\n");
+                return EXIT_FAILURE;
             }
-            arg_use_i420 = 0;
         }
         else if (file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, detect.buf))
         {

From f84f94904879fbe6d1f70e3b358ff46d3bce0b80 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 16 Jun 2010 15:43:09 -0400
Subject: [PATCH 045/307] Generate AUTHORS file with a script

This information is in git, so it's better to use that as a source than
updating this file manually. This script can be run manually at release
time for now, or we can set up a cron job sometime in the future.

Change-Id: I0344135ceb9c04ed14e2e2d939a93194e35973db
---
 tools/gen_authors.sh | 13 +++++++++++++
 1 file changed, 13 insertions(+)
 create mode 100755 tools/gen_authors.sh

diff --git a/tools/gen_authors.sh b/tools/gen_authors.sh
new file mode 100755
index 000000000..e1246f08a
--- /dev/null
+++ b/tools/gen_authors.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Add organization names manually.
+
+cat <<EOF
+# This file is automatically generated from the git commit history
+# by tools/gen_authors.sh.
+
+$(git log --pretty=format:"%aN <%aE>" | sort | uniq)
+Google Inc.
+The Mozilla Foundation
+The Xiph.Org Foundation
+EOF

From 51ae606b73e915ab65c99a33bffa56e32c060041 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 16 Jun 2010 15:44:07 -0400
Subject: [PATCH 046/307] Update AUTHORS

Change-Id: I1c6a275278788dfdc630ed436d2c770acfcbd097
---
 AUTHORS | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/AUTHORS b/AUTHORS
index 4ab68811d..9715f0071 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,7 +1,24 @@
-# Names should be added to this file like so:
-# Name or Organization <email address>
+# This file is automatically generated from the git commit history
+# by tools/gen_authors.sh.
 
+Alex Converse <alex.converse@gmail.com>
+Andres Mejia <mcitadel@gmail.com>
+Fabio Pedretti <fabio.ped@libero.it>
+Frank Galligan <fgalligan@google.com>
+Guillermo Ballester Valor <gbvalor@gmail.com>
+James Zern <jzern@google.com>
+John Koleszar <jkoleszar@google.com>
+Justin Clift <justin@salasaga.org>
+Luca Barbato <lu_zero@gentoo.org>
+Makoto Kato <makoto.kt@gmail.com>
+Paul Wilkins <paulwilkins@google.com>
+Pavol Rusnak <stick@gk2.sk>
+Philip Jägenstedt <philipj@opera.com>
+Scott LaVarnway <slavarnway@google.com>
+Timothy B. Terriberry <tterribe@xiph.org>
+Tom Finegan <tomfinegan@google.com>
+Yaowu Xu <yaowu@google.com>
+Yunqing Wang <yunqingwang@google.com>
 Google Inc.
 The Mozilla Foundation
-Timothy B. Terriberry <tterriberry@mozilla.com>
 The Xiph.Org Foundation

From b46a1f93959512dc75311b8c26be9a54a35cd929 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 17 Jun 2010 09:07:33 -0400
Subject: [PATCH 047/307] CHANGELOG: 0.9.1

Change-Id: Icca54b9d51becc49255193801762e1936a07aa2d
---
 CHANGELOG | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index d6c8ce8c4..c445a52df 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,18 @@
+2010-06-17 v0.9.1
+  - Enhancements:
+      * ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O
+      * Speed optimizations
+  - Bugfixes:
+      * Rate control
+      * Prevent out-of-bounds accesses on invalid data
+  - Build system updates:
+      * Detect toolchain to be used automatically for native builds
+      * Support building shared libraries
+      * Better autotools emulation (--prefix, --libdir, DESTDIR)
+  - Updated LICENSE
+      * http://webmproject.blogspot.com/2010/06/changes-to-webm-open-source-license.html
+
+
 2010-05-18 v0.9.0
   - Initial open source release. Welcome to WebM and VP8!
 

From 94c52e4da8a3532989c84f6b0da695dfaeb18449 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 18 Jun 2010 12:39:21 -0400
Subject: [PATCH 048/307] cosmetics: trim trailing whitespace

When the license headers were updated, they accidentally contained
trailing whitespace, so unfortunately we have to touch all the files
again.

Change-Id: I236c05fade06589e417179c0444cb39b09e4200d
---
 README                                                    | 6 +++---
 args.c                                                    | 4 ++--
 args.h                                                    | 4 ++--
 build/arm-wince-vs8/obj_int_extract.bat                   | 8 ++++----
 build/make/Makefile                                       | 8 ++++----
 build/make/ads2gas.pl                                     | 8 ++++----
 build/make/ads2gas_apple.pl                               | 8 ++++----
 build/make/armlink_adapter.sh                             | 4 ++--
 build/make/gen_asm_deps.sh                                | 4 ++--
 build/make/gen_msvs_def.sh                                | 4 ++--
 build/make/gen_msvs_proj.sh                               | 4 ++--
 build/make/gen_msvs_sln.sh                                | 4 ++--
 build/make/obj_int_extract.c                              | 4 ++--
 build/make/version.sh                                     | 4 ++--
 docs.mk                                                   | 4 ++--
 example_xma.c                                             | 4 ++--
 examples.mk                                               | 4 ++--
 examples/decoder_tmpl.c                                   | 4 ++--
 examples/encoder_tmpl.c                                   | 4 ++--
 examples/gen_example_code.sh                              | 4 ++--
 examples/gen_example_doxy.php                             | 4 ++--
 examples/gen_example_text.sh                              | 4 ++--
 ivfdec.c                                                  | 4 ++--
 libs.doxy_template                                        | 8 ++++----
 libs.mk                                                   | 4 ++--
 mainpage.dox                                              | 2 +-
 release.sh                                                | 4 ++--
 solution.mk                                               | 4 ++--
 vp8/common/alloccommon.c                                  | 4 ++--
 vp8/common/alloccommon.h                                  | 4 ++--
 vp8/common/arm/armv6/bilinearfilter_v6.asm                | 4 ++--
 vp8/common/arm/armv6/copymem16x16_v6.asm                  | 4 ++--
 vp8/common/arm/armv6/copymem8x4_v6.asm                    | 4 ++--
 vp8/common/arm/armv6/copymem8x8_v6.asm                    | 4 ++--
 vp8/common/arm/armv6/filter_v6.asm                        | 4 ++--
 vp8/common/arm/armv6/idct_v6.asm                          | 4 ++--
 vp8/common/arm/armv6/iwalsh_v6.asm                        | 4 ++--
 vp8/common/arm/armv6/loopfilter_v6.asm                    | 4 ++--
 vp8/common/arm/armv6/recon_v6.asm                         | 4 ++--
 vp8/common/arm/armv6/simpleloopfilter_v6.asm              | 4 ++--
 vp8/common/arm/armv6/sixtappredict8x4_v6.asm              | 4 ++--
 vp8/common/arm/bilinearfilter_arm.c                       | 4 ++--
 vp8/common/arm/filter_arm.c                               | 4 ++--
 vp8/common/arm/idct_arm.h                                 | 4 ++--
 vp8/common/arm/loopfilter_arm.c                           | 4 ++--
 vp8/common/arm/loopfilter_arm.h                           | 4 ++--
 vp8/common/arm/neon/bilinearpredict16x16_neon.asm         | 4 ++--
 vp8/common/arm/neon/bilinearpredict4x4_neon.asm           | 4 ++--
 vp8/common/arm/neon/bilinearpredict8x4_neon.asm           | 4 ++--
 vp8/common/arm/neon/bilinearpredict8x8_neon.asm           | 4 ++--
 vp8/common/arm/neon/buildintrapredictorsmby_neon.asm      | 4 ++--
 vp8/common/arm/neon/copymem16x16_neon.asm                 | 4 ++--
 vp8/common/arm/neon/copymem8x4_neon.asm                   | 4 ++--
 vp8/common/arm/neon/copymem8x8_neon.asm                   | 4 ++--
 vp8/common/arm/neon/iwalsh_neon.asm                       | 4 ++--
 vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm  | 4 ++--
 vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm   | 4 ++--
 .../arm/neon/loopfiltersimplehorizontaledge_neon.asm      | 4 ++--
 vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm | 4 ++--
 vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm    | 4 ++--
 vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm     | 4 ++--
 .../arm/neon/mbloopfilterhorizontaledge_uv_neon.asm       | 4 ++--
 vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm | 4 ++--
 vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm  | 4 ++--
 vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm   | 4 ++--
 vp8/common/arm/neon/recon16x16mb_neon.asm                 | 4 ++--
 vp8/common/arm/neon/recon2b_neon.asm                      | 4 ++--
 vp8/common/arm/neon/recon4b_neon.asm                      | 4 ++--
 vp8/common/arm/neon/reconb_neon.asm                       | 4 ++--
 vp8/common/arm/neon/save_neon_reg.asm                     | 4 ++--
 vp8/common/arm/neon/shortidct4x4llm_1_neon.asm            | 4 ++--
 vp8/common/arm/neon/shortidct4x4llm_neon.asm              | 4 ++--
 vp8/common/arm/neon/sixtappredict16x16_neon.asm           | 4 ++--
 vp8/common/arm/neon/sixtappredict4x4_neon.asm             | 4 ++--
 vp8/common/arm/neon/sixtappredict8x4_neon.asm             | 4 ++--
 vp8/common/arm/neon/sixtappredict8x8_neon.asm             | 4 ++--
 vp8/common/arm/recon_arm.c                                | 4 ++--
 vp8/common/arm/recon_arm.h                                | 4 ++--
 vp8/common/arm/reconintra4x4_arm.c                        | 4 ++--
 vp8/common/arm/reconintra_arm.c                           | 4 ++--
 vp8/common/arm/subpixel_arm.h                             | 4 ++--
 vp8/common/arm/systemdependent.c                          | 4 ++--
 vp8/common/arm/vpx_asm_offsets.c                          | 4 ++--
 vp8/common/bigend.h                                       | 4 ++--
 vp8/common/blockd.c                                       | 4 ++--
 vp8/common/blockd.h                                       | 4 ++--
 vp8/common/boolcoder.h                                    | 4 ++--
 vp8/common/codec_common_interface.h                       | 4 ++--
 vp8/common/coefupdateprobs.h                              | 4 ++--
 vp8/common/common.h                                       | 4 ++--
 vp8/common/common_types.h                                 | 4 ++--
 vp8/common/context.c                                      | 4 ++--
 vp8/common/debugmodes.c                                   | 4 ++--
 vp8/common/defaultcoefcounts.h                            | 4 ++--
 vp8/common/dma_desc.h                                     | 4 ++--
 vp8/common/duck_io.h                                      | 4 ++--
 vp8/common/entropy.c                                      | 4 ++--
 vp8/common/entropy.h                                      | 4 ++--
 vp8/common/entropymode.c                                  | 4 ++--
 vp8/common/entropymode.h                                  | 4 ++--
 vp8/common/entropymv.c                                    | 4 ++--
 vp8/common/entropymv.h                                    | 4 ++--
 vp8/common/extend.c                                       | 4 ++--
 vp8/common/extend.h                                       | 4 ++--
 vp8/common/filter_c.c                                     | 4 ++--
 vp8/common/findnearmv.c                                   | 4 ++--
 vp8/common/findnearmv.h                                   | 4 ++--
 vp8/common/fourcc.hpp                                     | 4 ++--
 vp8/common/g_common.h                                     | 4 ++--
 vp8/common/generic/systemdependent.c                      | 4 ++--
 vp8/common/header.h                                       | 4 ++--
 vp8/common/idct.h                                         | 4 ++--
 vp8/common/idctllm.c                                      | 4 ++--
 vp8/common/invtrans.c                                     | 4 ++--
 vp8/common/invtrans.h                                     | 4 ++--
 vp8/common/littlend.h                                     | 4 ++--
 vp8/common/loopfilter.c                                   | 4 ++--
 vp8/common/loopfilter.h                                   | 4 ++--
 vp8/common/loopfilter_filters.c                           | 4 ++--
 vp8/common/mac_specs.h                                    | 4 ++--
 vp8/common/mbpitch.c                                      | 4 ++--
 vp8/common/modecont.c                                     | 4 ++--
 vp8/common/modecont.h                                     | 4 ++--
 vp8/common/modecontext.c                                  | 4 ++--
 vp8/common/mv.h                                           | 4 ++--
 vp8/common/onyx.h                                         | 4 ++--
 vp8/common/onyxc_int.h                                    | 4 ++--
 vp8/common/onyxd.h                                        | 4 ++--
 vp8/common/partialgfupdate.h                              | 4 ++--
 vp8/common/postproc.c                                     | 4 ++--
 vp8/common/postproc.h                                     | 4 ++--
 vp8/common/ppc/copy_altivec.asm                           | 4 ++--
 vp8/common/ppc/filter_altivec.asm                         | 4 ++--
 vp8/common/ppc/filter_bilinear_altivec.asm                | 4 ++--
 vp8/common/ppc/idctllm_altivec.asm                        | 4 ++--
 vp8/common/ppc/loopfilter_altivec.c                       | 4 ++--
 vp8/common/ppc/loopfilter_filters_altivec.asm             | 4 ++--
 vp8/common/ppc/platform_altivec.asm                       | 4 ++--
 vp8/common/ppc/recon_altivec.asm                          | 4 ++--
 vp8/common/ppc/systemdependent.c                          | 4 ++--
 vp8/common/ppflags.h                                      | 4 ++--
 vp8/common/pragmas.h                                      | 4 ++--
 vp8/common/predictdc.c                                    | 4 ++--
 vp8/common/predictdc.h                                    | 4 ++--
 vp8/common/preproc.h                                      | 4 ++--
 vp8/common/preprocif.h                                    | 4 ++--
 vp8/common/proposed.h                                     | 4 ++--
 vp8/common/quant_common.c                                 | 4 ++--
 vp8/common/quant_common.h                                 | 4 ++--
 vp8/common/recon.c                                        | 4 ++--
 vp8/common/recon.h                                        | 4 ++--
 vp8/common/reconinter.c                                   | 4 ++--
 vp8/common/reconinter.h                                   | 4 ++--
 vp8/common/reconintra.c                                   | 4 ++--
 vp8/common/reconintra.h                                   | 4 ++--
 vp8/common/reconintra4x4.c                                | 4 ++--
 vp8/common/reconintra4x4.h                                | 4 ++--
 vp8/common/segmentation_common.c                          | 4 ++--
 vp8/common/segmentation_common.h                          | 4 ++--
 vp8/common/setupintrarecon.c                              | 4 ++--
 vp8/common/setupintrarecon.h                              | 4 ++--
 vp8/common/subpixel.h                                     | 4 ++--
 vp8/common/swapyv12buffer.c                               | 4 ++--
 vp8/common/swapyv12buffer.h                               | 4 ++--
 vp8/common/systemdependent.h                              | 4 ++--
 vp8/common/textblit.c                                     | 4 ++--
 vp8/common/threading.h                                    | 4 ++--
 vp8/common/treecoder.c                                    | 4 ++--
 vp8/common/treecoder.h                                    | 4 ++--
 vp8/common/type_aliases.h                                 | 4 ++--
 vp8/common/vfwsetting.hpp                                 | 4 ++--
 vp8/common/vpx_ref_build_prefix.h                         | 4 ++--
 vp8/common/vpxblit.h                                      | 4 ++--
 vp8/common/vpxblit_c64.h                                  | 4 ++--
 vp8/common/vpxerrors.h                                    | 4 ++--
 vp8/common/x86/boolcoder.cxx                              | 4 ++--
 vp8/common/x86/idct_x86.h                                 | 4 ++--
 vp8/common/x86/idctllm_mmx.asm                            | 4 ++--
 vp8/common/x86/iwalsh_mmx.asm                             | 4 ++--
 vp8/common/x86/iwalsh_sse2.asm                            | 4 ++--
 vp8/common/x86/loopfilter_mmx.asm                         | 4 ++--
 vp8/common/x86/loopfilter_sse2.asm                        | 4 ++--
 vp8/common/x86/loopfilter_x86.c                           | 4 ++--
 vp8/common/x86/loopfilter_x86.h                           | 4 ++--
 vp8/common/x86/postproc_mmx.asm                           | 4 ++--
 vp8/common/x86/postproc_mmx.c                             | 4 ++--
 vp8/common/x86/postproc_sse2.asm                          | 4 ++--
 vp8/common/x86/postproc_x86.h                             | 4 ++--
 vp8/common/x86/recon_mmx.asm                              | 4 ++--
 vp8/common/x86/recon_sse2.asm                             | 4 ++--
 vp8/common/x86/recon_x86.h                                | 4 ++--
 vp8/common/x86/subpixel_mmx.asm                           | 4 ++--
 vp8/common/x86/subpixel_sse2.asm                          | 4 ++--
 vp8/common/x86/subpixel_x86.h                             | 4 ++--
 vp8/common/x86/vp8_asm_stubs.c                            | 4 ++--
 vp8/common/x86/x86_systemdependent.c                      | 4 ++--
 vp8/decoder/arm/armv5/dequantize_v5.asm                   | 4 ++--
 vp8/decoder/arm/armv6/dboolhuff_v6.asm                    | 4 ++--
 vp8/decoder/arm/armv6/dequantdcidct_v6.asm                | 4 ++--
 vp8/decoder/arm/armv6/dequantidct_v6.asm                  | 4 ++--
 vp8/decoder/arm/armv6/dequantize_v6.asm                   | 4 ++--
 vp8/decoder/arm/dequantize_arm.c                          | 4 ++--
 vp8/decoder/arm/dequantize_arm.h                          | 4 ++--
 vp8/decoder/arm/detokenizearm_sjl.c                       | 4 ++--
 vp8/decoder/arm/detokenizearm_v6.asm                      | 4 ++--
 vp8/decoder/arm/dsystemdependent.c                        | 4 ++--
 vp8/decoder/arm/neon/dboolhuff_neon.asm                   | 4 ++--
 vp8/decoder/arm/neon/dequantdcidct_neon.asm               | 4 ++--
 vp8/decoder/arm/neon/dequantidct_neon.asm                 | 4 ++--
 vp8/decoder/arm/neon/dequantizeb_neon.asm                 | 4 ++--
 vp8/decoder/dboolhuff.c                                   | 4 ++--
 vp8/decoder/dboolhuff.h                                   | 4 ++--
 vp8/decoder/decodemv.c                                    | 4 ++--
 vp8/decoder/decodemv.h                                    | 4 ++--
 vp8/decoder/decoderthreading.h                            | 4 ++--
 vp8/decoder/demode.c                                      | 4 ++--
 vp8/decoder/demode.h                                      | 4 ++--
 vp8/decoder/dequantize.c                                  | 4 ++--
 vp8/decoder/dequantize.h                                  | 4 ++--
 vp8/decoder/detokenize.c                                  | 4 ++--
 vp8/decoder/detokenize.h                                  | 4 ++--
 vp8/decoder/generic/dsystemdependent.c                    | 4 ++--
 vp8/decoder/onyxd_if.c                                    | 4 ++--
 vp8/decoder/onyxd_if_sjl.c                                | 4 ++--
 vp8/decoder/onyxd_int.h                                   | 4 ++--
 vp8/decoder/treereader.h                                  | 4 ++--
 vp8/decoder/x86/dequantize_mmx.asm                        | 4 ++--
 vp8/decoder/x86/dequantize_x86.h                          | 4 ++--
 vp8/decoder/x86/onyxdxv.c                                 | 4 ++--
 vp8/decoder/x86/x86_dsystemdependent.c                    | 4 ++--
 vp8/decoder/xprintf.c                                     | 4 ++--
 vp8/decoder/xprintf.h                                     | 4 ++--
 vp8/encoder/arm/armv6/walsh_v6.asm                        | 4 ++--
 vp8/encoder/arm/boolhuff_arm.c                            | 4 ++--
 vp8/encoder/arm/csystemdependent.c                        | 4 ++--
 vp8/encoder/arm/dct_arm.h                                 | 4 ++--
 vp8/encoder/arm/encodemb_arm.c                            | 4 ++--
 vp8/encoder/arm/encodemb_arm.h                            | 4 ++--
 vp8/encoder/arm/mcomp_arm.c                               | 4 ++--
 vp8/encoder/arm/neon/boolhuff_armv7.asm                   | 4 ++--
 vp8/encoder/arm/neon/fastfdct4x4_neon.asm                 | 4 ++--
 vp8/encoder/arm/neon/fastfdct8x4_neon.asm                 | 4 ++--
 vp8/encoder/arm/neon/fastquantizeb_neon.asm               | 4 ++--
 vp8/encoder/arm/neon/sad16_neon.asm                       | 4 ++--
 vp8/encoder/arm/neon/sad8_neon.asm                        | 4 ++--
 vp8/encoder/arm/neon/shortfdct_neon.asm                   | 4 ++--
 vp8/encoder/arm/neon/subtract_neon.asm                    | 4 ++--
 vp8/encoder/arm/neon/variance_neon.asm                    | 4 ++--
 vp8/encoder/arm/neon/vp8_memcpy_neon.asm                  | 4 ++--
 vp8/encoder/arm/neon/vp8_mse16x16_neon.asm                | 4 ++--
 vp8/encoder/arm/neon/vp8_packtokens_armv7.asm             | 4 ++--
 vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm       | 4 ++--
 vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm  | 4 ++--
 vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm           | 4 ++--
 vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm   | 4 ++--
 vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm  | 4 ++--
 vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm     | 4 ++--
 vp8/encoder/arm/picklpf_arm.c                             | 4 ++--
 vp8/encoder/arm/quantize_arm.c                            | 4 ++--
 vp8/encoder/arm/quantize_arm.h                            | 4 ++--
 vp8/encoder/arm/variance_arm.h                            | 4 ++--
 vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c                 | 4 ++--
 vp8/encoder/bitstream.c                                   | 4 ++--
 vp8/encoder/bitstream.h                                   | 4 ++--
 vp8/encoder/block.h                                       | 4 ++--
 vp8/encoder/boolhuff.c                                    | 4 ++--
 vp8/encoder/boolhuff.h                                    | 4 ++--
 vp8/encoder/dct.c                                         | 4 ++--
 vp8/encoder/dct.h                                         | 4 ++--
 vp8/encoder/encodeframe.c                                 | 6 +++---
 vp8/encoder/encodeintra.c                                 | 4 ++--
 vp8/encoder/encodeintra.h                                 | 4 ++--
 vp8/encoder/encodemb.c                                    | 4 ++--
 vp8/encoder/encodemb.h                                    | 4 ++--
 vp8/encoder/encodemv.c                                    | 6 +++---
 vp8/encoder/encodemv.h                                    | 4 ++--
 vp8/encoder/ethreading.c                                  | 4 ++--
 vp8/encoder/firstpass.c                                   | 4 ++--
 vp8/encoder/firstpass.h                                   | 4 ++--
 vp8/encoder/generic/csystemdependent.c                    | 4 ++--
 vp8/encoder/mcomp.c                                       | 4 ++--
 vp8/encoder/mcomp.h                                       | 4 ++--
 vp8/encoder/modecosts.c                                   | 4 ++--
 vp8/encoder/modecosts.h                                   | 4 ++--
 vp8/encoder/onyx_if.c                                     | 6 +++---
 vp8/encoder/onyx_int.h                                    | 4 ++--
 vp8/encoder/parms.cpp                                     | 4 ++--
 vp8/encoder/pickinter.c                                   | 6 +++---
 vp8/encoder/pickinter.h                                   | 4 ++--
 vp8/encoder/picklpf.c                                     | 4 ++--
 vp8/encoder/ppc/csystemdependent.c                        | 4 ++--
 vp8/encoder/ppc/encodemb_altivec.asm                      | 4 ++--
 vp8/encoder/ppc/fdct_altivec.asm                          | 4 ++--
 vp8/encoder/ppc/rdopt_altivec.asm                         | 4 ++--
 vp8/encoder/ppc/sad_altivec.asm                           | 4 ++--
 vp8/encoder/ppc/variance_altivec.asm                      | 4 ++--
 vp8/encoder/ppc/variance_subpixel_altivec.asm             | 4 ++--
 vp8/encoder/preproc.c                                     | 4 ++--
 vp8/encoder/psnr.c                                        | 4 ++--
 vp8/encoder/psnr.h                                        | 4 ++--
 vp8/encoder/quantize.c                                    | 4 ++--
 vp8/encoder/quantize.h                                    | 4 ++--
 vp8/encoder/ratectrl.c                                    | 4 ++--
 vp8/encoder/ratectrl.h                                    | 4 ++--
 vp8/encoder/rdopt.c                                       | 6 +++---
 vp8/encoder/rdopt.h                                       | 4 ++--
 vp8/encoder/sad_c.c                                       | 4 ++--
 vp8/encoder/ssim.c                                        | 4 ++--
 vp8/encoder/tokenize.c                                    | 4 ++--
 vp8/encoder/tokenize.h                                    | 4 ++--
 vp8/encoder/treewriter.c                                  | 4 ++--
 vp8/encoder/treewriter.h                                  | 4 ++--
 vp8/encoder/variance.h                                    | 4 ++--
 vp8/encoder/variance_c.c                                  | 4 ++--
 vp8/encoder/x86/csystemdependent.c                        | 4 ++--
 vp8/encoder/x86/dct_mmx.asm                               | 4 ++--
 vp8/encoder/x86/dct_sse2.asm                              | 4 ++--
 vp8/encoder/x86/dct_x86.h                                 | 4 ++--
 vp8/encoder/x86/encodemb_x86.h                            | 4 ++--
 vp8/encoder/x86/encodeopt.asm                             | 4 ++--
 vp8/encoder/x86/fwalsh_sse2.asm                           | 4 ++--
 vp8/encoder/x86/mcomp_x86.h                               | 4 ++--
 vp8/encoder/x86/preproc_mmx.c                             | 4 ++--
 vp8/encoder/x86/quantize_mmx.asm                          | 4 ++--
 vp8/encoder/x86/sad_mmx.asm                               | 4 ++--
 vp8/encoder/x86/sad_sse2.asm                              | 4 ++--
 vp8/encoder/x86/sad_sse3.asm                              | 4 ++--
 vp8/encoder/x86/sad_ssse3.asm                             | 4 ++--
 vp8/encoder/x86/subtract_mmx.asm                          | 4 ++--
 vp8/encoder/x86/variance_impl_mmx.asm                     | 4 ++--
 vp8/encoder/x86/variance_impl_sse2.asm                    | 4 ++--
 vp8/encoder/x86/variance_mmx.c                            | 4 ++--
 vp8/encoder/x86/variance_sse2.c                           | 4 ++--
 vp8/encoder/x86/variance_x86.h                            | 4 ++--
 vp8/encoder/x86/x86_csystemdependent.c                    | 4 ++--
 vp8/vp8_common.mk                                         | 4 ++--
 vp8/vp8_cx_iface.c                                        | 4 ++--
 vp8/vp8_dx_iface.c                                        | 4 ++--
 vp8/vp8cx.mk                                              | 4 ++--
 vp8/vp8cx_arm.mk                                          | 4 ++--
 vp8/vp8dx.mk                                              | 4 ++--
 vp8/vp8dx_arm.mk                                          | 4 ++--
 vpx/internal/vpx_codec_internal.h                         | 4 ++--
 vpx/src/vpx_codec.c                                       | 4 ++--
 vpx/src/vpx_decoder.c                                     | 4 ++--
 vpx/src/vpx_decoder_compat.c                              | 4 ++--
 vpx/src/vpx_encoder.c                                     | 4 ++--
 vpx/src/vpx_image.c                                       | 4 ++--
 vpx/vp8.h                                                 | 4 ++--
 vpx/vp8cx.h                                               | 4 ++--
 vpx/vp8dx.h                                               | 4 ++--
 vpx/vp8e.h                                                | 4 ++--
 vpx/vpx_codec.h                                           | 4 ++--
 vpx/vpx_codec.mk                                          | 4 ++--
 vpx/vpx_codec_impl_bottom.h                               | 4 ++--
 vpx/vpx_codec_impl_top.h                                  | 4 ++--
 vpx/vpx_decoder.h                                         | 4 ++--
 vpx/vpx_decoder_compat.h                                  | 4 ++--
 vpx/vpx_encoder.h                                         | 4 ++--
 vpx/vpx_image.h                                           | 8 ++++----
 vpx/vpx_integer.h                                         | 4 ++--
 vpx_mem/include/nds/vpx_mem_nds.h                         | 4 ++--
 vpx_mem/include/vpx_mem_intrnl.h                          | 4 ++--
 vpx_mem/include/vpx_mem_tracker.h                         | 4 ++--
 vpx_mem/intel_linux/vpx_mem.c                             | 4 ++--
 vpx_mem/intel_linux/vpx_mem_tracker.c                     | 4 ++--
 vpx_mem/memory_manager/hmm_alloc.c                        | 4 ++--
 vpx_mem/memory_manager/hmm_base.c                         | 4 ++--
 vpx_mem/memory_manager/hmm_dflt_abort.c                   | 4 ++--
 vpx_mem/memory_manager/hmm_grow.c                         | 4 ++--
 vpx_mem/memory_manager/hmm_largest.c                      | 4 ++--
 vpx_mem/memory_manager/hmm_resize.c                       | 4 ++--
 vpx_mem/memory_manager/hmm_shrink.c                       | 4 ++--
 vpx_mem/memory_manager/hmm_true.c                         | 4 ++--
 vpx_mem/memory_manager/include/cavl_if.h                  | 4 ++--
 vpx_mem/memory_manager/include/cavl_impl.h                | 4 ++--
 vpx_mem/memory_manager/include/heapmm.h                   | 4 ++--
 vpx_mem/memory_manager/include/hmm_cnfg.h                 | 4 ++--
 vpx_mem/memory_manager/include/hmm_intrnl.h               | 4 ++--
 vpx_mem/nds/vpx_mem_nds.c                                 | 4 ++--
 vpx_mem/ti_c6x/vpx_mem_ti_6cx.c                           | 4 ++--
 vpx_mem/vpx_mem.c                                         | 4 ++--
 vpx_mem/vpx_mem.h                                         | 4 ++--
 vpx_mem/vpx_mem_tracker.c                                 | 4 ++--
 vpx_ports/config.h                                        | 4 ++--
 vpx_ports/emms.asm                                        | 4 ++--
 vpx_ports/mem.h                                           | 4 ++--
 vpx_ports/mem_ops.h                                       | 4 ++--
 vpx_ports/mem_ops_aligned.h                               | 4 ++--
 vpx_ports/vpx_timer.h                                     | 4 ++--
 vpx_ports/vpxtypes.h                                      | 4 ++--
 vpx_ports/x86.h                                           | 4 ++--
 vpx_ports/x86_abi_support.asm                             | 4 ++--
 vpx_scale/arm/armv4/gen_scalers_armv4.asm                 | 4 ++--
 vpx_scale/arm/nds/yv12extend.c                            | 4 ++--
 vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm    | 4 ++--
 vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm    | 4 ++--
 vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm | 4 ++--
 .../arm/neon/vp8_vpxyv12_extendframeborders_neon.asm      | 4 ++--
 vpx_scale/arm/scalesystemdependant.c                      | 4 ++--
 vpx_scale/arm/yv12extend_arm.c                            | 4 ++--
 vpx_scale/blackfin/yv12config.c                           | 4 ++--
 vpx_scale/blackfin/yv12extend.c                           | 4 ++--
 vpx_scale/dm642/bicubic_scaler_c64.c                      | 4 ++--
 vpx_scale/dm642/gen_scalers_c64.c                         | 4 ++--
 vpx_scale/dm642/yv12extend.c                              | 4 ++--
 vpx_scale/generic/bicubic_scaler.c                        | 4 ++--
 vpx_scale/generic/gen_scalers.c                           | 4 ++--
 vpx_scale/generic/scalesystemdependant.c                  | 4 ++--
 vpx_scale/generic/vpxscale.c                              | 4 ++--
 vpx_scale/generic/yv12config.c                            | 4 ++--
 vpx_scale/generic/yv12extend.c                            | 4 ++--
 vpx_scale/include/arm/vpxscale_nofp.h                     | 4 ++--
 vpx_scale/include/generic/vpxscale_arbitrary.h            | 4 ++--
 vpx_scale/include/generic/vpxscale_depricated.h           | 4 ++--
 vpx_scale/include/generic/vpxscale_nofp.h                 | 4 ++--
 vpx_scale/include/leapster/vpxscale.h                     | 4 ++--
 vpx_scale/include/symbian/vpxscale_nofp.h                 | 4 ++--
 vpx_scale/include/vpxscale_nofp.h                         | 4 ++--
 vpx_scale/intel_linux/scaleopt.c                          | 4 ++--
 vpx_scale/intel_linux/scalesystemdependant.c              | 4 ++--
 vpx_scale/leapster/doptsystemdependant_lf.c               | 4 ++--
 vpx_scale/leapster/gen_scalers_lf.c                       | 4 ++--
 vpx_scale/leapster/vpxscale_lf.c                          | 4 ++--
 vpx_scale/leapster/yv12extend.c                           | 4 ++--
 vpx_scale/scale_mode.h                                    | 4 ++--
 vpx_scale/symbian/gen_scalers_armv4.asm                   | 4 ++--
 vpx_scale/symbian/scalesystemdependant.c                  | 4 ++--
 vpx_scale/vpxscale.h                                      | 4 ++--
 vpx_scale/wce/gen_scalers_armv4.asm                       | 4 ++--
 vpx_scale/wce/scalesystemdependant.c                      | 4 ++--
 vpx_scale/win32/scaleopt.c                                | 4 ++--
 vpx_scale/win32/scalesystemdependant.c                    | 4 ++--
 vpx_scale/x86_64/scaleopt.c                               | 4 ++--
 vpx_scale/x86_64/scalesystemdependant.c                   | 4 ++--
 vpx_scale/yv12config.h                                    | 4 ++--
 vpx_scale/yv12extend.h                                    | 4 ++--
 wince_wmain_adapter.cpp                                   | 4 ++--
 y4minput.c                                                | 4 ++--
 y4minput.h                                                | 4 ++--
 440 files changed, 897 insertions(+), 897 deletions(-)

diff --git a/README b/README
index cfaf4ccd0..f0625d3d7 100644
--- a/README
+++ b/README
@@ -9,18 +9,18 @@ COMPILING THE APPLICATIONS/LIBRARIES:
   the application.
 
   1. Prerequisites
-  
+
     * All x86 targets require the Yasm[1] assembler be installed.
     * All Windows builds require that Cygwin[2] be installed.
     * Building the documentation requires PHP[3] and Doxygen[4]. If you do not
       have these packages, you must pass --disable-install-docs to the
       configure script.
-    
+
     [1]: http://www.tortall.net/projects/yasm
     [2]: http://www.cygwin.com
     [3]: http://php.net
     [4]: http://www.doxygen.org
-    
+
   2. Out-of-tree builds
   Out of tree builds are a supported method of building the application. For
   an out of tree build, the source tree is kept separate from the object
diff --git a/args.c b/args.c
index 5fd046671..0b9772bd5 100644
--- a/args.c
+++ b/args.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/args.h b/args.h
index f1ec6c0f3..a75c43f97 100644
--- a/args.h
+++ b/args.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/build/arm-wince-vs8/obj_int_extract.bat b/build/arm-wince-vs8/obj_int_extract.bat
index 306916010..84894508b 100644
--- a/build/arm-wince-vs8/obj_int_extract.bat
+++ b/build/arm-wince-vs8/obj_int_extract.bat
@@ -1,11 +1,11 @@
 @echo off
 REM   Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-REM 
-REM   Use of this source code is governed by a BSD-style license 
+REM
+REM   Use of this source code is governed by a BSD-style license
 REM   that can be found in the LICENSE file in the root of the source
 REM   tree. An additional intellectual property rights grant can be found
-REM   in the file PATENTS.  All contributing project authors may 
-REM   be found in the AUTHORS file in the root of the source tree.    
+REM   in the file PATENTS.  All contributing project authors may
+REM   be found in the AUTHORS file in the root of the source tree.
 echo on
 
 
diff --git a/build/make/Makefile b/build/make/Makefile
index 4f7df439a..931614f8a 100755
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -1,11 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-##  
-##  Use of this source code is governed by a BSD-style license 
+##
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
-##  be found in the AUTHORS file in the root of the source tree.    
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl
index bd5e7722f..276db996d 100755
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@@ -1,12 +1,12 @@
 #!/usr/bin/perl
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-##  
-##  Use of this source code is governed by a BSD-style license 
+##
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
-##  be found in the AUTHORS file in the root of the source tree.    
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/ads2gas_apple.pl b/build/make/ads2gas_apple.pl
index 7a324452c..b90c682c1 100755
--- a/build/make/ads2gas_apple.pl
+++ b/build/make/ads2gas_apple.pl
@@ -1,12 +1,12 @@
 #!/usr/bin/env perl
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-##  
-##  Use of this source code is governed by a BSD-style license 
+##
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
-##  be found in the AUTHORS file in the root of the source tree.    
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/build/make/armlink_adapter.sh b/build/make/armlink_adapter.sh
index 25fb6277e..df31c157a 100755
--- a/build/make/armlink_adapter.sh
+++ b/build/make/armlink_adapter.sh
@@ -2,10 +2,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/build/make/gen_asm_deps.sh b/build/make/gen_asm_deps.sh
index 2c972e5a0..53a8909c4 100755
--- a/build/make/gen_asm_deps.sh
+++ b/build/make/gen_asm_deps.sh
@@ -2,10 +2,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/build/make/gen_msvs_def.sh b/build/make/gen_msvs_def.sh
index b5c54fda9..a231b4916 100755
--- a/build/make/gen_msvs_def.sh
+++ b/build/make/gen_msvs_def.sh
@@ -2,10 +2,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index d94ae4d80..7dd0ad123 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -2,10 +2,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/build/make/gen_msvs_sln.sh b/build/make/gen_msvs_sln.sh
index ef425f565..b0904f99e 100755
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -2,10 +2,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c
index 0f64e8921..289d2e1a7 100644
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/build/make/version.sh b/build/make/version.sh
index 81682d614..ff97300d2 100755
--- a/build/make/version.sh
+++ b/build/make/version.sh
@@ -2,10 +2,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/docs.mk b/docs.mk
index f60a482b8..f90d15711 100644
--- a/docs.mk
+++ b/docs.mk
@@ -1,10 +1,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/example_xma.c b/example_xma.c
index e5eca7af2..69d14f4b8 100644
--- a/example_xma.c
+++ b/example_xma.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/examples.mk b/examples.mk
index bca0d9c9d..c2cf88c08 100644
--- a/examples.mk
+++ b/examples.mk
@@ -1,10 +1,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/examples/decoder_tmpl.c b/examples/decoder_tmpl.c
index 826ad201c..561d732a7 100644
--- a/examples/decoder_tmpl.c
+++ b/examples/decoder_tmpl.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c
index 9e262e9e5..c549784cb 100644
--- a/examples/encoder_tmpl.c
+++ b/examples/encoder_tmpl.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/examples/gen_example_code.sh b/examples/gen_example_code.sh
index dc7ad7286..4b221308a 100755
--- a/examples/gen_example_code.sh
+++ b/examples/gen_example_code.sh
@@ -2,10 +2,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/examples/gen_example_doxy.php b/examples/gen_example_doxy.php
index ba2617f28..a2d0790ed 100755
--- a/examples/gen_example_doxy.php
+++ b/examples/gen_example_doxy.php
@@ -2,10 +2,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/examples/gen_example_text.sh b/examples/gen_example_text.sh
index fcc73d8c1..f872acd38 100755
--- a/examples/gen_example_text.sh
+++ b/examples/gen_example_text.sh
@@ -2,10 +2,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/ivfdec.c b/ivfdec.c
index 2b26d55e8..0985dfbdd 100644
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/libs.doxy_template b/libs.doxy_template
index ce8fde637..92334089e 100644
--- a/libs.doxy_template
+++ b/libs.doxy_template
@@ -1,11 +1,11 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-##  
-##  Use of this source code is governed by a BSD-style license 
+##
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
-##  be found in the AUTHORS file in the root of the source tree.    
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
 ##
 
 
diff --git a/libs.mk b/libs.mk
index 115ceb537..21c25c0c4 100644
--- a/libs.mk
+++ b/libs.mk
@@ -1,10 +1,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/mainpage.dox b/mainpage.dox
index 3596ce0d7..49dff7b5b 100644
--- a/mainpage.dox
+++ b/mainpage.dox
@@ -11,7 +11,7 @@
   source codec deployed on millions of computers and devices worldwide.
 
   This distribution of the WebM VP8 Codec SDK includes the following support:
-  
+
   \if vp8_encoder    - \ref vp8_encoder   \endif
   \if vp8_decoder    - \ref vp8_decoder   \endif
 
diff --git a/release.sh b/release.sh
index 880ad0f59..ca8ef71d1 100755
--- a/release.sh
+++ b/release.sh
@@ -2,10 +2,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/solution.mk b/solution.mk
index 21bf065fb..3b6e50eb1 100644
--- a/solution.mk
+++ b/solution.mk
@@ -1,10 +1,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 9f6397e48..c3368b5cf 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/alloccommon.h b/vp8/common/alloccommon.h
index b87741281..63dcd8a36 100644
--- a/vp8/common/alloccommon.h
+++ b/vp8/common/alloccommon.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/armv6/bilinearfilter_v6.asm b/vp8/common/arm/armv6/bilinearfilter_v6.asm
index ac0d3330f..ae3526211 100644
--- a/vp8/common/arm/armv6/bilinearfilter_v6.asm
+++ b/vp8/common/arm/armv6/bilinearfilter_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/armv6/copymem16x16_v6.asm b/vp8/common/arm/armv6/copymem16x16_v6.asm
index 344c4535b..b74e294e4 100644
--- a/vp8/common/arm/armv6/copymem16x16_v6.asm
+++ b/vp8/common/arm/armv6/copymem16x16_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/armv6/copymem8x4_v6.asm b/vp8/common/arm/armv6/copymem8x4_v6.asm
index 3556b3a98..5488131fc 100644
--- a/vp8/common/arm/armv6/copymem8x4_v6.asm
+++ b/vp8/common/arm/armv6/copymem8x4_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/armv6/copymem8x8_v6.asm b/vp8/common/arm/armv6/copymem8x8_v6.asm
index 1da0ff5b6..0c1d4bd23 100644
--- a/vp8/common/arm/armv6/copymem8x8_v6.asm
+++ b/vp8/common/arm/armv6/copymem8x8_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/armv6/filter_v6.asm b/vp8/common/arm/armv6/filter_v6.asm
index cdc74bab3..e37d3aa67 100644
--- a/vp8/common/arm/armv6/filter_v6.asm
+++ b/vp8/common/arm/armv6/filter_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/armv6/idct_v6.asm b/vp8/common/arm/armv6/idct_v6.asm
index 9e932fa06..d9913c75e 100644
--- a/vp8/common/arm/armv6/idct_v6.asm
+++ b/vp8/common/arm/armv6/idct_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/armv6/iwalsh_v6.asm b/vp8/common/arm/armv6/iwalsh_v6.asm
index 460678330..f4002b2ce 100644
--- a/vp8/common/arm/armv6/iwalsh_v6.asm
+++ b/vp8/common/arm/armv6/iwalsh_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/armv6/loopfilter_v6.asm b/vp8/common/arm/armv6/loopfilter_v6.asm
index eeeacd330..a51da10eb 100644
--- a/vp8/common/arm/armv6/loopfilter_v6.asm
+++ b/vp8/common/arm/armv6/loopfilter_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/armv6/recon_v6.asm b/vp8/common/arm/armv6/recon_v6.asm
index 6f3ccbef9..de0449350 100644
--- a/vp8/common/arm/armv6/recon_v6.asm
+++ b/vp8/common/arm/armv6/recon_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/armv6/simpleloopfilter_v6.asm b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
index b820cedb1..3ba6ccaca 100644
--- a/vp8/common/arm/armv6/simpleloopfilter_v6.asm
+++ b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
index 641546390..781aa5fcc 100644
--- a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
+++ b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c
index b93539ca6..363eabde5 100644
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index 233be24dd..c67f5663c 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h
index cfd9d76d8..87d888de2 100644
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index d98c90867..bb4af2205 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h
index b59e2b58f..9a19386db 100644
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
index 076a3d33c..668772312 100644
--- a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
index f199ba3f3..4cbc63a4a 100644
--- a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
index 9a3a03938..dd8b4c9a2 100644
--- a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
index 10a636690..7c8cc988e 100644
--- a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
index 7cd9d7562..1e4340361 100644
--- a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+++ b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/copymem16x16_neon.asm b/vp8/common/arm/neon/copymem16x16_neon.asm
index b25bfdcbc..58961c0ec 100644
--- a/vp8/common/arm/neon/copymem16x16_neon.asm
+++ b/vp8/common/arm/neon/copymem16x16_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/copymem8x4_neon.asm b/vp8/common/arm/neon/copymem8x4_neon.asm
index 0c62ee251..296d11fb1 100644
--- a/vp8/common/arm/neon/copymem8x4_neon.asm
+++ b/vp8/common/arm/neon/copymem8x4_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/copymem8x8_neon.asm b/vp8/common/arm/neon/copymem8x8_neon.asm
index 84e0afda3..4f40b5a12 100644
--- a/vp8/common/arm/neon/copymem8x8_neon.asm
+++ b/vp8/common/arm/neon/copymem8x8_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/iwalsh_neon.asm b/vp8/common/arm/neon/iwalsh_neon.asm
index b8199ce9a..18b56537c 100644
--- a/vp8/common/arm/neon/iwalsh_neon.asm
+++ b/vp8/common/arm/neon/iwalsh_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
     EXPORT  |vp8_short_inv_walsh4x4_neon|
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
index 5d25e3d89..7590d0bd1 100644
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
index ebe52fc99..99458e601 100644
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
index dbbdd74c8..542d74659 100644
--- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
index 480e3184b..515ded43e 100644
--- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
index a402282e0..2722dd9ac 100644
--- a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
index 18eba9f50..7e7eb9fe5 100644
--- a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
index 21b85dadd..759143da0 100644
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
index 64d98c67d..321e2b93b 100644
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
index 0e72e8046..9d9ae5069 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
index 91396a180..c11fcde2a 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/recon16x16mb_neon.asm b/vp8/common/arm/neon/recon16x16mb_neon.asm
index 7c06c0308..595484e31 100644
--- a/vp8/common/arm/neon/recon16x16mb_neon.asm
+++ b/vp8/common/arm/neon/recon16x16mb_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/recon2b_neon.asm b/vp8/common/arm/neon/recon2b_neon.asm
index 3d87e2dac..b33e58a30 100644
--- a/vp8/common/arm/neon/recon2b_neon.asm
+++ b/vp8/common/arm/neon/recon2b_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/recon4b_neon.asm b/vp8/common/arm/neon/recon4b_neon.asm
index 63cd98715..2ff204ac4 100644
--- a/vp8/common/arm/neon/recon4b_neon.asm
+++ b/vp8/common/arm/neon/recon4b_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/reconb_neon.asm b/vp8/common/arm/neon/reconb_neon.asm
index 0ecdc1423..687b4b8f1 100644
--- a/vp8/common/arm/neon/reconb_neon.asm
+++ b/vp8/common/arm/neon/reconb_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/save_neon_reg.asm b/vp8/common/arm/neon/save_neon_reg.asm
index f5db2a8e7..32539ac4d 100644
--- a/vp8/common/arm/neon/save_neon_reg.asm
+++ b/vp8/common/arm/neon/save_neon_reg.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
index 24e5fed12..ff1590fb8 100644
--- a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
+++ b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
index c566c67fc..76721c1df 100644
--- a/vp8/common/arm/neon/shortidct4x4llm_neon.asm
+++ b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
index 6f3716db0..d6a207063 100644
--- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
index 6fe9eadcc..0b5504fab 100644
--- a/vp8/common/arm/neon/sixtappredict4x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
index a6ff4f776..0b19e0b31 100644
--- a/vp8/common/arm/neon/sixtappredict8x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
index bb35ae4ce..3fbe5644a 100644
--- a/vp8/common/arm/neon/sixtappredict8x8_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/arm/recon_arm.c b/vp8/common/arm/recon_arm.c
index 2cc9ee77e..2bfdda876 100644
--- a/vp8/common/arm/recon_arm.c
+++ b/vp8/common/arm/recon_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h
index 392297be4..2d5bfac52 100644
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/reconintra4x4_arm.c b/vp8/common/arm/reconintra4x4_arm.c
index 65fb1f027..b51c26b08 100644
--- a/vp8/common/arm/reconintra4x4_arm.c
+++ b/vp8/common/arm/reconintra4x4_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c
index 29f4a2c47..3b4fe1a5b 100644
--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h
index 0eb2c58d8..e86e1252f 100644
--- a/vp8/common/arm/subpixel_arm.h
+++ b/vp8/common/arm/subpixel_arm.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c
index 27d3deec0..bd6d146c0 100644
--- a/vp8/common/arm/systemdependent.c
+++ b/vp8/common/arm/systemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/arm/vpx_asm_offsets.c b/vp8/common/arm/vpx_asm_offsets.c
index b1e64571f..33adccf31 100644
--- a/vp8/common/arm/vpx_asm_offsets.c
+++ b/vp8/common/arm/vpx_asm_offsets.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/bigend.h b/vp8/common/bigend.h
index cd6b9886c..98b9aa54f 100644
--- a/vp8/common/bigend.h
+++ b/vp8/common/bigend.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/blockd.c b/vp8/common/blockd.c
index e0ed56129..7bb127ae8 100644
--- a/vp8/common/blockd.c
+++ b/vp8/common/blockd.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 2b25f62b4..865b8c18f 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/boolcoder.h b/vp8/common/boolcoder.h
index 66f67c284..048756762 100644
--- a/vp8/common/boolcoder.h
+++ b/vp8/common/boolcoder.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/codec_common_interface.h b/vp8/common/codec_common_interface.h
index d836564d8..e53d4e59e 100644
--- a/vp8/common/codec_common_interface.h
+++ b/vp8/common/codec_common_interface.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h
index 6131d1269..01a4f89b6 100644
--- a/vp8/common/coefupdateprobs.h
+++ b/vp8/common/coefupdateprobs.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/common.h b/vp8/common/common.h
index bfa8a9c41..b8b2eaa60 100644
--- a/vp8/common/common.h
+++ b/vp8/common/common.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/common_types.h b/vp8/common/common_types.h
index a307ed6f1..f5b69f49e 100644
--- a/vp8/common/common_types.h
+++ b/vp8/common/common_types.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/context.c b/vp8/common/context.c
index f0cb83845..af7471167 100644
--- a/vp8/common/context.c
+++ b/vp8/common/context.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/debugmodes.c b/vp8/common/debugmodes.c
index e66981413..edd583484 100644
--- a/vp8/common/debugmodes.c
+++ b/vp8/common/debugmodes.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h
index f9247d290..b41a618ad 100644
--- a/vp8/common/defaultcoefcounts.h
+++ b/vp8/common/defaultcoefcounts.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/dma_desc.h b/vp8/common/dma_desc.h
index 765405d4a..d4d9f59fb 100644
--- a/vp8/common/dma_desc.h
+++ b/vp8/common/dma_desc.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/duck_io.h b/vp8/common/duck_io.h
index 02f6895a1..0bf41895d 100644
--- a/vp8/common/duck_io.h
+++ b/vp8/common/duck_io.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index 8d01bf8f4..c55cddeec 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index 29be82c9d..feed4aff4 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c
index 72cbd6411..493728d5d 100644
--- a/vp8/common/entropymode.c
+++ b/vp8/common/entropymode.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h
index bd44b83ff..afa513d3c 100644
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c
index 176fecd82..a98d06aba 100644
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h
index 395984c44..31895d592 100644
--- a/vp8/common/entropymv.h
+++ b/vp8/common/entropymv.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/extend.c b/vp8/common/extend.c
index 43d7aed85..a16ff2bad 100644
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/extend.h b/vp8/common/extend.h
index bb8a01650..157a67c40 100644
--- a/vp8/common/extend.h
+++ b/vp8/common/extend.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/filter_c.c b/vp8/common/filter_c.c
index f24f8a287..7145f3fb9 100644
--- a/vp8/common/filter_c.c
+++ b/vp8/common/filter_c.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c
index 550681ece..48139428f 100644
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index 3e0718a10..f467ab635 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/fourcc.hpp b/vp8/common/fourcc.hpp
index 9823b5611..7a85eee69 100644
--- a/vp8/common/fourcc.hpp
+++ b/vp8/common/fourcc.hpp
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/g_common.h b/vp8/common/g_common.h
index 3f434010a..d5d005ec7 100644
--- a/vp8/common/g_common.h
+++ b/vp8/common/g_common.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 6e64885c7..5f7f943f5 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/header.h b/vp8/common/header.h
index b8b905973..40bef73a5 100644
--- a/vp8/common/header.h
+++ b/vp8/common/header.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/idct.h b/vp8/common/idct.h
index 2185bd357..adf7771ed 100644
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index 4261d241e..2866fa4bb 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index 00502c62e..d1822c8bf 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index be30ca002..0d502d285 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/littlend.h b/vp8/common/littlend.h
index 0961163a9..0aa76df69 100644
--- a/vp8/common/littlend.h
+++ b/vp8/common/littlend.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index 4937195d3..19fa865f8 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index a9a976eb8..f051a3151 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index eaf7327b4..2d209ee98 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/mac_specs.h b/vp8/common/mac_specs.h
index a12b8d59c..58721e10c 100644
--- a/vp8/common/mac_specs.h
+++ b/vp8/common/mac_specs.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c
index b183b8e30..4894bd96c 100644
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/modecont.c b/vp8/common/modecont.c
index c008eef63..e2008ce92 100644
--- a/vp8/common/modecont.c
+++ b/vp8/common/modecont.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/modecont.h b/vp8/common/modecont.h
index 4b79722e5..88dc626f7 100644
--- a/vp8/common/modecont.h
+++ b/vp8/common/modecont.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/modecontext.c b/vp8/common/modecontext.c
index a4b2f7629..a724d17bf 100644
--- a/vp8/common/modecontext.c
+++ b/vp8/common/modecontext.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/mv.h b/vp8/common/mv.h
index c3db5f03e..aca788be8 100644
--- a/vp8/common/mv.h
+++ b/vp8/common/mv.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index 3ed6f2db2..b909e725a 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index d1cb76618..4d5d9878b 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h
index ea04c14ae..a03bd5ded 100644
--- a/vp8/common/onyxd.h
+++ b/vp8/common/onyxd.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/partialgfupdate.h b/vp8/common/partialgfupdate.h
index 355aa795b..6d83d209b 100644
--- a/vp8/common/partialgfupdate.h
+++ b/vp8/common/partialgfupdate.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 1f36d4ee9..1670a1aa2 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index e148f2a7a..1c16995e7 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/ppc/copy_altivec.asm b/vp8/common/ppc/copy_altivec.asm
index 5ca2d170c..6d855fce4 100644
--- a/vp8/common/ppc/copy_altivec.asm
+++ b/vp8/common/ppc/copy_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/ppc/filter_altivec.asm b/vp8/common/ppc/filter_altivec.asm
index 1a7ebf7b9..7698add3d 100644
--- a/vp8/common/ppc/filter_altivec.asm
+++ b/vp8/common/ppc/filter_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/ppc/filter_bilinear_altivec.asm b/vp8/common/ppc/filter_bilinear_altivec.asm
index 73e758e13..08acc910e 100644
--- a/vp8/common/ppc/filter_bilinear_altivec.asm
+++ b/vp8/common/ppc/filter_bilinear_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/ppc/idctllm_altivec.asm b/vp8/common/ppc/idctllm_altivec.asm
index 9ebe6af4f..039157a19 100644
--- a/vp8/common/ppc/idctllm_altivec.asm
+++ b/vp8/common/ppc/idctllm_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/ppc/loopfilter_altivec.c b/vp8/common/ppc/loopfilter_altivec.c
index 8bf5e5760..ad02f9349 100644
--- a/vp8/common/ppc/loopfilter_altivec.c
+++ b/vp8/common/ppc/loopfilter_altivec.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/ppc/loopfilter_filters_altivec.asm b/vp8/common/ppc/loopfilter_filters_altivec.asm
index 26c51a6c2..03b25a78c 100644
--- a/vp8/common/ppc/loopfilter_filters_altivec.asm
+++ b/vp8/common/ppc/loopfilter_filters_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/ppc/platform_altivec.asm b/vp8/common/ppc/platform_altivec.asm
index 23680c94e..a1442cbcf 100644
--- a/vp8/common/ppc/platform_altivec.asm
+++ b/vp8/common/ppc/platform_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/ppc/recon_altivec.asm b/vp8/common/ppc/recon_altivec.asm
index 212664dc5..dbe7e4368 100644
--- a/vp8/common/ppc/recon_altivec.asm
+++ b/vp8/common/ppc/recon_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c
index 4ccf69061..87547fc55 100644
--- a/vp8/common/ppc/systemdependent.c
+++ b/vp8/common/ppc/systemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index 57aeb1d7c..d9eacb3df 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/pragmas.h b/vp8/common/pragmas.h
index 523c8b70c..519c68523 100644
--- a/vp8/common/pragmas.h
+++ b/vp8/common/pragmas.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/predictdc.c b/vp8/common/predictdc.c
index 18d7da86d..6eb5fae4f 100644
--- a/vp8/common/predictdc.c
+++ b/vp8/common/predictdc.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/predictdc.h b/vp8/common/predictdc.h
index 69036eea4..8af95fae9 100644
--- a/vp8/common/predictdc.h
+++ b/vp8/common/predictdc.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/preproc.h b/vp8/common/preproc.h
index a02745c72..ec8d075ea 100644
--- a/vp8/common/preproc.h
+++ b/vp8/common/preproc.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/preprocif.h b/vp8/common/preprocif.h
index f700f7688..e2ea2cad5 100644
--- a/vp8/common/preprocif.h
+++ b/vp8/common/preprocif.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/proposed.h b/vp8/common/proposed.h
index 65b783494..c0085765d 100644
--- a/vp8/common/proposed.h
+++ b/vp8/common/proposed.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c
index 6fd3bc01e..658629c42 100644
--- a/vp8/common/quant_common.c
+++ b/vp8/common/quant_common.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/quant_common.h b/vp8/common/quant_common.h
index 49d11bc5b..7e14c7a34 100644
--- a/vp8/common/quant_common.h
+++ b/vp8/common/quant_common.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/recon.c b/vp8/common/recon.c
index b09ef37ca..c37585c31 100644
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/recon.h b/vp8/common/recon.h
index 607895ca5..03eaab20b 100644
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 91ec76be8..d17dc26af 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index 9df480637..8067acbef 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index 23d87ee1f..7133b0915 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/reconintra.h b/vp8/common/reconintra.h
index b7c4d1d66..ec5f51440 100644
--- a/vp8/common/reconintra.h
+++ b/vp8/common/reconintra.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index 3b22423a5..b77d341ca 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/reconintra4x4.h b/vp8/common/reconintra4x4.h
index 881d091b6..e26961622 100644
--- a/vp8/common/reconintra4x4.h
+++ b/vp8/common/reconintra4x4.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/segmentation_common.c b/vp8/common/segmentation_common.c
index 2568c7cea..5df1b496b 100644
--- a/vp8/common/segmentation_common.c
+++ b/vp8/common/segmentation_common.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/segmentation_common.h b/vp8/common/segmentation_common.h
index 7b36b49e0..41c7f7f63 100644
--- a/vp8/common/segmentation_common.h
+++ b/vp8/common/segmentation_common.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c
index e796d4203..a8097ee29 100644
--- a/vp8/common/setupintrarecon.c
+++ b/vp8/common/setupintrarecon.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h
index ea4e34205..56485b738 100644
--- a/vp8/common/setupintrarecon.h
+++ b/vp8/common/setupintrarecon.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/subpixel.h b/vp8/common/subpixel.h
index 446697c30..a08d38784 100644
--- a/vp8/common/subpixel.h
+++ b/vp8/common/subpixel.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/swapyv12buffer.c b/vp8/common/swapyv12buffer.c
index 5bdf431a2..9742e34aa 100644
--- a/vp8/common/swapyv12buffer.c
+++ b/vp8/common/swapyv12buffer.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/swapyv12buffer.h b/vp8/common/swapyv12buffer.h
index f2c3b745f..80121b8dc 100644
--- a/vp8/common/swapyv12buffer.h
+++ b/vp8/common/swapyv12buffer.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/systemdependent.h b/vp8/common/systemdependent.h
index 56218e603..5d432745b 100644
--- a/vp8/common/systemdependent.h
+++ b/vp8/common/systemdependent.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/textblit.c b/vp8/common/textblit.c
index 5d117f1b0..f999a0c8f 100644
--- a/vp8/common/textblit.c
+++ b/vp8/common/textblit.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index 7c94645a0..96be710c4 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/treecoder.c b/vp8/common/treecoder.c
index 0ccd64dae..5829cb701 100644
--- a/vp8/common/treecoder.c
+++ b/vp8/common/treecoder.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index 908dbcb41..c8f5af96d 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/type_aliases.h b/vp8/common/type_aliases.h
index a0d871746..98da4eec0 100644
--- a/vp8/common/type_aliases.h
+++ b/vp8/common/type_aliases.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/vfwsetting.hpp b/vp8/common/vfwsetting.hpp
index c01a0ddd5..18efafec5 100644
--- a/vp8/common/vfwsetting.hpp
+++ b/vp8/common/vfwsetting.hpp
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/vpx_ref_build_prefix.h b/vp8/common/vpx_ref_build_prefix.h
index cded66c17..7d91d5646 100644
--- a/vp8/common/vpx_ref_build_prefix.h
+++ b/vp8/common/vpx_ref_build_prefix.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/vpxblit.h b/vp8/common/vpxblit.h
index 2c7f673e1..83dfbafb2 100644
--- a/vp8/common/vpxblit.h
+++ b/vp8/common/vpxblit.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/vpxblit_c64.h b/vp8/common/vpxblit_c64.h
index 7659b5cb3..ab3da5229 100644
--- a/vp8/common/vpxblit_c64.h
+++ b/vp8/common/vpxblit_c64.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/vpxerrors.h b/vp8/common/vpxerrors.h
index f0ec707ba..79e2bc914 100644
--- a/vp8/common/vpxerrors.h
+++ b/vp8/common/vpxerrors.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/x86/boolcoder.cxx b/vp8/common/x86/boolcoder.cxx
index cd9c495bf..1238dd2d2 100644
--- a/vp8/common/x86/boolcoder.cxx
+++ b/vp8/common/x86/boolcoder.cxx
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/x86/idct_x86.h b/vp8/common/x86/idct_x86.h
index 1f2cb631b..0ad8f55cf 100644
--- a/vp8/common/x86/idct_x86.h
+++ b/vp8/common/x86/idct_x86.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm
index 5ec01e9c4..1b18bd38d 100644
--- a/vp8/common/x86/idctllm_mmx.asm
+++ b/vp8/common/x86/idctllm_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm
index 6cb897910..26f04e3f2 100644
--- a/vp8/common/x86/iwalsh_mmx.asm
+++ b/vp8/common/x86/iwalsh_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
index bb0d1d7ae..17337f0c1 100644
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index 6e6efabe0..7a9b6792f 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index d160dd65a..f11fcadec 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index f5af7cffe..3a9437e4d 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/x86/loopfilter_x86.h b/vp8/common/x86/loopfilter_x86.h
index 503bf5b3a..cf6ceab07 100644
--- a/vp8/common/x86/loopfilter_x86.h
+++ b/vp8/common/x86/loopfilter_x86.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm
index 070765109..2f6ea4911 100644
--- a/vp8/common/x86/postproc_mmx.asm
+++ b/vp8/common/x86/postproc_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/x86/postproc_mmx.c b/vp8/common/x86/postproc_mmx.c
index f3b29234a..1b3d60d9d 100644
--- a/vp8/common/x86/postproc_mmx.c
+++ b/vp8/common/x86/postproc_mmx.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index 9e56429e3..b6e98d8cd 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/x86/postproc_x86.h b/vp8/common/x86/postproc_x86.h
index f93942733..b542728ad 100644
--- a/vp8/common/x86/postproc_x86.h
+++ b/vp8/common/x86/postproc_x86.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm
index 95c308d7d..b1eb629b7 100644
--- a/vp8/common/x86/recon_mmx.asm
+++ b/vp8/common/x86/recon_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index cfdbfada9..716818906 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h
index fcd429c1c..d83328e66 100644
--- a/vp8/common/x86/recon_x86.h
+++ b/vp8/common/x86/recon_x86.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index b3e4ad52c..ba747f7a0 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index ee383ad47..4eeab0c66 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index bd6859cf4..c406be789 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 163ec5b29..79650cf34 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 09ff3c545..677eafa84 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/arm/armv5/dequantize_v5.asm b/vp8/decoder/arm/armv5/dequantize_v5.asm
index 80b2e0cc7..a949d5774 100644
--- a/vp8/decoder/arm/armv5/dequantize_v5.asm
+++ b/vp8/decoder/arm/armv5/dequantize_v5.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/decoder/arm/armv6/dboolhuff_v6.asm b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
index eca8eeb72..e181b3081 100644
--- a/vp8/decoder/arm/armv6/dboolhuff_v6.asm
+++ b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
index c5b0b7b9f..025287223 100644
--- a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
+++ b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/decoder/arm/armv6/dequantidct_v6.asm b/vp8/decoder/arm/armv6/dequantidct_v6.asm
index 0d1c6b448..15e4c6814 100644
--- a/vp8/decoder/arm/armv6/dequantidct_v6.asm
+++ b/vp8/decoder/arm/armv6/dequantidct_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/decoder/arm/armv6/dequantize_v6.asm
index c35e7c630..ba0ab7e1d 100644
--- a/vp8/decoder/arm/armv6/dequantize_v6.asm
+++ b/vp8/decoder/arm/armv6/dequantize_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
index 913267dc4..e39f7d2d1 100644
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index ae7cf8e45..31fc5d05c 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/arm/detokenizearm_sjl.c b/vp8/decoder/arm/detokenizearm_sjl.c
index a126a0555..e9917c175 100644
--- a/vp8/decoder/arm/detokenizearm_sjl.c
+++ b/vp8/decoder/arm/detokenizearm_sjl.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/arm/detokenizearm_v6.asm b/vp8/decoder/arm/detokenizearm_v6.asm
index 439c9abdc..92fd83656 100644
--- a/vp8/decoder/arm/detokenizearm_v6.asm
+++ b/vp8/decoder/arm/detokenizearm_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
index 6defa3d5d..1504216fd 100644
--- a/vp8/decoder/arm/dsystemdependent.c
+++ b/vp8/decoder/arm/dsystemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/arm/neon/dboolhuff_neon.asm b/vp8/decoder/arm/neon/dboolhuff_neon.asm
index 01315a40e..321ca6515 100644
--- a/vp8/decoder/arm/neon/dboolhuff_neon.asm
+++ b/vp8/decoder/arm/neon/dboolhuff_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/decoder/arm/neon/dequantdcidct_neon.asm b/vp8/decoder/arm/neon/dequantdcidct_neon.asm
index 482f02dee..ae126f23f 100644
--- a/vp8/decoder/arm/neon/dequantdcidct_neon.asm
+++ b/vp8/decoder/arm/neon/dequantdcidct_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/decoder/arm/neon/dequantidct_neon.asm b/vp8/decoder/arm/neon/dequantidct_neon.asm
index 3d00dbf15..e0888ed35 100644
--- a/vp8/decoder/arm/neon/dequantidct_neon.asm
+++ b/vp8/decoder/arm/neon/dequantidct_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/decoder/arm/neon/dequantizeb_neon.asm
index 14698f8f4..da1ca5c24 100644
--- a/vp8/decoder/arm/neon/dequantizeb_neon.asm
+++ b/vp8/decoder/arm/neon/dequantizeb_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/decoder/dboolhuff.c b/vp8/decoder/dboolhuff.c
index 389aed013..2aba5583d 100644
--- a/vp8/decoder/dboolhuff.c
+++ b/vp8/decoder/dboolhuff.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index 7ec235786..6ddceee20 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 2b004df05..44b98e773 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/decodemv.h b/vp8/decoder/decodemv.h
index 8b7fb684f..08134fbe2 100644
--- a/vp8/decoder/decodemv.h
+++ b/vp8/decoder/decodemv.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h
index 6c0363b5f..5685c0479 100644
--- a/vp8/decoder/decoderthreading.h
+++ b/vp8/decoder/decoderthreading.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/demode.c b/vp8/decoder/demode.c
index 07be9fb35..436725de6 100644
--- a/vp8/decoder/demode.c
+++ b/vp8/decoder/demode.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/demode.h b/vp8/decoder/demode.h
index 8d2fbee8e..ca9b80a4f 100644
--- a/vp8/decoder/demode.h
+++ b/vp8/decoder/demode.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 2c286ec77..b023d28eb 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index 7fd7cbbc7..a02ea7e81 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index b202b7b44..09547966d 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h
index 6cfb66bb2..2f6b4a996 100644
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 4585d0882..272d422ad 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 76387f564..3a237de1f 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/onyxd_if_sjl.c b/vp8/decoder/onyxd_if_sjl.c
index 12d28a5b9..e68a7a075 100644
--- a/vp8/decoder/onyxd_if_sjl.c
+++ b/vp8/decoder/onyxd_if_sjl.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 2eea614a2..e02c96228 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/treereader.h b/vp8/decoder/treereader.h
index f1893df31..a850df6d5 100644
--- a/vp8/decoder/treereader.h
+++ b/vp8/decoder/treereader.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
index 1611e034d..2dcf548f6 100644
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
index 4c91633aa..743a8f5dd 100644
--- a/vp8/decoder/x86/dequantize_x86.h
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/x86/onyxdxv.c b/vp8/decoder/x86/onyxdxv.c
index 22d0548cb..6fd0e25fe 100644
--- a/vp8/decoder/x86/onyxdxv.c
+++ b/vp8/decoder/x86/onyxdxv.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index 2dfb469b7..957fb1d67 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/xprintf.c b/vp8/decoder/xprintf.c
index 7465010ae..80be17c4c 100644
--- a/vp8/decoder/xprintf.c
+++ b/vp8/decoder/xprintf.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/decoder/xprintf.h b/vp8/decoder/xprintf.h
index 765607556..fa2e15d19 100644
--- a/vp8/decoder/xprintf.h
+++ b/vp8/decoder/xprintf.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/arm/armv6/walsh_v6.asm b/vp8/encoder/arm/armv6/walsh_v6.asm
index 461e49290..13d0864ff 100644
--- a/vp8/encoder/arm/armv6/walsh_v6.asm
+++ b/vp8/encoder/arm/armv6/walsh_v6.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c
index 8c0faffd4..bb02a4aed 100644
--- a/vp8/encoder/arm/boolhuff_arm.c
+++ b/vp8/encoder/arm/boolhuff_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c
index 7fba99589..4521bfc31 100644
--- a/vp8/encoder/arm/csystemdependent.c
+++ b/vp8/encoder/arm/csystemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h
index bb60c9d24..433489c70 100644
--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/arm/encodemb_arm.c b/vp8/encoder/arm/encodemb_arm.c
index 7d58f9438..10f5114c7 100644
--- a/vp8/encoder/arm/encodemb_arm.c
+++ b/vp8/encoder/arm/encodemb_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h
index 525135a41..f59ef9f7e 100644
--- a/vp8/encoder/arm/encodemb_arm.h
+++ b/vp8/encoder/arm/encodemb_arm.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c
index 9418a60bb..709e2e824 100644
--- a/vp8/encoder/arm/mcomp_arm.c
+++ b/vp8/encoder/arm/mcomp_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/neon/boolhuff_armv7.asm
index 674eea960..13201f7a4 100644
--- a/vp8/encoder/arm/neon/boolhuff_armv7.asm
+++ b/vp8/encoder/arm/neon/boolhuff_armv7.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
index 44e6dfc73..4af7687aa 100644
--- a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
+++ b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
index 6a286f6ab..774027aec 100644
--- a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
+++ b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
index e3a94a6dd..118a11f0f 100644
--- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm
+++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/sad16_neon.asm b/vp8/encoder/arm/neon/sad16_neon.asm
index 7e2ab13de..7b102b9c9 100644
--- a/vp8/encoder/arm/neon/sad16_neon.asm
+++ b/vp8/encoder/arm/neon/sad16_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/sad8_neon.asm b/vp8/encoder/arm/neon/sad8_neon.asm
index fc8c4e132..7c1cc9828 100644
--- a/vp8/encoder/arm/neon/sad8_neon.asm
+++ b/vp8/encoder/arm/neon/sad8_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm
index 4399c9700..39d31cb1c 100644
--- a/vp8/encoder/arm/neon/shortfdct_neon.asm
+++ b/vp8/encoder/arm/neon/shortfdct_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm
index d4803ff6e..d66d203b7 100644
--- a/vp8/encoder/arm/neon/subtract_neon.asm
+++ b/vp8/encoder/arm/neon/subtract_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/variance_neon.asm b/vp8/encoder/arm/neon/variance_neon.asm
index b90169313..903c45137 100644
--- a/vp8/encoder/arm/neon/variance_neon.asm
+++ b/vp8/encoder/arm/neon/variance_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
index 0a372c39d..0834a2fd5 100644
--- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
index 087ea1716..e86d017c6 100644
--- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
index bfa97d720..82595860d 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
index 334c88feb..bed3255aa 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
index 267e21649..8590ea02a 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
index ebd5dc1bc..9c750bad4 100644
--- a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
index 185277f23..65c2a66a6 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
index 611b1e447..6af89a1b8 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
index 614d48fc0..e491d47e5 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/arm/picklpf_arm.c b/vp8/encoder/arm/picklpf_arm.c
index fb0b3bdc9..31e95eadc 100644
--- a/vp8/encoder/arm/picklpf_arm.c
+++ b/vp8/encoder/arm/picklpf_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c
index e8bd44b44..68dcdd73b 100644
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/arm/quantize_arm.h b/vp8/encoder/arm/quantize_arm.h
index 14bc923cd..5a7b0caac 100644
--- a/vp8/encoder/arm/quantize_arm.h
+++ b/vp8/encoder/arm/quantize_arm.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index 1c160202c..8097ed984 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
index 28aac70cb..9392b603c 100644
--- a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
+++ b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index ce9d2fd66..3d7c7612d 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index de4b94ded..07b73c31a 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 2e866ddf4..c1fcfe29a 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c
index 2e95c7579..c5ddf802a 100644
--- a/vp8/encoder/boolhuff.c
+++ b/vp8/encoder/boolhuff.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
index 95635e3ed..7a0ba8f7a 100644
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index 4f5f9f056..3075e5853 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h
index 2aaf731cf..f79dba4f2 100644
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 46b697e8c..32cef1db1 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
@@ -468,7 +468,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
 
 #if 0
     // Experimental code
-    cpi->frame_distortion = 0;     
+    cpi->frame_distortion = 0;
     cpi->last_mb_distortion = 0;
 #endif
 
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index d632bd85d..0e160930d 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/encodeintra.h b/vp8/encoder/encodeintra.h
index 49b3257a5..d51b95fb1 100644
--- a/vp8/encoder/encodeintra.h
+++ b/vp8/encoder/encodeintra.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index a66b90c53..824850c41 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index 5285a385a..423935248 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index 3b58a80da..d945a779f 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
@@ -253,7 +253,7 @@ static void write_component_probs(
     vp8_writer *const w,
     struct mv_context *cur_mvc,
     const struct mv_context *default_mvc_,
-    const struct mv_context *update_mvc,       
+    const struct mv_context *update_mvc,
     const unsigned int events [MVvals],
     unsigned int rc,
     int *updated
diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h
index bf6d7af32..d18c9de2c 100644
--- a/vp8/encoder/encodemv.h
+++ b/vp8/encoder/encodemv.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 11b19368f..a205667dc 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index ced2d7c66..7625c9c49 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/firstpass.h b/vp8/encoder/firstpass.h
index 48257ce66..c6f3e18a9 100644
--- a/vp8/encoder/firstpass.h
+++ b/vp8/encoder/firstpass.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 96028b313..e68d65025 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 3c1507ff6..4c3edd708 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 40cbb073c..1c1714111 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c
index 6632a3629..df3d7d608 100644
--- a/vp8/encoder/modecosts.c
+++ b/vp8/encoder/modecosts.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/modecosts.h b/vp8/encoder/modecosts.h
index 0c46acd12..d71abe89b 100644
--- a/vp8/encoder/modecosts.h
+++ b/vp8/encoder/modecosts.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 98827f961..3680b46d6 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
@@ -1922,7 +1922,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
         VP8_COMP *cpi;
         VP8_PTR   ptr;
     } ctx;
-    
+
     VP8_COMP *cpi;
     VP8_COMMON *cm;
 
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 889bf735f..fcde2205d 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/parms.cpp b/vp8/encoder/parms.cpp
index d7d30a8f1..fccc9db00 100644
--- a/vp8/encoder/parms.cpp
+++ b/vp8/encoder/parms.cpp
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index a50f99bf3..bb6348d5f 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
@@ -572,7 +572,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
         distortion2 = 0;
 
         this_mode = vp8_mode_order[mode_index];
-        
+
         // Experimental debug code.
         //all_rds[mode_index] = -1;
 
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index 76fdb99f1..057794b82 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 0527b80a6..4f3dba6bc 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/ppc/csystemdependent.c b/vp8/encoder/ppc/csystemdependent.c
index 66fca8ff5..3af628852 100644
--- a/vp8/encoder/ppc/csystemdependent.c
+++ b/vp8/encoder/ppc/csystemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/ppc/encodemb_altivec.asm b/vp8/encoder/ppc/encodemb_altivec.asm
index 14a36a17e..ab2341bab 100644
--- a/vp8/encoder/ppc/encodemb_altivec.asm
+++ b/vp8/encoder/ppc/encodemb_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/ppc/fdct_altivec.asm b/vp8/encoder/ppc/fdct_altivec.asm
index 01f336407..2829b9260 100644
--- a/vp8/encoder/ppc/fdct_altivec.asm
+++ b/vp8/encoder/ppc/fdct_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/ppc/rdopt_altivec.asm b/vp8/encoder/ppc/rdopt_altivec.asm
index 4f9b050a7..6ef585c60 100644
--- a/vp8/encoder/ppc/rdopt_altivec.asm
+++ b/vp8/encoder/ppc/rdopt_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/ppc/sad_altivec.asm b/vp8/encoder/ppc/sad_altivec.asm
index 6d927280e..84ecc7756 100644
--- a/vp8/encoder/ppc/sad_altivec.asm
+++ b/vp8/encoder/ppc/sad_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/ppc/variance_altivec.asm b/vp8/encoder/ppc/variance_altivec.asm
index 4e3fd5984..5126d68c4 100644
--- a/vp8/encoder/ppc/variance_altivec.asm
+++ b/vp8/encoder/ppc/variance_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/ppc/variance_subpixel_altivec.asm b/vp8/encoder/ppc/variance_subpixel_altivec.asm
index 4dcf7e44f..184c8a043 100644
--- a/vp8/encoder/ppc/variance_subpixel_altivec.asm
+++ b/vp8/encoder/ppc/variance_subpixel_altivec.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/preproc.c b/vp8/encoder/preproc.c
index e9cc0752c..dc2d10ebf 100644
--- a/vp8/encoder/preproc.c
+++ b/vp8/encoder/preproc.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c
index c5e9dad8c..f693c5e6f 100644
--- a/vp8/encoder/psnr.c
+++ b/vp8/encoder/psnr.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/psnr.h b/vp8/encoder/psnr.h
index dd0b4e587..29cb4099f 100644
--- a/vp8/encoder/psnr.h
+++ b/vp8/encoder/psnr.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 73e80e36c..181870c11 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index ca073ef0a..775641893 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 944a2e832..b309d5364 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h
index ff5778fd5..9124c3383 100644
--- a/vp8/encoder/ratectrl.h
+++ b/vp8/encoder/ratectrl.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 9a772a706..2d6dee139 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
@@ -1541,7 +1541,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         //all_rds[mode_index] = -1;
         //all_rates[mode_index] = -1;
         //all_dist[mode_index] = -1;
-        //intermodecost[mode_index] = -1;  
+        //intermodecost[mode_index] = -1;
 
         // Test best rd so far against threshold for trying this mode.
         if (best_rd <= cpi->rd_threshes[mode_index])
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index 617241dfb..e3766661e 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c
index 1914c60b7..e9e565278 100644
--- a/vp8/encoder/sad_c.c
+++ b/vp8/encoder/sad_c.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c
index 35dd10c88..dd0c82a77 100644
--- a/vp8/encoder/ssim.c
+++ b/vp8/encoder/ssim.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 819f6a58b..29be6b62b 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h
index 51f912b06..6f154381a 100644
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/treewriter.c b/vp8/encoder/treewriter.c
index 942442b09..3ce4267f6 100644
--- a/vp8/encoder/treewriter.c
+++ b/vp8/encoder/treewriter.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/treewriter.h b/vp8/encoder/treewriter.h
index 075df50ca..c0d45ee89 100644
--- a/vp8/encoder/treewriter.h
+++ b/vp8/encoder/treewriter.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index 6610e7d68..8113cfcb8 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index efcf2b7a3..1030d8564 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/x86/csystemdependent.c b/vp8/encoder/x86/csystemdependent.c
index 8bc687785..6aeac508f 100644
--- a/vp8/encoder/x86/csystemdependent.c
+++ b/vp8/encoder/x86/csystemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm
index 3dfc47b61..32d6610aa 100644
--- a/vp8/encoder/x86/dct_mmx.asm
+++ b/vp8/encoder/x86/dct_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index 8ddc5d72b..1cd137d15 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/dct_x86.h b/vp8/encoder/x86/dct_x86.h
index fec1a2edd..05d018043 100644
--- a/vp8/encoder/x86/dct_x86.h
+++ b/vp8/encoder/x86/dct_x86.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/x86/encodemb_x86.h b/vp8/encoder/x86/encodemb_x86.h
index d1ba7d94d..e9256d2c3 100644
--- a/vp8/encoder/x86/encodemb_x86.h
+++ b/vp8/encoder/x86/encodemb_x86.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
index cdc17a525..842fbdcce 100644
--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
index 196669758..1b02369ac 100644
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/mcomp_x86.h b/vp8/encoder/x86/mcomp_x86.h
index c2b4b369b..13a4ee59b 100644
--- a/vp8/encoder/x86/mcomp_x86.h
+++ b/vp8/encoder/x86/mcomp_x86.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/x86/preproc_mmx.c b/vp8/encoder/x86/preproc_mmx.c
index 8b23bb516..c68cdff46 100644
--- a/vp8/encoder/x86/preproc_mmx.c
+++ b/vp8/encoder/x86/preproc_mmx.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm
index 25adca0ed..1374fdbba 100644
--- a/vp8/encoder/x86/quantize_mmx.asm
+++ b/vp8/encoder/x86/quantize_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm
index 4b3574922..8bf4bbd47 100644
--- a/vp8/encoder/x86/sad_mmx.asm
+++ b/vp8/encoder/x86/sad_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm
index f4ef5518a..b0877ec52 100644
--- a/vp8/encoder/x86/sad_sse2.asm
+++ b/vp8/encoder/x86/sad_sse2.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index edfe82f26..8a4954829 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/sad_ssse3.asm b/vp8/encoder/x86/sad_ssse3.asm
index 79c4b4433..024cabe06 100644
--- a/vp8/encoder/x86/sad_ssse3.asm
+++ b/vp8/encoder/x86/sad_ssse3.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm
index d9babd37c..5775d98c4 100644
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ b/vp8/encoder/x86/subtract_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm
index 31f66ecf2..293e2e14d 100644
--- a/vp8/encoder/x86/variance_impl_mmx.asm
+++ b/vp8/encoder/x86/variance_impl_mmx.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index 1ccc6c567..029f1ac0d 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 788b8331f..63dbc126f 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index 78ecd7b22..28099ede4 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
index 9cdd662b5..0de897e8d 100644
--- a/vp8/encoder/x86/variance_x86.h
+++ b/vp8/encoder/x86/variance_x86.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index f6123a823..f3750455b 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 5ed53ba2c..d993927ec 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -1,10 +1,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index d63c03938..666432751 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index a99364d5e..ea75529cd 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 971a17520..f09f25852 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -1,10 +1,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index 16009f9b7..97367dbd7 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -1,10 +1,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index 24f18b7cd..68a0ee86c 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -1,10 +1,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index 58ccac573..f8bcd32d2 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -1,10 +1,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index c653cc5a8..7d970275c 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/src/vpx_codec.c b/vpx/src/vpx_codec.c
index 0ef3a7b68..f8bf6527e 100644
--- a/vpx/src/vpx_codec.c
+++ b/vpx/src/vpx_codec.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
index cbf5259f2..e819cc548 100644
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/src/vpx_decoder_compat.c b/vpx/src/vpx_decoder_compat.c
index d5cc16e1c..a05e9272b 100644
--- a/vpx/src/vpx_decoder_compat.c
+++ b/vpx/src/vpx_decoder_compat.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index f43328f3d..671590edf 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/src/vpx_image.c b/vpx/src/vpx_image.c
index e8a2959e8..2f1370d38 100644
--- a/vpx/src/vpx_image.c
+++ b/vpx/src/vpx_image.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/vp8.h b/vpx/vp8.h
index a493ef620..9e7e95b3d 100644
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 110406411..58182e1f3 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index d602e8067..1f021cadb 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/vp8e.h b/vpx/vp8e.h
index f72fd2466..aab04f06f 100644
--- a/vpx/vp8e.h
+++ b/vpx/vp8e.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index f821559c0..952dcbbed 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
index 4e09b5fab..5ccd67c03 100644
--- a/vpx/vpx_codec.mk
+++ b/vpx/vpx_codec.mk
@@ -1,10 +1,10 @@
 ##
 ##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ##
-##  Use of this source code is governed by a BSD-style license 
+##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
 ##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may 
+##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
 
diff --git a/vpx/vpx_codec_impl_bottom.h b/vpx/vpx_codec_impl_bottom.h
index 02bf8b319..023ea9a61 100644
--- a/vpx/vpx_codec_impl_bottom.h
+++ b/vpx/vpx_codec_impl_bottom.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/vpx_codec_impl_top.h b/vpx/vpx_codec_impl_top.h
index fce14c1e2..e91904798 100644
--- a/vpx/vpx_codec_impl_top.h
+++ b/vpx/vpx_codec_impl_top.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index 865f5dfd9..c9f2d42f1 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/vpx_decoder_compat.h b/vpx/vpx_decoder_compat.h
index e66a096e0..416aa1925 100644
--- a/vpx/vpx_decoder_compat.h
+++ b/vpx/vpx_decoder_compat.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index 8ad7055e4..fa493c273 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index 7e4a03a30..0bbda1a48 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
@@ -117,11 +117,11 @@ extern "C" {
 #define VPX_PLANE_V      2   /**< V (Chroma) plane */
 #define VPX_PLANE_ALPHA  3   /**< A (Transparancy) plane */
 #if !defined(VPX_CODEC_DISABLE_COMPAT) || !VPX_CODEC_DISABLE_COMPAT
-#define PLANE_PACKED     VPX_PLANE_PACKED 
+#define PLANE_PACKED     VPX_PLANE_PACKED
 #define PLANE_Y          VPX_PLANE_Y
 #define PLANE_U          VPX_PLANE_U
 #define PLANE_V          VPX_PLANE_V
-#define PLANE_ALPHA      VPX_PLANE_ALPHA 
+#define PLANE_ALPHA      VPX_PLANE_ALPHA
 #endif
         unsigned char *planes[4];  /**< pointer to the top left pixel for each plane */
         int      stride[4];  /**< stride between rows for each plane */
diff --git a/vpx/vpx_integer.h b/vpx/vpx_integer.h
index f06c64181..7eccce905 100644
--- a/vpx/vpx_integer.h
+++ b/vpx/vpx_integer.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/include/nds/vpx_mem_nds.h b/vpx_mem/include/nds/vpx_mem_nds.h
index 361c29b12..aa9816035 100644
--- a/vpx_mem/include/nds/vpx_mem_nds.h
+++ b/vpx_mem/include/nds/vpx_mem_nds.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/include/vpx_mem_intrnl.h b/vpx_mem/include/vpx_mem_intrnl.h
index 4605b345d..1f025f003 100644
--- a/vpx_mem/include/vpx_mem_intrnl.h
+++ b/vpx_mem/include/vpx_mem_intrnl.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/include/vpx_mem_tracker.h b/vpx_mem/include/vpx_mem_tracker.h
index 49f783e83..7867328a3 100644
--- a/vpx_mem/include/vpx_mem_tracker.h
+++ b/vpx_mem/include/vpx_mem_tracker.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/intel_linux/vpx_mem.c b/vpx_mem/intel_linux/vpx_mem.c
index 7bce794e8..75096e783 100644
--- a/vpx_mem/intel_linux/vpx_mem.c
+++ b/vpx_mem/intel_linux/vpx_mem.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/intel_linux/vpx_mem_tracker.c b/vpx_mem/intel_linux/vpx_mem_tracker.c
index fcbebc932..e80c2babe 100644
--- a/vpx_mem/intel_linux/vpx_mem_tracker.c
+++ b/vpx_mem/intel_linux/vpx_mem_tracker.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/hmm_alloc.c b/vpx_mem/memory_manager/hmm_alloc.c
index 3f10097e0..e1487ce25 100644
--- a/vpx_mem/memory_manager/hmm_alloc.c
+++ b/vpx_mem/memory_manager/hmm_alloc.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/hmm_base.c b/vpx_mem/memory_manager/hmm_base.c
index fbc36de14..b4a4d5143 100644
--- a/vpx_mem/memory_manager/hmm_base.c
+++ b/vpx_mem/memory_manager/hmm_base.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/hmm_dflt_abort.c b/vpx_mem/memory_manager/hmm_dflt_abort.c
index 71b41d924..7ae2defb1 100644
--- a/vpx_mem/memory_manager/hmm_dflt_abort.c
+++ b/vpx_mem/memory_manager/hmm_dflt_abort.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/hmm_grow.c b/vpx_mem/memory_manager/hmm_grow.c
index a979c7a6e..b938ee1b1 100644
--- a/vpx_mem/memory_manager/hmm_grow.c
+++ b/vpx_mem/memory_manager/hmm_grow.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/hmm_largest.c b/vpx_mem/memory_manager/hmm_largest.c
index 82a6a36f2..eeaaf81db 100644
--- a/vpx_mem/memory_manager/hmm_largest.c
+++ b/vpx_mem/memory_manager/hmm_largest.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/hmm_resize.c b/vpx_mem/memory_manager/hmm_resize.c
index cb93bb1d3..ab0ef255a 100644
--- a/vpx_mem/memory_manager/hmm_resize.c
+++ b/vpx_mem/memory_manager/hmm_resize.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/hmm_shrink.c b/vpx_mem/memory_manager/hmm_shrink.c
index d84513625..1306fcad4 100644
--- a/vpx_mem/memory_manager/hmm_shrink.c
+++ b/vpx_mem/memory_manager/hmm_shrink.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/hmm_true.c b/vpx_mem/memory_manager/hmm_true.c
index 586fc2248..f11be912e 100644
--- a/vpx_mem/memory_manager/hmm_true.c
+++ b/vpx_mem/memory_manager/hmm_true.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/include/cavl_if.h b/vpx_mem/memory_manager/include/cavl_if.h
index da1b148dd..5ac6c6bab 100644
--- a/vpx_mem/memory_manager/include/cavl_if.h
+++ b/vpx_mem/memory_manager/include/cavl_if.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/include/cavl_impl.h b/vpx_mem/memory_manager/include/cavl_impl.h
index e67cc8a9b..38b9f6be9 100644
--- a/vpx_mem/memory_manager/include/cavl_impl.h
+++ b/vpx_mem/memory_manager/include/cavl_impl.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/include/heapmm.h b/vpx_mem/memory_manager/include/heapmm.h
index 4e46c41fb..0549f9ab4 100644
--- a/vpx_mem/memory_manager/include/heapmm.h
+++ b/vpx_mem/memory_manager/include/heapmm.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/include/hmm_cnfg.h b/vpx_mem/memory_manager/include/hmm_cnfg.h
index 715163c1b..20acd9449 100644
--- a/vpx_mem/memory_manager/include/hmm_cnfg.h
+++ b/vpx_mem/memory_manager/include/hmm_cnfg.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/memory_manager/include/hmm_intrnl.h b/vpx_mem/memory_manager/include/hmm_intrnl.h
index 1dddb8ac7..4c64be619 100644
--- a/vpx_mem/memory_manager/include/hmm_intrnl.h
+++ b/vpx_mem/memory_manager/include/hmm_intrnl.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/nds/vpx_mem_nds.c b/vpx_mem/nds/vpx_mem_nds.c
index 88d474af8..694aac84d 100644
--- a/vpx_mem/nds/vpx_mem_nds.c
+++ b/vpx_mem/nds/vpx_mem_nds.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c b/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
index c2a1c2813..3ee54a606 100644
--- a/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
+++ b/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/vpx_mem.c b/vpx_mem/vpx_mem.c
index ba92024bf..ded432a66 100644
--- a/vpx_mem/vpx_mem.c
+++ b/vpx_mem/vpx_mem.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/vpx_mem.h b/vpx_mem/vpx_mem.h
index a1239c127..f6fb52d31 100644
--- a/vpx_mem/vpx_mem.h
+++ b/vpx_mem/vpx_mem.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_mem/vpx_mem_tracker.c b/vpx_mem/vpx_mem_tracker.c
index 92152a6fd..ab4dec9f0 100644
--- a/vpx_mem/vpx_mem_tracker.c
+++ b/vpx_mem/vpx_mem_tracker.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_ports/config.h b/vpx_ports/config.h
index 9d32393ae..fd1ee563b 100644
--- a/vpx_ports/config.h
+++ b/vpx_ports/config.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 #include "vpx_config.h"
diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm
index 096176dc8..44b8f0ee0 100644
--- a/vpx_ports/emms.asm
+++ b/vpx_ports/emms.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h
index ade2e3024..8dbb0eb09 100644
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h
index 109c2707f..813f91fba 100644
--- a/vpx_ports/mem_ops.h
+++ b/vpx_ports/mem_ops.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h
index 7c4d95ec3..24b6dc100 100644
--- a/vpx_ports/mem_ops_aligned.h
+++ b/vpx_ports/mem_ops_aligned.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h
index 11fee84b5..c9ec6ba9e 100644
--- a/vpx_ports/vpx_timer.h
+++ b/vpx_ports/vpx_timer.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h
index b9c8b8c0f..d0b3589e5 100644
--- a/vpx_ports/vpxtypes.h
+++ b/vpx_ports/vpxtypes.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index 8c23abd38..90a50f8e4 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index a1622e6a4..6fc005ad1 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vpx_scale/arm/armv4/gen_scalers_armv4.asm b/vpx_scale/arm/armv4/gen_scalers_armv4.asm
index e317fe9af..b6dae0b19 100644
--- a/vpx_scale/arm/armv4/gen_scalers_armv4.asm
+++ b/vpx_scale/arm/armv4/gen_scalers_armv4.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vpx_scale/arm/nds/yv12extend.c b/vpx_scale/arm/nds/yv12extend.c
index 9ec346662..a12bb2799 100644
--- a/vpx_scale/arm/nds/yv12extend.c
+++ b/vpx_scale/arm/nds/yv12extend.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
index 64e7bfe66..25a21e63a 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
index f79de3c0d..04c0de734 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
index 2e24e24a8..cdb402c92 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
index 418578fd0..632d43014 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vpx_scale/arm/scalesystemdependant.c b/vpx_scale/arm/scalesystemdependant.c
index 67954b2c5..0868b8bca 100644
--- a/vpx_scale/arm/scalesystemdependant.c
+++ b/vpx_scale/arm/scalesystemdependant.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/arm/yv12extend_arm.c b/vpx_scale/arm/yv12extend_arm.c
index 5069030d5..dce642d1f 100644
--- a/vpx_scale/arm/yv12extend_arm.c
+++ b/vpx_scale/arm/yv12extend_arm.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/blackfin/yv12config.c b/vpx_scale/blackfin/yv12config.c
index 950a5d2ce..043493ffe 100644
--- a/vpx_scale/blackfin/yv12config.c
+++ b/vpx_scale/blackfin/yv12config.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/blackfin/yv12extend.c b/vpx_scale/blackfin/yv12extend.c
index 80504ef9d..e5af5305b 100644
--- a/vpx_scale/blackfin/yv12extend.c
+++ b/vpx_scale/blackfin/yv12extend.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/dm642/bicubic_scaler_c64.c b/vpx_scale/dm642/bicubic_scaler_c64.c
index e026be598..830900e68 100644
--- a/vpx_scale/dm642/bicubic_scaler_c64.c
+++ b/vpx_scale/dm642/bicubic_scaler_c64.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/dm642/gen_scalers_c64.c b/vpx_scale/dm642/gen_scalers_c64.c
index 34f95f71a..47e43cac3 100644
--- a/vpx_scale/dm642/gen_scalers_c64.c
+++ b/vpx_scale/dm642/gen_scalers_c64.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/dm642/yv12extend.c b/vpx_scale/dm642/yv12extend.c
index 2557a3af0..fff39d3d8 100644
--- a/vpx_scale/dm642/yv12extend.c
+++ b/vpx_scale/dm642/yv12extend.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c
index 48f909671..3052542b7 100644
--- a/vpx_scale/generic/bicubic_scaler.c
+++ b/vpx_scale/generic/bicubic_scaler.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c
index ff841f33d..8be43dc7d 100644
--- a/vpx_scale/generic/gen_scalers.c
+++ b/vpx_scale/generic/gen_scalers.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/generic/scalesystemdependant.c b/vpx_scale/generic/scalesystemdependant.c
index 7a24a26b7..81c8171d7 100644
--- a/vpx_scale/generic/scalesystemdependant.c
+++ b/vpx_scale/generic/scalesystemdependant.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c
index 8f8cfb504..7c8f8d004 100644
--- a/vpx_scale/generic/vpxscale.c
+++ b/vpx_scale/generic/vpxscale.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index 008130b98..d73cfd113 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
index 907fa6820..aa623ba03 100644
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/include/arm/vpxscale_nofp.h b/vpx_scale/include/arm/vpxscale_nofp.h
index 39e37428f..2dea83c79 100644
--- a/vpx_scale/include/arm/vpxscale_nofp.h
+++ b/vpx_scale/include/arm/vpxscale_nofp.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h
index 68b8fc0e6..9a721791b 100644
--- a/vpx_scale/include/generic/vpxscale_arbitrary.h
+++ b/vpx_scale/include/generic/vpxscale_arbitrary.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/include/generic/vpxscale_depricated.h b/vpx_scale/include/generic/vpxscale_depricated.h
index dbe778655..30a98e800 100644
--- a/vpx_scale/include/generic/vpxscale_depricated.h
+++ b/vpx_scale/include/generic/vpxscale_depricated.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/include/generic/vpxscale_nofp.h b/vpx_scale/include/generic/vpxscale_nofp.h
index 97ea60e99..2a618de5d 100644
--- a/vpx_scale/include/generic/vpxscale_nofp.h
+++ b/vpx_scale/include/generic/vpxscale_nofp.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/include/leapster/vpxscale.h b/vpx_scale/include/leapster/vpxscale.h
index a40f2ebea..54ad54eb1 100644
--- a/vpx_scale/include/leapster/vpxscale.h
+++ b/vpx_scale/include/leapster/vpxscale.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/include/symbian/vpxscale_nofp.h b/vpx_scale/include/symbian/vpxscale_nofp.h
index 39e37428f..2dea83c79 100644
--- a/vpx_scale/include/symbian/vpxscale_nofp.h
+++ b/vpx_scale/include/symbian/vpxscale_nofp.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/include/vpxscale_nofp.h b/vpx_scale/include/vpxscale_nofp.h
index f39a1c6c8..c04e5267b 100644
--- a/vpx_scale/include/vpxscale_nofp.h
+++ b/vpx_scale/include/vpxscale_nofp.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/intel_linux/scaleopt.c b/vpx_scale/intel_linux/scaleopt.c
index 1bdb488af..5ea96144f 100644
--- a/vpx_scale/intel_linux/scaleopt.c
+++ b/vpx_scale/intel_linux/scaleopt.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/intel_linux/scalesystemdependant.c b/vpx_scale/intel_linux/scalesystemdependant.c
index 82b90c910..892d53292 100644
--- a/vpx_scale/intel_linux/scalesystemdependant.c
+++ b/vpx_scale/intel_linux/scalesystemdependant.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/leapster/doptsystemdependant_lf.c b/vpx_scale/leapster/doptsystemdependant_lf.c
index f4ccb163b..50073d62b 100644
--- a/vpx_scale/leapster/doptsystemdependant_lf.c
+++ b/vpx_scale/leapster/doptsystemdependant_lf.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/leapster/gen_scalers_lf.c b/vpx_scale/leapster/gen_scalers_lf.c
index f9a11fecd..809981df5 100644
--- a/vpx_scale/leapster/gen_scalers_lf.c
+++ b/vpx_scale/leapster/gen_scalers_lf.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/leapster/vpxscale_lf.c b/vpx_scale/leapster/vpxscale_lf.c
index deabd989f..32dffbbf2 100644
--- a/vpx_scale/leapster/vpxscale_lf.c
+++ b/vpx_scale/leapster/vpxscale_lf.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/leapster/yv12extend.c b/vpx_scale/leapster/yv12extend.c
index 5a89c3164..017be972f 100644
--- a/vpx_scale/leapster/yv12extend.c
+++ b/vpx_scale/leapster/yv12extend.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h
index 41aefa27c..90cd2afe0 100644
--- a/vpx_scale/scale_mode.h
+++ b/vpx_scale/scale_mode.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/symbian/gen_scalers_armv4.asm b/vpx_scale/symbian/gen_scalers_armv4.asm
index e317fe9af..b6dae0b19 100644
--- a/vpx_scale/symbian/gen_scalers_armv4.asm
+++ b/vpx_scale/symbian/gen_scalers_armv4.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vpx_scale/symbian/scalesystemdependant.c b/vpx_scale/symbian/scalesystemdependant.c
index 75ef26dfd..d6497aaed 100644
--- a/vpx_scale/symbian/scalesystemdependant.c
+++ b/vpx_scale/symbian/scalesystemdependant.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h
index f3057fe54..bd6796a9a 100644
--- a/vpx_scale/vpxscale.h
+++ b/vpx_scale/vpxscale.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/wce/gen_scalers_armv4.asm b/vpx_scale/wce/gen_scalers_armv4.asm
index e317fe9af..b6dae0b19 100644
--- a/vpx_scale/wce/gen_scalers_armv4.asm
+++ b/vpx_scale/wce/gen_scalers_armv4.asm
@@ -1,10 +1,10 @@
 ;
 ;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
 ;
-;  Use of this source code is governed by a BSD-style license 
+;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
 ;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may 
+;  in the file PATENTS.  All contributing project authors may
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
diff --git a/vpx_scale/wce/scalesystemdependant.c b/vpx_scale/wce/scalesystemdependant.c
index 1d62eb598..3b381fcc8 100644
--- a/vpx_scale/wce/scalesystemdependant.c
+++ b/vpx_scale/wce/scalesystemdependant.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c
index 59d49e625..f354a1d6f 100644
--- a/vpx_scale/win32/scaleopt.c
+++ b/vpx_scale/win32/scaleopt.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/win32/scalesystemdependant.c b/vpx_scale/win32/scalesystemdependant.c
index 82b90c910..892d53292 100644
--- a/vpx_scale/win32/scalesystemdependant.c
+++ b/vpx_scale/win32/scalesystemdependant.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/x86_64/scaleopt.c b/vpx_scale/x86_64/scaleopt.c
index de4c478fc..fa5061a3b 100644
--- a/vpx_scale/x86_64/scaleopt.c
+++ b/vpx_scale/x86_64/scaleopt.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/x86_64/scalesystemdependant.c b/vpx_scale/x86_64/scalesystemdependant.c
index 032444965..da9ea4e05 100644
--- a/vpx_scale/x86_64/scalesystemdependant.c
+++ b/vpx_scale/x86_64/scalesystemdependant.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index f01bd78ec..a7f4d5f1d 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/vpx_scale/yv12extend.h b/vpx_scale/yv12extend.h
index b310ef5fd..1a32f1003 100644
--- a/vpx_scale/yv12extend.h
+++ b/vpx_scale/yv12extend.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/wince_wmain_adapter.cpp b/wince_wmain_adapter.cpp
index 48d3ab336..ddee8a45b 100644
--- a/wince_wmain_adapter.cpp
+++ b/wince_wmain_adapter.cpp
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
diff --git a/y4minput.c b/y4minput.c
index 895226d4f..70cdaef2a 100644
--- a/y4minput.c
+++ b/y4minput.c
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  *
  *  Based on code from the OggTheora software codec source code,
diff --git a/y4minput.h b/y4minput.h
index e3f930433..09441bff3 100644
--- a/y4minput.h
+++ b/y4minput.h
@@ -1,10 +1,10 @@
 /*
  *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
  *
- *  Use of this source code is governed by a BSD-style license 
+ *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
  *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may 
+ *  in the file PATENTS.  All contributing project authors may
  *  be found in the AUTHORS file in the root of the source tree.
  *
  *  Based on code from the OggTheora software codec source code,

From 7630e36f1bda1dbf6e63c2514927b0e524787a41 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 17 Jun 2010 13:34:19 -0400
Subject: [PATCH 049/307] Add x86_64-linux-icc target to build VP8 with icc

Add a target for icc.

Change-Id: Ia1db82373d9c7268848bbb65c9483d408b9d933f
---
 build/make/configure.sh | 2 ++
 configure               | 1 +
 2 files changed, 3 insertions(+)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index 05e550fb0..b22fbbabb 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -797,6 +797,8 @@ process_common_toolchain() {
                 setup_gnu_toolchain
                 add_cflags -use-msasm -use-asm
                 add_ldflags -i-static
+                enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE3 -axSSE3
+                enabled x86_64 && AR=xiar
                 ;;
             gcc*)
                 add_cflags  -m${bits}
diff --git a/configure b/configure
index 9a3269a7f..f68f018d5 100755
--- a/configure
+++ b/configure
@@ -109,6 +109,7 @@ all_platforms="${all_platforms} x86-win32-vs7"
 all_platforms="${all_platforms} x86-win32-vs8"
 all_platforms="${all_platforms} x86_64-darwin9-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
+all_platforms="${all_platforms} x86_64-linux-icc"
 all_platforms="${all_platforms} x86_64-solaris-gcc"
 all_platforms="${all_platforms} x86_64-win64-vs8"
 all_platforms="${all_platforms} universal-darwin8-gcc"

From 220daa00e0753fb7c2b7346c5557624df9055f21 Mon Sep 17 00:00:00 2001
From: Jim Bankoski <jimbankoski@google.com>
Date: Wed, 16 Jun 2010 12:36:53 -0400
Subject: [PATCH 050/307] vp8_block_error_xmm: remove unnecessary instructions

Remove a couple instructions from this function which weren't
necessary for correct execution.

Change-Id: Ib649674f140689f7e5c1530c35686241688a3151
---
 vp8/encoder/x86/encodeopt.asm | 34 ++++++++++++----------------------
 1 file changed, 12 insertions(+), 22 deletions(-)

diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
index 842fbdcce..b4fe576a0 100644
--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -11,7 +11,6 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-
 ;int vp8_block_error_xmm(short *coeff_ptr,  short *dcoef_ptr)
 global sym(vp8_block_error_xmm)
 sym(vp8_block_error_xmm):
@@ -20,11 +19,9 @@ sym(vp8_block_error_xmm):
     SHADOW_ARGS_TO_STACK 2
     push rsi
     push rdi
-    ; end prolog
-
+    ; end prologue
 
         mov         rsi,        arg(0) ;coeff_ptr
-        pxor        xmm7,       xmm7
 
         mov         rdi,        arg(1) ;dcoef_ptr
         movdqa      xmm3,       [rsi]
@@ -33,31 +30,25 @@ sym(vp8_block_error_xmm):
         movdqa      xmm5,       [rsi+16]
 
         movdqa      xmm6,       [rdi+16]
-        pxor        xmm1,       xmm1    ; from movd xmm1, dc; dc=0
-
-        movdqa      xmm2,       xmm7
-        psubw       xmm5,       xmm6
-
-        por         xmm1,       xmm2
-        pmaddwd     xmm5,       xmm5
-
-        pcmpeqw     xmm1,       xmm7
         psubw       xmm3,       xmm4
 
-        pand        xmm1,       xmm3
-        pmaddwd     xmm1,       xmm1
+        psubw       xmm5,       xmm6
+        pmaddwd     xmm3,       xmm3
+        pmaddwd     xmm5,       xmm5
 
-        paddd       xmm1,       xmm5
-        movdqa      xmm0,       xmm1
+        paddd       xmm3,       xmm5
+
+        pxor        xmm7,       xmm7
+        movdqa      xmm0,       xmm3
 
         punpckldq   xmm0,       xmm7
-        punpckhdq   xmm1,       xmm7
+        punpckhdq   xmm3,       xmm7
 
-        paddd       xmm0,       xmm1
-        movdqa      xmm1,       xmm0
+        paddd       xmm0,       xmm3
+        movdqa      xmm3,       xmm0
 
         psrldq      xmm0,       8
-        paddd       xmm0,       xmm1
+        paddd       xmm0,       xmm3
 
         movd        rax,        xmm0
 
@@ -68,7 +59,6 @@ sym(vp8_block_error_xmm):
     pop         rbp
     ret
 
-
 ;int vp8_block_error_mmx(short *coeff_ptr,  short *dcoef_ptr)
 global sym(vp8_block_error_mmx)
 sym(vp8_block_error_mmx):

From 9f814634545d18dbb0114804e525809f382dcba0 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Thu, 17 Jun 2010 19:33:52 -0700
Subject: [PATCH 051/307] Fix a linker error on x86-64 Linux when not using a
 version script.

If the version script produced by the libvpx build system is not
 used when linking a shared library on x86-64 Linux, the constant
 data in the subpel filters produces R_X86_64_32 relocation errors
 due to the use of wrt rip addressing instead of
 wrt rip wrt ..gotpcrel.
Instead of adding a new macro for this addressing mode, this patch
 sets the ELF visibility of these symbols to "hidden", which
 allows wrt rip addressing to work without a text relocation.
This allows building a shared library without using the provided
 build system or a separate version script.
Fixes http://code.google.com/p/webm/issues/detail?id=46

Change-Id: Ie108f9d9a4352e5af46938bf4750d2302c1b2dc2
---
 vp8/common/x86/subpixel_mmx.asm | 4 ++--
 vpx_ports/x86_abi_support.asm   | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index ba747f7a0..b0008fcdb 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -731,7 +731,7 @@ rd:
     times 4 dw 0x40
 
 align 16
-global sym(vp8_six_tap_mmx)
+global sym(vp8_six_tap_mmx) HIDDEN_DATA
 sym(vp8_six_tap_mmx):
     times 8 dw 0
     times 8 dw 0
@@ -791,7 +791,7 @@ sym(vp8_six_tap_mmx):
 
 
 align 16
-global sym(vp8_bilinear_filters_mmx)
+global sym(vp8_bilinear_filters_mmx) HIDDEN_DATA
 sym(vp8_bilinear_filters_mmx):
     times 8 dw 128
     times 8 dw 0
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index 6fc005ad1..5748c235e 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -138,12 +138,16 @@
     %endmacro
   %endif
   %endif
+  %define HIDDEN_DATA
 %else
   %macro GET_GOT 1
   %endmacro
   %define GLOBAL wrt rip
   %ifidn __OUTPUT_FORMAT__,elf64
     %define WRT_PLT wrt ..plt
+    %define HIDDEN_DATA :data hidden
+  %else
+    %define HIDDEN_DATA
   %endif
 %endif
 %ifnmacro GET_GOT

From d4b99b8e3ae2fe0a92b66cc610e55a64bfc3c9a4 Mon Sep 17 00:00:00 2001
From: agrange <agrange@google.com>
Date: Fri, 18 Jun 2010 15:18:09 +0100
Subject: [PATCH 052/307] Moved DOUBLE_DIVIDE_CHECK to denominator (was on
 numerator)

The DOUBLE_DIVIDE_CHECK macro prevents from divide by 0,
so must be on the denominator to work as intended.

Change-Id: Ie109242d52dbb9a2c4bc1e11890fa51b5f87ffc7
---
 vp8/encoder/firstpass.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 7625c9c49..74feca314 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -217,7 +217,7 @@ int frame_max_bits(VP8_COMP *cpi)
     // If we are running below the optimal level then we need to gradually tighten up on max_bits.
     if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
     {
-        double buffer_fullness_ratio = (double)DOUBLE_DIVIDE_CHECK(cpi->buffer_level) / (double)cpi->oxcf.optimal_buffer_level;
+        double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level);
 
         // For CBR base this on the target average bits per frame plus the maximum sedction rate passed in by the user
         max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));

From daa5d0eb3d837aefdf30c96c11a1af93566d318d Mon Sep 17 00:00:00 2001
From: agrange <agrange@google.com>
Date: Fri, 18 Jun 2010 15:58:18 +0100
Subject: [PATCH 053/307] Changed unary operator from ! to ~

Since the intent is
to reset the appropriate bit in ref_frame_flags not to
test a logic condition. Prior result would always have
been ref_frame_flags being set to 0.
(Issue reported by dgohman, issue 47)

Change-Id: I2c12502ed74c73cf38e98c9680e0249c29e16433
---
 vp8/encoder/onyx_if.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 3680b46d6..14adb4ecc 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -4588,13 +4588,13 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
     cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
 
     if (cpi->gold_is_last)
-        cpi->ref_frame_flags &= !VP8_GOLD_FLAG;
+        cpi->ref_frame_flags &= ~VP8_GOLD_FLAG;
 
     if (cpi->alt_is_last)
-        cpi->ref_frame_flags &= !VP8_ALT_FLAG;
+        cpi->ref_frame_flags &= ~VP8_ALT_FLAG;
 
     if (cpi->gold_is_alt)
-        cpi->ref_frame_flags &= !VP8_ALT_FLAG;
+        cpi->ref_frame_flags &= ~VP8_ALT_FLAG;
 
 
     if (cpi->oxcf.error_resilient_mode)

From cee8f9f9a5416a3d1663c5d860976383608c1037 Mon Sep 17 00:00:00 2001
From: Giuseppe Scrivano <gscrivano@gnu.org>
Date: Mon, 21 Jun 2010 17:15:04 +0200
Subject: [PATCH 054/307] Remove deprecated `svnstat' rule from Makefile

---
 build/make/Makefile | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/build/make/Makefile b/build/make/Makefile
index 931614f8a..2da8b4789 100755
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -57,7 +57,6 @@ dist:
         fi
 
 
-svnstat: ALL_TARGETS:=$(firstword $(ALL_TARGETS))
 endif
 
 ifneq ($(target),)
@@ -363,12 +362,3 @@ INSTALL_TARGETS += .install-docs .install-srcs .install-libs .install-bins
 all-$(target): $(BUILD_TARGETS)
 install:: $(INSTALL_TARGETS)
 dist: $(INSTALL_TARGETS)
-
-#
-# Development helper targets
-#
-ifneq ($(SRC_PATH_BARE),)
-.PHONY: svnstat
-svnstat:
-	svn stat $(SRC_PATH_BARE)
-endif

From a08df4552a73529e12fdad10bc1b04e5f82fbcb6 Mon Sep 17 00:00:00 2001
From: agrange <agrange@google.com>
Date: Mon, 21 Jun 2010 13:44:42 +0100
Subject: [PATCH 055/307] Fix breakout thresh computation for golden & AltRef
 frames

1. Unavailability of each reference frame type should be tested
independently,
2. Also, only the VP8_GOLD_FLAG needs to be tested before setting
golden frame specific thresholds, and only VP8_ALT_FLAG needs
testing before setting thresholds relevant to the AltRef frame.
(Raised by gbvalor, in response to Issue 47)

Change-Id: I6a06fc2a6592841d85422bc1661e33349bb6c3b8
---
 vp8/encoder/onyx_if.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 14adb4ecc..f3456a733 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -545,7 +545,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
             sf->thresh_mult[THR_NEWG     ] = INT_MAX;
             sf->thresh_mult[THR_SPLITG   ] = INT_MAX;
         }
-        else if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
+
+        if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
         {
             sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
             sf->thresh_mult[THR_ZEROA    ] = INT_MAX;
@@ -597,7 +598,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
             sf->thresh_mult[THR_NEARMV   ] = INT_MAX;
             sf->thresh_mult[THR_SPLITMV  ] = INT_MAX;
         }
-        else if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
+
+        if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
         {
             sf->thresh_mult[THR_NEARESTG ] = INT_MAX;
             sf->thresh_mult[THR_ZEROG    ] = INT_MAX;
@@ -605,7 +607,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
             sf->thresh_mult[THR_NEWG     ] = INT_MAX;
             sf->thresh_mult[THR_SPLITG   ] = INT_MAX;
         }
-        else if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
+
+        if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
         {
             sf->thresh_mult[THR_NEARESTA ] = INT_MAX;
             sf->thresh_mult[THR_ZEROA    ] = INT_MAX;
@@ -763,7 +766,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
                 cpi->mode_check_freq[THR_NEWA] = 4;
             }
 
-            if (cpi->ref_frame_flags & VP8_LAST_FLAG & VP8_GOLD_FLAG)
+            if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
             {
                 sf->thresh_mult[THR_NEARESTG ] = 2000;
                 sf->thresh_mult[THR_ZEROG    ] = 2000;
@@ -771,7 +774,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
                 sf->thresh_mult[THR_NEWG     ] = 4000;
             }
 
-            if (cpi->ref_frame_flags & VP8_LAST_FLAG & VP8_ALT_FLAG)
+            if (cpi->ref_frame_flags & VP8_ALT_FLAG)
             {
                 sf->thresh_mult[THR_NEARESTA ] = 2000;
                 sf->thresh_mult[THR_ZEROA    ] = 2000;

From b1e36f2872b8f645b806a151e3e88540adcde614 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 24 Jun 2010 08:41:51 -0400
Subject: [PATCH 056/307] ivfenc: correct fixed kf interval, --disable-kf

ivfenc was setting the VPX_KF_FIXED mode when the kf_min_dist and
kf_max_dist parameters were set to each other. This flag actually means
that keyframes are disabled, and that name was deprecated to avoid
confusion such as this. Instead, a new option is exposed for setting the
VPX_KF_DISABLED mode, and the intervals are passed through to the codec,
which will do automatic placement at a fixed interval as expected.

Change-Id: I15abbec5936f39d5901878b4bc154372fbc23a43
---
 ivfenc.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/ivfenc.c b/ivfenc.c
index 11f2a8fef..3487b3594 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -505,9 +505,11 @@ static const arg_def_t kf_min_dist = ARG_DEF(NULL, "kf-min-dist", 1,
                                      "Minimum keyframe interval (frames)");
 static const arg_def_t kf_max_dist = ARG_DEF(NULL, "kf-max-dist", 1,
                                      "Maximum keyframe interval (frames)");
+static const arg_def_t kf_disabled = ARG_DEF(NULL, "disable-kf", 0,
+                                     "Disable keyframe placement");
 static const arg_def_t *kf_args[] =
 {
-    &kf_min_dist, &kf_max_dist, NULL
+    &kf_min_dist, &kf_max_dist, &kf_disabled, NULL
 };
 
 
@@ -800,6 +802,8 @@ int main(int argc, const char **argv_)
             cfg.kf_min_dist = arg_parse_uint(&arg);
         else if (arg_match(&arg, &kf_max_dist, argi))
             cfg.kf_max_dist = arg_parse_uint(&arg);
+        else if (arg_match(&arg, &kf_disabled, argi))
+            cfg.kf_mode = VPX_KF_DISABLED;
         else
             argj++;
     }
@@ -1016,9 +1020,6 @@ int main(int argc, const char **argv_)
 
 
         /* Construct Encoder Context */
-        if (cfg.kf_min_dist == cfg.kf_max_dist)
-            cfg.kf_mode = VPX_KF_FIXED;
-
         vpx_codec_enc_init(&encoder, codec->iface, &cfg,
                            show_psnr ? VPX_CODEC_USE_PSNR : 0);
         ctx_exit_on_error(&encoder, "Failed to initialize encoder");

From d1920de22cd6f13ba85b087ff06f6a8cfd4641d2 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 24 Jun 2010 08:51:00 -0400
Subject: [PATCH 057/307] configure: remove postproc-generic

This option is vestigial and is unreferenced.

Change-Id: I8bd27cb674c263e9a86fb43244003a9b9df3ca9c
---
 configure | 2 --
 1 file changed, 2 deletions(-)

diff --git a/configure b/configure
index f68f018d5..f74714dcf 100755
--- a/configure
+++ b/configure
@@ -231,7 +231,6 @@ CONFIG_LIST="
     new_tokens
     runtime_cpu_detect
     postproc
-    postproc_generic
     multithread
     psnr
     ${CODECS}
@@ -270,7 +269,6 @@ CMDLINE_SELECT="
     dc_recon
     new_tokens
     postproc
-    postproc_generic
     multithread
     psnr
     ${CODECS}

From 5e34461448c3b68385d5cc92d6b4d4c95be394fb Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 24 Jun 2010 09:02:48 -0400
Subject: [PATCH 058/307] Remove INLINE/FORCEINLINE

These are mostly vestigial, it's up to the compiler to decide what
should be inlined, and this collided with certain Windows platform SDKs.

Change-Id: I80dd35de25eda7773156e355b5aef8f7e44e179b
---
 build/make/configure.sh            |  2 --
 configure                          |  2 --
 vp8/vp8dx.mk                       |  2 --
 vpx_ports/mem_ops.h                | 26 +++++++++++++-------------
 vpx_ports/mem_ops_aligned.h        | 12 ++++++------
 vpx_ports/vpx_timer.h              |  6 +++---
 vpx_scale/generic/bicubic_scaler.c |  8 ++++----
 7 files changed, 26 insertions(+), 32 deletions(-)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index b22fbbabb..35131b07b 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -395,8 +395,6 @@ EOF
 write_common_target_config_h() {
     cat > ${TMP_H} << EOF
 /* This file automatically generated by configure. Do not edit! */
-#define INLINE      ${INLINE}
-#define FORCEINLINE ${FORCEINLINE:-${INLINE}}
 #define RESTRICT    ${RESTRICT}
 EOF
     print_config_h ARCH   "${TMP_H}" ${ARCH_LIST}
diff --git a/configure b/configure
index f74714dcf..d2dfb6b35 100755
--- a/configure
+++ b/configure
@@ -519,8 +519,6 @@ process_toolchain() {
              enable solution
              vs_version=${tgt_cc##vs}
              all_targets="${all_targets} solution"
-             INLINE=__inline
-             FORCEINLINE=__forceinline
         ;;
     esac
 
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index 68a0ee86c..8ab94259c 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -30,7 +30,6 @@ CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder
 # common
 #define ARM
 #define DISABLE_THREAD
-#define INLINE=__forceinline
 
 #INCLUDES += algo/vpx_common/vpx_mem/include
 #INCLUDES += common
@@ -44,7 +43,6 @@ CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder
 # decoder
 #define ARM
 #define DISABLE_THREAD
-#define INLINE=__forceinline
 
 #INCLUDES += algo/vpx_common/vpx_mem/include
 #INCLUDES += common
diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h
index 813f91fba..ddf0983c7 100644
--- a/vpx_ports/mem_ops.h
+++ b/vpx_ports/mem_ops.h
@@ -60,7 +60,7 @@
 
 #undef  mem_get_be16
 #define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16)
-static INLINE unsigned MEM_VALUE_T mem_get_be16(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_be16(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -72,7 +72,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_be16(const void *vmem)
 
 #undef  mem_get_be24
 #define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24)
-static INLINE unsigned MEM_VALUE_T mem_get_be24(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_be24(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -85,7 +85,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_be24(const void *vmem)
 
 #undef  mem_get_be32
 #define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32)
-static INLINE unsigned MEM_VALUE_T mem_get_be32(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_be32(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -99,7 +99,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_be32(const void *vmem)
 
 #undef  mem_get_le16
 #define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16)
-static INLINE unsigned MEM_VALUE_T mem_get_le16(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_le16(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -111,7 +111,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_le16(const void *vmem)
 
 #undef  mem_get_le24
 #define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24)
-static INLINE unsigned MEM_VALUE_T mem_get_le24(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_le24(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -124,7 +124,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_le24(const void *vmem)
 
 #undef  mem_get_le32
 #define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32)
-static INLINE unsigned MEM_VALUE_T mem_get_le32(const void *vmem)
+static unsigned MEM_VALUE_T mem_get_le32(const void *vmem)
 {
     unsigned MEM_VALUE_T  val;
     const MAU_T          *mem = (const MAU_T *)vmem;
@@ -137,7 +137,7 @@ static INLINE unsigned MEM_VALUE_T mem_get_le32(const void *vmem)
 }
 
 #define mem_get_s_generic(end,sz) \
-    static INLINE signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\
+    static signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) {\
         const MAU_T *mem = (const MAU_T*)vmem;\
         signed MEM_VALUE_T val = mem_get_##end##sz(mem);\
         return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz);\
@@ -169,7 +169,7 @@ mem_get_s_generic(le, 32);
 
 #undef  mem_put_be16
 #define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16)
-static INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val)
+static void mem_put_be16(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
@@ -179,7 +179,7 @@ static INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val)
 
 #undef  mem_put_be24
 #define mem_put_be24 mem_ops_wrap_symbol(mem_put_be24)
-static INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val)
+static void mem_put_be24(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
@@ -190,7 +190,7 @@ static INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val)
 
 #undef  mem_put_be32
 #define mem_put_be32 mem_ops_wrap_symbol(mem_put_be32)
-static INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val)
+static void mem_put_be32(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
@@ -202,7 +202,7 @@ static INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val)
 
 #undef  mem_put_le16
 #define mem_put_le16 mem_ops_wrap_symbol(mem_put_le16)
-static INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val)
+static void mem_put_le16(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
@@ -212,7 +212,7 @@ static INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val)
 
 #undef  mem_put_le24
 #define mem_put_le24 mem_ops_wrap_symbol(mem_put_le24)
-static INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val)
+static void mem_put_le24(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
@@ -223,7 +223,7 @@ static INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val)
 
 #undef  mem_put_le32
 #define mem_put_le32 mem_ops_wrap_symbol(mem_put_le32)
-static INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val)
+static void mem_put_le32(void *vmem, MEM_VALUE_T val)
 {
     MAU_T *mem = (MAU_T *)vmem;
 
diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h
index 24b6dc100..61496f14b 100644
--- a/vpx_ports/mem_ops_aligned.h
+++ b/vpx_ports/mem_ops_aligned.h
@@ -40,19 +40,19 @@
 #define swap_endian_32_se(val,raw) swap_endian_32(val,raw)
 
 #define mem_get_ne_aligned_generic(end,sz) \
-    static INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
+    static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
         const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
         return *mem;\
     }
 
 #define mem_get_sne_aligned_generic(end,sz) \
-    static INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
+    static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
         const int##sz##_t *mem = (const int##sz##_t *)vmem;\
         return *mem;\
     }
 
 #define mem_get_se_aligned_generic(end,sz) \
-    static INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
+    static unsigned MEM_VALUE_T mem_get_##end##sz##_aligned(const void *vmem) {\
         const uint##sz##_t *mem = (const uint##sz##_t *)vmem;\
         unsigned MEM_VALUE_T val, raw = *mem;\
         swap_endian_##sz(val,raw);\
@@ -60,7 +60,7 @@
     }
 
 #define mem_get_sse_aligned_generic(end,sz) \
-    static INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
+    static signed MEM_VALUE_T mem_get_s##end##sz##_aligned(const void *vmem) {\
         const int##sz##_t *mem = (const int##sz##_t *)vmem;\
         unsigned MEM_VALUE_T val, raw = *mem;\
         swap_endian_##sz##_se(val,raw);\
@@ -68,13 +68,13 @@
     }
 
 #define mem_put_ne_aligned_generic(end,sz) \
-    static INLINE void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
+    static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
         uint##sz##_t *mem = (uint##sz##_t *)vmem;\
         *mem = (uint##sz##_t)val;\
     }
 
 #define mem_put_se_aligned_generic(end,sz) \
-    static INLINE void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
+    static void mem_put_##end##sz##_aligned(void *vmem, MEM_VALUE_T val) {\
         uint##sz##_t *mem = (uint##sz##_t *)vmem, raw;\
         swap_endian_##sz(raw,val);\
         *mem = (uint##sz##_t)raw;\
diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h
index c9ec6ba9e..f5e817ff4 100644
--- a/vpx_ports/vpx_timer.h
+++ b/vpx_ports/vpx_timer.h
@@ -51,7 +51,7 @@ struct vpx_usec_timer
 };
 
 
-static INLINE void
+static void
 vpx_usec_timer_start(struct vpx_usec_timer *t)
 {
 #if defined(_MSC_VER)
@@ -62,7 +62,7 @@ vpx_usec_timer_start(struct vpx_usec_timer *t)
 }
 
 
-static INLINE void
+static void
 vpx_usec_timer_mark(struct vpx_usec_timer *t)
 {
 #if defined(_MSC_VER)
@@ -73,7 +73,7 @@ vpx_usec_timer_mark(struct vpx_usec_timer *t)
 }
 
 
-static INLINE long
+static long
 vpx_usec_timer_elapsed(struct vpx_usec_timer *t)
 {
 #if defined(_MSC_VER)
diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c
index 3052542b7..c600c3f1b 100644
--- a/vpx_scale/generic/bicubic_scaler.c
+++ b/vpx_scale/generic/bicubic_scaler.c
@@ -46,7 +46,7 @@ static float a = -0.6;
 //         3     2
 // C0 = a*t - a*t
 //
-static INLINE short c0_fixed(unsigned int t)
+static short c0_fixed(unsigned int t)
 {
     // put t in Q16 notation
     unsigned short v1, v2;
@@ -67,7 +67,7 @@ static INLINE short c0_fixed(unsigned int t)
 //                     2          3
 // C1 = a*t + (3-2*a)*t  - (2-a)*t
 //
-static INLINE short c1_fixed(unsigned int t)
+static short c1_fixed(unsigned int t)
 {
     unsigned short v1, v2, v3;
     unsigned short two, three;
@@ -96,7 +96,7 @@ static INLINE short c1_fixed(unsigned int t)
 //                 2          3
 // C2 = 1 - (3-a)*t  + (2-a)*t
 //
-static INLINE short c2_fixed(unsigned int t)
+static short c2_fixed(unsigned int t)
 {
     unsigned short v1, v2, v3;
     unsigned short two, three;
@@ -124,7 +124,7 @@ static INLINE short c2_fixed(unsigned int t)
 //                 2      3
 // C3 = a*t - 2*a*t  + a*t
 //
-static INLINE short c3_fixed(unsigned int t)
+static short c3_fixed(unsigned int t)
 {
     int v1, v2, v3;
 

From cecdd73db786c2ea29f9d6eff1d30f1e8b75eeb5 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Thu, 24 Jun 2010 12:18:23 -0400
Subject: [PATCH 059/307] vp8cx : bestsad declared and initialized incorrectly.

bestsad should be an int initialized to INT_MAX.  The optimized
SAD function expects a signed value for bestsad to use for comparison
and early loop termination.  When no match is made, which is
determined by a comparison of bestsad to INT_MAX, INT_MAX is returned.
---
 vp8/encoder/mcomp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 4c3edd708..6d69a9ef4 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -997,7 +997,7 @@ int vp8_diamond_search_sadx4
     int tot_steps;
     MV this_mv;
 
-    unsigned int bestsad = UINT_MAX;
+    int bestsad = INT_MAX;
     int best_site = 0;
     int last_site = 0;
 

From a5906668a32c46a0f033b3a503ac5a159b77fce3 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Thu, 24 Jun 2010 14:30:48 -0400
Subject: [PATCH 060/307] vp8cx : bestsad declared and initialized incorrectly.

bestsad needs to be a int and set to INT_MAX because at the end
of the function it is compared to INT_MAX to determine if there
was a match in the function.

Change-Id: Ie80e88e4c4bb4a1ff9446079b794d14d5a219788
---
 vp8/encoder/mcomp.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 6d69a9ef4..156578bb1 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1238,7 +1238,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
     unsigned char *bestaddress;
     MV *best_mv = &d->bmi.mv.as_mv;
     MV this_mv;
-    unsigned int bestsad = UINT_MAX;
+    int bestsad = INT_MAX;
     int r, c;
 
     unsigned char *check_here;

From d0dd01b8ce8bc5f477d70f1c127d795418c5efb5 Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Wed, 16 Jun 2010 12:52:18 -0700
Subject: [PATCH 061/307] Redo the forward 4x4 dct

The new fdct lowers the round trip sum squared error for a
4x4 block ~0.12. or ~0.008/pixel. For reference, the old
matrix multiply version has average round trip error 1.46
for a 4x4 block.

Thanks to "derf" for his suggestions and references.

Change-Id: I5559d1e81d333b319404ab16b336b739f87afc79
---
 vp8/encoder/block.h                    |   5 -
 vp8/encoder/dct.c                      | 141 +--------
 vp8/encoder/dct.h                      |  10 -
 vp8/encoder/encodeintra.c              |   2 +-
 vp8/encoder/encodemb.c                 |  39 ++-
 vp8/encoder/ethreading.c               |   3 -
 vp8/encoder/generic/csystemdependent.c |   4 +-
 vp8/encoder/onyx_if.c                  |   6 -
 vp8/encoder/rdopt.c                    |   4 +-
 vp8/encoder/x86/csystemdependent.c     |  26 +-
 vp8/encoder/x86/dct_mmx.asm            | 392 +++----------------------
 vp8/encoder/x86/dct_x86.h              |  13 +-
 vp8/encoder/x86/x86_csystemdependent.c |  34 +--
 vp8/vp8cx.mk                           |   1 -
 14 files changed, 118 insertions(+), 562 deletions(-)

diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index c1fcfe29a..b55bc51cb 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -100,14 +100,9 @@ typedef struct
 
     void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
     void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
-    void (*short_fdct4x4rd)(short *input, short *output, int pitch);
-    void (*short_fdct8x4rd)(short *input, short *output, int pitch);
     void (*short_walsh4x4)(short *input, short *output, int pitch);
-
     void (*quantize_b)(BLOCK *b, BLOCKD *d);
 
-
-
 } MACROBLOCK;
 
 
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index 3075e5853..58e36109c 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -11,163 +11,54 @@
 
 #include <math.h>
 
-
-static const short dct_matrix2[4][4] =
-{
-    { 23170,  30274,  23170, 12540 },
-    { 23170,  12540, -23170, -30274 },
-    { 23170, -12540, -23170, 30274 },
-    { 23170, -30274,  23170, -12540 }
-};
-
-static const short dct_matrix1[4][4] =
-{
-    { 23170,  23170,  23170,  23170 },
-    { 30274,  12540, -12540, -30274 },
-    { 23170, -23170, -23170,  23170 },
-    { 12540, -30274,  30274, -12540 }
-};
-
-
-#define _1STSTAGESHIFT           14
-#define _1STSTAGEROUNDING        (1<<( _1STSTAGESHIFT-1))
-#define _2NDSTAGESHIFT           16
-#define _2NDSTAGEROUNDING        (1<<( _2NDSTAGESHIFT-1))
-
-// using matrix multiply
 void vp8_short_fdct4x4_c(short *input, short *output, int pitch)
-{
-    int i, j, k;
-    short temp[4][4];
-    int sumtemp;
-    pitch >>= 1;
-
-    for (i = 0; i < 4; i++)
-    {
-        for (j = 0; j < 4; j++)
-        {
-            sumtemp = 0;
-
-            for (k = 0; k < 4; k++)
-            {
-                sumtemp += input[i*pitch+k] * dct_matrix2[k][j];
-
-            }
-
-            temp[i][j] = (short)((sumtemp + _1STSTAGEROUNDING) >> _1STSTAGESHIFT);
-        }
-    }
-
-
-    for (i = 0; i < 4; i++)
-    {
-        for (j = 0; j < 4; j++)
-        {
-            sumtemp = 0;
-
-            for (k = 0; k < 4; k++)
-            {
-                sumtemp += dct_matrix1[i][ k] * temp[k][ j];
-            }
-
-            output[i*4+j] = (short)((sumtemp + _2NDSTAGEROUNDING) >> _2NDSTAGESHIFT);
-        }
-    }
-
-}
-
-
-void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
-{
-    vp8_short_fdct4x4_c(input,   output,    pitch);
-    vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
-}
-
-
-static const signed short x_c1 = 60547;
-static const signed short x_c2 = 46341;
-static const signed short x_c3 = 25080;
-
-void vp8_fast_fdct4x4_c(short *input, short *output, int pitch)
 {
     int i;
     int a1, b1, c1, d1;
-    int a2, b2, c2, d2;
     short *ip = input;
-
     short *op = output;
-    int temp1, temp2;
 
     for (i = 0; i < 4; i++)
     {
-        a1 = (ip[0] + ip[3]) * 2;
-        b1 = (ip[1] + ip[2]) * 2;
-        c1 = (ip[1] - ip[2]) * 2;
-        d1 = (ip[0] - ip[3]) * 2;
+        a1 = ((ip[0] + ip[3])<<3);
+        b1 = ((ip[1] + ip[2])<<3);
+        c1 = ((ip[1] - ip[2])<<3);
+        d1 = ((ip[0] - ip[3])<<3);
 
-        temp1 = a1 + b1;
-        temp2 = a1 - b1;
+        op[0] = a1 + b1;
+        op[2] = a1 - b1;
 
-        op[0] = ((temp1 * x_c2) >> 16) + temp1;
-        op[2] = ((temp2 * x_c2) >> 16) + temp2;
-
-        temp1 = (c1 * x_c3) >> 16;
-        temp2 = ((d1 * x_c1) >> 16) + d1;
-
-        op[1] = temp1 + temp2;
-
-        temp1 = (d1 * x_c3) >> 16;
-        temp2 = ((c1 * x_c1) >> 16) + c1;
-
-        op[3] = temp1 - temp2;
+        op[1] = (c1 * 2217 + d1 * 5352 +  14500)>>12;
+        op[3] = (d1 * 2217 - c1 * 5352 +   7500)>>12;
 
         ip += pitch / 2;
         op += 4;
-    }
 
+    }
     ip = output;
     op = output;
-
     for (i = 0; i < 4; i++)
     {
-
         a1 = ip[0] + ip[12];
         b1 = ip[4] + ip[8];
         c1 = ip[4] - ip[8];
         d1 = ip[0] - ip[12];
 
+        op[0]  = ( a1 + b1 + 7)>>4;
+        op[8]  = ( a1 - b1 + 7)>>4;
 
-        temp1 = a1 + b1;
-        temp2 = a1 - b1;
-
-        a2 = ((temp1 * x_c2) >> 16) + temp1;
-        c2 = ((temp2 * x_c2) >> 16) + temp2;
-
-        temp1 = (c1 * x_c3) >> 16;
-        temp2 = ((d1 * x_c1) >> 16) + d1;
-
-        b2 = temp1 + temp2;
-
-        temp1 = (d1 * x_c3) >> 16;
-        temp2 = ((c1 * x_c1) >> 16) + c1;
-
-        d2 = temp1 - temp2;
-
-
-        op[0]   = (a2 + 1) >> 1;
-        op[4]   = (b2 + 1) >> 1;
-        op[8]   = (c2 + 1) >> 1;
-        op[12]  = (d2 + 1) >> 1;
+        op[4]  =((c1 * 2217 + d1 * 5352 +  12000)>>16) + (d1!=0);
+        op[12] = (d1 * 2217 - c1 * 5352 +  51000)>>16;
 
         ip++;
         op++;
     }
 }
 
-void vp8_fast_fdct8x4_c(short *input, short *output, int pitch)
+void vp8_short_fdct8x4_c(short *input, short *output, int pitch)
 {
-    vp8_fast_fdct4x4_c(input,   output,    pitch);
-    vp8_fast_fdct4x4_c(input + 4, output + 16, pitch);
+    vp8_short_fdct4x4_c(input,   output,    pitch);
+    vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
 }
 
 void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h
index f79dba4f2..0ab40b310 100644
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -32,16 +32,6 @@ extern prototype_fdct(vp8_fdct_short4x4);
 #endif
 extern prototype_fdct(vp8_fdct_short8x4);
 
-#ifndef vp8_fdct_fast4x4
-#define vp8_fdct_fast4x4  vp8_fast_fdct4x4_c
-#endif
-extern prototype_fdct(vp8_fdct_fast4x4);
-
-#ifndef vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4  vp8_fast_fdct8x4_c
-#endif
-extern prototype_fdct(vp8_fdct_fast8x4);
-
 #ifndef vp8_fdct_walsh_short4x4
 #define vp8_fdct_walsh_short4x4  vp8_short_walsh4x4_c
 #endif
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 0e160930d..870cb5815 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -66,7 +66,7 @@ void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BL
 
     ENCODEMB_INVOKE(&rtcd->encodemb, subb)(be, b, 16);
 
-    x->short_fdct4x4rd(be->src_diff, be->coeff, 32);
+    x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
 
     x->quantize_b(be, b);
 
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 824850c41..8bc01df5b 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -130,7 +130,8 @@ void vp8_transform_mbuvrd(MACROBLOCK *x)
 
     for (i = 16; i < 24; i += 2)
     {
-        x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 16);
     }
 }
 
@@ -140,14 +141,16 @@ void vp8_transform_intra_mby(MACROBLOCK *x)
 
     for (i = 0; i < 16; i += 2)
     {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 32);
     }
 
     // build dc block from 16 y dc values
     vp8_build_dcblock(x);
 
     // do 2nd order transform on the dc block
-    x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+    x->short_walsh4x4(&x->block[24].src_diff[0],
+        &x->block[24].coeff[0], 8);
 
 }
 
@@ -157,14 +160,16 @@ void vp8_transform_intra_mbyrd(MACROBLOCK *x)
 
     for (i = 0; i < 16; i += 2)
     {
-        x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 32);
     }
 
     // build dc block from 16 y dc values
     vp8_build_dcblock(x);
 
     // do 2nd order transform on the dc block
-    x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+    x->short_walsh4x4(&x->block[24].src_diff[0],
+        &x->block[24].coeff[0], 8);
 }
 
 void vp8_transform_mb(MACROBLOCK *x)
@@ -173,7 +178,8 @@ void vp8_transform_mb(MACROBLOCK *x)
 
     for (i = 0; i < 16; i += 2)
     {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 32);
     }
 
     // build dc block from 16 y dc values
@@ -182,12 +188,14 @@ void vp8_transform_mb(MACROBLOCK *x)
 
     for (i = 16; i < 24; i += 2)
     {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 16);
     }
 
     // do 2nd order transform on the dc block
     if (x->e_mbd.mbmi.mode != SPLITMV)
-        x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+        x->short_walsh4x4(&x->block[24].src_diff[0],
+        &x->block[24].coeff[0], 8);
 
 }
 
@@ -197,14 +205,16 @@ void vp8_transform_mby(MACROBLOCK *x)
 
     for (i = 0; i < 16; i += 2)
     {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 32);
     }
 
     // build dc block from 16 y dc values
     if (x->e_mbd.mbmi.mode != SPLITMV)
     {
         vp8_build_dcblock(x);
-        x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+        x->short_walsh4x4(&x->block[24].src_diff[0],
+            &x->block[24].coeff[0], 8);
     }
 }
 
@@ -214,7 +224,8 @@ void vp8_transform_mbrd(MACROBLOCK *x)
 
     for (i = 0; i < 16; i += 2)
     {
-        x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 32);
     }
 
     // build dc block from 16 y dc values
@@ -223,12 +234,14 @@ void vp8_transform_mbrd(MACROBLOCK *x)
 
     for (i = 16; i < 24; i += 2)
     {
-        x->short_fdct8x4rd(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+            &x->block[i].coeff[0], 16);
     }
 
     // do 2nd order transform on the dc block
     if (x->e_mbd.mbmi.mode != SPLITMV)
-        x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8);
+        x->short_walsh4x4(&x->block[24].src_diff[0],
+            &x->block[24].coeff[0], 8);
 }
 
 void vp8_stuff_inter16x16(MACROBLOCK *x)
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index a205667dc..dd98a09d1 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -257,9 +257,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
 
     z->vp8_short_fdct4x4     = x->vp8_short_fdct4x4;
     z->vp8_short_fdct8x4     = x->vp8_short_fdct8x4;
-    z->short_fdct4x4rd   = x->short_fdct4x4rd;
-    z->short_fdct8x4rd   = x->short_fdct8x4rd;
-    z->short_fdct8x4rd   = x->short_fdct8x4rd;
     z->short_walsh4x4    = x->short_walsh4x4;
     z->quantize_b        = x->quantize_b;
 
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index e68d65025..dd89f1a82 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -68,8 +68,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
 
     cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
     cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;
+    cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_c;
+    cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_c;
     cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
 
     cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index f3456a733..60d807c03 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -137,8 +137,6 @@ extern unsigned int inter_b_modes[15];
 
 extern void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
 extern void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
-extern void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch);
-extern void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch);
 
 extern const int vp8_bits_per_mb[2][QINDEX_RANGE];
 
@@ -1136,15 +1134,11 @@ void vp8_set_speed_features(VP8_COMP *cpi)
     {
         cpi->mb.vp8_short_fdct8x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
         cpi->mb.vp8_short_fdct4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
-        cpi->mb.short_fdct8x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, short8x4);
-        cpi->mb.short_fdct4x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, short4x4);
     }
     else
     {
         cpi->mb.vp8_short_fdct8x4   = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4);
         cpi->mb.vp8_short_fdct4x4   = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4);
-        cpi->mb.short_fdct8x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, fast8x4);
-        cpi->mb.short_fdct4x4rd = FDCT_INVOKE(&cpi->rtcd.fdct, fast4x4);
     }
 
     cpi->mb.short_walsh4x4 = FDCT_INVOKE(&cpi->rtcd.fdct, walsh_short4x4);
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 2d6dee139..70cf122fa 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1028,7 +1028,7 @@ static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels
 
             vp8_build_inter_predictors_b(bd, 16, x->e_mbd.subpixel_predict);
             ENCODEMB_INVOKE(rtcd, subb)(be, bd, 16);
-            x->short_fdct4x4rd(be->src_diff, be->coeff, 32);
+            x->vp8_short_fdct4x4(be->src_diff, be->coeff, 32);
 
             // set to 0 no way to account for 2nd order DC so discount
             //be->coeff[0] = 0;
@@ -1056,7 +1056,7 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp
     // Fdct and building the 2nd order block
     for (beptr = mb->block; beptr < mb->block + 16; beptr += 2)
     {
-        mb->short_fdct8x4rd(beptr->src_diff, beptr->coeff, 32);
+        mb->vp8_short_fdct8x4(beptr->src_diff, beptr->coeff, 32);
         *Y2DCPtr++ = beptr->coeff[0];
         *Y2DCPtr++ = beptr->coeff[16];
     }
diff --git a/vp8/encoder/x86/csystemdependent.c b/vp8/encoder/x86/csystemdependent.c
index 6aeac508f..bf12fee54 100644
--- a/vp8/encoder/x86/csystemdependent.c
+++ b/vp8/encoder/x86/csystemdependent.c
@@ -181,10 +181,17 @@ void vp8_cmachine_specific_config(void)
         // Willamette instruction set available:
         vp8_mbuverror                = vp8_mbuverror_xmm;
         vp8_fast_quantize_b            = vp8_fast_quantize_b_sse;
+#if 0 //new fdct
         vp8_short_fdct4x4             = vp8_short_fdct4x4_mmx;
         vp8_short_fdct8x4             = vp8_short_fdct8x4_mmx;
-        vp8_fast_fdct4x4              = vp8_fast_fdct4x4_mmx;
-        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_wmt;
+        vp8_fast_fdct4x4              = vp8_short_fdct4x4_mmx;
+        vp8_fast_fdct8x4              = vp8_short_fdct8x4_wmt;
+#else
+        vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
+        vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
+        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
+        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
+#endif
         vp8_subtract_b                = vp8_subtract_b_mmx;
         vp8_subtract_mbuv             = vp8_subtract_mbuv_mmx;
         vp8_variance4x4              = vp8_variance4x4_mmx;
@@ -218,10 +225,17 @@ void vp8_cmachine_specific_config(void)
         // MMX instruction set available:
         vp8_mbuverror                = vp8_mbuverror_mmx;
         vp8_fast_quantize_b            = vp8_fast_quantize_b_mmx;
+#if 0 // new fdct
         vp8_short_fdct4x4             = vp8_short_fdct4x4_mmx;
         vp8_short_fdct8x4             = vp8_short_fdct8x4_mmx;
-        vp8_fast_fdct4x4              = vp8_fast_fdct4x4_mmx;
-        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_mmx;
+        vp8_fast_fdct4x4              = vp8_short_fdct4x4_mmx;
+        vp8_fast_fdct8x4              = vp8_short_fdct8x4_mmx;
+#else
+        vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
+        vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
+        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
+        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
+#endif
         vp8_subtract_b                = vp8_subtract_b_mmx;
         vp8_subtract_mbuv             = vp8_subtract_mbuv_mmx;
         vp8_variance4x4              = vp8_variance4x4_mmx;
@@ -254,10 +268,10 @@ void vp8_cmachine_specific_config(void)
     {
         // Pure C:
         vp8_mbuverror                = vp8_mbuverror_c;
-        vp8_fast_quantize_b            = vp8_fast_quantize_b_c;
+        vp8_fast_quantize_b          = vp8_fast_quantize_b_c;
         vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
         vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
-        vp8_fast_fdct4x4              = vp8_fast_fdct4x4_c;
+        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
         vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
         vp8_subtract_b                = vp8_subtract_b_c;
         vp8_subtract_mbuv             = vp8_subtract_mbuv_c;
diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm
index 32d6610aa..ff96c49f3 100644
--- a/vp8/encoder/x86/dct_mmx.asm
+++ b/vp8/encoder/x86/dct_mmx.asm
@@ -13,8 +13,7 @@
 
 section .text
     global sym(vp8_short_fdct4x4_mmx)
-    global sym(vp8_fast_fdct4x4_mmx)
-    global sym(vp8_fast_fdct8x4_wmt)
+    global sym(vp8_short_fdct8x4_wmt)
 
 
 %define         DCTCONSTANTSBITS         (16)
@@ -24,339 +23,8 @@ section .text
 %define         x_c3                      (25080)          ; cos(pi*3/8) * (1<<15)
 
 
-%define _1STSTAGESHIFT           14
-%define _2NDSTAGESHIFT           16
-
-; using matrix multiply with source and destbuffer has a pitch
 ;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch)
 sym(vp8_short_fdct4x4_mmx):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 3
-    GET_GOT     rbx
-    push rsi
-    push rdi
-    ; end prolog
-
-        mov         rsi,    arg(0) ;input
-        mov         rdi,    arg(1) ;output
-
-        movsxd      rax,    dword ptr arg(2) ;pitch
-        lea         rdx,    [dct_matrix GLOBAL]
-
-        movq        mm0,    [rsi   ]
-        movq        mm1,    [rsi + rax]
-
-        movq        mm2,    [rsi + rax*2]
-        lea         rsi,    [rsi + rax*2]
-
-        movq        mm3,    [rsi + rax]
-
-        ; first column
-        movq        mm4,    mm0
-        movq        mm7,    [rdx]
-
-        pmaddwd     mm4,    mm7
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    mm7
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-
-        pmaddwd     mm5,    mm7
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    mm7
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct1st_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _1STSTAGESHIFT
-        psrad       mm5,    _1STSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi],  mm4
-
-        ;second column
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx+8]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx+8]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx+8]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx+8]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct1st_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _1STSTAGESHIFT
-        psrad       mm5,    _1STSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi+8],  mm4
-
-
-        ;third column
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx+16]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx+16]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx+16]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx+16]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct1st_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _1STSTAGESHIFT
-        psrad       mm5,    _1STSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi+16],  mm4
-
-        ;fourth column (this is the last column, so we do not have save the source any more)
-
-        pmaddwd     mm0,    [rdx+24]
-
-        pmaddwd     mm1,    [rdx+24]
-        movq        mm6,    mm0
-
-        punpckldq   mm0,    mm1
-        punpckhdq   mm6,    mm1
-
-        paddd       mm0,    mm6
-
-        pmaddwd     mm2,    [rdx+24]
-
-        pmaddwd     mm3,    [rdx+24]
-        movq        mm7,    mm2
-
-        punpckldq   mm2,    mm3
-        punpckhdq   mm7,    mm3
-
-        paddd       mm2,    mm7
-        movq        mm6,    [dct1st_stage_rounding_mmx GLOBAL]
-
-        paddd       mm0,    mm6
-        paddd       mm2,    mm6
-
-        psrad       mm0,    _1STSTAGESHIFT
-        psrad       mm2,    _1STSTAGESHIFT
-
-        packssdw    mm0,    mm2
-
-        movq        mm3,    mm0
-
-        ; done with one pass
-        ; now start second pass
-        movq        mm0,    [rdi   ]
-        movq        mm1,    [rdi+ 8]
-        movq        mm2,    [rdi+ 16]
-
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct2nd_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _2NDSTAGESHIFT
-        psrad       mm5,    _2NDSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi],  mm4
-
-        ;second column
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx+8]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx+8]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx+8]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx+8]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct2nd_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _2NDSTAGESHIFT
-        psrad       mm5,    _2NDSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi+8],  mm4
-
-
-        ;third column
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx+16]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx+16]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx+16]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx+16]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct2nd_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _2NDSTAGESHIFT
-        psrad       mm5,    _2NDSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi+16],  mm4
-
-        ;fourth column
-        movq        mm4,    mm0
-
-        pmaddwd     mm4,    [rdx+24]
-        movq        mm5,    mm1
-
-        pmaddwd     mm5,    [rdx+24]
-        movq        mm6,    mm4
-
-        punpckldq   mm4,    mm5
-        punpckhdq   mm6,    mm5
-
-        paddd       mm4,    mm6
-        movq        mm5,    mm2
-
-        pmaddwd     mm5,    [rdx+24]
-        movq        mm6,    mm3
-
-        pmaddwd     mm6,    [rdx+24]
-        movq        mm7,    mm5
-
-        punpckldq   mm5,    mm6
-        punpckhdq   mm7,    mm6
-
-        paddd       mm5,    mm7
-        movq        mm6,    [dct2nd_stage_rounding_mmx GLOBAL]
-
-        paddd       mm4,    mm6
-        paddd       mm5,    mm6
-
-        psrad       mm4,    _2NDSTAGESHIFT
-        psrad       mm5,    _2NDSTAGESHIFT
-
-        packssdw    mm4,    mm5
-        movq        [rdi+24],  mm4
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-;void vp8_fast_fdct4x4_mmx(short *input, short *output, int pitch)
-sym(vp8_fast_fdct4x4_mmx):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 3
@@ -379,11 +47,11 @@ sym(vp8_fast_fdct4x4_mmx):
         movq    mm3,    [rcx + rax]
         ; get the constants
         ;shift to left by 1 for prescision
-        paddw   mm0,    mm0
-        paddw   mm1,    mm1
+        psllw   mm0,    3
+        psllw   mm1,    3
 
-        psllw   mm2,    1
-        psllw   mm3,    1
+        psllw   mm2,    3
+        psllw   mm3,    3
 
         ; transpose for the second stage
         movq    mm4,    mm0         ; 00 01 02 03
@@ -531,20 +199,23 @@ sym(vp8_fast_fdct4x4_mmx):
         movq    mm3,    mm5
         ; done with vertical
 
-		pcmpeqw	mm4,	mm4
-		pcmpeqw	mm5,	mm5
-		psrlw	mm4,	15
-		psrlw	mm5,	15
+        pcmpeqw mm4,    mm4
+        pcmpeqw mm5,    mm5
+        psrlw   mm4,    15
+        psrlw   mm5,    15
+
+        psllw   mm4,    2
+        psllw   mm5,    2
 
         paddw   mm0,    mm4
         paddw   mm1,    mm5
         paddw   mm2,    mm4
         paddw   mm3,    mm5
 
-        psraw   mm0, 1
-        psraw   mm1, 1
-        psraw   mm2, 1
-        psraw   mm3, 1
+        psraw   mm0, 3
+        psraw   mm1, 3
+        psraw   mm2, 3
+        psraw   mm3, 3
 
         movq        [rdi   ],   mm0
         movq        [rdi+ 8],   mm1
@@ -560,8 +231,8 @@ sym(vp8_fast_fdct4x4_mmx):
     ret
 
 
-;void vp8_fast_fdct8x4_wmt(short *input, short *output, int pitch)
-sym(vp8_fast_fdct8x4_wmt):
+;void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch)
+sym(vp8_short_fdct8x4_wmt):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 3
@@ -584,11 +255,11 @@ sym(vp8_fast_fdct8x4_wmt):
         movdqa      xmm3,       [rcx + rax]
         ; get the constants
         ;shift to left by 1 for prescision
-        psllw       xmm0,        1
-        psllw       xmm2,        1
+        psllw       xmm0,        3
+        psllw       xmm2,        3
 
-        psllw       xmm4,        1
-        psllw       xmm3,        1
+        psllw       xmm4,        3
+        psllw       xmm3,        3
 
         ; transpose for the second stage
         movdqa      xmm1,       xmm0         ; 00 01 02 03 04 05 06 07
@@ -758,20 +429,23 @@ sym(vp8_fast_fdct8x4_wmt):
         ; done with vertical
 
 
-        pcmpeqw		xmm4,		xmm4
-        pcmpeqw		xmm5,		xmm5;
-        psrlw		xmm4,		15
-        psrlw		xmm5,		15
+        pcmpeqw     xmm4,       xmm4
+        pcmpeqw     xmm5,       xmm5;
+        psrlw       xmm4,       15
+        psrlw       xmm5,       15
+
+        psllw       xmm4,       2
+        psllw       xmm5,       2
 
         paddw       xmm0,       xmm4
         paddw       xmm1,       xmm5
         paddw       xmm2,       xmm4
         paddw       xmm3,       xmm5
 
-        psraw       xmm0,       1
-        psraw       xmm1,       1
-        psraw       xmm2,       1
-        psraw       xmm3,       1
+        psraw       xmm0,       3
+        psraw       xmm1,       3
+        psraw       xmm2,       3
+        psraw       xmm3,       3
 
         movq        QWORD PTR[rdi   ],   xmm0
         movq        QWORD PTR[rdi+ 8],   xmm1
diff --git a/vp8/encoder/x86/dct_x86.h b/vp8/encoder/x86/dct_x86.h
index 05d018043..ada16d34f 100644
--- a/vp8/encoder/x86/dct_x86.h
+++ b/vp8/encoder/x86/dct_x86.h
@@ -22,31 +22,22 @@
 #if HAVE_MMX
 extern prototype_fdct(vp8_short_fdct4x4_mmx);
 extern prototype_fdct(vp8_short_fdct8x4_mmx);
-extern prototype_fdct(vp8_fast_fdct4x4_mmx);
-extern prototype_fdct(vp8_fast_fdct8x4_mmx);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
+#if 0 new c version,
 #undef  vp8_fdct_short4x4
 #define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx
 
 #undef  vp8_fdct_short8x4
 #define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx
-
-#undef  vp8_fdct_fast4x4
-#define vp8_fdct_fast4x4 vp8_fast_fdct4x4_mmx
-
-#undef  vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_mmx
+#endif
 
 #endif
 #endif
 
 
 #if HAVE_SSE2
-extern prototype_fdct(vp8_short_fdct4x4_wmt);
 extern prototype_fdct(vp8_short_fdct8x4_wmt);
-extern prototype_fdct(vp8_fast_fdct8x4_wmt);
-
 extern prototype_fdct(vp8_short_walsh4x4_sse2);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index f3750455b..0fb82e60e 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -18,15 +18,10 @@
 #if HAVE_MMX
 void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
 {
-    vp8_short_fdct4x4_mmx(input,   output,    pitch);
-    vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
+    vp8_short_fdct4x4_c(input,   output,    pitch);
+    vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
 }
 
-void vp8_fast_fdct8x4_mmx(short *input, short *output, int pitch)
-{
-    vp8_fast_fdct4x4_mmx(input,   output   , pitch);
-    vp8_fast_fdct4x4_mmx(input + 4, output + 16, pitch);
-}
 
 int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
                                  short *qcoeff_ptr, short *dequant_ptr,
@@ -87,11 +82,6 @@ void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
 #endif
 
 #if HAVE_SSE2
-void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch)
-{
-    vp8_short_fdct4x4_wmt(input,   output,    pitch);
-    vp8_short_fdct4x4_wmt(input + 4, output + 16, pitch);
-}
 
 int vp8_fast_quantize_b_impl_sse(short *coeff_ptr, short *zbin_ptr,
                                  short *qcoeff_ptr, short *dequant_ptr,
@@ -221,11 +211,19 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.variance.get8x8var             = vp8_get8x8var_mmx;
         cpi->rtcd.variance.get16x16var           = vp8_get16x16var_mmx;
         cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_mmx;
-
+#if 0 // new fdct
         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_mmx;
         cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_mmx;
-        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_mmx;
-        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_mmx;
+        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_mmx;
+        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_mmx;
+#else
+        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
+        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_c;
+        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_c;
+
+#endif
+
         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
 
         cpi->rtcd.encodemb.berr                  = vp8_block_error_mmx;
@@ -270,13 +268,13 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.variance.get16x16var           = vp8_get16x16var_sse2;
         /* cpi->rtcd.variance.get4x4sse_cs  not implemented for wmt */;
 
-#if 0
+#if 0 //new fdct
         /* short SSE2 DCT currently disabled, does not match the MMX version */
         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_wmt;
         cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_wmt;
-#endif
         /* cpi->rtcd.fdct.fast4x4  not implemented for wmt */;
-        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_wmt;
+        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_wmt;
+#endif
         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2;
 
         cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index f09f25852..f86a0b2aa 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -96,7 +96,6 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_sse2.c
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_impl_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm
-VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm

From f1a3b1e0d94dec2d40008f36fdfad99338484b9a Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Thu, 24 Jun 2010 13:11:30 -0400
Subject: [PATCH 062/307] Added first-pass sse2 version of Yaowu's new fdct.

Change-Id: Ib479210067510162879c368428b92690591120b2
---
 vp8/encoder/x86/dct_sse2.asm           | 362 ++++++++++---------------
 vp8/encoder/x86/dct_x86.h              |  16 +-
 vp8/encoder/x86/x86_csystemdependent.c |  17 +-
 vp8/vp8cx.mk                           |   1 +
 4 files changed, 166 insertions(+), 230 deletions(-)

diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index 1cd137d15..0e8cfcfc3 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -11,251 +11,179 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-global sym(vp8_short_fdct4x4_wmt)
-
-%define         DCTCONSTANTSBITS         (16)
-%define         DCTROUNDINGVALUE         (1<< (DCTCONSTANTSBITS-1))
-%define         x_c1                      (60547)          ; cos(pi  /8) * (1<<15)
-%define         x_c2                      (46341)          ; cos(pi*2/8) * (1<<15)
-%define         x_c3                      (25080)          ; cos(pi*3/8) * (1<<15)
-
-%define _1STSTAGESHIFT           14
-%define _2NDSTAGESHIFT           16
-
-
-;; using matrix multiply
-;void vp8_short_fdct4x4_wmt(short *input, short *output)
-sym(vp8_short_fdct4x4_wmt):
+;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
+global sym(vp8_short_fdct4x4_sse2)
+sym(vp8_short_fdct4x4_sse2):
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 2
+    SHADOW_ARGS_TO_STACK 3
+;;    SAVE_XMM
     GET_GOT     rbx
+    push        rsi
+    push        rdi
     ; end prolog
 
-        mov         rax,        arg(0) ;input
-        mov         rcx,        arg(1) ;output
+    mov         rsi, arg(0)
+    movsxd      rax, DWORD PTR arg(2)
+    lea         rdi, [rsi + rax*2]
 
-        lea         rdx,        [dct_matrix_sse2 GLOBAL]
+    movq        xmm0, MMWORD PTR[rsi   ]        ;03 02 01 00
+    movq        xmm2, MMWORD PTR[rsi + rax]     ;13 12 11 10
+    movq        xmm1, MMWORD PTR[rsi + rax*2]   ;23 22 21 20
+    movq        xmm3, MMWORD PTR[rdi + rax]     ;33 32 31 30
 
-        movdqu      xmm0,       [rax   ]
-        movdqu      xmm1,       [rax+16]
+    punpcklqdq  xmm0, xmm2                      ;13 12 11 10 03 02 01 00
+    punpcklqdq  xmm1, xmm3                      ;33 32 31 30 23 22 21 20
 
-        ; first column
-        movdqa      xmm2,       xmm0
-        movdqa      xmm7,       [rdx]
+    mov         rdi, arg(1)
 
-        pmaddwd     xmm2,       xmm7
-        movdqa      xmm3,       xmm1
+    movdqa      xmm2, xmm0
+    punpckldq   xmm0, xmm1                      ;23 22 03 02 21 20 01 00
+    punpckhdq   xmm2, xmm1                      ;33 32 13 12 31 30 11 10
+    movdqa      xmm1, xmm0
+    punpckldq   xmm0, xmm2                      ;31 21 30 20 11 10 01 00
+    pshufhw     xmm1, xmm1, 0b1h                ;22 23 02 03 xx xx xx xx
+    pshufhw     xmm2, xmm2, 0b1h                ;32 33 12 13 xx xx xx xx
 
-        pmaddwd     xmm3,       xmm7
-        movdqa      xmm4,       xmm2
+    punpckhdq   xmm1, xmm2                      ;32 33 22 23 12 13 02 03
+    movdqa      xmm3, xmm0
+    paddw       xmm0, xmm1                      ;b1 a1 b1 a1 b1 a1 b1 a1
+    psubw       xmm3, xmm1                      ;c1 d1 c1 d1 c1 d1 c1 d1
+    psllw       xmm0, 3                         ;b1 <<= 3 a1 <<= 3
+    psllw       xmm3, 3                         ;c1 <<= 3 d1 <<= 3
+    movdqa      xmm1, xmm0
+    pmaddwd     xmm0, XMMWORD PTR[_mult_add GLOBAL]     ;a1 + b1
+    pmaddwd     xmm1, XMMWORD PTR[_mult_sub GLOBAL]     ;a1 - b1
+    movdqa      xmm4, xmm3
+    pmaddwd     xmm3, XMMWORD PTR[_5352_2217 GLOBAL]    ;c1*2217 + d1*5352
+    pmaddwd     xmm4, XMMWORD PTR[_2217_neg5352 GLOBAL] ;d1*2217 - c1*5352
 
-        punpckldq   xmm2,       xmm3
-        punpckhdq   xmm4,       xmm3
+    paddd       xmm3, XMMWORD PTR[_14500 GLOBAL]
+    paddd       xmm4, XMMWORD PTR[_7500 GLOBAL]
+    psrad       xmm3, 12            ;(c1 * 2217 + d1 * 5352 +  14500)>>12
+    psrad       xmm4, 12            ;(d1 * 2217 - c1 * 5352 +   7500)>>12
 
-        movdqa      xmm3,       xmm2
-        punpckldq   xmm2,       xmm4
+    packssdw    xmm0, xmm1                      ;op[2] op[0]
+    packssdw    xmm3, xmm4                      ;op[3] op[1]
+    ; 23 22 21 20 03 02 01 00
+    ;
+    ; 33 32 31 30 13 12 11 10
+    ;
+    movdqa      xmm2, xmm0
+    punpcklqdq  xmm0, xmm3                      ;13 12 11 10 03 02 01 00
+    punpckhqdq  xmm2, xmm3                      ;23 22 21 20 33 32 31 30
 
-        punpckhdq   xmm3,       xmm4
-        paddd       xmm2,       xmm3
+    movdqa      xmm3, xmm0
+    punpcklwd   xmm0, xmm2                      ;32 30 22 20 12 10 02 00
+    punpckhwd   xmm3, xmm2                      ;33 31 23 21 13 11 03 01
+    movdqa      xmm2, xmm0
+    punpcklwd   xmm0, xmm3                      ;13 12 11 10 03 02 01 00
+    punpckhwd   xmm2, xmm3                      ;33 32 31 30 23 22 21 20
 
+    movdqa      xmm5, XMMWORD PTR[_7 GLOBAL]
+    pshufd      xmm2, xmm2, 04eh
+    movdqa      xmm3, xmm0
+    paddw       xmm0, xmm2                      ;b1 b1 b1 b1 a1 a1 a1 a1
+    psubw       xmm3, xmm2                      ;c1 c1 c1 c1 d1 d1 d1 d1
 
-        paddd       xmm2,       XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
-        psrad       xmm2,       _1STSTAGESHIFT
-        ;second column
-        movdqa      xmm3,       xmm0
-        pmaddwd     xmm3,       [rdx+16]
+    pshufd      xmm0, xmm0, 0d8h                ;b1 b1 a1 a1 b1 b1 a1 a1
+    movdqa      xmm2, xmm3                      ;save d1 for compare
+    pshufd      xmm3, xmm3, 0d8h                ;c1 c1 d1 d1 c1 c1 d1 d1
+    pshuflw     xmm0, xmm0, 0d8h                ;b1 b1 a1 a1 b1 a1 b1 a1
+    pshuflw     xmm3, xmm3, 0d8h                ;c1 c1 d1 d1 c1 d1 c1 d1
+    pshufhw     xmm0, xmm0, 0d8h                ;b1 a1 b1 a1 b1 a1 b1 a1
+    pshufhw     xmm3, xmm3, 0d8h                ;c1 d1 c1 d1 c1 d1 c1 d1
+    movdqa      xmm1, xmm0
+    pmaddwd     xmm0, XMMWORD PTR[_mult_add GLOBAL] ;a1 + b1
+    pmaddwd     xmm1, XMMWORD PTR[_mult_sub GLOBAL] ;a1 - b1
 
-        movdqa      xmm4,       xmm1
-        pmaddwd     xmm4,       [rdx+16]
+    pxor        xmm4, xmm4                      ;zero out for compare
+    paddd       xmm0, xmm5
+    paddd       xmm1, xmm5
+    pcmpeqw     xmm2, xmm4
+    psrad       xmm0, 4                         ;(a1 + b1 + 7)>>4
+    psrad       xmm1, 4                         ;(a1 - b1 + 7)>>4
+    pandn       xmm2, XMMWORD PTR[_cmp_mask GLOBAL] ;clear upper,
+                                                    ;and keep bit 0 of lower
 
-        movdqa      xmm5,       xmm3
-        punpckldq   xmm3,       xmm4
+    movdqa      xmm4, xmm3
+    pmaddwd     xmm3, XMMWORD PTR[_5352_2217 GLOBAL]    ;c1*2217 + d1*5352
+    pmaddwd     xmm4, XMMWORD PTR[_2217_neg5352 GLOBAL] ;d1*2217 - c1*5352
+    paddd       xmm3, XMMWORD PTR[_12000 GLOBAL]
+    paddd       xmm4, XMMWORD PTR[_51000 GLOBAL]
+    packssdw    xmm0, xmm1                      ;op[8] op[0]
+    psrad       xmm3, 16                ;(c1 * 2217 + d1 * 5352 +  12000)>>16
+    psrad       xmm4, 16                ;(d1 * 2217 - c1 * 5352 +  51000)>>16
 
-        punpckhdq   xmm5,       xmm4
-        movdqa      xmm4,       xmm3
+    packssdw    xmm3, xmm4                      ;op[12] op[4]
+    movdqa      xmm1, xmm0
+    paddw       xmm3, xmm2                      ;op[4] += (d1!=0)
+    punpcklqdq  xmm0, xmm3                      ;op[4] op[0]
+    punpckhqdq  xmm1, xmm3                      ;op[12] op[8]
 
-        punpckldq   xmm3,       xmm5
-        punpckhdq   xmm4,       xmm5
+    movdqa      XMMWORD PTR[rdi + 0], xmm0
+    movdqa      XMMWORD PTR[rdi + 16], xmm1
 
-        paddd       xmm3,       xmm4
-        paddd       xmm3,       XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
-
-
-        psrad       xmm3,       _1STSTAGESHIFT
-        packssdw    xmm2,       xmm3
-
-        ;third column
-        movdqa      xmm3,       xmm0
-        pmaddwd     xmm3,       [rdx+32]
-
-        movdqa      xmm4,       xmm1
-        pmaddwd     xmm4,       [rdx+32]
-
-        movdqa      xmm5,       xmm3
-        punpckldq   xmm3,       xmm4
-
-        punpckhdq   xmm5,       xmm4
-        movdqa      xmm4,       xmm3
-
-        punpckldq   xmm3,       xmm5
-        punpckhdq   xmm4,       xmm5
-
-        paddd       xmm3,       xmm4
-        paddd       xmm3,       XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
-
-        psrad       xmm3,       _1STSTAGESHIFT
-
-        ;fourth column (this is the last column, so we do not have save the source any more)
-        pmaddwd     xmm0,       [rdx+48]
-        pmaddwd     xmm1,       [rdx+48]
-
-        movdqa      xmm4,       xmm0
-        punpckldq   xmm0,       xmm1
-
-        punpckhdq   xmm4,       xmm1
-        movdqa      xmm1,       xmm0
-
-        punpckldq   xmm0,       xmm4
-        punpckhdq   xmm1,       xmm4
-
-        paddd       xmm0,       xmm1
-        paddd       xmm0,       XMMWORD PTR [dct1st_stage_rounding_sse2 GLOBAL]
-
-
-        psrad       xmm0,       _1STSTAGESHIFT
-        packssdw    xmm3,       xmm0
-        ; done with one pass
-        ; now start second pass
-        movdqa      xmm0,       xmm2
-        movdqa      xmm1,       xmm3
-
-        pmaddwd     xmm2,       xmm7
-        pmaddwd     xmm3,       xmm7
-
-        movdqa      xmm4,       xmm2
-        punpckldq   xmm2,       xmm3
-
-        punpckhdq   xmm4,       xmm3
-        movdqa      xmm3,       xmm2
-
-        punpckldq   xmm2,       xmm4
-        punpckhdq   xmm3,       xmm4
-
-        paddd       xmm2,       xmm3
-        paddd       xmm2,       XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
-        psrad       xmm2,       _2NDSTAGESHIFT
-
-        ;second column
-        movdqa      xmm3,       xmm0
-        pmaddwd     xmm3,       [rdx+16]
-
-        movdqa      xmm4,       xmm1
-        pmaddwd     xmm4,       [rdx+16]
-
-        movdqa      xmm5,       xmm3
-        punpckldq   xmm3,       xmm4
-
-        punpckhdq   xmm5,       xmm4
-        movdqa      xmm4,       xmm3
-
-        punpckldq   xmm3,       xmm5
-        punpckhdq   xmm4,       xmm5
-
-        paddd       xmm3,       xmm4
-        paddd       xmm3,       XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
-        psrad       xmm3,       _2NDSTAGESHIFT
-        packssdw    xmm2,       xmm3
-
-        movdqu      [rcx],      xmm2
-        ;third column
-        movdqa      xmm3,       xmm0
-        pmaddwd     xmm3,       [rdx+32]
-
-        movdqa      xmm4,       xmm1
-        pmaddwd     xmm4,       [rdx+32]
-
-        movdqa      xmm5,       xmm3
-        punpckldq   xmm3,       xmm4
-
-        punpckhdq   xmm5,       xmm4
-        movdqa      xmm4,       xmm3
-
-        punpckldq   xmm3,       xmm5
-        punpckhdq   xmm4,       xmm5
-
-        paddd       xmm3,       xmm4
-        paddd       xmm3,       XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
-        psrad       xmm3,       _2NDSTAGESHIFT
-        ;fourth column
-        pmaddwd     xmm0,       [rdx+48]
-        pmaddwd     xmm1,       [rdx+48]
-
-        movdqa      xmm4,       xmm0
-        punpckldq   xmm0,       xmm1
-
-        punpckhdq   xmm4,       xmm1
-        movdqa      xmm1,       xmm0
-
-        punpckldq   xmm0,       xmm4
-        punpckhdq   xmm1,       xmm4
-
-        paddd       xmm0,       xmm1
-        paddd       xmm0,       XMMWORD PTR [dct2nd_stage_rounding_sse2 GLOBAL]
-
-        psrad       xmm0,       _2NDSTAGESHIFT
-        packssdw    xmm3,       xmm0
-
-        movdqu     [rcx+16],   xmm3
-
-    mov rsp, rbp
     ; begin epilog
+    pop rdi
+    pop rsi
     RESTORE_GOT
+;;    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
 
-
 SECTION_RODATA
-;static unsigned int dct1st_stage_rounding_sse2[4] =
 align 16
-dct1st_stage_rounding_sse2:
-    times 4 dd 8192
-
-
-;static unsigned int dct2nd_stage_rounding_sse2[4] =
+_5352_2217:
+    dw 5352
+    dw 2217
+    dw 5352
+    dw 2217
+    dw 5352
+    dw 2217
+    dw 5352
+    dw 2217
 align 16
-dct2nd_stage_rounding_sse2:
-    times 4 dd 32768
-
-;static short dct_matrix_sse2[4][8]=
+_2217_neg5352:
+    dw 2217
+    dw -5352
+    dw 2217
+    dw -5352
+    dw 2217
+    dw -5352
+    dw 2217
+    dw -5352
 align 16
-dct_matrix_sse2:
-    times 8 dw 23170
+_mult_add:
+    times 8 dw 1
+align 16
+_cmp_mask:
+    times 4 dw 1
+    times 4 dw 0
 
-    dw  30274
-    dw  12540
-    dw -12540
-    dw -30274
-    dw  30274
-    dw  12540
-    dw -12540
-    dw -30274
-
-    dw  23170
-    times 2 dw -23170
-    times 2 dw  23170
-    times 2 dw -23170
-    dw  23170
-
-    dw  12540
-    dw -30274
-    dw  30274
-    dw -12540
-    dw  12540
-    dw -30274
-    dw  30274
-    dw -12540
+align 16
+_mult_sub:
+    dw 1
+    dw -1
+    dw 1
+    dw -1
+    dw 1
+    dw -1
+    dw 1
+    dw -1
+align 16
+_7:
+    times 4 dd 7
+align 16
+_14500:
+    times 4 dd 14500
+align 16
+_7500:
+    times 4 dd 7500
+align 16
+_12000:
+    times 4 dd 12000
+align 16
+_51000:
+    times 4 dd 51000
diff --git a/vp8/encoder/x86/dct_x86.h b/vp8/encoder/x86/dct_x86.h
index ada16d34f..bff52e129 100644
--- a/vp8/encoder/x86/dct_x86.h
+++ b/vp8/encoder/x86/dct_x86.h
@@ -24,7 +24,7 @@ extern prototype_fdct(vp8_short_fdct4x4_mmx);
 extern prototype_fdct(vp8_short_fdct8x4_mmx);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
-#if 0 new c version,
+#if 0
 #undef  vp8_fdct_short4x4
 #define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx
 
@@ -40,19 +40,23 @@ extern prototype_fdct(vp8_short_fdct8x4_mmx);
 extern prototype_fdct(vp8_short_fdct8x4_wmt);
 extern prototype_fdct(vp8_short_walsh4x4_sse2);
 
-#if !CONFIG_RUNTIME_CPU_DETECT
+extern prototype_fdct(vp8_short_fdct4x4_sse2);
 
-#if 0
+#if !CONFIG_RUNTIME_CPU_DETECT
+#if 1
 /* short SSE2 DCT currently disabled, does not match the MMX version */
 #undef  vp8_fdct_short4x4
-#define vp8_fdct_short4x4 vp8_short_fdct4x4_wmt
+#define vp8_fdct_short4x4 vp8_short_fdct4x4_sse2
 
 #undef  vp8_fdct_short8x4
-#define vp8_fdct_short8x4 vp8_short_fdct8x4_wmt
+#define vp8_fdct_short8x4 vp8_short_fdct8x4_sse2
 #endif
 
+#undef  vp8_fdct_fast4x4
+#define vp8_fdct_fast4x4 vp8_short_fdct4x4_sse2
+
 #undef  vp8_fdct_fast8x4
-#define vp8_fdct_fast8x4 vp8_fast_fdct8x4_wmt
+#define vp8_fdct_fast8x4 vp8_short_fdct8x4_sse2
 
 #undef vp8_fdct_walsh_short4x4
 #define vp8_fdct_walsh_short4x4  vp8_short_walsh4x4_sse2
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 0fb82e60e..4d0515662 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -82,6 +82,11 @@ void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
 #endif
 
 #if HAVE_SSE2
+void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
+{
+    vp8_short_fdct4x4_sse2(input,   output,    pitch);
+    vp8_short_fdct4x4_sse2(input + 4, output + 16, pitch);
+}
 
 int vp8_fast_quantize_b_impl_sse(short *coeff_ptr, short *zbin_ptr,
                                  short *qcoeff_ptr, short *dequant_ptr,
@@ -268,13 +273,11 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.variance.get16x16var           = vp8_get16x16var_sse2;
         /* cpi->rtcd.variance.get4x4sse_cs  not implemented for wmt */;
 
-#if 0 //new fdct
-        /* short SSE2 DCT currently disabled, does not match the MMX version */
-        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_wmt;
-        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_wmt;
-        /* cpi->rtcd.fdct.fast4x4  not implemented for wmt */;
-        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_wmt;
-#endif
+        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_sse2;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_sse2;
+        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_sse2;
+        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_sse2;
+
         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2;
 
         cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index f86a0b2aa..c88df4705 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -93,6 +93,7 @@ VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/sad_mmx.asm
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/dct_mmx.asm
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/subtract_mmx.asm
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_sse2.c
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_impl_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm

From aa8fe0d269ada1f32fcf9f6941388de6e8767c65 Mon Sep 17 00:00:00 2001
From: Adrian Grange <agrange@google.com>
Date: Mon, 28 Jun 2010 12:00:11 +0100
Subject: [PATCH 063/307] Fixed buffer selection for UV in AltRef filtering

Corrected setting of "which_buffer" for U & V cases to match that
used for Y, i.e. to refer to the temporally most recent frame of
those to be filtered.

Change-Id: Idf94b287ef47a05f060da3e61134a0b616adcb6b
---
 vp8/encoder/onyx_if.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 60d807c03..f331a4ba2 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3231,7 +3231,7 @@ static void vp8cx_temp_blur1_c
     unsigned char block_size
 )
 {
-    int byte = 0;           // Buffer offset for the current pixel value being filtered
+    int byte = 0;         // Buffer offset for current pixel being filtered
     int frame = 0;
     int modifier = 0;
     int i, j, k;
@@ -3264,9 +3264,9 @@ static void vp8cx_temp_blur1_c
                     for (frame = 0; frame < frame_count; frame++)
                     {
                         // get current frame pixel value
-                        int pixel_value = frames[frame][byte];       // int pixel_value = *frameptr;
+                        int pixel_value = frames[frame][byte];
 
-                        modifier   = src_byte;                       // modifier   = s[byte];
+                        modifier   = src_byte;
                         modifier  -= pixel_value;
                         modifier  *= modifier;
                         modifier >>= strength;
@@ -3283,10 +3283,10 @@ static void vp8cx_temp_blur1_c
                     }
 
                     accumulator += (count >> 1);
-                    accumulator *= fixed_divide[count];          // accumulator *= ppi->fixed_divide[count];
+                    accumulator *= fixed_divide[count];
                     accumulator >>= 16;
 
-                    dst[byte] = accumulator;        // d[byte] = accumulator;
+                    dst[byte] = accumulator;
 
                     // move to next pixel
                     byte++;
@@ -3392,7 +3392,8 @@ static void vp8cx_temp_filter_c
         {
             if ((frames_to_blur_backward + frames_to_blur_forward) >= max_frames)
             {
-                frames_to_blur_backward = max_frames - frames_to_blur_forward - 1;
+                frames_to_blur_backward 
+                    = max_frames - frames_to_blur_forward - 1;
             }
         }
         else
@@ -3449,7 +3450,7 @@ static void vp8cx_temp_filter_c
 
     for (frame = 0; frame < frames_to_blur; frame++)
     {
-        int which_buffer =  cpi->last_alt_ref_sei - frame;
+        int which_buffer =  start_frame - frame;
 
         if (which_buffer < 0)
             which_buffer += cpi->oxcf.lag_in_frames;
@@ -3473,7 +3474,7 @@ static void vp8cx_temp_filter_c
 
     for (frame = 0; frame < frames_to_blur; frame++)
     {
-        int which_buffer =  cpi->last_alt_ref_sei - frame;
+        int which_buffer =  start_frame - frame;
 
         if (which_buffer < 0)
             which_buffer += cpi->oxcf.lag_in_frames;

From b62d093efa8bc100462995ffd8d067fe1f49612c Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Mon, 28 Jun 2010 22:03:43 -0700
Subject: [PATCH 064/307] Improve the accuracy of forward walsh-hadamard
 transform

Besides the slight improvement in round trip error. This
also fixes a sign bias in the forward transform, so the
round trip errors are evenly distributed between +1s and
-1s. The old bias seemed to work well with the dc sign bias
in old fdct,  which no longer exist in the improved fdct.

Change-Id: I8635e7be16c69e69a8669eca5438550d23089cef
---
 vp8/encoder/dct.c                      | 49 +++++++++++++-------------
 vp8/encoder/x86/x86_csystemdependent.c |  2 +-
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index 58e36109c..2827aa5a4 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -69,17 +69,18 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
     short *ip = input;
     short *op = output;
 
+
     for (i = 0; i < 4; i++)
     {
-        a1 = ip[0] + ip[3];
-        b1 = ip[1] + ip[2];
-        c1 = ip[1] - ip[2];
-        d1 = ip[0] - ip[3];
+        a1 = ((ip[0] + ip[2])<<2);
+        d1 = ((ip[1] + ip[3])<<2);
+        c1 = ((ip[1] - ip[3])<<2);
+        b1 = ((ip[0] - ip[2])<<2);
 
-        op[0] = a1 + b1;
-        op[1] = c1 + d1;
-        op[2] = a1 - b1;
-        op[3] = d1 - c1;
+        op[0] = a1 + d1 + (a1!=0);
+        op[1] = b1 + c1;
+        op[2] = b1 - c1;
+        op[3] = a1 - d1;
         ip += pitch / 2;
         op += 4;
     }
@@ -89,25 +90,25 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)
 
     for (i = 0; i < 4; i++)
     {
-        a1 = ip[0] + ip[12];
-        b1 = ip[4] + ip[8];
-        c1 = ip[4] - ip[8];
-        d1 = ip[0] - ip[12];
+        a1 = ip[0] + ip[8];
+        d1 = ip[4] + ip[12];
+        c1 = ip[4] - ip[12];
+        b1 = ip[0] - ip[8];
 
-        a2 = a1 + b1;
-        b2 = c1 + d1;
-        c2 = a1 - b1;
-        d2 = d1 - c1;
+        a2 = a1 + d1;
+        b2 = b1 + c1;
+        c2 = b1 - c1;
+        d2 = a1 - d1;
 
-        a2 += (a2 > 0);
-        b2 += (b2 > 0);
-        c2 += (c2 > 0);
-        d2 += (d2 > 0);
+        a2 += a2<0;
+        b2 += b2<0;
+        c2 += c2<0;
+        d2 += d2<0;
 
-        op[0] = (a2) >> 1;
-        op[4] = (b2) >> 1;
-        op[8] = (c2) >> 1;
-        op[12] = (d2) >> 1;
+        op[0] = (a2+3) >> 3;
+        op[4] = (b2+3) >> 3;
+        op[8] = (c2+3) >> 3;
+        op[12]= (d2+3) >> 3;
 
         ip++;
         op++;
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 4d0515662..11ef4197b 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -278,7 +278,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_sse2;
         cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_sse2;
 
-        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2;
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c ;
 
         cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
         cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_xmm;

From 1ca39bf26dd114f224ce67f1f3f85076cdafaacc Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Tue, 29 Jun 2010 12:15:54 +0100
Subject: [PATCH 065/307] Further adjustment of RD behaviour with Q and Zbin.

Following conversations with Tim T (Derf) I ran a large number of
tests comparing the existing polynomial expression with a simpler
^2 variant. Though the polynomial was sometimes a little better at
the extremes of Q it was possible to get close for most clips and
even a little better on some.

This code also changes the way the RD multiplier is calculated
when the ZBIN is extended to use a variant of the same ^2
expression.

I hope that this simpler expression will be easier to tune further
as we expand our test set and consider adjustments based on content.

Change-Id: I73b2564346e74d1332c33e2c1964ae093437456c
---
 vp8/encoder/rdopt.c | 40 +++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 70cf122fa..65dbd8d8e 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -231,18 +231,29 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
     int i;
     int *thresh;
     int threshmult;
-
-    int capped_q = (Qvalue < 160) ? Qvalue : 160;
+    double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0;
+    double rdconst = 3.00;
 
     vp8_clear_system_state();  //__asm emms;
 
-    cpi->RDMULT = (int)( (0.0001 * (capped_q * capped_q * capped_q * capped_q))
-                        -(0.015 * (capped_q * capped_q * capped_q))
-                        +(3.25 * (capped_q * capped_q))
-                        -(17.5 * capped_q) + 125.0);
+    // Further tests required to see if optimum is different
+    // for key frames, golden frames and arf frames.
+    // if (cpi->common.refresh_golden_frame ||
+    //     cpi->common.refresh_alt_ref_frame)
+    cpi->RDMULT = (int)(rdconst * (capped_q * capped_q));
 
-    if (cpi->RDMULT < 125)
-        cpi->RDMULT = 125;
+    // Extend rate multiplier along side quantizer zbin increases
+    if (cpi->zbin_over_quant  > 0)
+    {
+        double oq_factor;
+        double modq;
+
+        // Experimental code using the same basic equation as used for Q above
+        // The units of cpi->zbin_over_quant are 1/128 of Q bin size
+        oq_factor = 1.0 + ((double)0.0015625 * cpi->zbin_over_quant);
+        modq = (int)((double)capped_q * oq_factor);
+        cpi->RDMULT = (int)(rdconst * (modq * modq));
+    }
 
     if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME))
     {
@@ -252,17 +263,8 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, int Qvalue)
             cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->next_iiratio]) >> 4;
     }
 
-
-    // Extend rate multiplier along side quantizer zbin increases
-    if (cpi->zbin_over_quant  > 0)
-    {
-        double oq_factor = pow(1.006,  cpi->zbin_over_quant);
-
-        if (oq_factor > (1.0 + ((double)cpi->zbin_over_quant / 64.0)))
-            oq_factor = (1.0 + (double)cpi->zbin_over_quant / 64.0);
-
-        cpi->RDMULT = (int)(oq_factor * cpi->RDMULT);
-    }
+    if (cpi->RDMULT < 125)
+        cpi->RDMULT = 125;
 
     cpi->mb.errorperbit = (cpi->RDMULT / 100);
 

From a23ec527afe7b64445be083e9f77f12b15b77f51 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Tue, 29 Jun 2010 12:02:19 -0400
Subject: [PATCH 066/307] ARM WinCE VS8 build update

The generated project is vpx.vcproj, change vpx_decoder references to
match. Remove .rules file dependency as it will be pulled from the
source tree.

Change-Id: I679db2748b37adae3bafd764dba8575fc3abde72
---
 build/arm-wince-vs8/{vpx_decoder.sln => vpx.sln} |  2 +-
 build/make/gen_msvs_proj.sh                      | 10 +++++-----
 libs.mk                                          |  1 -
 3 files changed, 6 insertions(+), 7 deletions(-)
 rename build/arm-wince-vs8/{vpx_decoder.sln => vpx.sln} (98%)

diff --git a/build/arm-wince-vs8/vpx_decoder.sln b/build/arm-wince-vs8/vpx.sln
similarity index 98%
rename from build/arm-wince-vs8/vpx_decoder.sln
rename to build/arm-wince-vs8/vpx.sln
index 226205761..3e49929f2 100644
--- a/build/arm-wince-vs8/vpx_decoder.sln
+++ b/build/arm-wince-vs8/vpx.sln
@@ -8,7 +8,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "example", "example.vcproj",
 EndProject
 Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "obj_int_extract", "obj_int_extract.vcproj", "{E1360C65-D375-4335-8057-7ED99CC3F9B2}"
 EndProject
-Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vpx_decoder", "vpx_decoder.vcproj", "{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}"
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "vpx", "vpx.vcproj", "{DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74}"
 	ProjectSection(ProjectDependencies) = postProject
 		{E1360C65-D375-4335-8057-7ED99CC3F9B2} = {E1360C65-D375-4335-8057-7ED99CC3F9B2}
 	EndProjectSection
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index 7dd0ad123..63d537630 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -347,7 +347,7 @@ generate_vcproj() {
         x86*) $uses_asm && tag ToolFile RelativePath="$self_dirname/../x86-msvs/yasm.rules"
         ;;
         arm*|iwmmx*)
-            if [ "$name" == "vpx_decoder" ];then
+            if [ "$name" == "vpx" ];then
             case "$target" in
                 armv5*)
                     tag ToolFile RelativePath="$self_dirname/../arm-wince-vs8/armasmv5.rules"
@@ -376,7 +376,7 @@ generate_vcproj() {
 
         if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
             case "$name" in
-                vpx_decoder) tag Tool \
+                vpx)         tag Tool \
                              Name="VCPreBuildEventTool" \
                              CommandLine="call obj_int_extract.bat \$(ConfigurationName)"
                              tag Tool \
@@ -510,7 +510,7 @@ generate_vcproj() {
 
         if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
             case "$name" in
-                vpx_decoder) tag DeploymentTool \
+                vpx)         tag DeploymentTool \
                              ForceDirty="-1" \
                              RegisterOutput="0"
                                 ;;
@@ -534,7 +534,7 @@ generate_vcproj() {
 
         if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
             case "$name" in
-                vpx_decoder) tag Tool \
+                vpx)         tag Tool \
                                      Name="VCPreBuildEventTool" \
                                      CommandLine="call obj_int_extract.bat \$(ConfigurationName)"
                              tag Tool \
@@ -672,7 +672,7 @@ generate_vcproj() {
 
         if [ "$target" == "armv6-wince-vs8" ] || [ "$target" == "armv5te-wince-vs8" ] || [ "$target" == "iwmmxt-wince-vs8" ] || [ "$target" == "iwmmxt2-wince-vs8" ];then
             case "$name" in
-                vpx_decoder) tag DeploymentTool \
+                vpx)         tag DeploymentTool \
                              ForceDirty="-1" \
                              RegisterOutput="0"
                 ;;
diff --git a/libs.mk b/libs.mk
index 21c25c0c4..4bc5dd76c 100644
--- a/libs.mk
+++ b/libs.mk
@@ -144,7 +144,6 @@ obj_int_extract.vcproj: $(SRC_PATH_BARE)/build/make/obj_int_extract.c
 
 PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.vcproj
 PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.bat
-PROJECTS-$(BUILD_LIBVPX) += armasm$(ARM_ARCH).rules
 endif
 
 vpx.def: $(call enabled,CODEC_EXPORTS)

From bead039d4d316092bca20e62df001f92a86067d2 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Fri, 25 Jun 2010 09:18:11 -0400
Subject: [PATCH 067/307] Improve SSE2 loopfilter functions

Restructured and rewrote SSE2 loopfilter functions. Combined u and
v into one function to take advantage of SSE2 128-bit registers.
Tests on test clips showed a 4% decoder performance improvement on
Linux desktop.

Change-Id: Iccc6669f09e17f2224da715f7547d6f93b0a4987
---
 vp8/common/arm/loopfilter_arm.c    |   10 -
 vp8/common/loopfilter.h            |    9 +
 vp8/common/x86/loopfilter_sse2.asm | 2647 +++++++++++++++-------------
 vp8/common/x86/loopfilter_x86.c    |   25 +-
 4 files changed, 1399 insertions(+), 1292 deletions(-)

diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index bb4af2205..12e56abd0 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -14,16 +14,6 @@
 #include "loopfilter.h"
 #include "onyxc_int.h"
 
-typedef void loop_filter_uvfunction
-(
-    unsigned char *u,   // source pointer
-    int p,              // pitch
-    const signed char *flimit,
-    const signed char *limit,
-    const signed char *thresh,
-    unsigned char *v
-);
-
 extern prototype_loopfilter(vp8_loop_filter_horizontal_edge_armv6);
 extern prototype_loopfilter(vp8_loop_filter_vertical_edge_armv6);
 extern prototype_loopfilter(vp8_mbloop_filter_horizontal_edge_armv6);
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index f051a3151..66185d1e7 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -117,5 +117,14 @@ typedef struct
 #define LF_INVOKE(ctx,fn) vp8_lf_##fn
 #endif
 
+typedef void loop_filter_uvfunction
+(
+    unsigned char *u,   // source pointer
+    int p,              // pitch
+    const signed char *flimit,
+    const signed char *limit,
+    const signed char *thresh,
+    unsigned char *v
+);
 
 #endif
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index f11fcadec..ad2f36c9e 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -12,6 +12,283 @@
 %include "vpx_ports/x86_abi_support.asm"
 
 
+%macro LFH_FILTER_MASK 1
+%if %1
+        movdqa      xmm2,                   [rdi+2*rax]       ; q3
+        movdqa      xmm1,                   [rsi+2*rax]       ; q2
+%else
+        movq        xmm0,                   [rsi + rcx*2]     ; q3
+        movq        xmm2,                   [rdi + rcx*2]
+        pslldq      xmm2,                   8
+        por         xmm2,                   xmm0
+        movq        xmm1,                   [rsi + rcx]       ; q2
+        movq        xmm3,                   [rdi + rcx]
+        pslldq      xmm3,                   8
+        por         xmm1,                   xmm3
+        movdqa      XMMWORD PTR [rsp],      xmm1              ; store q2
+%endif
+
+        movdqa      xmm6,                   xmm1              ; q2
+        psubusb     xmm1,                   xmm2              ; q2-=q3
+        psubusb     xmm2,                   xmm6              ; q3-=q2
+        por         xmm1,                   xmm2              ; abs(q3-q2)
+
+        psubusb     xmm1,                   xmm7
+
+%if %1
+        movdqa      xmm4,                   [rsi+rax]         ; q1
+%else
+        movq        xmm0,                   [rsi]             ; q1
+        movq        xmm4,                   [rdi]
+        pslldq      xmm4,                   8
+        por         xmm4,                   xmm0
+        movdqa      XMMWORD PTR [rsp + 16], xmm4              ; store q1
+%endif
+
+        movdqa      xmm3,                   xmm4              ; q1
+        psubusb     xmm4,                   xmm6              ; q1-=q2
+        psubusb     xmm6,                   xmm3              ; q2-=q1
+        por         xmm4,                   xmm6              ; abs(q2-q1)
+        psubusb     xmm4,                   xmm7
+
+        por         xmm1,                   xmm4
+
+%if %1
+        movdqa      xmm4,                   [rsi]             ; q0
+%else
+        movq        xmm4,                   [rsi + rax]       ; q0
+        movq        xmm0,                   [rdi + rax]
+        pslldq      xmm0,                   8
+        por         xmm4,                   xmm0
+%endif
+
+        movdqa      xmm0,                   xmm4              ; q0
+        psubusb     xmm4,                   xmm3              ; q0-=q1
+        psubusb     xmm3,                   xmm0              ; q1-=q0
+        por         xmm4,                   xmm3              ; abs(q0-q1)
+        movdqa      t0,                     xmm4              ; save to t0
+
+        psubusb     xmm4,                   xmm7
+        por         xmm1,                   xmm4
+
+%if %1
+        neg         rax                     ; negate pitch to deal with above border
+
+        movdqa      xmm2,                   [rsi+4*rax]       ; p3
+        movdqa      xmm4,                   [rdi+4*rax]       ; p2
+%else
+        lea         rsi,                    [rsi + rax*4]
+        lea         rdi,                    [rdi + rax*4]
+
+        movq        xmm2,                   [rsi + rax]       ; p3
+        movq        xmm3,                   [rdi + rax]
+        pslldq      xmm3,                   8
+        por         xmm2,                   xmm3
+        movq        xmm4,                   [rsi]             ; p2
+        movq        xmm5,                   [rdi]
+        pslldq      xmm5,                   8
+        por         xmm4,                   xmm5
+        movdqa      XMMWORD PTR [rsp + 32], xmm4              ; store p2
+%endif
+
+        movdqa      xmm5,                   xmm4              ; p2
+        psubusb     xmm4,                   xmm2              ; p2-=p3
+        psubusb     xmm2,                   xmm5              ; p3-=p2
+        por         xmm4,                   xmm2              ; abs(p3 - p2)
+
+        psubusb     xmm4,                   xmm7
+        por         xmm1,                   xmm4
+
+%if %1
+        movdqa      xmm4,                   [rsi+2*rax]       ; p1
+%else
+        movq        xmm4,                   [rsi + rcx]       ; p1
+        movq        xmm3,                   [rdi + rcx]
+        pslldq      xmm3,                   8
+        por         xmm4,                   xmm3
+        movdqa      XMMWORD PTR [rsp + 48], xmm4              ; store p1
+%endif
+
+        movdqa      xmm3,                   xmm4              ; p1
+        psubusb     xmm4,                   xmm5              ; p1-=p2
+        psubusb     xmm5,                   xmm3              ; p2-=p1
+        por         xmm4,                   xmm5              ; abs(p2 - p1)
+        psubusb     xmm4,                   xmm7
+
+        por         xmm1,                   xmm4
+        movdqa      xmm2,                   xmm3              ; p1
+
+%if %1
+        movdqa      xmm4,                   [rsi+rax]         ; p0
+%else
+        movq        xmm4,                   [rsi + rcx*2]     ; p0
+        movq        xmm5,                   [rdi + rcx*2]
+        pslldq      xmm5,                   8
+        por         xmm4,                   xmm5
+%endif
+
+        movdqa      xmm5,                   xmm4              ; p0
+        psubusb     xmm4,                   xmm3              ; p0-=p1
+        psubusb     xmm3,                   xmm5              ; p1-=p0
+        por         xmm4,                   xmm3              ; abs(p1 - p0)
+        movdqa        t1,                   xmm4              ; save to t1
+
+        psubusb     xmm4,                   xmm7
+        por         xmm1,                   xmm4
+
+%if %1
+        movdqa      xmm3,                   [rdi]             ; q1
+%else
+        movdqa      xmm3,                   q1                ; q1
+%endif
+
+        movdqa      xmm4,                   xmm3              ; q1
+        psubusb     xmm3,                   xmm2              ; q1-=p1
+        psubusb     xmm2,                   xmm4              ; p1-=q1
+        por         xmm2,                   xmm3              ; abs(p1-q1)
+        pand        xmm2,                   [tfe GLOBAL]      ; set lsb of each byte to zero
+        psrlw       xmm2,                   1                 ; abs(p1-q1)/2
+
+        movdqa      xmm6,                   xmm5              ; p0
+        movdqa      xmm3,                   xmm0              ; q0
+        psubusb     xmm5,                   xmm3              ; p0-=q0
+        psubusb     xmm3,                   xmm6              ; q0-=p0
+        por         xmm5,                   xmm3              ; abs(p0 - q0)
+        paddusb     xmm5,                   xmm5              ; abs(p0-q0)*2
+        paddusb     xmm5,                   xmm2              ; abs (p0 - q0) *2 + abs(p1-q1)/2
+
+        mov         rdx,                    arg(2)            ; get flimit
+        movdqa      xmm2,                   XMMWORD PTR [rdx]
+        paddb       xmm2,                   xmm2              ; flimit*2 (less than 255)
+        paddb       xmm7,                   xmm2              ; flimit * 2 + limit (less than 255)
+
+        psubusb     xmm5,                   xmm7              ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
+        por         xmm1,                   xmm5
+        pxor        xmm5,                   xmm5
+        pcmpeqb     xmm1,                   xmm5              ; mask mm1
+%endmacro
+
+%macro LFH_HEV_MASK 0
+        mov         rdx,                    arg(4)            ; get thresh
+        movdqa      xmm7,                   XMMWORD PTR [rdx]
+
+        movdqa      xmm4,                   t0                ; get abs (q1 - q0)
+        psubusb     xmm4,                   xmm7
+        movdqa      xmm3,                   t1                ; get abs (p1 - p0)
+        psubusb     xmm3,                   xmm7
+        paddb       xmm4,                   xmm3              ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
+        pcmpeqb     xmm4,                   xmm5
+
+        pcmpeqb     xmm5,                   xmm5
+        pxor        xmm4,                   xmm5
+%endmacro
+
+%macro BH_FILTER 1
+%if %1
+        movdqa      xmm2,                   [rsi+2*rax]       ; p1
+        movdqa      xmm7,                   [rdi]             ; q1
+%else
+        movdqa      xmm2,                   p1                ; p1
+        movdqa      xmm7,                   q1                ; q1
+%endif
+
+        pxor        xmm2,                   [t80 GLOBAL]      ; p1 offset to convert to signed values
+        pxor        xmm7,                   [t80 GLOBAL]      ; q1 offset to convert to signed values
+
+        psubsb      xmm2,                   xmm7              ; p1 - q1
+        pand        xmm2,                   xmm4              ; high var mask (hvm)(p1 - q1)
+        pxor        xmm6,                   [t80 GLOBAL]      ; offset to convert to signed values
+
+        pxor        xmm0,                   [t80 GLOBAL]      ; offset to convert to signed values
+        movdqa      xmm3,                   xmm0              ; q0
+
+        psubsb      xmm0,                   xmm6              ; q0 - p0
+        paddsb      xmm2,                   xmm0              ; 1 * (q0 - p0) + hvm(p1 - q1)
+        paddsb      xmm2,                   xmm0              ; 2 * (q0 - p0) + hvm(p1 - q1)
+        paddsb      xmm2,                   xmm0              ; 3 * (q0 - p0) + hvm(p1 - q1)
+        pand        xmm1,                   xmm2              ; mask filter values we don't care about
+        movdqa      xmm2,                   xmm1
+        paddsb      xmm1,                   [t4 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+        paddsb      xmm2,                   [t3 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+
+        pxor        xmm0,                   xmm0
+        pxor        xmm5,                   xmm5
+        punpcklbw   xmm0,                   xmm2
+        punpckhbw   xmm5,                   xmm2
+        psraw       xmm0,                   11
+        psraw       xmm5,                   11
+        packsswb    xmm0,                   xmm5
+        movdqa      xmm2,                   xmm0              ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+
+        pxor        xmm0,                   xmm0              ; 0
+        movdqa      xmm5,                   xmm1              ; abcdefgh
+        punpcklbw   xmm0,                   xmm1              ; e0f0g0h0
+        psraw       xmm0,                   11                ; sign extended shift right by 3
+        pxor        xmm1,                   xmm1              ; 0
+        punpckhbw   xmm1,                   xmm5              ; a0b0c0d0
+        psraw       xmm1,                   11                ; sign extended shift right by 3
+        movdqa      xmm5,                   xmm0              ; save results
+
+        packsswb    xmm0,                   xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
+        paddsw      xmm5,                   [ones GLOBAL]
+        paddsw      xmm1,                   [ones GLOBAL]
+        psraw       xmm5,                   1                 ; partial shifted one more time for 2nd tap
+        psraw       xmm1,                   1                 ; partial shifted one more time for 2nd tap
+        packsswb    xmm5,                   xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
+        pandn       xmm4,                   xmm5              ; high edge variance additive
+%endmacro
+
+%macro BH_WRITEBACK 1
+        paddsb      xmm6,                   xmm2              ; p0+= p0 add
+        pxor        xmm6,                   [t80 GLOBAL]      ; unoffset
+%if %1
+        movdqa      [rsi+rax],              xmm6              ; write back
+%else
+        lea         rsi,                    [rsi + rcx*2]
+        lea         rdi,                    [rdi + rcx*2]
+        movq        MMWORD PTR [rsi],       xmm6              ; p0
+        psrldq      xmm6,                   8
+        movq        MMWORD PTR [rdi],       xmm6
+%endif
+
+%if %1
+        movdqa      xmm6,                   [rsi+2*rax]       ; p1
+%else
+        movdqa      xmm6,                   p1                ; p1
+%endif
+        pxor        xmm6,                   [t80 GLOBAL]      ; reoffset
+        paddsb      xmm6,                   xmm4               ; p1+= p1 add
+        pxor        xmm6,                   [t80 GLOBAL]      ; unoffset
+%if %1
+        movdqa      [rsi+2*rax],            xmm6              ; write back
+%else
+        movq        MMWORD PTR [rsi + rax], xmm6              ; p1
+        psrldq      xmm6,                   8
+        movq        MMWORD PTR [rdi + rax], xmm6
+%endif
+
+        psubsb      xmm3,                   xmm0              ; q0-= q0 add
+        pxor        xmm3,                   [t80 GLOBAL]      ; unoffset
+%if %1
+        movdqa      [rsi],                  xmm3              ; write back
+%else
+        movq        MMWORD PTR [rsi + rcx], xmm3              ; q0
+        psrldq      xmm3,                   8
+        movq        MMWORD PTR [rdi + rcx], xmm3
+%endif
+
+        psubsb      xmm7,                   xmm4              ; q1-= q1 add
+        pxor        xmm7,                   [t80 GLOBAL]      ; unoffset
+%if %1
+        movdqa      [rdi],                  xmm7              ; write back
+%else
+        movq        MMWORD PTR [rsi + rcx*2],xmm7             ; q1
+        psrldq      xmm7,                   8
+        movq        MMWORD PTR [rdi + rcx*2],xmm7
+%endif
+%endmacro
+
+
 ;void vp8_loop_filter_horizontal_edge_sse2
 ;(
 ;    unsigned char *src_ptr,
@@ -33,179 +310,28 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
     ; end prolog
 
     ALIGN_STACK 16, rax
-    sub         rsp, 32                         ; reserve 32 bytes
+    sub         rsp, 32     ; reserve 32 bytes
     %define t0 [rsp + 0]    ;__declspec(align(16)) char t0[16];
     %define t1 [rsp + 16]   ;__declspec(align(16)) char t1[16];
 
-        mov         rsi, arg(0) ;src_ptr
-        movsxd      rax, dword ptr arg(1) ;src_pixel_step     ; destination pitch?
+        mov         rsi,                    arg(0)           ;src_ptr
+        movsxd      rax,                    dword ptr arg(1) ;src_pixel_step
 
-        mov         rdx,    arg(3) ;limit
-        movdqa      xmm7,   XMMWORD PTR [rdx]
-        mov         rdi,    rsi           ; rdi points to row +1 for indirect addressing
-        add         rdi,    rax
+        mov         rdx,                    arg(3)           ;limit
+        movdqa      xmm7,                   XMMWORD PTR [rdx]
+
+        lea         rdi,                    [rsi+rax]        ; rdi points to row +1 for indirect addressing
 
         ; calculate breakout conditions
-        movdqu      xmm2,   [rdi+2*rax]      ; q3
-        movdqu      xmm1,   [rsi+2*rax]      ; q2
-        movdqa      xmm6,   xmm1              ; q2
-        psubusb     xmm1,   xmm2              ; q2-=q3
-        psubusb     xmm2,   xmm6              ; q3-=q2
-        por         xmm1,   xmm2              ; abs(q3-q2)
-        psubusb     xmm1,   xmm7              ;
-
-
-        movdqu      xmm4,   [rsi+rax]         ; q1
-        movdqa      xmm3,   xmm4              ; q1
-        psubusb     xmm4,   xmm6              ; q1-=q2
-        psubusb     xmm6,   xmm3              ; q2-=q1
-        por         xmm4,   xmm6              ; abs(q2-q1)
-
-        psubusb     xmm4,   xmm7
-        por         xmm1,   xmm4
-
-        movdqu      xmm4,   [rsi]             ; q0
-        movdqa      xmm0,   xmm4              ; q0
-        psubusb     xmm4,   xmm3              ; q0-=q1
-        psubusb     xmm3,   xmm0              ; q1-=q0
-        por         xmm4,   xmm3              ; abs(q0-q1)
-        movdqa        t0,       xmm4                  ; save to t0
-        psubusb     xmm4,   xmm7
-        por         xmm1,   xmm4
-
-        neg         rax                   ; negate pitch to deal with above border
-        movdqu      xmm2,   [rsi+4*rax]      ; p3
-        movdqu      xmm4,   [rdi+4*rax]      ; p2
-        movdqa      xmm5,   xmm4              ; p2
-        psubusb     xmm4,   xmm2              ; p2-=p3
-        psubusb     xmm2,   xmm5              ; p3-=p2
-        por         xmm4,   xmm2              ; abs(p3 - p2)
-        psubusb     xmm4,   xmm7
-        por         xmm1,   xmm4
-
-
-        movdqu      xmm4,   [rsi+2*rax]      ; p1
-        movdqa      xmm3,   xmm4              ; p1
-        psubusb     xmm4,   xmm5              ; p1-=p2
-        psubusb     xmm5,   xmm3              ; p2-=p1
-        por         xmm4,   xmm5              ; abs(p2 - p1)
-        psubusb     xmm4,   xmm7
-        por         xmm1,   xmm4
-
-        movdqa      xmm2,   xmm3              ; p1
-
-        movdqu      xmm4,   [rsi+rax]         ; p0
-        movdqa      xmm5,   xmm4              ; p0
-        psubusb     xmm4,   xmm3              ; p0-=p1
-        psubusb     xmm3,   xmm5              ; p1-=p0
-        por         xmm4,   xmm3              ; abs(p1 - p0)
-        movdqa      t1,     xmm4                  ; save to t1
-        psubusb     xmm4,   xmm7
-        por         xmm1,    xmm4
-
-        movdqu      xmm3,   [rdi]             ; q1
-        movdqa      xmm4,   xmm3              ; q1
-        psubusb     xmm3,   xmm2              ; q1-=p1
-        psubusb     xmm2,   xmm4              ; p1-=q1
-        por         xmm2,   xmm3              ; abs(p1-q1)
-        pand        xmm2,   [tfe GLOBAL]      ; set lsb of each byte to zero
-        psrlw       xmm2,   1                 ; abs(p1-q1)/2
-
-        movdqa      xmm6,   xmm5              ; p0
-        movdqu      xmm3,   [rsi]             ; q0
-        psubusb     xmm5,   xmm3              ; p0-=q0
-        psubusb     xmm3,   xmm6              ; q0-=p0
-        por         xmm5,   xmm3              ; abs(p0 - q0)
-        paddusb     xmm5,   xmm5              ; abs(p0-q0)*2
-        paddusb     xmm5,   xmm2              ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
-        mov         rdx,    arg(2) ;flimit            ; get flimit
-        movdqa      xmm2,   [rdx]             ;
-
-        paddb       xmm2,   xmm2              ; flimit*2 (less than 255)
-        paddb       xmm7,   xmm2              ; flimit * 2 + limit (less than 255)
-
-        psubusb     xmm5,    xmm7             ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
-        por         xmm1,    xmm5
-        pxor        xmm5,    xmm5
-        pcmpeqb     xmm1,    xmm5             ; mask mm1
-
+        LFH_FILTER_MASK 1
 
         ; calculate high edge variance
-        mov         rdx,    arg(4) ;thresh            ; get thresh
-        movdqa      xmm7,   [rdx]             ;
-        movdqa      xmm4,   t0                ; get abs (q1 - q0)
-        psubusb     xmm4,   xmm7
-        movdqa      xmm3,   t1                ; get abs (p1 - p0)
-        psubusb     xmm3,   xmm7
-        paddb       xmm4,   xmm3              ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-        pcmpeqb     xmm4,        xmm5
-        pcmpeqb     xmm5,        xmm5
-        pxor        xmm4,        xmm5
-
+        LFH_HEV_MASK
 
         ; start work on filters
-        movdqu      xmm2, [rsi+2*rax]     ; p1
-        movdqu      xmm7, [rdi]           ; q1
-        pxor        xmm2, [t80 GLOBAL]    ; p1 offset to convert to signed values
-        pxor        xmm7, [t80 GLOBAL]    ; q1 offset to convert to signed values
-        psubsb      xmm2, xmm7            ; p1 - q1
-        pand        xmm2, xmm4            ; high var mask (hvm)(p1 - q1)
-        pxor        xmm6, [t80 GLOBAL]    ; offset to convert to signed values
-        pxor        xmm0, [t80 GLOBAL]    ; offset to convert to signed values
-        movdqa      xmm3, xmm0            ; q0
-        psubsb      xmm0, xmm6            ; q0 - p0
-        paddsb      xmm2, xmm0            ; 1 * (q0 - p0) + hvm(p1 - q1)
-        paddsb      xmm2, xmm0            ; 2 * (q0 - p0) + hvm(p1 - q1)
-        paddsb      xmm2, xmm0            ; 3 * (q0 - p0) + hvm(p1 - q1)
-        pand        xmm1, xmm2            ; mask filter values we don't care about
-        movdqa      xmm2, xmm1
-        paddsb      xmm1, [t4 GLOBAL]         ; 3* (q0 - p0) + hvm(p1 - q1) + 4
-        paddsb      xmm2, [t3 GLOBAL]         ; 3* (q0 - p0) + hvm(p1 - q1) + 3
-
-        pxor        xmm0, xmm0           ;
-        pxor        xmm5, xmm5
-        punpcklbw   xmm0, xmm2          ;
-        punpckhbw   xmm5, xmm2          ;
-        psraw       xmm0, 11                ;
-        psraw       xmm5, 11
-        packsswb    xmm0, xmm5
-        movdqa      xmm2, xmm0          ;  (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
-
-        pxor        xmm0, xmm0            ; 0
-        movdqa      xmm5, xmm1            ; abcdefgh
-        punpcklbw   xmm0, xmm1            ; e0f0g0h0
-        psraw       xmm0, 11                  ; sign extended shift right by 3
-        pxor        xmm1, xmm1            ; 0
-        punpckhbw   xmm1, xmm5            ; a0b0c0d0
-        psraw       xmm1, 11                  ; sign extended shift right by 3
-        movdqa      xmm5, xmm0              ; save results
-
-        packsswb    xmm0, xmm1            ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
-        paddsw      xmm5, [ones GLOBAL]
-        paddsw      xmm1, [ones GLOBAL]
-        psraw       xmm5, 1               ; partial shifted one more time for 2nd tap
-        psraw       xmm1, 1               ; partial shifted one more time for 2nd tap
-        packsswb    xmm5, xmm1            ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
-        pandn       xmm4, xmm5            ; high edge variance additive
-
-        paddsb      xmm6, xmm2            ; p0+= p0 add
-        pxor        xmm6, [t80 GLOBAL]    ; unoffset
-        movdqu      [rsi+rax], xmm6       ; write back
-
-        movdqu      xmm6, [rsi+2*rax]     ; p1
-        pxor        xmm6, [t80 GLOBAL]    ; reoffset
-        paddsb      xmm6, xmm4            ; p1+= p1 add
-        pxor        xmm6, [t80 GLOBAL]    ; unoffset
-        movdqu      [rsi+2*rax], xmm6     ; write back
-
-        psubsb      xmm3, xmm0            ; q0-= q0 add
-        pxor        xmm3, [t80 GLOBAL]    ; unoffset
-        movdqu      [rsi], xmm3           ; write back
-
-        psubsb      xmm7, xmm4            ; q1-= q1 add
-        pxor        xmm7, [t80 GLOBAL]    ; unoffset
-        movdqu      [rdi], xmm7           ; write back
+        BH_FILTER 1
+        ; write back the result
+        BH_WRITEBACK 1
 
     add rsp, 32
     pop rsp
@@ -219,7 +345,7 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
     ret
 
 
-;void vp8_loop_filter_vertical_edge_sse2
+;void vp8_loop_filter_horizontal_edge_uv_sse2
 ;(
 ;    unsigned char *src_ptr,
 ;    int            src_pixel_step,
@@ -228,8 +354,8 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
 ;    const char    *thresh,
 ;    int            count
 ;)
-global sym(vp8_loop_filter_vertical_edge_sse2)
-sym(vp8_loop_filter_vertical_edge_sse2):
+global sym(vp8_loop_filter_horizontal_edge_uv_sse2)
+sym(vp8_loop_filter_horizontal_edge_uv_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 6
@@ -240,414 +366,35 @@ sym(vp8_loop_filter_vertical_edge_sse2):
     ; end prolog
 
     ALIGN_STACK 16, rax
-    sub          rsp, 96      ; reserve 96 bytes
-    %define t0   [rsp + 0]    ;__declspec(align(16)) char t0[16];
-    %define t1   [rsp + 16]   ;__declspec(align(16)) char t1[16];
-    %define srct [rsp + 32]   ;__declspec(align(16)) char srct[64];
-
-        mov         rsi,        arg(0) ;src_ptr
-        movsxd      rax,        dword ptr arg(1) ;src_pixel_step     ; destination pitch?
-
-        lea         rsi,        [rsi + rax*4 - 4]
-        mov         rdi,        rsi           ; rdi points to row +1 for indirect addressing
-
-        add         rdi,        rax
-        lea         rcx,        [rdi + rax *8]
-
-        ;transpose
-        movq        xmm7,       QWORD PTR [rsi+2*rax]                 ; 67 66 65 64 63 62 61 60
-        movq        xmm6,       QWORD PTR [rdi+2*rax]                 ; 77 76 75 74 73 72 71 70
-
-        punpcklbw   xmm7,       xmm6                        ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
-        movq        xmm5,       QWORD PTR [rsi]                       ; 47 46 45 44 43 42 41 40
-
-        movq        xmm4,       QWORD PTR [rsi+rax]                   ; 57 56 55 54 53 52 51 50
-        punpcklbw   xmm5,       xmm4                        ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
-
-        movdqa      xmm3,       xmm5                        ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
-        punpckhwd   xmm5,       xmm7                        ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
-
-        lea         rsi,        [rsi+ rax*8]
-
-        punpcklwd   xmm3,       xmm7                        ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
-        movq        xmm6,       QWORD PTR [rsi + 2*rax]               ; e7 e6 e5 e4 e3 e2 e1 e0
-
-        movq        xmm7,       QWORD PTR [rcx + 2*rax]               ; f7 f6 f5 f4 f3 f2 f1 f0
-        punpcklbw   xmm6,       xmm7                        ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
-
-        movq        xmm4,       QWORD PTR [rsi]                       ; c7 c6 c5 c4 c3 c2 c1 c0
-        movq        xmm7,       QWORD PTR [rsi + rax]                 ; d7 d6 d5 d4 d3 d2 d1 d0
-
-        punpcklbw   xmm4,       xmm7                        ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 c1 d0 c0
-        movdqa      xmm7,       xmm4                        ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 c1 d0 c0
-
-        punpckhwd   xmm7,       xmm6                        ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4
-        punpcklwd   xmm4,       xmm6                        ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
-
-        ; xmm3 xmm4, xmm5 xmm7   in use
-        neg         rax
-
-        lea         rsi,        [rsi+rax*8]
-        movq        xmm6,       QWORD PTR [rsi+rax*2]                 ; 27 26 25 24 23 22 21 20
-
-        movq        xmm1,       QWORD PTR [rsi+rax  ]                 ; 37 36 35 34 33 32 31 30
-        punpcklbw   xmm6,       xmm1                        ; 37 27 36 26 35 25 34 24 33 23 32 22 31 21 30 20
-
-        movq        xmm2,       QWORD PTR [rsi+rax*4]                 ; 07 06 05 04 03 02 01 00
-        movq        xmm1,       QWORD PTR [rdi+rax*4]                 ; 17 16 15 14 13 12 11 10
-
-        punpcklbw   xmm2,       xmm1                        ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
-        movdqa      xmm0,       xmm2
-
-        punpckhwd   xmm2,       xmm6                        ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
-        punpcklwd   xmm0,       xmm6                        ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
-
-        movdqa      xmm6,       xmm2
-        punpckldq   xmm2,       xmm5                        ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
-
-        punpckhdq   xmm6,       xmm5                        ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
-        ;xmm0 xmm2 xmm3 xmm4, xmm6, xmm7
-
-        movdqa      xmm5,       xmm0                        ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
-        punpckhdq   xmm5,       xmm3                        ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
-
-        punpckldq   xmm0,       xmm3                        ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
-        lea         rsi,        [rcx+rax]
-        ; xmm1, xmm3 free
-        movq        xmm1,       QWORD PTR [rsi+rax*2]                 ; a7 a6 a5 a4 a3 a2 a1 a0
-        movq        xmm3,       QWORD PTR [rsi+rax]                   ; b7 b6 b5 b4 b3 b2 b1 b0
-
-        punpcklbw   xmm1,       xmm3                        ;
-        lea         rdx,        srct                        ;
-
-        movdqa      [rdx+16],   xmm1                        ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
-        movq        xmm3,       QWORD PTR [rsi+rax*4]                 ; 87 86 85 84 83 82 81 80
-
-        movq        xmm1,       QWORD PTR [rcx+rax*4]
-        punpcklbw   xmm3,       xmm1                        ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
-
-        movdqa      [rdx],      xmm3                        ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
-
-        punpckhwd   xmm3,       [rdx+16]                    ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
-        movdqa      xmm1,       xmm3                        ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
-
-        punpckhdq   xmm1,       xmm7                        ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
-        punpckldq   xmm3,       xmm7                        ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84
-
-        movdqa      xmm7,       xmm2                        ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
-        punpcklqdq  xmm7,       xmm3                        ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-
-        punpckhqdq  xmm2,       xmm3                        ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
-        movdqa      [rdx+32],   xmm7                        ; save 4s
-
-        movdqa      [rdx+48],   xmm2                        ; save 5s
-        movdqa      xmm7,       xmm6                        ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
-
-        punpckhqdq  xmm7,       xmm1                        ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07 = q3
-        punpcklqdq  xmm6,       xmm1                        ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 = q2
-
-        ; free 1, 3   xmm7-7s xmm6-6s, xmm2-5s
-        movq        xmm1,       QWORD PTR [rdx]                       ; 93 83 92 82 91 81 90 80
-        movq        xmm3,       QWORD PTR [rdx+16]                    ; b3 a3 b2 a2 b1 a1 b0 a0
-
-        punpcklwd   xmm1,       xmm3                        ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
-        movdqa      xmm3,       xmm1                        ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
-
-        punpckhdq   xmm3,       xmm4                        ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
-        punpckldq   xmm1,       xmm4                        ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
-
-        movdqa      xmm4,       xmm5                        ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
-        punpcklqdq  xmm5,       xmm3                        ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-
-        punpckhqdq  xmm4,       xmm3                        ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
-        movdqa      [rdx],      xmm5                        ; save 2s
-
-        movdqa      [rdx+16],   xmm4                        ; save 3s
-
-        movdqa      xmm3,       xmm6                        ;
-        psubusb     xmm3,       xmm7                        ; q3 - q2
-
-        psubusb     xmm7,       xmm6                        ; q2 - q3
-        por         xmm7,       xmm3                        ; abs(q3-q2)
-
-        movdqa      xmm3,       xmm2                        ; q1
-        psubusb     xmm3,       xmm6                        ; q1 - q2
-
-        psubusb     xmm6,       xmm2                        ; q2 - q1
-        por         xmm6,       xmm3                        ; abs(q2-q1)
-
-
-        movdqa      xmm3,       xmm0                        ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
-        punpcklqdq  xmm0,       xmm1                        ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
-
-        punpckhqdq  xmm3,       xmm1                        ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
-        movdqa      xmm1,       xmm3
-
-        psubusb     xmm3,       xmm0                        ; p2-p3
-        psubusb     xmm0,       xmm1                        ; p3-p2
-
-        por         xmm0,       xmm3                        ; abs(p3-p2)
-        movdqa      xmm3,       xmm5                        ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-
-        psubusb     xmm3,       xmm1                        ; p1-p2
-        psubusb     xmm1,       xmm5                        ; p2-p1
-
-        por         xmm1,       xmm3                        ; abs(p1-p2)
-        mov         rdx,        arg(3) ;limit
-
-        movdqa      xmm3,       [rdx]                       ; limit
-
-        psubusb     xmm7,       xmm3
-        psubusb     xmm0,       xmm3
-
-        psubusb     xmm1,       xmm3
-        psubusb     xmm6,       xmm3
-
-        por         xmm7,       xmm6
-        por         xmm0,       xmm1
-
-        por         xmm0,       xmm7                         ;   abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
-
-        movdqa      xmm1,       xmm5                         ; p1
-
-        movdqa      xmm7,       xmm4                        ; xmm4 xmm7 = p0
-
-        psubusb     xmm7,       xmm5                        ; p0 - p1
-        psubusb     xmm5,       xmm4                        ; p1 - p0
-
-        por         xmm5,       xmm7                        ; abs(p1-p0)
-        movdqa        t0,       xmm5                        ; save abs(p1-p0)
-
-        lea         rdx,        srct
-        psubusb     xmm5,       xmm3
-
-        por         xmm0,       xmm5                        ; xmm0=mask
-        movdqa      xmm5,       [rdx+32]                    ; xmm5=q0
-
-        movdqa      xmm7,       [rdx+48]                    ; xmm7=q1
-        movdqa      xmm6,       xmm5                        ; mm6=q0
-
-        movdqa      xmm2,       xmm7                        ; q1
-
-        psubusb     xmm5,       xmm7                        ; q0-q1
-        psubusb     xmm7,       xmm6                        ; q1-q0
-
-        por         xmm7,       xmm5                        ; abs(q1-q0)
-        movdqa        t1,       xmm7                        ; save abs(q1-q0)
-
-        psubusb     xmm7,       xmm3
-        por         xmm0,       xmm7                        ; mask
-
-        movdqa      xmm5,       xmm2                        ; q1
-        psubusb     xmm5,       xmm1                        ; q1-=p1
-        psubusb     xmm1,       xmm2                        ; p1-=q1
-        por         xmm5,       xmm1                        ; abs(p1-q1)
-        pand        xmm5,       [tfe GLOBAL]                ; set lsb of each byte to zero
-        psrlw       xmm5,       1                           ; abs(p1-q1)/2
-
-        mov         rdx,        arg(2) ;flimit                      ;
-        movdqa        xmm2,       [rdx]                       ;flimit  xmm2
-
-        movdqa      xmm1,       xmm4                        ; xmm1=xmm4=p0
-
-        movdqa      xmm7,       xmm6                        ; xmm7=xmm6=q0
-        psubusb     xmm1,       xmm7                        ; p0-q0
-
-        psubusb     xmm7,       xmm4                        ; q0-p0
-        por         xmm1,       xmm7                        ; abs(q0-p0)
-        paddusb     xmm1,       xmm1                        ; abs(q0-p0)*2
-        paddusb     xmm1,       xmm5                        ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
-        paddb       xmm2,       xmm2                        ; flimit*2 (less than 255)
-        paddb       xmm3,       xmm2                        ; flimit * 2 + limit (less than 255)
-
-        psubusb     xmm1,       xmm3                         ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
-
-        por         xmm1,       xmm0;                       ; mask
-
-        pxor        xmm0,       xmm0
-        pcmpeqb     xmm1,       xmm0
+    sub         rsp, 96       ; reserve 96 bytes
+    %define q2  [rsp + 0]     ;__declspec(align(16)) char q2[16];
+    %define q1  [rsp + 16]    ;__declspec(align(16)) char q1[16];
+    %define p2  [rsp + 32]    ;__declspec(align(16)) char p2[16];
+    %define p1  [rsp + 48]    ;__declspec(align(16)) char p1[16];
+    %define t0  [rsp + 64]    ;__declspec(align(16)) char t0[16];
+    %define t1  [rsp + 80]    ;__declspec(align(16)) char t1[16];
+
+        mov         rsi,                    arg(0)             ; u
+        mov         rdi,                    arg(5)             ; v
+        movsxd      rax,                    dword ptr arg(1)   ; src_pixel_step
+        mov         rcx,                    rax
+        neg         rax                     ; negate pitch to deal with above border
+
+        mov         rdx,                    arg(3)             ;limit
+        movdqa      xmm7,                   XMMWORD PTR [rdx]
+
+        lea         rsi,                    [rsi + rcx]
+        lea         rdi,                    [rdi + rcx]
+
+        ; calculate breakout conditions
+        LFH_FILTER_MASK 0
         ; calculate high edge variance
-        mov         rdx,        arg(4) ;thresh            ; get thresh
-        movdqa      xmm7,       [rdx]
-
-        ;
-        movdqa      xmm4,       t0              ; get abs (q1 - q0)
-        psubusb     xmm4,       xmm7
-
-        movdqa      xmm3,       t1              ; get abs (p1 - p0)
-        psubusb     xmm3,       xmm7
-
-        por         xmm4,       xmm3            ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-        pcmpeqb     xmm4,       xmm0
-
-        pcmpeqb     xmm0,       xmm0
-        pxor        xmm4,       xmm0
+        LFH_HEV_MASK
 
         ; start work on filters
-        lea         rdx,        srct
-
-        movdqa      xmm2,       [rdx]           ; p1
-        movdqa      xmm7,       [rdx+48]        ; q1
-
-        movdqa      xmm6,       [rdx+16]        ; p0
-        movdqa      xmm0,       [rdx+32]        ; q0
-
-        pxor        xmm2,       [t80 GLOBAL]    ; p1 offset to convert to signed values
-        pxor        xmm7,       [t80 GLOBAL]    ; q1 offset to convert to signed values
-
-        psubsb      xmm2,       xmm7            ; p1 - q1
-        pand        xmm2,       xmm4            ; high var mask (hvm)(p1 - q1)
-
-        pxor        xmm6,       [t80 GLOBAL]    ; offset to convert to signed values
-        pxor        xmm0,       [t80 GLOBAL]    ; offset to convert to signed values
-
-        movdqa      xmm3,       xmm0            ; q0
-        psubsb      xmm0,       xmm6            ; q0 - p0
-
-        paddsb      xmm2,       xmm0            ; 1 * (q0 - p0) + hvm(p1 - q1)
-        paddsb      xmm2,       xmm0            ; 2 * (q0 - p0) + hvm(p1 - q1)
-
-        paddsb      xmm2,       xmm0            ; 3 * (q0 - p0) + hvm(p1 - q1)
-        pand        xmm1,       xmm2            ; mask filter values we don't care about
-
-        movdqa      xmm2,       xmm1
-        paddsb      xmm1,       [t4 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 4
-
-        paddsb      xmm2,       [t3 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 3
-        pxor        xmm0,       xmm0             ;
-
-        pxor        xmm5,       xmm5
-        punpcklbw   xmm0,       xmm2            ;
-
-        punpckhbw   xmm5,       xmm2            ;
-        psraw       xmm0,       11              ;
-
-        psraw       xmm5,       11
-        packsswb    xmm0,       xmm5
-
-        movdqa      xmm2,       xmm0            ;  (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
-
-        pxor        xmm0,       xmm0              ; 0
-        movdqa      xmm5,       xmm1              ; abcdefgh
-
-        punpcklbw   xmm0,       xmm1              ; e0f0g0h0
-        psraw       xmm0,       11                ; sign extended shift right by 3
-
-        pxor        xmm1,       xmm1              ; 0
-        punpckhbw   xmm1,       xmm5              ; a0b0c0d0
-
-        psraw       xmm1,       11                ; sign extended shift right by 3
-        movdqa      xmm5,       xmm0              ; save results
-
-        packsswb    xmm0,       xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
-        paddsw      xmm5,       [ones GLOBAL]
-
-        paddsw      xmm1,       [ones GLOBAL]
-        psraw       xmm5,       1                 ; partial shifted one more time for 2nd tap
-
-        psraw       xmm1,       1                 ; partial shifted one more time for 2nd tap
-        packsswb    xmm5,       xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
-
-        pandn       xmm4,       xmm5            ; high edge variance additive
-
-        paddsb      xmm6,       xmm2            ; p0+= p0 add
-        pxor        xmm6,       [t80 GLOBAL]    ; unoffset
-
-        ; mm6=p0                               ;
-        movdqa      xmm1,       [rdx]           ; p1
-        pxor        xmm1,       [t80 GLOBAL]    ; reoffset
-
-        paddsb      xmm1,       xmm4            ; p1+= p1 add
-        pxor        xmm1,       [t80 GLOBAL]    ; unoffset
-        ; mm6 = p0 mm1 = p1
-
-        psubsb      xmm3,       xmm0            ; q0-= q0 add
-        pxor        xmm3,       [t80 GLOBAL]    ; unoffset
-
-        ; mm3 = q0
-        psubsb      xmm7,       xmm4            ; q1-= q1 add
-        pxor        xmm7,       [t80 GLOBAL]    ; unoffset
-        ; mm7 = q1
-
-        ; tranpose and write back
-        ; xmm1 =    f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-        ; xmm6 =    f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
-        ; xmm3 =    f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-        ; xmm7 =    f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
-        movdqa      xmm2,       xmm1            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-        punpcklbw   xmm2,       xmm6            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
-
-        movdqa      xmm4,       xmm3            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-        punpckhbw   xmm1,       xmm6            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
-
-        punpcklbw   xmm4,       xmm7            ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
-        punpckhbw   xmm3,       xmm7            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
-
-        movdqa      xmm6,       xmm2            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
-        punpcklwd   xmm2,       xmm4            ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
-
-        punpckhwd   xmm6,       xmm4            ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
-        movdqa      xmm5,       xmm1            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
-
-        punpcklwd   xmm1,       xmm3            ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
-        punpckhwd   xmm5,       xmm3            ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
-
-        ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
-        ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
-        ; xmm5 = f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
-        ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
-        lea         rsi,           [rsi+rax*8]
-
-        movd        [rsi+rax*4+2], xmm2
-        psrldq      xmm2,   4
-
-        movd        [rdi+rax*4+2], xmm2
-        psrldq      xmm2,   4
-
-        movd        [rsi+rax*2+2], xmm2
-        psrldq      xmm2,   4
-
-        movd        [rdi+rax*2+2], xmm2
-        movd        [rsi+2],       xmm6
-
-        psrldq      xmm6,   4
-        movd        [rdi+2],       xmm6
-
-        psrldq      xmm6,   4
-        neg         rax
-
-        movd        [rdi+rax+2],    xmm6
-        psrldq      xmm6,   4
-
-        movd        [rdi+rax*2+2],  xmm6
-        lea         rsi,       [rsi+rax*8]
-
-        neg         rax
-        ;;;;;;;;;;;;;;;;;;;;/
-        movd        [rsi+rax*4+2], xmm1
-        psrldq      xmm1,   4
-
-        movd        [rcx+rax*4+2], xmm1
-        psrldq      xmm1,   4
-
-        movd        [rsi+rax*2+2], xmm1
-        psrldq      xmm1,   4
-
-        movd        [rcx+rax*2+2], xmm1
-        psrldq      xmm1,   4
-
-        movd        [rsi+2],       xmm5
-        psrldq      xmm5,   4
-
-        movd        [rcx+2],        xmm5
-        psrldq      xmm5,   4
-
-        neg         rax
-        movd        [rcx+rax+2],    xmm5
-
-        psrldq      xmm5,   4
-        movd        [rcx+rax*2+2],  xmm5
+        BH_FILTER 0
+        ; write back the result
+        BH_WRITEBACK 0
 
     add rsp, 96
     pop rsp
@@ -661,233 +408,58 @@ sym(vp8_loop_filter_vertical_edge_sse2):
     ret
 
 
-;void vp8_mbloop_filter_horizontal_edge_sse2
-;(
-;    unsigned char *src_ptr,
-;    int            src_pixel_step,
-;    const char    *flimit,
-;    const char    *limit,
-;    const char    *thresh,
-;    int            count
-;)
-global sym(vp8_mbloop_filter_horizontal_edge_sse2)
-sym(vp8_mbloop_filter_horizontal_edge_sse2):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    SAVE_XMM
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-    ALIGN_STACK 16, rax
-    sub         rsp, 32                         ; reserve 32 bytes
-    %define t0  [rsp + 0]    ;__declspec(align(16)) char t0[8];
-    %define t1  [rsp + 16]   ;__declspec(align(16)) char t1[8];
-
-        mov         rsi,                    arg(0) ;src_ptr
-        movsxd      rax,                    dword ptr arg(1) ;src_pixel_step     ; destination pitch?
-
-        mov         rdx,                    arg(3) ;limit
-        movdqa      xmm7,                   XMMWORD PTR [rdx]
-
-        mov         rdi,                    rsi           ; rdi points to row +1 for indirect addressing
-        add         rdi,                    rax
-
-        ; calculate breakout conditions
-        movdqa      xmm2,                   XMMWORD PTR [rdi+2*rax]      ; q3
-        movdqa      xmm1,                   XMMWORD PTR [rsi+2*rax]      ; q2
-
-        movdqa      xmm6,                   xmm1              ; q2
-        psubusb     xmm1,                   xmm2              ; q2-=q3
-
-
-        psubusb     xmm2,                   xmm6              ; q3-=q2
-        por         xmm1,                   xmm2              ; abs(q3-q2)
-
-        psubusb     xmm1,                   xmm7
-
-        ; mm1 = abs(q3-q2), mm6 =q2, mm7 = limit
-        movdqa      xmm4,                   XMMWORD PTR [rsi+rax]         ; q1
-        movdqa      xmm3,                   xmm4              ; q1
-
-        psubusb     xmm4,                   xmm6              ; q1-=q2
-        psubusb     xmm6,                   xmm3              ; q2-=q1
-
-        por         xmm4,                   xmm6              ; abs(q2-q1)
-        psubusb     xmm4,                   xmm7
-
-        por         xmm1,                   xmm4
-        ; mm1 = mask,      mm3=q1, mm7 = limit
-
-        movdqa      xmm4,                   XMMWORD PTR [rsi]             ; q0
-        movdqa      xmm0,                   xmm4              ; q0
-
-        psubusb     xmm4,                   xmm3              ; q0-=q1
-        psubusb     xmm3,                   xmm0              ; q1-=q0
-
-        por         xmm4,                   xmm3              ; abs(q0-q1)
-        movdqa      t0,                     xmm4                  ; save to t0
-
-        psubusb     xmm4,                   xmm7
-        por         xmm1,                   xmm4
-
-        ; mm1 = mask, mm0=q0,  mm7 = limit, t0 = abs(q0-q1)
-        neg         rax                   ; negate pitch to deal with above border
-
-        movdqa      xmm2,                   XMMWORD PTR [rsi+4*rax]      ; p3
-        movdqa      xmm4,                   XMMWORD PTR [rdi+4*rax]      ; p2
-
-        movdqa      xmm5,                   xmm4              ; p2
-        psubusb     xmm4,                   xmm2              ; p2-=p3
-
-        psubusb     xmm2,                   xmm5              ; p3-=p2
-        por         xmm4,                   xmm2              ; abs(p3 - p2)
-
-        psubusb     xmm4,                   xmm7
-        por         xmm1,                   xmm4
-
-        ; mm1 = mask, mm0=q0,  mm7 = limit, t0 = abs(q0-q1)
-        movdqa      xmm4,                   XMMWORD PTR [rsi+2*rax]      ; p1
-        movdqa      xmm3,                   xmm4              ; p1
-
-        psubusb     xmm4,                   xmm5              ; p1-=p2
-        psubusb     xmm5,                   xmm3              ; p2-=p1
-
-        por         xmm4,                   xmm5              ; abs(p2 - p1)
-        psubusb     xmm4,                   xmm7
-
-        por         xmm1,                   xmm4
-
-        movdqa      xmm2,                   xmm3              ; p1
-
-        ; mm1 = mask, mm0=q0,  mm7 = limit, t0 = abs(q0-q1)
-        movdqa      xmm4,                   XMMWORD PTR [rsi+rax]         ; p0
-        movdqa      xmm5,                   xmm4              ; p0
-
-        psubusb     xmm4,                   xmm3              ; p0-=p1
-        psubusb     xmm3,                   xmm5              ; p1-=p0
-
-        por         xmm4,                   xmm3              ; abs(p1 - p0)
-        movdqa        t1,                   xmm4                  ; save to t1
-
-        psubusb     xmm4,                   xmm7
-        por         xmm1,                   xmm4
-
-        ; mm1 = mask, mm0=q0,  mm7 = limit, t0 = abs(q0-q1) t1 = abs(p1-p0)
-        ; mm5 = p0
-        movdqa      xmm3,                   XMMWORD PTR [rdi] ; q1
-        movdqa      xmm4,                   xmm3              ; q1
-        psubusb     xmm3,                   xmm2              ; q1-=p1
-        psubusb     xmm2,                   xmm4              ; p1-=q1
-        por         xmm2,                   xmm3              ; abs(p1-q1)
-        pand        xmm2,                   [tfe GLOBAL]      ; set lsb of each byte to zero
-        psrlw       xmm2,                   1                 ; abs(p1-q1)/2
-
-        movdqa      xmm6,                   xmm5              ; p0
-        movdqa      xmm3,                   xmm0              ; q0
-
-        psubusb     xmm5,                   xmm3              ; p0-=q0
-        psubusb     xmm3,                   xmm6              ; q0-=p0
-
-        por         xmm5,                   xmm3              ; abs(p0 - q0)
-        paddusb     xmm5,                   xmm5              ; abs(p0-q0)*2
-        paddusb     xmm5,                   xmm2              ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
-        mov         rdx,                    arg(2) ;flimit            ; get flimit
-        movdqa      xmm2,                   XMMWORD PTR [rdx]             ;
-        paddb       xmm2,                   xmm2              ; flimit*2 (less than 255)
-        paddb       xmm7,                   xmm2              ; flimit * 2 + limit (less than 255)
-
-        psubusb     xmm5,                   xmm7              ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
-        por         xmm1,                   xmm5
-        pxor        xmm5,                   xmm5
-        pcmpeqb     xmm1,                   xmm5               ; mask mm1
-        ; mm1 = mask, mm0=q0,  mm7 = flimit, t0 = abs(q0-q1) t1 = abs(p1-p0)
-        ; mm6 = p0,
-
-        ; calculate high edge variance
-        mov         rdx,                    arg(4) ;thresh            ; get thresh
-        movdqa      xmm7,                   XMMWORD PTR [rdx]             ;
-
-        movdqa      xmm4,                   t0                ; get abs (q1 - q0)
-        psubusb     xmm4,                   xmm7
-
-        movdqa      xmm3,                   t1                ; get abs (p1 - p0)
-        psubusb     xmm3,                   xmm7
-
-        paddb       xmm4,                   xmm3              ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-        pcmpeqb     xmm4,                   xmm5
-
-        pcmpeqb     xmm5,                   xmm5
-        pxor        xmm4,                   xmm5
-        ; mm1 = mask, mm0=q0,  mm7 = thresh, t0 = abs(q0-q1) t1 = abs(p1-p0)
-        ; mm6 = p0, mm4=hev
-        ; start work on filters
-        movdqa      xmm2,                   XMMWORD PTR [rsi+2*rax]   ; p1
-        movdqa      xmm7,                   XMMWORD PTR [rdi]             ; q1
-
-        pxor        xmm2,                   [t80 GLOBAL]  ; p1 offset to convert to signed values
-        pxor        xmm7,                   [t80 GLOBAL]  ; q1 offset to convert to signed values
+%macro MBH_FILTER 1
+%if %1
+        movdqa      xmm2,                   [rsi+2*rax]       ; p1
+        movdqa      xmm7,                   [rdi]             ; q1
+%else
+        movdqa      xmm2,                   p1                ; p1
+        movdqa      xmm7,                   q1                ; q1
+%endif
+        pxor        xmm2,                   [t80 GLOBAL]      ; p1 offset to convert to signed values
+        pxor        xmm7,                   [t80 GLOBAL]      ; q1 offset to convert to signed values
 
         psubsb      xmm2,                   xmm7              ; p1 - q1
-        pxor        xmm6,                   [t80 GLOBAL]  ; offset to convert to signed values
-
-        pxor        xmm0,                   [t80 GLOBAL]  ; offset to convert to signed values
+        pxor        xmm6,                   [t80 GLOBAL]      ; offset to convert to signed values
+        pxor        xmm0,                   [t80 GLOBAL]      ; offset to convert to signed values
         movdqa      xmm3,                   xmm0              ; q0
-
         psubsb      xmm0,                   xmm6              ; q0 - p0
         paddsb      xmm2,                   xmm0              ; 1 * (q0 - p0) + (p1 - q1)
-
         paddsb      xmm2,                   xmm0              ; 2 * (q0 - p0)
         paddsb      xmm2,                   xmm0              ; 3 * (q0 - p0) + (p1 - q1)
 
         pand        xmm1,                   xmm2              ; mask filter values we don't care about
-        ; mm1 = vp8_filter, mm4=hev, mm6=ps0, mm3=qs0
         movdqa      xmm2,                   xmm1              ; vp8_filter
         pand        xmm2,                   xmm4;             ; Filter2 = vp8_filter & hev
 
-
-        movdqa      xmm5,                   xmm2          ;
-        paddsb      xmm5,                   [t3 GLOBAL];
+        movdqa      xmm5,                   xmm2
+        paddsb      xmm5,                   [t3 GLOBAL]
 
         pxor        xmm0,                   xmm0              ; 0
         pxor        xmm7,                   xmm7              ; 0
-
         punpcklbw   xmm0,                   xmm5              ; e0f0g0h0
         psraw       xmm0,                   11                ; sign extended shift right by 3
-
         punpckhbw   xmm7,                   xmm5              ; a0b0c0d0
         psraw       xmm7,                   11                ; sign extended shift right by 3
-
         packsswb    xmm0,                   xmm7              ; Filter2 >>=3;
         movdqa      xmm5,                   xmm0              ; Filter2
-
         paddsb      xmm2,                   [t4 GLOBAL]      ; vp8_signed_char_clamp(Filter2 + 4)
-        pxor        xmm0,                   xmm0              ; 0
 
+        pxor        xmm0,                   xmm0              ; 0
         pxor        xmm7,                   xmm7              ; 0
         punpcklbw   xmm0,                   xmm2              ; e0f0g0h0
-
         psraw       xmm0,                   11                ; sign extended shift right by 3
         punpckhbw   xmm7,                   xmm2              ; a0b0c0d0
-
         psraw       xmm7,                   11                ; sign extended shift right by 3
         packsswb    xmm0,                   xmm7              ; Filter2 >>=3;
 
-        ; mm0= filter2 mm1 = vp8_filter,  mm3 =qs0 mm5=s mm4 =hev mm6=ps0
         psubsb      xmm3,                   xmm0              ; qs0 =qs0 - filter1
         paddsb      xmm6,                   xmm5              ; ps0 =ps0 + Fitler2
 
-        ; mm1=vp8_filter, mm3=qs0, mm4 =hev mm6=ps0
-        ; vp8_filter &= ~hev;
-        ; Filter2 = vp8_filter;
         pandn       xmm4,                   xmm1              ; vp8_filter&=~hev
+%endmacro
 
-
-        ; mm3=qs0, mm4=filter2, mm6=ps0
-
+%macro MBH_WRITEBACK 1
         ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
         ; s = vp8_signed_char_clamp(qs0 - u);
         ; *oq0 = s^0x80;
@@ -917,8 +489,20 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
         pxor        xmm3,                   [t80 GLOBAL]
         pxor        xmm6,                   [t80 GLOBAL]
 
+%if %1
         movdqa      XMMWORD PTR [rsi+rax],  xmm6
         movdqa      XMMWORD PTR [rsi],      xmm3
+%else
+        lea         rsi,                    [rsi + rcx*2]
+        lea         rdi,                    [rdi + rcx*2]
+
+        movq        MMWORD PTR [rsi],       xmm6              ; p0
+        psrldq      xmm6,                   8
+        movq        MMWORD PTR [rdi],       xmm6
+        movq        MMWORD PTR [rsi + rcx], xmm3              ; q0
+        psrldq      xmm3,                   8
+        movq        MMWORD PTR [rdi + rcx], xmm3
+%endif
 
         ; roughly 2/7th difference across boundary
         ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
@@ -943,8 +527,13 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
 
         packsswb    xmm1,                   xmm2
 
+%if %1
         movdqa      xmm3,                   XMMWORD PTR [rdi]
-        movdqa      xmm6,                   XMMWORD PTR [rsi+rax*2]       ; p1
+        movdqa      xmm6,                   XMMWORD PTR [rsi+rax*2] ; p1
+%else
+        movdqa      xmm3,                   q1                ; q1
+        movdqa      xmm6,                   p1                ; p1
+%endif
 
         pxor        xmm3,                   [t80 GLOBAL]
         pxor        xmm6,                   [t80 GLOBAL]
@@ -955,9 +544,18 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
         pxor        xmm6,                   [t80 GLOBAL]
         pxor        xmm3,                   [t80 GLOBAL]
 
+%if %1
         movdqa      XMMWORD PTR [rdi],      xmm3
         movdqa      XMMWORD PTR [rsi+rax*2],xmm6
+%else
+        movq        MMWORD PTR [rsi + rcx*2],xmm3             ; q1
+        psrldq      xmm3,                   8
+        movq        MMWORD PTR [rdi + rcx*2],xmm3
 
+        movq        MMWORD PTR [rsi + rax], xmm6              ; p1
+        psrldq      xmm6,                   8
+        movq        MMWORD PTR [rdi + rax], xmm6
+%endif
         ; roughly 1/7th difference across boundary
         ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
         ; s = vp8_signed_char_clamp(qs2 - u);
@@ -981,11 +579,15 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
 
         packsswb    xmm1,                   xmm2
 
-
+%if %1
         movdqa      xmm6,                   XMMWORD PTR [rdi+rax*4]
         neg         rax
 
-        movdqa      xmm3,                   XMMWORD PTR [rdi+rax  ]
+        movdqa      xmm3,                   XMMWORD PTR [rdi+rax]
+%else
+        movdqa      xmm6,                   p2                ; p2
+        movdqa      xmm3,                   q2                ; q2
+%endif
 
         pxor        xmm6,                   [t80 GLOBAL]
         pxor        xmm3,                   [t80 GLOBAL]
@@ -995,11 +597,68 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
 
         pxor        xmm6,                   [t80 GLOBAL]
         pxor        xmm3,                   [t80 GLOBAL]
-
-        movdqa      XMMWORD PTR [rdi+rax  ], xmm3
+%if %1
+        movdqa      XMMWORD PTR [rdi+rax  ],xmm3
         neg         rax
 
-        movdqa      XMMWORD PTR [rdi+rax*4], xmm6
+        movdqa      XMMWORD PTR [rdi+rax*4],xmm6
+%else
+        movq        MMWORD PTR [rsi+rax*2], xmm6              ; p2
+        psrldq      xmm6,                   8
+        movq        MMWORD PTR [rdi+rax*2], xmm6
+
+        lea         rsi,                    [rsi + rcx]
+        lea         rdi,                    [rdi + rcx]
+        movq        MMWORD PTR [rsi+rcx*2  ],xmm3             ; q2
+        psrldq      xmm3,                   8
+        movq        MMWORD PTR [rdi+rcx*2  ],xmm3
+%endif
+%endmacro
+
+
+;void vp8_mbloop_filter_horizontal_edge_sse2
+;(
+;    unsigned char *src_ptr,
+;    int            src_pixel_step,
+;    const char    *flimit,
+;    const char    *limit,
+;    const char    *thresh,
+;    int            count
+;)
+global sym(vp8_mbloop_filter_horizontal_edge_sse2)
+sym(vp8_mbloop_filter_horizontal_edge_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+    sub         rsp, 32     ; reserve 32 bytes
+    %define t0 [rsp + 0]    ;__declspec(align(16)) char t0[16];
+    %define t1 [rsp + 16]   ;__declspec(align(16)) char t1[16];
+
+        mov         rsi,                    arg(0)            ;src_ptr
+        movsxd      rax,                    dword ptr arg(1)  ;src_pixel_step
+
+        mov         rdx,                    arg(3)            ;limit
+        movdqa      xmm7,                   XMMWORD PTR [rdx]
+
+        lea         rdi,                    [rsi+rax]         ; rdi points to row +1 for indirect addressing
+
+        ; calculate breakout conditions
+        LFH_FILTER_MASK 1
+
+        ; calculate high edge variance
+        LFH_HEV_MASK
+
+        ; start work on filters
+        MBH_FILTER 1
+        ; write back the result
+        MBH_WRITEBACK 1
 
     add rsp, 32
     pop rsp
@@ -1013,6 +672,877 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
     ret
 
 
+;void vp8_mbloop_filter_horizontal_edge_uv_sse2
+;(
+;    unsigned char *u,
+;    int            src_pixel_step,
+;    const char    *flimit,
+;    const char    *limit,
+;    const char    *thresh,
+;    unsigned char *v
+;)
+global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2)
+sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+    sub         rsp, 96       ; reserve 96 bytes
+    %define q2  [rsp + 0]     ;__declspec(align(16)) char q2[16];
+    %define q1  [rsp + 16]    ;__declspec(align(16)) char q1[16];
+    %define p2  [rsp + 32]    ;__declspec(align(16)) char p2[16];
+    %define p1  [rsp + 48]    ;__declspec(align(16)) char p1[16];
+    %define t0  [rsp + 64]    ;__declspec(align(16)) char t0[16];
+    %define t1  [rsp + 80]    ;__declspec(align(16)) char t1[16];
+
+        mov         rsi,                    arg(0)             ; u
+        mov         rdi,                    arg(5)             ; v
+        movsxd      rax,                    dword ptr arg(1)   ; src_pixel_step
+        mov         rcx,                    rax
+        neg         rax                     ; negate pitch to deal with above border
+
+        mov         rdx,                    arg(3)             ;limit
+        movdqa      xmm7,                   XMMWORD PTR [rdx]
+
+        lea         rsi,                    [rsi + rcx]
+        lea         rdi,                    [rdi + rcx]
+
+        ; calculate breakout conditions
+        LFH_FILTER_MASK 0
+
+        ; calculate high edge variance
+        LFH_HEV_MASK
+
+        ; start work on filters
+        MBH_FILTER 0
+        ; write back the result
+        MBH_WRITEBACK 0
+
+    add rsp, 96
+    pop rsp
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+%macro TRANSPOSE_16X8_1 0
+        movq        xmm0,               QWORD PTR [rdi+rcx*2]   ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70
+        movq        xmm7,               QWORD PTR [rsi+rcx*2]   ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60
+
+        punpcklbw   xmm7,               xmm0            ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
+        movq        xmm0,               QWORD PTR [rsi+rcx]
+
+        movq        xmm5,               QWORD PTR [rsi] ;
+        punpcklbw   xmm5,               xmm0            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+
+        movdqa      xmm6,               xmm5            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+        punpcklwd   xmm5,               xmm7            ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
+
+        punpckhwd   xmm6,               xmm7            ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
+        movq        xmm7,               QWORD PTR [rsi + rax]   ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30
+
+        movq        xmm0,               QWORD PTR [rsi + rax*2] ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20
+        punpcklbw   xmm0,               xmm7            ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20
+
+        movq        xmm4,               QWORD PTR [rsi + rax*4] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
+        movq        xmm7,               QWORD PTR [rdi + rax*4] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
+
+        punpcklbw   xmm4,               xmm7            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
+        movdqa      xmm3,               xmm4            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
+
+        punpcklwd   xmm3,               xmm0            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+        punpckhwd   xmm4,               xmm0            ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
+
+        movdqa      xmm7,               xmm4            ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
+        movdqa      xmm2,               xmm3            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+
+        punpckhdq   xmm7,               xmm6            ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
+        punpckldq   xmm4,               xmm6            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+
+        punpckhdq   xmm3,               xmm5            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+        punpckldq   xmm2,               xmm5            ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
+
+        movdqa      t0,                 xmm2            ; save to free XMM2
+%endmacro
+
+%macro TRANSPOSE_16X8_2 1
+        movq        xmm6,               QWORD PTR [rdi+rcx*2] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0
+        movq        xmm5,               QWORD PTR [rsi+rcx*2] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0
+
+        punpcklbw   xmm5,               xmm6            ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
+        movq        xmm6,               QWORD PTR [rsi+rcx]   ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0
+
+        movq        xmm1,               QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0
+        punpcklbw   xmm1,               xmm6            ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0
+
+        movdqa      xmm6,               xmm1            ;
+        punpckhwd   xmm6,               xmm5            ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4
+
+        punpcklwd   xmm1,               xmm5            ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
+        movq        xmm5,               QWORD PTR [rsi+rax]   ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0
+
+        movq        xmm0,               QWORD PTR [rsi+rax*2] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0
+        punpcklbw   xmm0,               xmm5                  ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
+
+        movq        xmm2,               QWORD PTR [rsi+rax*4] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80
+        movq        xmm5,               QWORD PTR [rdi+rax*4] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90
+
+        punpcklbw   xmm2,               xmm5            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
+        movdqa      xmm5,               xmm2            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
+
+        punpcklwd   xmm5,               xmm0            ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
+        punpckhwd   xmm2,               xmm0            ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
+
+        movdqa      xmm0,               xmm5
+        punpckldq   xmm0,               xmm1            ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
+
+
+        punpckhdq   xmm5,               xmm1            ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
+        movdqa      xmm1,               xmm2            ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
+
+        punpckldq   xmm1,               xmm6            ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84
+        punpckhdq   xmm2,               xmm6            ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
+
+        movdqa      xmm6,               xmm7            ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
+        punpcklqdq  xmm6,               xmm2            ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
+
+        punpckhqdq  xmm7,               xmm2            ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07
+%if %1
+        movdqa      xmm2,               xmm3            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+
+        punpcklqdq  xmm2,               xmm5            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+
+        punpckhqdq  xmm3,               xmm5            ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+        movdqa      [rdx],              xmm2            ; save 2
+
+        movdqa      xmm5,               xmm4            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+        punpcklqdq  xmm4,               xmm1            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+
+        movdqa      [rdx+16],           xmm3            ; save 3
+        punpckhqdq  xmm5,               xmm1            ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+
+        movdqa      [rdx+32],           xmm4            ; save 4
+        movdqa      [rdx+48],           xmm5            ; save 5
+
+        movdqa      xmm1,               t0              ; get
+        movdqa      xmm2,               xmm1            ;
+
+        punpckhqdq  xmm1,               xmm0            ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+        punpcklqdq  xmm2,               xmm0            ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+%else
+        movdqa      [rdx+112],          xmm7            ; save 7
+        movdqa      xmm2,               xmm3            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+
+        movdqa      [rdx+96],           xmm6            ; save 6
+        punpcklqdq  xmm2,               xmm5            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+
+        punpckhqdq  xmm3,               xmm5            ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+        movdqa      [rdx+32],           xmm2            ; save 2
+
+        movdqa      xmm5,               xmm4            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
+        punpcklqdq  xmm4,               xmm1            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+
+        movdqa      [rdx+48],           xmm3            ; save 3
+        punpckhqdq  xmm5,               xmm1            ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+
+        movdqa      [rdx+64],           xmm4            ; save 4
+        movdqa      [rdx+80],           xmm5            ; save 5
+
+        movdqa      xmm1,               t0              ; get
+        movdqa      xmm2,               xmm1
+
+        punpckhqdq  xmm1,               xmm0            ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+        punpcklqdq  xmm2,               xmm0            ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+
+        movdqa      [rdx+16],           xmm1
+        movdqa      [rdx],              xmm2
+%endif
+%endmacro
+
+%macro LFV_FILTER_MASK 1
+        movdqa      xmm0,               xmm6            ; q2
+        psubusb     xmm0,               xmm7            ; q2-q3
+
+        psubusb     xmm7,               xmm6            ; q3-q2
+        por         xmm7,               xmm0            ; abs (q3-q2)
+
+        movdqa      xmm4,               xmm5            ; q1
+        psubusb     xmm4,               xmm6            ; q1-q2
+
+        psubusb     xmm6,               xmm5            ; q2-q1
+        por         xmm6,               xmm4            ; abs (q2-q1)
+
+        movdqa      xmm0,               xmm1
+
+        psubusb     xmm0,               xmm2            ; p2 - p3;
+        psubusb     xmm2,               xmm1            ; p3 - p2;
+
+        por         xmm0,               xmm2            ; abs(p2-p3)
+%if %1
+        movdqa      xmm2,               [rdx]           ; p1
+%else
+        movdqa      xmm2,               [rdx+32]        ; p1
+%endif
+        movdqa      xmm5,               xmm2            ; p1
+
+        psubusb     xmm5,               xmm1            ; p1-p2
+        psubusb     xmm1,               xmm2            ; p2-p1
+
+        por         xmm1,               xmm5            ; abs(p2-p1)
+
+        mov         rdx,                arg(3)          ; limit
+        movdqa      xmm4,               [rdx]           ; limit
+
+        psubusb     xmm7,               xmm4
+
+        psubusb     xmm0,               xmm4            ; abs(p3-p2) > limit
+        psubusb     xmm1,               xmm4            ; abs(p2-p1) > limit
+
+        psubusb     xmm6,               xmm4            ; abs(q2-q1) > limit
+        por         xmm7,               xmm6            ; or
+
+        por         xmm0,               xmm1
+        por         xmm0,               xmm7            ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
+
+        movdqa      xmm1,               xmm2            ; p1
+
+        movdqa      xmm7,               xmm3            ; p0
+        psubusb     xmm7,               xmm2            ; p0-p1
+
+        psubusb     xmm2,               xmm3            ; p1-p0
+        por         xmm2,               xmm7            ; abs(p1-p0)
+
+        movdqa      t0,                 xmm2            ; save abs(p1-p0)
+        lea         rdx,                srct
+
+        psubusb     xmm2,               xmm4            ; abs(p1-p0)>limit
+        por         xmm0,               xmm2            ; mask
+%if %1
+        movdqa      xmm5,               [rdx+32]        ; q0
+        movdqa      xmm7,               [rdx+48]        ; q1
+%else
+        movdqa      xmm5,               [rdx+64]        ; q0
+        movdqa      xmm7,               [rdx+80]        ; q1
+%endif
+        movdqa      xmm6,               xmm5            ; q0
+        movdqa      xmm2,               xmm7            ; q1
+        psubusb     xmm5,               xmm7            ; q0-q1
+
+        psubusb     xmm7,               xmm6            ; q1-q0
+        por         xmm7,               xmm5            ; abs(q1-q0)
+
+        movdqa      t1,                 xmm7            ; save abs(q1-q0)
+        psubusb     xmm7,               xmm4            ; abs(q1-q0)> limit
+
+        por         xmm0,               xmm7            ; mask
+
+        movdqa      xmm5,               xmm2            ; q1
+        psubusb     xmm5,               xmm1            ; q1-=p1
+        psubusb     xmm1,               xmm2            ; p1-=q1
+        por         xmm5,               xmm1            ; abs(p1-q1)
+        pand        xmm5,               [tfe GLOBAL]    ; set lsb of each byte to zero
+        psrlw       xmm5,               1               ; abs(p1-q1)/2
+
+        mov         rdx,                arg(2)          ; flimit
+        movdqa      xmm2,               [rdx]           ; flimit
+
+        movdqa      xmm1,               xmm3            ; p0
+        movdqa      xmm7,               xmm6            ; q0
+        psubusb     xmm1,               xmm7            ; p0-q0
+        psubusb     xmm7,               xmm3            ; q0-p0
+        por         xmm1,               xmm7            ; abs(q0-p0)
+        paddusb     xmm1,               xmm1            ; abs(q0-p0)*2
+        paddusb     xmm1,               xmm5            ; abs (p0 - q0) *2 + abs(p1-q1)/2
+
+        paddb       xmm2,               xmm2            ; flimit*2 (less than 255)
+        paddb       xmm4,               xmm2            ; flimit * 2 + limit (less than 255)
+
+        psubusb     xmm1,               xmm4            ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
+        por         xmm1,               xmm0;           ; mask
+        pxor        xmm0,               xmm0
+        pcmpeqb     xmm1,               xmm0
+%endmacro
+
+%macro LFV_HEV_MASK 0
+        mov         rdx,                arg(4)          ; get thresh
+        movdqa      xmm7,               XMMWORD PTR [rdx]
+
+        movdqa      xmm4,               t0              ; get abs (q1 - q0)
+        psubusb     xmm4,               xmm7            ; abs(q1 - q0) > thresh
+
+        movdqa      xmm3,               t1              ; get abs (p1 - p0)
+        psubusb     xmm3,               xmm7            ; abs(p1 - p0)> thresh
+
+        por         xmm4,               xmm3            ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
+        pcmpeqb     xmm4,               xmm0
+
+        pcmpeqb     xmm0,               xmm0
+        pxor        xmm4,               xmm0
+%endmacro
+
+%macro BV_FILTER 0
+        lea         rdx,                srct
+
+        movdqa      xmm2,               [rdx]           ; p1        lea         rsi,       [rsi+rcx*8]
+        lea         rdi,                [rsi+rcx]
+        movdqa      xmm7,               [rdx+48]        ; q1
+        movdqa      xmm6,               [rdx+16]        ; p0
+        movdqa      xmm0,               [rdx+32]        ; q0
+
+        pxor        xmm2,               [t80 GLOBAL]    ; p1 offset to convert to signed values
+        pxor        xmm7,               [t80 GLOBAL]    ; q1 offset to convert to signed values
+
+        psubsb      xmm2,               xmm7            ; p1 - q1
+        pand        xmm2,               xmm4            ; high var mask (hvm)(p1 - q1)
+
+        pxor        xmm6,               [t80 GLOBAL]    ; offset to convert to signed values
+        pxor        xmm0,               [t80 GLOBAL]    ; offset to convert to signed values
+
+        movdqa      xmm3,               xmm0            ; q0
+        psubsb      xmm0,               xmm6            ; q0 - p0
+
+        paddsb      xmm2,               xmm0            ; 1 * (q0 - p0) + hvm(p1 - q1)
+        paddsb      xmm2,               xmm0            ; 2 * (q0 - p0) + hvm(p1 - q1)
+
+        paddsb      xmm2,               xmm0            ; 3 * (q0 - p0) + hvm(p1 - q1)
+        pand        xmm1,               xmm2            ; mask filter values we don't care about
+
+        movdqa      xmm2,               xmm1
+        paddsb      xmm1,               [t4 GLOBAL]     ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+
+        paddsb      xmm2,               [t3 GLOBAL]     ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+        pxor        xmm0,               xmm0
+
+        pxor        xmm5,               xmm5
+        punpcklbw   xmm0,               xmm2
+
+        punpckhbw   xmm5,               xmm2
+        psraw       xmm0,               11
+
+        psraw       xmm5,               11
+        packsswb    xmm0,               xmm5
+
+        movdqa      xmm2,               xmm0            ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+
+        pxor        xmm0,               xmm0            ; 0
+        movdqa      xmm5,               xmm1            ; abcdefgh
+
+        punpcklbw   xmm0,               xmm1            ; e0f0g0h0
+        psraw       xmm0,               11              ; sign extended shift right by 3
+
+        pxor        xmm1,               xmm1            ; 0
+        punpckhbw   xmm1,               xmm5            ; a0b0c0d0
+
+        psraw       xmm1,               11              ; sign extended shift right by 3
+        movdqa      xmm5,               xmm0            ; save results
+
+        packsswb    xmm0,               xmm1            ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
+        paddsw      xmm5,               [ones GLOBAL]
+
+        paddsw      xmm1,               [ones GLOBAL]
+        psraw       xmm5,               1               ; partial shifted one more time for 2nd tap
+
+        psraw       xmm1,               1               ; partial shifted one more time for 2nd tap
+        packsswb    xmm5,               xmm1            ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
+
+        pandn       xmm4,               xmm5            ; high edge variance additive
+
+        paddsb      xmm6,               xmm2            ; p0+= p0 add
+        pxor        xmm6,               [t80 GLOBAL]    ; unoffset
+
+        movdqa      xmm1,               [rdx]           ; p1
+        pxor        xmm1,               [t80 GLOBAL]    ; reoffset
+
+        paddsb      xmm1,               xmm4            ; p1+= p1 add
+        pxor        xmm1,               [t80 GLOBAL]    ; unoffset
+
+        psubsb      xmm3,               xmm0            ; q0-= q0 add
+        pxor        xmm3,               [t80 GLOBAL]    ; unoffset
+
+        psubsb      xmm7,               xmm4            ; q1-= q1 add
+        pxor        xmm7,               [t80 GLOBAL]    ; unoffset
+%endmacro
+
+%macro BV_TRANSPOSE 0
+        ; xmm1 =    f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+        ; xmm6 =    f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+        ; xmm3 =    f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+        ; xmm7 =    f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
+        movdqa      xmm2,               xmm1            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+        punpcklbw   xmm2,               xmm6            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+
+        movdqa      xmm4,               xmm3            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+        punpckhbw   xmm1,               xmm6            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+
+        punpcklbw   xmm4,               xmm7            ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
+        punpckhbw   xmm3,               xmm7            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
+
+        movdqa      xmm6,               xmm2            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+        punpcklwd   xmm2,               xmm4            ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
+
+        punpckhwd   xmm6,               xmm4            ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
+        movdqa      xmm5,               xmm1            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+
+        punpcklwd   xmm1,               xmm3            ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
+        punpckhwd   xmm5,               xmm3            ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
+        ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
+        ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
+        ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
+        ; xmm5 = f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
+%endmacro
+
+%macro BV_WRITEBACK 2
+        movd        [rsi+rax*4+2],      %1
+        psrldq      %1,                 4
+
+        movd        [rdi+rax*4+2],      %1
+        psrldq      %1,                 4
+
+        movd        [rsi+rax*2+2],      %1
+        psrldq      %1,                 4
+
+        movd        [rdi+rax*2+2],      %1
+
+        movd        [rsi+2],            %2
+        psrldq      %2,                 4
+
+        movd        [rdi+2],            %2
+        psrldq      %2,                 4
+
+        movd        [rdi+rcx+2],        %2
+        psrldq      %2,                 4
+
+        movd        [rdi+rcx*2+2],      %2
+%endmacro
+
+
+;void vp8_loop_filter_vertical_edge_sse2
+;(
+;    unsigned char *src_ptr,
+;    int            src_pixel_step,
+;    const char    *flimit,
+;    const char    *limit,
+;    const char    *thresh,
+;    int            count
+;)
+global sym(vp8_loop_filter_vertical_edge_sse2)
+sym(vp8_loop_filter_vertical_edge_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+    sub             rsp, 96      ; reserve 96 bytes
+    %define t0      [rsp + 0]    ;__declspec(align(16)) char t0[16];
+    %define t1      [rsp + 16]   ;__declspec(align(16)) char t1[16];
+    %define srct    [rsp + 32]   ;__declspec(align(16)) char srct[64];
+
+        mov         rsi,        arg(0)                  ; src_ptr
+        movsxd      rax,        dword ptr arg(1)        ; src_pixel_step
+
+        lea         rsi,        [rsi + rax*4 - 4]
+        lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
+        mov         rcx,        rax
+        neg         rax
+
+        ;transpose 16x8 to 8x16, and store the 8-line result on stack.
+        TRANSPOSE_16X8_1
+
+        lea         rsi,        [rsi+rcx*8]
+        lea         rdi,        [rdi+rcx*8]
+        lea         rdx,        srct
+        TRANSPOSE_16X8_2 1
+
+        ; calculate filter mask
+        LFV_FILTER_MASK 1
+        ; calculate high edge variance
+        LFV_HEV_MASK
+
+        ; start work on filters
+        BV_FILTER
+
+        ; tranpose and write back - only work on q1, q0, p0, p1
+        BV_TRANSPOSE
+        ; store 16-line result
+        BV_WRITEBACK xmm1, xmm5
+
+        lea         rsi,        [rsi+rax*8]
+        lea         rdi,        [rsi+rcx]
+        BV_WRITEBACK xmm2, xmm6
+
+    add rsp, 96
+    pop rsp
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_loop_filter_vertical_edge_uv_sse2
+;(
+;    unsigned char *u,
+;    int            src_pixel_step,
+;    const char    *flimit,
+;    const char    *limit,
+;    const char    *thresh,
+;    unsigned char *v
+;)
+global sym(vp8_loop_filter_vertical_edge_uv_sse2)
+sym(vp8_loop_filter_vertical_edge_uv_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+    sub             rsp, 96      ; reserve 96 bytes
+    %define t0      [rsp + 0]    ;__declspec(align(16)) char t0[16];
+    %define t1      [rsp + 16]   ;__declspec(align(16)) char t1[16];
+    %define srct    [rsp + 32]   ;__declspec(align(16)) char srct[64];
+
+        mov         rsi,        arg(0)                  ; u_ptr
+        movsxd      rax,        dword ptr arg(1)        ; src_pixel_step
+
+        lea         rsi,        [rsi + rax*4 - 4]
+        lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
+        mov         rcx,        rax
+        neg         rax
+
+        ;transpose 16x8 to 8x16, and store the 8-line result on stack.
+        TRANSPOSE_16X8_1
+
+        mov         rsi,        arg(5)                   ; v_ptr
+        lea         rsi,        [rsi + rcx*4 - 4]
+        lea         rdi,        [rsi + rcx]              ; rdi points to row +1 for indirect addressing
+
+        lea         rdx,        srct
+        TRANSPOSE_16X8_2 1
+
+        ; calculate filter mask
+        LFV_FILTER_MASK 1
+        ; calculate high edge variance
+        LFV_HEV_MASK
+
+        ; start work on filters
+        BV_FILTER
+
+        ; tranpose and write back - only work on q1, q0, p0, p1
+        BV_TRANSPOSE
+        ; store 16-line result
+        BV_WRITEBACK xmm1, xmm5
+
+        mov         rsi,        arg(0)                   ;u_ptr
+        lea         rsi,        [rsi + rcx*4 - 4]
+        lea         rdi,        [rsi + rcx]
+        BV_WRITEBACK xmm2, xmm6
+
+    add rsp, 96
+    pop rsp
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+%macro MBV_FILTER 0
+        lea         rdx,                srct
+
+        movdqa      xmm2,               [rdx+32]        ; p1
+        movdqa      xmm7,               [rdx+80]        ; q1
+        movdqa      xmm6,               [rdx+48]        ; p0
+        movdqa      xmm0,               [rdx+64]        ; q0
+
+        pxor        xmm2,               [t80 GLOBAL]    ; p1 offset to convert to signed values
+        pxor        xmm7,               [t80 GLOBAL]    ; q1 offset to convert to signed values
+        pxor        xmm6,               [t80 GLOBAL]    ; offset to convert to signed values
+        pxor        xmm0,               [t80 GLOBAL]    ; offset to convert to signed values
+
+        psubsb      xmm2,               xmm7            ; p1 - q1
+
+        movdqa      xmm3,               xmm0            ; q0
+
+        psubsb      xmm0,               xmm6            ; q0 - p0
+        paddsb      xmm2,               xmm0            ; 1 * (q0 - p0) + (p1 - q1)
+
+        paddsb      xmm2,               xmm0            ; 2 * (q0 - p0)
+        paddsb      xmm2,               xmm0            ; 3 * (q0 - p0)+ (p1 - q1)
+
+        pand        xmm1,               xmm2            ; mask filter values we don't care about
+
+        movdqa      xmm2,               xmm1            ; vp8_filter
+        pand        xmm2,               xmm4;           ; Filter2 = vp8_filter & hev
+
+        movdqa      xmm5,               xmm2
+        paddsb      xmm5,               [t3 GLOBAL]
+
+        pxor        xmm0,               xmm0            ; 0
+        pxor        xmm7,               xmm7            ; 0
+
+        punpcklbw   xmm0,               xmm5            ; e0f0g0h0
+        psraw       xmm0,               11              ; sign extended shift right by 3
+
+        punpckhbw   xmm7,               xmm5            ; a0b0c0d0
+        psraw       xmm7,               11              ; sign extended shift right by 3
+
+        packsswb    xmm0,               xmm7            ; Filter2 >>=3;
+        movdqa      xmm5,               xmm0            ; Filter2
+
+        paddsb      xmm2,               [t4 GLOBAL]     ; vp8_signed_char_clamp(Filter2 + 4)
+        pxor        xmm0,               xmm0            ; 0
+
+        pxor        xmm7,               xmm7            ; 0
+        punpcklbw   xmm0,               xmm2            ; e0f0g0h0
+
+        psraw       xmm0,               11              ; sign extended shift right by 3
+        punpckhbw   xmm7,               xmm2            ; a0b0c0d0
+
+        psraw       xmm7,               11              ; sign extended shift right by 3
+        packsswb    xmm0,               xmm7            ; Filter2 >>=3;
+
+        psubsb      xmm3,               xmm0            ; qs0 =qs0 - filter1
+        paddsb      xmm6,               xmm5            ; ps0 =ps0 + Fitler2
+
+        ; vp8_filter &= ~hev;
+        ; Filter2 = vp8_filter;
+        pandn       xmm4,               xmm1            ; vp8_filter&=~hev
+
+        ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
+        ; s = vp8_signed_char_clamp(qs0 - u);
+        ; *oq0 = s^0x80;
+        ; s = vp8_signed_char_clamp(ps0 + u);
+        ; *op0 = s^0x80;
+        pxor        xmm0,               xmm0
+        pxor        xmm1,               xmm1
+
+        pxor        xmm2,               xmm2
+        punpcklbw   xmm1,               xmm4
+
+        punpckhbw   xmm2,               xmm4
+        pmulhw      xmm1,               [s27 GLOBAL]
+
+        pmulhw      xmm2,               [s27 GLOBAL]
+        paddw       xmm1,               [s63 GLOBAL]
+
+        paddw       xmm2,               [s63 GLOBAL]
+        psraw       xmm1,               7
+
+        psraw       xmm2,               7
+        packsswb    xmm1,               xmm2
+
+        psubsb      xmm3,               xmm1
+        paddsb      xmm6,               xmm1
+
+        pxor        xmm3,               [t80 GLOBAL]
+        pxor        xmm6,               [t80 GLOBAL]
+
+        movdqa      [rdx+48],           xmm6
+        movdqa      [rdx+64],           xmm3
+
+        ; roughly 2/7th difference across boundary
+        ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
+        ; s = vp8_signed_char_clamp(qs1 - u);
+        ; *oq1 = s^0x80;
+        ; s = vp8_signed_char_clamp(ps1 + u);
+        ; *op1 = s^0x80;
+        pxor        xmm1,               xmm1
+        pxor        xmm2,               xmm2
+
+        punpcklbw   xmm1,               xmm4
+        punpckhbw   xmm2,               xmm4
+
+        pmulhw      xmm1,               [s18 GLOBAL]
+        pmulhw      xmm2,               [s18 GLOBAL]
+
+        paddw       xmm1,               [s63 GLOBAL]
+        paddw       xmm2,               [s63 GLOBAL]
+
+        psraw       xmm1,               7
+        psraw       xmm2,               7
+
+        packsswb    xmm1,               xmm2
+
+        movdqa      xmm3,               [rdx + 80]              ; q1
+        movdqa      xmm6,               [rdx + 32]              ; p1
+
+        pxor        xmm3,               [t80 GLOBAL]
+        pxor        xmm6,               [t80 GLOBAL]
+
+        paddsb      xmm6,               xmm1
+        psubsb      xmm3,               xmm1
+
+        pxor        xmm6,               [t80 GLOBAL]
+        pxor        xmm3,               [t80 GLOBAL]
+
+        movdqa      [rdx + 80],         xmm3
+        movdqa      [rdx + 32],         xmm6
+
+        ; roughly 1/7th difference across boundary
+        ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
+        ; s = vp8_signed_char_clamp(qs2 - u);
+        ; *oq2 = s^0x80;
+        ; s = vp8_signed_char_clamp(ps2 + u);
+        ; *op2 = s^0x80;
+        pxor        xmm1,               xmm1
+        pxor        xmm2,               xmm2
+
+        punpcklbw   xmm1,               xmm4
+        punpckhbw   xmm2,               xmm4
+
+        pmulhw      xmm1,               [s9 GLOBAL]
+        pmulhw      xmm2,               [s9 GLOBAL]
+
+        paddw       xmm1,               [s63 GLOBAL]
+        paddw       xmm2,               [s63 GLOBAL]
+
+        psraw       xmm1,               7
+        psraw       xmm2,               7
+
+        packsswb    xmm1,               xmm2
+
+        movdqa      xmm6,               [rdx+16]
+        movdqa      xmm3,               [rdx+96]
+
+        pxor        xmm6,               [t80 GLOBAL]
+        pxor        xmm3,               [t80 GLOBAL]
+
+        paddsb      xmm6,               xmm1
+        psubsb      xmm3,               xmm1
+
+        pxor        xmm6,               [t80 GLOBAL]        ; xmm6 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+        pxor        xmm3,               [t80 GLOBAL]        ; xmm3 = f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 15 06
+%endmacro
+
+%macro MBV_TRANSPOSE 0
+        movdqa      xmm0,               [rdx]               ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+        movdqa      xmm1,               xmm0                ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+
+        punpcklbw   xmm0,               xmm6                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+        punpckhbw   xmm1,               xmm6                ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+
+        movdqa      xmm2,               [rdx+32]            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+        movdqa      xmm6,               xmm2                ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+
+        punpcklbw   xmm2,               [rdx+48]            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
+        punpckhbw   xmm6,               [rdx+48]            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+
+        movdqa      xmm5,               xmm0                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+        punpcklwd   xmm0,               xmm2                ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+
+        punpckhwd   xmm5,               xmm2                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+        movdqa      xmm4,               xmm1                ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+
+        punpcklwd   xmm1,               xmm6                ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
+        punpckhwd   xmm4,               xmm6                ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
+
+        movdqa      xmm2,               [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+        punpcklbw   xmm2,               [rdx+80]            ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
+
+        movdqa      xmm6,               xmm3                ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
+        punpcklbw   xmm6,               [rdx+112]           ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06
+
+        movdqa      xmm7,               xmm2                ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
+        punpcklwd   xmm2,               xmm6                ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04
+
+        punpckhwd   xmm7,               xmm6                ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44
+        movdqa      xmm6,               xmm0                ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+
+        punpckldq   xmm0,               xmm2                ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00
+        punpckhdq   xmm6,               xmm2                ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20
+%endmacro
+
+%macro MBV_WRITEBACK_1 0
+        movq        QWORD PTR [rsi+rax*4], xmm0
+        psrldq      xmm0,               8
+
+        movq        QWORD PTR [rsi+rax*2], xmm6
+        psrldq      xmm6,               8
+
+        movq        QWORD PTR [rdi+rax*4], xmm0
+        movq        QWORD PTR [rsi+rax],   xmm6
+
+        movdqa      xmm0,               xmm5                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+        punpckldq   xmm0,               xmm7                ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40
+
+        punpckhdq   xmm5,               xmm7                ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60
+
+        movq        QWORD PTR [rsi],    xmm0
+        psrldq      xmm0,               8
+
+        movq        QWORD PTR [rsi+rcx*2], xmm5
+        psrldq      xmm5,               8
+
+        movq        QWORD PTR [rsi+rcx],   xmm0
+        movq        QWORD PTR [rdi+rcx*2], xmm5
+
+        movdqa      xmm2,               [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
+        punpckhbw   xmm2,               [rdx+80]            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
+
+        punpckhbw   xmm3,               [rdx+112]           ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86
+        movdqa      xmm0,               xmm2
+
+        punpcklwd   xmm0,               xmm3                ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84
+        punpckhwd   xmm2,               xmm3                ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4
+
+        movdqa      xmm3,               xmm1                ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
+        punpckldq   xmm1,               xmm0                ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80
+
+        punpckhdq   xmm3,               xmm0                ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0
+%endmacro
+
+%macro MBV_WRITEBACK_2 0
+        movq        QWORD PTR [rsi+rax*4], xmm1
+        psrldq      xmm1,               8
+
+        movq        QWORD PTR [rsi+rax*2], xmm3
+        psrldq      xmm3,               8
+
+        movq        QWORD PTR [rdi+rax*4], xmm1
+        movq        QWORD PTR [rsi+rax],   xmm3
+
+        movdqa      xmm1,               xmm4                ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
+        punpckldq   xmm1,               xmm2                ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0
+
+        punpckhdq   xmm4,               xmm2                ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0
+        movq        QWORD PTR [rsi],    xmm1
+
+        psrldq      xmm1,               8
+
+        movq        QWORD PTR [rsi+rcx*2], xmm4
+        psrldq      xmm4,               8
+
+        movq        QWORD PTR [rsi+rcx], xmm1
+        movq        QWORD PTR [rdi+rcx*2], xmm4
+%endmacro
+
+
 ;void vp8_mbloop_filter_vertical_edge_sse2
 ;(
 ;    unsigned char *src_ptr,
@@ -1039,531 +1569,116 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
     %define t1   [rsp + 16]   ;__declspec(align(16)) char t1[16];
     %define srct [rsp + 32]   ;__declspec(align(16)) char srct[128];
 
-
         mov         rsi,                arg(0) ;src_ptr
-        movsxd      rax,                dword ptr arg(1) ;src_pixel_step                    ; destination pitch?
+        movsxd      rax,                dword ptr arg(1)   ;src_pixel_step
 
         lea         rsi,                [rsi + rax*4 - 4]
-        lea         rdi,                [rsi + rax]                     ; rdi points to row +1 for indirect addressing
-
+        lea         rdi,                [rsi + rax]        ; rdi points to row +1 for indirect addressing
         mov         rcx,                rax
-        neg         rcx
+        neg         rax
 
         ; Transpose
-        movq        xmm0,               QWORD PTR [rdi+rax*2]           ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70
-        movq        xmm7,               QWORD PTR [rsi+rax*2]           ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60
-
-        punpcklbw   xmm7,               xmm0                            ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
-        movq        xmm0,               QWORD PTR [rsi+rax]             ;
-
-        movq        xmm5,               QWORD PTR [rsi]                 ;
-        punpcklbw   xmm5,               xmm0                            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
-
-        movdqa      xmm6,               xmm5                            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
-        punpcklwd   xmm5,               xmm7                            ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
-
-        punpckhwd   xmm6,               xmm7                            ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
-        movq        xmm7,               QWORD PTR [rsi + rcx]           ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30
-
-        movq        xmm0,               QWORD PTR [rsi + rcx*2]         ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20
-        punpcklbw   xmm0,               xmm7                            ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20
-
-        movq        xmm4,               QWORD PTR [rsi + rcx*4]         ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
-        movq        xmm7,               QWORD PTR [rdi + rcx*4]         ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
-
-        punpcklbw   xmm4,               xmm7                            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
-        movdqa      xmm3,               xmm4                            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
-
-        punpcklwd   xmm3,               xmm0                            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
-        punpckhwd   xmm4,               xmm0                            ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
-
-        movdqa      xmm7,               xmm4                            ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
-        movdqa      xmm2,               xmm3                            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
-
-        punpckhdq   xmm7,               xmm6                            ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
-        punpckldq   xmm4,               xmm6                            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
-
-        punpckhdq   xmm3,               xmm5                            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
-        punpckldq   xmm2,               xmm5                            ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
-
-        movdqa      t0,                 xmm2                            ; save to free XMM2
-        ;movdqa        t1,                 xmm3
-
-        ; XMM3 XMM4 XMM7 in use
-        lea         rsi,                [rsi+rax*8]
-        lea         rdi,                [rdi+rax*8]
-
-        movq        xmm6,               QWORD PTR [rdi+rax*2]           ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0
-        movq        xmm5,               QWORD PTR [rsi+rax*2]           ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0
-
-        punpcklbw   xmm5,               xmm6                            ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
-        movq        xmm6,               QWORD PTR [rsi+rax]             ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0
-
-        movq        xmm1,               QWORD PTR [rsi]                 ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0
-        punpcklbw   xmm1,               xmm6                            ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0
-
-        movdqa      xmm6,               xmm1                            ;
-        punpckhwd   xmm6,               xmm5                            ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4
-
-        punpcklwd   xmm1,               xmm5                            ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
-        movq        xmm5,               QWORD PTR [rsi+rcx]             ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0
-
-        movq        xmm0,               QWORD PTR [rsi+rcx*2]           ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0
-        punpcklbw   xmm0,               xmm5                            ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
-
-        movq        xmm2,               QWORD PTR [rsi+rcx*4]           ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80
-        movq        xmm5,               QWORD PTR [rdi+rcx*4]           ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90
-
-        punpcklbw   xmm2,               xmm5                            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
-        movdqa      xmm5,               xmm2                            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
-
-        punpcklwd   xmm5,               xmm0                            ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
-        punpckhwd   xmm2,               xmm0                            ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
-
-        movdqa      xmm0,               xmm5
-        punpckldq   xmm0,               xmm1                            ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
-
-
-        punpckhdq   xmm5,               xmm1                            ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
-        movdqa      xmm1,               xmm2                            ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
-
-        punpckldq   xmm1,               xmm6                            ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84
-        punpckhdq   xmm2,               xmm6                            ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
-
-        movdqa      xmm6,               xmm7                            ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
-        punpcklqdq  xmm6,               xmm2                            ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
-
+        TRANSPOSE_16X8_1
 
+        lea         rsi,                [rsi+rcx*8]
+        lea         rdi,                [rdi+rcx*8]
         lea         rdx,                srct
-        punpckhqdq  xmm7,               xmm2                            ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07
-
-        movdqa      [rdx+112],          xmm7                            ; save 7
-        movdqa      xmm2,               xmm3                            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
-
-        movdqa      [rdx+96],           xmm6                            ; save 6
-        punpcklqdq  xmm2,               xmm5                            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-
-        punpckhqdq  xmm3,               xmm5                            ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
-        movdqa      [rdx+32],           xmm2                            ; save 2
-
-        movdqa      xmm5,               xmm4                            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
-        punpcklqdq  xmm4,               xmm1                            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-
-        movdqa      [rdx+48],           xmm3                            ; save 3
-        punpckhqdq  xmm5,               xmm1                            ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
-
-        movdqa      [rdx+64],           xmm4                            ; save 4
-        movdqa      [rdx+80],           xmm5                            ; save 5
-
-        movdqa      xmm1,               t0                              ; get
-        movdqa      xmm2,               xmm1                            ;
-
-        punpckhqdq  xmm1,               xmm0                            ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
-        punpcklqdq  xmm2,               xmm0                            ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
-
-        movdqa      [rdx+16],           xmm1
-        movdqa      [rdx],              xmm2
-
-        movdqa      xmm0,               xmm6                            ; q2
-        psubusb     xmm0,               xmm7                            ; q2-q3
-
-        psubusb     xmm7,               xmm6                            ; q3-q2
-        por         xmm7,               xmm0                            ; abs (q3-q2)
-
-        movdqa      xmm1,               xmm5                            ; q1
-        psubusb     xmm1,               xmm6                            ; q1-q2
-
-        psubusb     xmm6,               xmm5                            ; q2-q1
-        por         xmm6,               xmm1                            ; abs (q2-q1)
-
-        ;/*
-        ;movdqa      xmm0,               xmm4                            ; q0
-        ;psubusb     xmm0                xmm5                            ; q0-q1
-        ;
-        ;pusbusb     xmm5,               xmm4                            ; q1-q0
-        ;por         xmm5,               xmm0                            ; abs (q1-q0)
-        ;*/
-
-        movdqa      xmm1,               [rdx+16]                        ; p2
-        movdqa      xmm0,               xmm1
-
-        psubusb     xmm0,               xmm2                            ; p2 - p3;
-        psubusb     xmm2,               xmm1                            ; p3 - p2;
-
-        por         xmm0,               xmm2                            ; abs(p2-p3)
-
-        movdqa      xmm2,               [rdx+32]                        ; p1
-        movdqa      xmm5,               xmm2                            ; p1
-
-        psubusb     xmm5,               xmm1                            ; p1-p2
-        psubusb     xmm1,               xmm2                            ; p2-p1
-
-        por         xmm1,               xmm5                            ; abs(p2-p1)
-        mov         rdx,                arg(3) ;limit
-
-        movdqa      xmm4,               [rdx]                           ; limit
-        psubusb     xmm7,               xmm4                            ;
-
-
-        psubusb     xmm0,               xmm4                            ; abs(p3-p2) > limit
-        psubusb     xmm1,               xmm4                            ; abs(p2-p1) > limit
-
-        psubusb     xmm6,               xmm4                            ; abs(q2-q1) > limit
-        por         xmm7,               xmm6                            ; or
-
-        por         xmm0,               xmm1                            ;
-        por         xmm0,               xmm7                            ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
-
-        movdqa      xmm1,               xmm2                            ; p1
-
-        movdqa      xmm7,               xmm3                            ; p0
-        psubusb     xmm7,               xmm2                            ; p0-p1
-
-        psubusb     xmm2,               xmm3                            ; p1-p0
-        por         xmm2,               xmm7                            ; abs(p1-p0)
-
-        movdqa      t0,                 xmm2                            ; save abs(p1-p0)
-        lea         rdx,                srct
-
-        psubusb     xmm2,               xmm4                            ; abs(p1-p0)>limit
-        por         xmm0,               xmm2                            ; mask
-
-        movdqa      xmm5,               [rdx+64]                        ; q0
-        movdqa      xmm7,               [rdx+80]                        ; q1
-
-        movdqa      xmm6,               xmm5                            ; q0
-        movdqa      xmm2,               xmm7                            ; q1
-        psubusb     xmm5,               xmm7                            ; q0-q1
-
-        psubusb     xmm7,               xmm6                            ; q1-q0
-        por         xmm7,               xmm5                            ; abs(q1-q0)
-
-        movdqa      t1,                 xmm7                            ; save abs(q1-q0)
-        psubusb     xmm7,               xmm4                            ; abs(q1-q0)> limit
-
-        por         xmm0,               xmm7                            ; mask
-
-        movdqa      xmm5,                xmm2                           ; q1
-        psubusb     xmm5,                xmm1                           ; q1-=p1
-        psubusb     xmm1,                xmm2                           ; p1-=q1
-        por         xmm5,                xmm1                           ; abs(p1-q1)
-        pand        xmm5,                [tfe GLOBAL]                   ; set lsb of each byte to zero
-        psrlw       xmm5,                1                              ; abs(p1-q1)/2
-
-        mov         rdx,                arg(2) ;flimit                          ;
-        movdqa      xmm2,               [rdx]                           ; flimit
-
-        movdqa      xmm1,               xmm3                            ; p0
-        movdqa      xmm7,               xmm6                            ; q0
-        psubusb     xmm1,               xmm7                            ; p0-q0
-        psubusb     xmm7,               xmm3                            ; q0-p0
-        por         xmm1,               xmm7                            ; abs(q0-p0)
-        paddusb     xmm1,               xmm1                            ; abs(q0-p0)*2
-        paddusb     xmm1,               xmm5                            ; abs (p0 - q0) *2 + abs(p1-q1)/2
-
-        paddb       xmm2,               xmm2                            ; flimit*2 (less than 255)
-        paddb       xmm4,               xmm2                            ; flimit * 2 + limit (less than 255)
-
-        psubusb     xmm1,               xmm4                            ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
-        por         xmm1,               xmm0;                           ; mask
-        pxor        xmm0,               xmm0
-        pcmpeqb     xmm1,               xmm0
+        TRANSPOSE_16X8_2 0
 
+        ; calculate filter mask
+        LFV_FILTER_MASK 0
         ; calculate high edge variance
-        mov         rdx,                arg(4) ;thresh                          ; get thresh
-        movdqa      xmm7,               [rdx]
-
-        movdqa      xmm4,               t0                              ; get abs (q1 - q0)
-        psubusb     xmm4,               xmm7                            ; abs(q1 - q0) > thresh
-
-        movdqa      xmm3,               t1                              ; get abs (p1 - p0)
-        psubusb     xmm3,               xmm7                            ; abs(p1 - p0)> thresh
-
-        por         xmm4,               xmm3                            ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-        pcmpeqb     xmm4,               xmm0
-
-        pcmpeqb     xmm0,               xmm0
-        pxor        xmm4,               xmm0
-
+        LFV_HEV_MASK
 
         ; start work on filters
-        lea         rdx,                srct
-
-        ; start work on filters
-        movdqa      xmm2,               [rdx+32]                        ; p1
-        movdqa      xmm7,               [rdx+80]                        ; q1
-
-        pxor        xmm2,               [t80 GLOBAL]                    ; p1 offset to convert to signed values
-        pxor        xmm7,               [t80 GLOBAL]                    ; q1 offset to convert to signed values
-
-        psubsb      xmm2,               xmm7                            ; p1 - q1
-        movdqa      xmm6,               [rdx+48]                        ; p0
-
-        movdqa      xmm0,               [rdx+64]                        ; q0
-        pxor        xmm6,               [t80 GLOBAL]                    ; offset to convert to signed values
-
-        pxor        xmm0,               [t80 GLOBAL]                    ; offset to convert to signed values
-        movdqa      xmm3,               xmm0                            ; q0
-
-        psubsb      xmm0,               xmm6                            ; q0 - p0
-        paddsb      xmm2,               xmm0                            ; 1 * (q0 - p0) + (p1 - q1)
-
-        paddsb      xmm2,               xmm0                            ; 2 * (q0 - p0)
-        paddsb      xmm2,               xmm0                            ; 3 * (q0 - p0)+ (p1 - q1)
-
-        pand        xmm1,               xmm2                            ; mask filter values we don't care about
-
-        ; xmm1 = vp8_filter, xmm4=hev, xmm6=ps0, xmm3=qs0
-        movdqa      xmm2,               xmm1                            ; vp8_filter
-        pand        xmm2,               xmm4;                           ; Filter2 = vp8_filter & hev
-
-        movdqa      xmm5,               xmm2
-        paddsb      xmm5,               [t3 GLOBAL]
-
-        pxor        xmm0,               xmm0                            ; 0
-        pxor        xmm7,               xmm7                            ; 0
-
-        punpcklbw   xmm0,               xmm5                            ; e0f0g0h0
-        psraw       xmm0,               11                              ; sign extended shift right by 3
-
-        punpckhbw   xmm7,               xmm5                            ; a0b0c0d0
-        psraw       xmm7,               11                              ; sign extended shift right by 3
-
-        packsswb    xmm0,               xmm7                            ; Filter2 >>=3;
-        movdqa      xmm5,               xmm0                            ; Filter2
-
-        paddsb      xmm2,               [t4 GLOBAL]                     ; vp8_signed_char_clamp(Filter2 + 4)
-        pxor        xmm0,               xmm0                            ; 0
-
-        pxor        xmm7,               xmm7                            ; 0
-        punpcklbw   xmm0,               xmm2                            ; e0f0g0h0
-
-        psraw       xmm0,               11                              ; sign extended shift right by 3
-        punpckhbw   xmm7,               xmm2                            ; a0b0c0d0
-
-        psraw       xmm7,               11                              ; sign extended shift right by 3
-        packsswb    xmm0,               xmm7                            ; Filter2 >>=3;
-
-        ; xmm0= filter2 xmm1 = vp8_filter,  xmm3 =qs0 xmm5=s xmm4 =hev xmm6=ps0
-        psubsb      xmm3,               xmm0                            ; qs0 =qs0 - filter1
-        paddsb      xmm6,               xmm5                            ; ps0 =ps0 + Fitler2
-
-
-        ; xmm1=vp8_filter, xmm3=qs0, xmm4 =hev xmm6=ps0
-        ; vp8_filter &= ~hev;
-        ; Filter2 = vp8_filter;
-        pandn       xmm4,                   xmm1                        ; vp8_filter&=~hev
-
-        ; xmm3=qs0, xmm4=filter2, xmm6=ps0
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
-        ; s = vp8_signed_char_clamp(qs0 - u);
-        ; *oq0 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps0 + u);
-        ; *op0 = s^0x80;
-        pxor        xmm0,                   xmm0
-        pxor        xmm1,                   xmm1
-
-        pxor        xmm2,                   xmm2
-        punpcklbw   xmm1,                   xmm4
-
-        punpckhbw   xmm2,                   xmm4
-        pmulhw      xmm1,                   [s27 GLOBAL]
-
-        pmulhw      xmm2,                   [s27 GLOBAL]
-        paddw       xmm1,                   [s63 GLOBAL]
-
-        paddw       xmm2,                   [s63 GLOBAL]
-        psraw       xmm1,                   7
-
-        psraw       xmm2,                   7
-        packsswb    xmm1,                   xmm2
-
-        psubsb      xmm3,                   xmm1
-        paddsb      xmm6,                   xmm1
-
-        pxor        xmm3,                   [t80 GLOBAL]
-        pxor        xmm6,                   [t80 GLOBAL]
-
-        movdqa      [rdx+48],               xmm6
-        movdqa      [rdx+64],               xmm3
-
-        ; roughly 2/7th difference across boundary
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
-        ; s = vp8_signed_char_clamp(qs1 - u);
-        ; *oq1 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps1 + u);
-        ; *op1 = s^0x80;
-        pxor        xmm1,                       xmm1
-        pxor        xmm2,                       xmm2
-
-        punpcklbw   xmm1,                       xmm4
-        punpckhbw   xmm2,                       xmm4
-
-        pmulhw      xmm1,                       [s18 GLOBAL]
-        pmulhw      xmm2,                       [s18 GLOBAL]
-
-        paddw       xmm1,                       [s63 GLOBAL]
-        paddw       xmm2,                       [s63 GLOBAL]
-
-        psraw       xmm1,                       7
-        psraw       xmm2,                       7
-
-        packsswb    xmm1,                       xmm2
-
-        movdqa      xmm3,                       [rdx + 80]              ;/q1
-        movdqa      xmm6,                       [rdx + 32]              ; p1
-
-        pxor        xmm3,                       [t80 GLOBAL]
-        pxor        xmm6,                       [t80 GLOBAL]
-
-        paddsb      xmm6,                       xmm1
-        psubsb      xmm3,                       xmm1
-
-        pxor        xmm6,                       [t80 GLOBAL]
-        pxor        xmm3,                       [t80 GLOBAL]
-
-        movdqa      [rdx + 80],                 xmm3
-        movdqa      [rdx + 32],                 xmm6
-
-
-        ; roughly 1/7th difference across boundary
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
-        ; s = vp8_signed_char_clamp(qs2 - u);
-        ; *oq2 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps2 + u);
-        ; *op2 = s^0x80;
-        pxor        xmm1,                       xmm1
-        pxor        xmm2,                       xmm2
-
-        punpcklbw   xmm1,                       xmm4
-        punpckhbw   xmm2,                       xmm4
-
-        pmulhw      xmm1,                       [s9 GLOBAL]
-        pmulhw      xmm2,                       [s9 GLOBAL]
-
-        paddw       xmm1,                       [s63 GLOBAL]
-        paddw       xmm2,                       [s63 GLOBAL]
-
-        psraw       xmm1,                       7
-        psraw       xmm2,                       7
-
-        packsswb    xmm1,                       xmm2
-
-        movdqa      xmm6,                       [rdx+16]
-        movdqa      xmm3,                       [rdx+96]
-
-        pxor        xmm6,                       [t80 GLOBAL]
-        pxor        xmm3,                       [t80 GLOBAL]
-
-        paddsb      xmm6,                       xmm1
-        psubsb      xmm3,                       xmm1
-
-        pxor        xmm6,                       [t80 GLOBAL]        ; xmm6 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
-        pxor        xmm3,                       [t80 GLOBAL]        ; xmm3 = f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 15 06
-
+        MBV_FILTER
 
         ; transpose and write back
-        movdqa      xmm0,                       [rdx]               ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
-        movdqa      xmm1,                       xmm0                ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
+        MBV_TRANSPOSE
 
-        punpcklbw   xmm0,                       xmm6                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
-        punpckhbw   xmm1,                       xmm6                ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+        lea         rsi,                [rsi+rax*8]
+        lea         rdi,                [rdi+rax*8]
+        MBV_WRITEBACK_1
 
-        movdqa      xmm2,                       [rdx+32]            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
-        movdqa      xmm6,                       xmm2                ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
+        lea         rsi,                [rsi+rcx*8]
+        lea         rdi,                [rdi+rcx*8]
+        MBV_WRITEBACK_2
 
-        punpcklbw   xmm2,                       [rdx+48]            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
-        punpckhbw   xmm6,                       [rdx+48]            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
+    add rsp, 160
+    pop rsp
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
 
-        movdqa      xmm5,                       xmm0                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
-        punpcklwd   xmm0,                       xmm2                ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
 
-        punpckhwd   xmm5,                       xmm2                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
-        movdqa      xmm4,                       xmm1                ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+;void vp8_mbloop_filter_vertical_edge_uv_sse2
+;(
+;    unsigned char *u,
+;    int            src_pixel_step,
+;    const char    *flimit,
+;    const char    *limit,
+;    const char    *thresh,
+;    unsigned char *v
+;)
+global sym(vp8_mbloop_filter_vertical_edge_uv_sse2)
+sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
 
-        punpcklwd   xmm1,                       xmm6                ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
-        punpckhwd   xmm4,                       xmm6                ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
+    ALIGN_STACK 16, rax
+    sub          rsp, 160     ; reserve 160 bytes
+    %define t0   [rsp + 0]    ;__declspec(align(16)) char t0[16];
+    %define t1   [rsp + 16]   ;__declspec(align(16)) char t1[16];
+    %define srct [rsp + 32]   ;__declspec(align(16)) char srct[128];
 
-        movdqa      xmm2,                       [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-        punpcklbw   xmm2,                       [rdx+80]            ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
+        mov         rsi,                arg(0) ;u_ptr
+        movsxd      rax,                dword ptr arg(1)   ; src_pixel_step
 
-        movdqa      xmm6,                       xmm3                ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
-        punpcklbw   xmm6,                       [rdx+112]           ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06
+        lea         rsi,                [rsi + rax*4 - 4]
+        lea         rdi,                [rsi + rax]        ; rdi points to row +1 for indirect addressing
+        mov         rcx,                rax
+        neg         rax
 
-        movdqa      xmm7,                       xmm2                ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
-        punpcklwd   xmm2,                       xmm6                ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04
+        ; Transpose
+        TRANSPOSE_16X8_1
 
-        punpckhwd   xmm7,                       xmm6                ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44
-        movdqa      xmm6,                       xmm0                ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
+        ; XMM3 XMM4 XMM7 in use
+        mov         rsi,                arg(5)             ;v_ptr
+        lea         rsi,                [rsi + rcx*4 - 4]
+        lea         rdi,                [rsi + rcx]
+        lea         rdx,                srct
+        TRANSPOSE_16X8_2 0
 
-        punpckldq   xmm0,                       xmm2                ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00
-        punpckhdq   xmm6,                       xmm2                ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20
+        ; calculate filter mask
+        LFV_FILTER_MASK 0
+        ; calculate high edge variance
+        LFV_HEV_MASK
 
-        lea         rsi,                        [rsi+rcx*8]
-        lea         rdi,                        [rdi+rcx*8]
+        ; start work on filters
+        MBV_FILTER
 
-        movq        QWORD PTR [rsi+rcx*4],      xmm0
-        psrldq      xmm0,                       8
+        ; transpose and write back
+        MBV_TRANSPOSE
 
-        movq        QWORD PTR [rsi+rcx*2],      xmm6
-        psrldq      xmm6,                       8
-
-        movq        QWORD PTR [rdi+rcx*4],      xmm0
-        movq        QWORD PTR [rsi+rcx],        xmm6
-
-        movdqa      xmm0,                       xmm5                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
-        punpckldq   xmm0,                       xmm7                ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40
-
-        punpckhdq   xmm5,                       xmm7                ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60
-
-        movq        QWORD PTR [rsi],            xmm0
-        psrldq      xmm0,                       8
-
-        movq        QWORD PTR [rsi+rax*2],      xmm5
-        psrldq      xmm5,                       8
-
-        movq        QWORD PTR [rsi+rax],        xmm0
-        movq        QWORD PTR [rdi+rax*2],      xmm5
-
-        movdqa      xmm2,                       [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
-        punpckhbw   xmm2,                       [rdx+80]            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
-
-        punpckhbw   xmm3,                       [rdx+112]           ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86
-        movdqa      xmm0,                       xmm2
-
-        punpcklwd   xmm0,                       xmm3                ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84
-        punpckhwd   xmm2,                       xmm3                ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4
-
-        movdqa      xmm3,                       xmm1                ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
-        punpckldq   xmm1,                       xmm0                ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80
-
-        punpckhdq   xmm3,                       xmm0                ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0
-
-        lea         rsi,                        [rsi+rax*8]
-        lea         rdi,                        [rdi+rax*8]
-
-        movq        QWORD PTR [rsi+rcx*4],      xmm1
-        psrldq      xmm1,                       8
-
-        movq        QWORD PTR [rsi+rcx*2],      xmm3
-        psrldq      xmm3,                       8
-
-        movq        QWORD PTR [rdi+rcx*4],      xmm1
-        movq        QWORD PTR [rsi+rcx],        xmm3
-
-        movdqa      xmm1,                       xmm4                ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
-        punpckldq   xmm1,                       xmm2                ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0
-
-        punpckhdq   xmm4,                       xmm2                ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0
-        movq        QWORD PTR [rsi],            xmm1
-
-        psrldq      xmm1,                       8
-
-        movq        QWORD PTR [rsi+rax*2],      xmm4
-        psrldq      xmm4,                       8
-
-        movq        QWORD PTR [rsi+rax],        xmm1
-        movq        QWORD PTR [rdi+rax*2],      xmm4
+        mov         rsi,                arg(0)             ;u_ptr
+        lea         rsi,                [rsi + rcx*4 - 4]
+        lea         rdi,                [rsi + rcx]
+        MBV_WRITEBACK_1
+        mov         rsi,                arg(5)             ;v_ptr
+        lea         rsi,                [rsi + rcx*4 - 4]
+        lea         rdi,                [rsi + rcx]
+        MBV_WRITEBACK_2
 
     add rsp, 160
     pop rsp
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index 3a9437e4d..16498abbd 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -34,6 +34,11 @@ prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_sse2);
 prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_sse2);
 prototype_loopfilter(vp8_fast_loop_filter_vertical_edges_sse2);
 
+extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2;
+extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2;
+extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2;
+extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
+
 #if HAVE_MMX
 // Horizontal MB filtering
 void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
@@ -157,10 +162,7 @@ void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
     vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 
     if (u_ptr)
-        vp8_mbloop_filter_horizontal_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
-
-    if (v_ptr)
-        vp8_mbloop_filter_horizontal_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
+        vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
 }
 
 
@@ -183,10 +185,7 @@ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
     vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 
     if (u_ptr)
-        vp8_mbloop_filter_vertical_edge_mmx(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
-
-    if (v_ptr)
-        vp8_mbloop_filter_vertical_edge_mmx(v_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, 1);
+        vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->uvmbflim, lfi->uvlim, lfi->uvmbthr, v_ptr);
 }
 
 
@@ -211,10 +210,7 @@ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
     vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 
     if (u_ptr)
-        vp8_loop_filter_horizontal_edge_mmx(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
-
-    if (v_ptr)
-        vp8_loop_filter_horizontal_edge_mmx(v_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
+        vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4 * uv_stride);
 }
 
 
@@ -241,10 +237,7 @@ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
     vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 
     if (u_ptr)
-        vp8_loop_filter_vertical_edge_mmx(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
-
-    if (v_ptr)
-        vp8_loop_filter_vertical_edge_mmx(v_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, 1);
+        vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->uvflim, lfi->uvlim, lfi->uvthr, v_ptr + 4);
 }
 
 

From 29d586b462f2c37bd468dd51cf32ef5982777056 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Wed, 30 Jun 2010 09:42:39 -0400
Subject: [PATCH 068/307] Add loopfilter initialization fix in multithreading
 code

Modified loopfilter initialization to avoid unnecessary operations.

Change-Id: I9fd1a5a49edc1cb8116c2a72a6908b1e437459ec
---
 vp8/common/onyxc_int.h  | 1 +
 vp8/decoder/threading.c | 7 +++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 4d5d9878b..33a543377 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -201,6 +201,7 @@ typedef struct VP8Common
 
 void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level);
 void vp8_init_loop_filter(VP8_COMMON *cm);
+void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type);
 extern void vp8_loop_filter_frame(VP8_COMMON *cm,    MACROBLOCKD *mbd,  int filt_val);
 
 #endif
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 38d604210..18c8da077 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -281,11 +281,11 @@ THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
 
                 YV12_BUFFER_CONFIG *post = &cm->new_frame;
                 loop_filter_info *lfi = cm->lf_info;
+                int frame_type = cm->frame_type;
 
                 int mb_row;
                 int mb_col;
 
-
                 int baseline_filter_level[MAX_MB_SEGMENTS];
                 int filter_level;
                 int alt_flt_enabled = mbd->segmentation_enabled;
@@ -319,7 +319,10 @@ THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
                 }
 
                 // Initialize the loop filter for this frame.
-                vp8_init_loop_filter(cm);
+                if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
+                    vp8_init_loop_filter(cm);
+                else if (frame_type != cm->last_frame_type)
+                    vp8_frame_init_loop_filter(lfi, frame_type);
 
                 // Set up the buffer pointers
                 y_ptr = post->y_buffer;

From 308e867f91ec1b76d59009060ada99f52a73c602 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 30 Jun 2010 10:22:40 -0400
Subject: [PATCH 069/307] Update loopfilter frame/filter/sharp info for
 multithread

Change I9fd1a5a4 updated the multithreaded loopfilter to avoid
reinitializing several parameteres if they haven't changed from the
last frame, but the code to update the last frame's parameters wasn't
invoked in the multithreaded case.

Change-Id: Ia23d937af625c01dd739608e02d110f742b7e1f2
---
 vp8/decoder/onyxd_if.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 3a237de1f..60ca74a7f 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -293,17 +293,16 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
         // Apply the loop filter if appropriate.
 
         if (cm->filter_level > 0)
-        {
             vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
-            cm->last_frame_type = cm->frame_type;
-            cm->last_filter_type = cm->filter_type;
-            cm->last_sharpness_level = cm->sharpness_level;
-
-        }
 
         vpx_usec_timer_mark(&lpftimer);
         pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
     }
+    if (cm->filter_level > 0) {
+        cm->last_frame_type = cm->frame_type;
+        cm->last_filter_type = cm->filter_type;
+        cm->last_sharpness_level = cm->sharpness_level;
+    }
 
     vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
 

From 0618ff14d6eeca27d6cca6b3999e4cd10fe7b096 Mon Sep 17 00:00:00 2001
From: Adrian Grange <agrange@google.com>
Date: Thu, 1 Jul 2010 14:17:04 +0100
Subject: [PATCH 070/307] Fix bug in 1st pass motion compensation

In the case where the best reference mv is not (0,0) a secondary
search is carried out centered on (0,0). However, rather than
sending tmp_err into the search function, motion_error was
inadvertently passed.

As a result tmp_err remains set at INT_MAX and the (0,0)-based
search result will never be selected, even if it is better.

Change-Id: I3c82b246c8c82ba887b9d3fb4c9e0a0f2fe5a76c
---
 vp8/encoder/firstpass.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 74feca314..0a33feb1c 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -639,14 +639,18 @@ void vp8_first_pass(VP8_COMP *cpi)
                 d->bmi.mv.as_mv.row = 0;
                 d->bmi.mv.as_mv.col = 0;
 
-                // Test last reference frame using the previous best mv as the starting point (best reference) for the search
-                vp8_first_pass_motion_search(cpi, x, &best_ref_mv, &d->bmi.mv.as_mv, &cm->last_frame, &motion_error, recon_yoffset);
+                // Test last reference frame using the previous best mv as the
+                // starting point (best reference) for the search
+                vp8_first_pass_motion_search(cpi, x, &best_ref_mv,
+                                        &d->bmi.mv.as_mv, &cm->last_frame,
+                                        &motion_error, recon_yoffset);
 
                 // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well
                 if ((best_ref_mv.col != 0) || (best_ref_mv.row != 0))
                 {
                    tmp_err = INT_MAX;
-                   vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, &cm->last_frame, &motion_error, recon_yoffset);
+                   vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv,
+                                     &cm->last_frame, &tmp_err, recon_yoffset);
 
                    if ( tmp_err < motion_error )
                    {

From 1e23f45119c86482f9d8e384e7e093ed3b741cbc Mon Sep 17 00:00:00 2001
From: Michael Kohler <michaelkohler@live.com>
Date: Wed, 7 Jul 2010 19:48:12 +0200
Subject: [PATCH 071/307] Fix misspelled "skiped" in onyxc_int.h to "skipped".

Signed-off-by: Michael Kohler <michaelkohler@live.com>
---
 vp8/common/onyxc_int.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 33a543377..d2fbc8686 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -102,7 +102,7 @@ typedef struct VP8Common
     YV12_BUFFER_CONFIG post_proc_buffer;
     YV12_BUFFER_CONFIG temp_scale_frame;
 
-    FRAME_TYPE last_frame_type;  //Add to check if vp8_frame_init_loop_filter() can be skiped.
+    FRAME_TYPE last_frame_type;  //Add to check if vp8_frame_init_loop_filter() can be skipped.
     FRAME_TYPE frame_type;
 
     int show_frame;

From efbfaf6d114186f05662ffa82c2edd4fb8ef6f95 Mon Sep 17 00:00:00 2001
From: Michael Kohler <michaelkohler@live.com>
Date: Wed, 7 Jul 2010 19:49:58 +0200
Subject: [PATCH 072/307] Fix misspelled "paramter" in vpx_codec_internal.h" to
 "parameter".

Signed-off-by: Michael Kohler <michaelkohler@live.com>
---
 vpx/internal/vpx_codec_internal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 7d970275c..4053df036 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -138,7 +138,7 @@ typedef vpx_codec_err_t (*vpx_codec_get_si_fn_t)(vpx_codec_alg_priv_t    *ctx,
  * provide type safety for the exchanged data or assign meanings to the
  * control codes. Those details should be specified in the algorithm's
  * header file. In particular, the ctrl_id parameter is guaranteed to exist
- * in the algorithm's control mapping table, and the data paramter may be NULL.
+ * in the algorithm's control mapping table, and the data parameter may be NULL.
  *
  *
  * \param[in]     ctx              Pointer to this instance's context

From 3d0a1edadd0befc7a6ebe3fb746a11ce3dfc6485 Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Wed, 7 Jul 2010 10:26:30 -0700
Subject: [PATCH 073/307] Fix a compiling error on armv6

The issue was caused by a bad merge in Change I5559d1e8

Change-Id: I6563f652bc1500202de361f8f51d11cc6ddf3331
---
 vp8/encoder/dct.h | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h
index 0ab40b310..cd8faedef 100644
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -32,6 +32,15 @@ extern prototype_fdct(vp8_fdct_short4x4);
 #endif
 extern prototype_fdct(vp8_fdct_short8x4);
 
+// There is no fast4x4 (for now)
+#ifndef vp8_fdct_fast4x4
+#define vp8_fdct_fast4x4  vp8_short_fdct4x4_c
+#endif
+
+#ifndef vp8_fdct_fast8x4
+#define vp8_fdct_fast8x4  vp8_short_fdct8x4_c
+#endif
+
 #ifndef vp8_fdct_walsh_short4x4
 #define vp8_fdct_walsh_short4x4  vp8_short_walsh4x4_c
 #endif

From fd0d7ff4c155b94d3f322addc7b66234b6908cc6 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 22 Jun 2010 09:45:43 -0400
Subject: [PATCH 074/307] msvs: disable CRT deprecation warnings

Disables the warnings produced for so-called insecure standard C
functions.

Change-Id: I0e6f448e27f899a0eaefc1151185945fbe15718e
---
 build/make/gen_msvs_proj.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index 63d537630..1398bfba9 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -437,7 +437,7 @@ generate_vcproj() {
                 Name="VCCLCompilerTool" \
                 Optimization="0" \
                 AdditionalIncludeDirectories="$incs" \
-                PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;$defines" \
+                PreprocessorDefinitions="WIN32;_DEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
                 RuntimeLibrary="$debug_runtime" \
                 UsePrecompiledHeader="0" \
                 WarningLevel="3" \
@@ -595,7 +595,7 @@ generate_vcproj() {
         x86*) tag       Tool \
                       Name="VCCLCompilerTool" \
                       AdditionalIncludeDirectories="$incs" \
-                      PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;$defines" \
+                      PreprocessorDefinitions="WIN32;NDEBUG;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE;$defines" \
                       RuntimeLibrary="$release_runtime" \
                       UsePrecompiledHeader="0" \
                       WarningLevel="3" \

From 80f0e7a7d0e0b3637e71dcfa55089feabdff6b59 Mon Sep 17 00:00:00 2001
From: Michael Kohler <michaelkohler@live.com>
Date: Mon, 12 Jul 2010 18:41:45 +0200
Subject: [PATCH 075/307] limit range checking code for L[k] to CONFIG_DEBUG.
 patch by timeless@gmail.com

---
 vp8/decoder/decodemv.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 44b98e773..4f24b445f 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -226,13 +226,14 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
                         int mv_contz;
 
                         while (j != L[++k])
-                            if (k >= 16)
+                        {
 #if CONFIG_DEBUG
+                            if (k >= 16)
+                            {
                                 assert(0);
-
-#else
-                                ;
+                            }
 #endif
+                        }
 
                         mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
 

From 7c938f4d3cfebf68e93b0bfa4debc89a202d267a Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Fri, 16 Jul 2010 15:57:17 +0100
Subject: [PATCH 076/307] Fix: Incorrect 'cols' calculation in temporal filter.

Change-Id: I37f10fbe4fbb505c1d34980a59af3e817c287e22
---
 vp8/encoder/onyx_if.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index f331a4ba2..a51f754a3 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3218,6 +3218,7 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
 #if VP8_TEMPORAL_ALT_REF
 static void vp8cx_temp_blur1_c
 (
+    VP8_COMP *cpi,
     unsigned char **frames,
     int frame_count,
     unsigned char *src,
@@ -3236,17 +3237,16 @@ static void vp8cx_temp_blur1_c
     int modifier = 0;
     int i, j, k;
     int block_ofset;
-    int Cols, Rows;
+    int cols;
     unsigned char Shift = (block_size == 16) ? 4 : 3;
 
-    Cols = width / block_size;
-    Rows = height / block_size;
+    cols = cpi->common.mb_cols;
 
     for (i = 0; i < height; i++)
     {
-        block_ofset = (i >> Shift) * Cols;
+        block_ofset = (i >> Shift) * cols;
 
-        for (j = 0; j < Cols; j ++)
+        for (j = 0; j < cols; j ++)
         {
             if (motion_map_ptr[block_ofset] > 2)
             {
@@ -3436,6 +3436,7 @@ static void vp8cx_temp_filter_c
 
     // Blur Y
     vp8cx_temp_blur1_c(
+        cpi,
         cpi->frames,
         frames_to_blur,
         temp_source_buffer->y_buffer,  // cpi->Source->y_buffer,
@@ -3460,6 +3461,7 @@ static void vp8cx_temp_filter_c
 
     // Blur U
     vp8cx_temp_blur1_c(
+        cpi,
         cpi->frames,
         frames_to_blur,
         temp_source_buffer->u_buffer,
@@ -3484,6 +3486,7 @@ static void vp8cx_temp_filter_c
 
     // Blur V
     vp8cx_temp_blur1_c(
+        cpi,
         cpi->frames,
         frames_to_blur,
         temp_source_buffer->v_buffer,

From bf18069cebd34326da5f479143f46c994ee41114 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Mon, 19 Jul 2010 14:10:07 +0100
Subject: [PATCH 077/307] Rate control fix for ARNR filtered frames.

Previously we had assumed that it was necessary to give a full frame's
bit allocation to the alt ref frame if it has been created through temporal
filtering. This is not the case. The active max quantizer control
insures that sufficient bits are allocated if needed and allocating a
full frame's worth of bits creates an excessive overhead for the ARF.

Change-Id: I83c95ed7bc7ce0e53ccae6ff32db5a97f145937a
---
 vp8/encoder/onyx_if.c  | 13 -------------
 vp8/encoder/ratectrl.c |  8 +++++---
 2 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index a51f754a3..c654eb394 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -4960,20 +4960,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
             {
                 if (cpi->source_encode_index ==  cpi->last_alt_ref_sei)
                 {
-#if VP8_TEMPORAL_ALT_REF
-
-                    if (cpi->oxcf.arnr_max_frames == 0)
-                    {
-                        cpi->is_src_frame_alt_ref = 1; // copy alt ref
-                    }
-                    else
-                    {
-                        cpi->is_src_frame_alt_ref = 0;
-                    }
-
-#else
                     cpi->is_src_frame_alt_ref = 1;
-#endif
                     cpi->last_alt_ref_sei    = -1;
                 }
                 else
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index b309d5364..582c617ef 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1120,10 +1120,12 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
 
             }
             // If there is an active ARF at this location use the minimum
-            // bits on this frame unless it was a contructed arf.
-            else if (cpi->oxcf.arnr_max_frames == 0)
+            // bits on this frame even if it is a contructed arf.
+            // The active maximum quantizer insures that an appropriate
+            // number of bits will be spent if needed for contstructed ARFs.
+            else
             {
-                cpi->this_frame_target = 0;           // Minimial spend on gf that is replacing an arf
+                cpi->this_frame_target = 0;
             }
 
             cpi->current_gf_interval = cpi->frames_till_gf_update_due;

From 02277b8aa34a007ab211475605926b5942257658 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Mon, 19 Jul 2010 11:32:09 +0100
Subject: [PATCH 078/307] Parameter limit change.

Change maximum ARNR filter width to 15.

Change-Id: I3b72450ea08e96287445ec18810630ee2292954c
---
 vp8/vp8_cx_iface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 666432751..d0c47b35a 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -178,7 +178,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
 
     RANGE_CHECK(vp8_cfg, token_partitions,   VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
     RANGE_CHECK(vp8_cfg, Sharpness,         0, 7);
-    RANGE_CHECK(vp8_cfg, arnr_max_frames,    0, 25);
+    RANGE_CHECK(vp8_cfg, arnr_max_frames,    0, 15);
     RANGE_CHECK(vp8_cfg, arnr_strength,     0, 6);
     RANGE_CHECK(vp8_cfg, arnr_type,         0, 0xffffffff);
 

From 0ba32632cd0ba6a11248141acb9747279c71679d Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Mon, 19 Jul 2010 13:28:34 +0100
Subject: [PATCH 079/307] ARNR Lookup Table.

Change submitted for Adrian Grange. Convert threshold
calculation in ARNR filter to a lookup table.

Change-Id: I12a4bbb96b9ce6231ce2a6ecc2d295610d49e7ec
---
 vp8/encoder/onyx_if.c | 27 ++++++++++++++++++++++++---
 1 file changed, 24 insertions(+), 3 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index c654eb394..e88d705a3 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3214,8 +3214,21 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
 }
 #endif
 // return of 0 means drop frame
-
+#define USE_FILTER_LUT 1
 #if VP8_TEMPORAL_ALT_REF
+
+#if USE_FILTER_LUT
+static int modifier_lut[7][19] =
+{
+16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,         // Strength=0
+16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        // Strength=1
+16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        // Strength=2
+16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,      // Strength=3
+16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,     // Strength=4
+16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0,  // Strength=5
+16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1// Strength=6
+};
+#endif
 static void vp8cx_temp_blur1_c
 (
     VP8_COMP *cpi,
@@ -3239,6 +3252,9 @@ static void vp8cx_temp_blur1_c
     int block_ofset;
     int cols;
     unsigned char Shift = (block_size == 16) ? 4 : 3;
+#if USE_FILTER_LUT
+    int *lut = modifier_lut[strength];
+#endif
 
     cols = cpi->common.mb_cols;
 
@@ -3265,7 +3281,12 @@ static void vp8cx_temp_blur1_c
                     {
                         // get current frame pixel value
                         int pixel_value = frames[frame][byte];
-
+#if USE_FILTER_LUT
+                        // LUT implementation --
+                        // improves precision of filter
+                        modifier = abs(src_byte-pixel_value);
+                        modifier = modifier>18 ? 0 : lut[modifier];
+#else
                         modifier   = src_byte;
                         modifier  -= pixel_value;
                         modifier  *= modifier;
@@ -3276,7 +3297,7 @@ static void vp8cx_temp_blur1_c
                             modifier = 16;
 
                         modifier = 16 - modifier;
-
+#endif
                         accumulator += modifier * pixel_value;
 
                         count += modifier;

From 72d4ba92f0712bb415b070fc919b4e99a36317b7 Mon Sep 17 00:00:00 2001
From: Tom Finegan <tomfinegan@google.com>
Date: Thu, 22 Jul 2010 13:34:25 -0400
Subject: [PATCH 080/307] Add vs9 targets.

Add targets x86-win32-vs9 and x86_64-win64-vs9 for support of Visual
Studio 2008-- this removes the need to convert the vs8 projects before
using them within the IDE.

Change-Id: Idb83e2ae701e07d98db1be71638280a493d770a2
---
 build/make/gen_msvs_proj.sh | 4 +++-
 build/make/gen_msvs_sln.sh  | 9 ++++++---
 configure                   | 2 ++
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index 1398bfba9..5181d3252 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -206,7 +206,7 @@ for opt in "$@"; do
     ;;
     --ver=*) vs_ver="$optval"
              case $optval in
-             [78])
+             [789])
              ;;
              *) die Unrecognized Visual Studio Version in $opt
              ;;
@@ -248,6 +248,8 @@ case "${vs_ver:-8}" in
     ;;
     8) vs_ver_id="8.00"
     ;;
+    9) vs_ver_id="9.00"
+    ;;
 esac
 
 [ -n "$name" ] || die "Project name (--name) must be specified!"
diff --git a/build/make/gen_msvs_sln.sh b/build/make/gen_msvs_sln.sh
index b0904f99e..f377aa824 100755
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -25,7 +25,7 @@ files.
 Options:
     --help                      Print this message
     --out=outfile               Redirect output to a file
-    --ver=version               Version (7,8) of visual studio to generate for
+    --ver=version               Version (7,8,9) of visual studio to generate for
     --target=isa-os-cc          Target specifier
 EOF
     exit 1
@@ -224,7 +224,7 @@ for opt in "$@"; do
     ;;
     --ver=*) vs_ver="$optval"
              case $optval in
-             [78])
+             [789])
              ;;
              *) die Unrecognized Visual Studio Version in $opt
              ;;
@@ -235,7 +235,7 @@ for opt in "$@"; do
              7) sln_vers="8.00"
                 sln_vers_str="Visual Studio .NET 2003"
              ;;
-             8)
+             [89])
              ;;
              *) die "Unrecognized Visual Studio Version '$optval' in $opt"
              ;;
@@ -257,6 +257,9 @@ case "${vs_ver:-8}" in
     8) sln_vers="9.00"
        sln_vers_str="Visual Studio 2005"
     ;;
+    9) sln_vers="10.00"
+       sln_vers_str="Visual Studio 2008"
+    ;;
 esac
 
 for f in "${file_list[@]}"; do
diff --git a/configure b/configure
index d2dfb6b35..85230d967 100755
--- a/configure
+++ b/configure
@@ -107,11 +107,13 @@ all_platforms="${all_platforms} x86-solaris-gcc"
 all_platforms="${all_platforms} x86-win32-gcc"
 all_platforms="${all_platforms} x86-win32-vs7"
 all_platforms="${all_platforms} x86-win32-vs8"
+all_platforms="${all_platforms} x86-win32-vs9"
 all_platforms="${all_platforms} x86_64-darwin9-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
 all_platforms="${all_platforms} x86_64-linux-icc"
 all_platforms="${all_platforms} x86_64-solaris-gcc"
 all_platforms="${all_platforms} x86_64-win64-vs8"
+all_platforms="${all_platforms} x86_64-win64-vs9"
 all_platforms="${all_platforms} universal-darwin8-gcc"
 all_platforms="${all_platforms} universal-darwin9-gcc"
 all_platforms="${all_platforms} generic-gnu"

From b791dca979d4fb26c6d3814e4375b676518bc5f2 Mon Sep 17 00:00:00 2001
From: Tom Finegan <tomfinegan@google.com>
Date: Thu, 22 Jul 2010 17:51:17 -0400
Subject: [PATCH 081/307] Change devenv.com command line.

Change /build to -build to avoid problems when builds are run within
msys bash shells.

Change-Id: Ie68d72f702adad00d99be8a01c7a388c3af7657d
---
 build/make/gen_msvs_sln.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build/make/gen_msvs_sln.sh b/build/make/gen_msvs_sln.sh
index f377aa824..24d6f5d27 100755
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -193,11 +193,11 @@ ${TAB}rm -rf "$platform"/"$config"
 ifneq (\$(found_devenv),)
   ifeq (\$(CONFIG_VS_VERSION),7)
 $nows_sln_config: $outfile
-${TAB}devenv.com $outfile /build "$config"
+${TAB}devenv.com $outfile -build "$config"
 
   else
 $nows_sln_config: $outfile
-${TAB}devenv.com $outfile /build "$sln_config"
+${TAB}devenv.com $outfile -build "$sln_config"
 
   endif
 else

From 4d86ef3534cd95f5b437c1bb658b442d58cca60e Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 22 Jun 2010 08:44:48 -0400
Subject: [PATCH 082/307] msvs: fix install of codec sources

The libs.mk file must be installed for the vpx.vcproj file to be
generated. It was being installed, but not in the src/ directory as
expected.

Also missed include files yasm.rules, quantize_x86.h

Change-Id: Ic1a6f836e953bfc954d6e42a18c102a0114821eb
---
 build/make/Makefile | 1 +
 libs.mk             | 3 ++-
 vp8/vp8cx.mk        | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/build/make/Makefile b/build/make/Makefile
index 2da8b4789..20a48671e 100755
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -334,6 +334,7 @@ ifneq ($(call enabled,DIST-SRCS),)
     DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_def.sh
     DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_proj.sh
     DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_sln.sh
+    DIST-SRCS-$(CONFIG_MSVS)  += build/x86-msvs/yasm.rules
     DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
     #
     # This isn't really ARCH_ARM dependent, it's dependant on whether we're
diff --git a/libs.mk b/libs.mk
index 4bc5dd76c..32f8a34bc 100644
--- a/libs.mk
+++ b/libs.mk
@@ -11,6 +11,8 @@
 
 ASM:=$(if $(filter yes,$(CONFIG_GCC)),.asm.s,.asm)
 
+CODEC_SRCS-yes += libs.mk
+
 include $(SRC_PATH_BARE)/vpx/vpx_codec.mk
 CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))
 
@@ -59,7 +61,6 @@ CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)
 # This variable uses deferred expansion intentionally, since the results of
 # $(wildcard) may change during the course of the Make.
 VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d))))
-CODEC_SRCS-yes += $(SRC_PATH_BARE)/libs.mk # to show up in the msvs workspace
 endif
 
 # The following pairs define a mapping of locations in the distribution
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index c88df4705..c0a58ae29 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -87,6 +87,7 @@ VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodemb_x86.h
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/dct_x86.h
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/mcomp_x86.h
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/variance_x86.h
+VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_x86.h
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/x86_csystemdependent.c
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_mmx.c
 VP8_CX_SRCS-$(HAVE_MMX) += encoder/x86/variance_impl_mmx.asm

From 08eed049d4f08943079483cdd5d5d9f865457a67 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Thu, 22 Jul 2010 09:46:54 -0400
Subject: [PATCH 083/307] Remove CONFIG_NEW_TOKENS files.

These files were out of date and no longer maintained.
Token decoding has implemented the no-crash code which
is incompatible with this arm assembly code.

Change-Id: Ibf729886c56fca48181af60b44bda896c30023fc
---
 configure                            |   2 -
 vp8/decoder/arm/detokenizearm_sjl.c  | 731 ---------------------------
 vp8/decoder/arm/detokenizearm_v6.asm | 365 -------------
 vp8/decoder/onyxd_if_sjl.c           | 399 ---------------
 vp8/vp8_dx_iface.c                   |   3 -
 vp8/vp8dx_arm.mk                     |  11 -
 6 files changed, 1511 deletions(-)
 delete mode 100644 vp8/decoder/arm/detokenizearm_sjl.c
 delete mode 100644 vp8/decoder/arm/detokenizearm_v6.asm
 delete mode 100644 vp8/decoder/onyxd_if_sjl.c

diff --git a/configure b/configure
index 85230d967..9be0624fb 100755
--- a/configure
+++ b/configure
@@ -230,7 +230,6 @@ CONFIG_LIST="
 
     dequant_tokens
     dc_recon
-    new_tokens
     runtime_cpu_detect
     postproc
     multithread
@@ -269,7 +268,6 @@ CMDLINE_SELECT="
 
     dequant_tokens
     dc_recon
-    new_tokens
     postproc
     multithread
     psnr
diff --git a/vp8/decoder/arm/detokenizearm_sjl.c b/vp8/decoder/arm/detokenizearm_sjl.c
deleted file mode 100644
index e9917c175..000000000
--- a/vp8/decoder/arm/detokenizearm_sjl.c
+++ /dev/null
@@ -1,731 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "type_aliases.h"
-#include "blockd.h"
-#include "onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_ports/mem.h"
-
-#define BR_COUNT 8
-#define BOOL_DATA UINT8
-
-#define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-//ALIGN16 UINT16 onyx_coef_bands_x[16] = { 0, 1*OCB_X, 2*OCB_X, 3*OCB_X, 6*OCB_X, 4*OCB_X, 5*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 6*OCB_X, 7*OCB_X};
-DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
-
-#define EOB_CONTEXT_NODE            0
-#define ZERO_CONTEXT_NODE           1
-#define ONE_CONTEXT_NODE            2
-#define LOW_VAL_CONTEXT_NODE        3
-#define TWO_CONTEXT_NODE            4
-#define THREE_CONTEXT_NODE          5
-#define HIGH_LOW_CONTEXT_NODE       6
-#define CAT_ONE_CONTEXT_NODE        7
-#define CAT_THREEFOUR_CONTEXT_NODE  8
-#define CAT_THREE_CONTEXT_NODE      9
-#define CAT_FIVE_CONTEXT_NODE       10
-
-
-
-
-DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
-{
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  //ZERO_TOKEN
-    {  1, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //ONE_TOKEN
-    {  2, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //TWO_TOKEN
-    {  3, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //THREE_TOKEN
-    {  4, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //FOUR_TOKEN
-    {  5, 0, { 159, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  //DCT_VAL_CATEGORY1
-    {  7, 1, { 145, 165, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY2
-    { 11, 2, { 140, 148, 173, 0,  0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY3
-    { 19, 3, { 135, 140, 155, 176, 0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY4
-    { 35, 4, { 130, 134, 141, 157, 180, 0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY5
-    { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0   } }, //DCT_VAL_CATEGORY6
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  // EOB TOKEN
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-DECLARE_ALIGNED(16, const UINT8, vp8_block2context_leftabove[25*3]) =
-{
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, //end of vp8_block2context
-    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0, //end of vp8_block2left
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0 //end of vp8_block2above
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
-{
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-
-    ENTROPY_CONTEXT *a;
-    ENTROPY_CONTEXT *l;
-    int i;
-
-    for (i = 0; i < 24; i++)
-    {
-
-        a = A[ vp8_block2context[i] ] + vp8_block2above[i];
-        l = L[ vp8_block2context[i] ] + vp8_block2left[i];
-
-        *a = *l = 0;
-    }
-
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
-    {
-        a = A[Y2CONTEXT] + vp8_block2above[24];
-        l = L[Y2CONTEXT] + vp8_block2left[24];
-        *a = *l = 0;
-    }
-
-
-}
-
-#define ONYXBLOCK2CONTEXT_OFFSET    0
-#define ONYXBLOCK2LEFT_OFFSET       25
-#define ONYXBLOCK2ABOVE_OFFSET 50
-
-DECLARE_ALIGNED(16, const static unsigned char, norm[128]) =
-{
-    0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
-};
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-void init_detokenizer(VP8D_COMP *dx)
-{
-    const VP8_COMMON *const oc = & dx->common;
-    MACROBLOCKD *x = & dx->mb;
-
-    dx->detoken.norm_ptr = (unsigned char *)norm;
-    dx->detoken.vp8_coef_tree_ptr = (vp8_tree_index *)vp8_coef_tree;
-    dx->detoken.ptr_onyxblock2context_leftabove = (UINT8 *)vp8_block2context_leftabove;
-    dx->detoken.ptr_onyx_coef_bands_x = vp8_coef_bands_x;
-    dx->detoken.scan = (int *)vp8_default_zig_zag1d;
-    dx->detoken.teb_base_ptr = (TOKENEXTRABITS *)vp8d_token_extra_bits2;
-
-    dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
-
-
-    dx->detoken.coef_probs[0] = (unsigned char *)(oc->fc.coef_probs [0] [ 0 ] [0]);
-    dx->detoken.coef_probs[1] = (unsigned char *)(oc->fc.coef_probs [1] [ 0 ] [0]);
-    dx->detoken.coef_probs[2] = (unsigned char *)(oc->fc.coef_probs [2] [ 0 ] [0]);
-    dx->detoken.coef_probs[3] = (unsigned char *)(oc->fc.coef_probs [3] [ 0 ] [0]);
-
-}
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-
-//shift = norm[range]; \
-//      shift = norm_ptr[range]; \
-
-#define NORMALIZE \
-    /*if(range < 0x80)*/                            \
-    { \
-        shift = detoken->norm_ptr[range]; \
-        range <<= shift; \
-        value <<= shift; \
-        count -= shift; \
-        if(count <= 0) \
-        { \
-            count += BR_COUNT ; \
-            value |= (*bufptr) << (BR_COUNT-count); \
-            bufptr++; \
-        } \
-    }
-#if 1
-#define DECODE_AND_APPLYSIGN(value_to_sign) \
-    split = (range + 1) >> 1; \
-    if ( (value >> 24) < split ) \
-    { \
-        range = split; \
-        v= value_to_sign; \
-    } \
-    else \
-    { \
-        range = range-split; \
-        value = value-(split<<24); \
-        v = -value_to_sign; \
-    } \
-    range +=range;                   \
-    value +=value;                   \
-    if (!--count) \
-    { \
-        count = BR_COUNT; \
-        value |= *bufptr; \
-        bufptr++; \
-    }
-
-#define DECODE_AND_BRANCH_IF_ZERO(probability,branch) \
-    { \
-        split = 1 +  ((( probability*(range-1) ) )>> 8); \
-        if ( (value >> 24) < split ) \
-        { \
-            range = split; \
-            NORMALIZE \
-            goto branch; \
-        } \
-        value -= (split<<24); \
-        range = range - split; \
-        NORMALIZE \
-    }
-
-#define DECODE_AND_LOOP_IF_ZERO(probability,branch) \
-    { \
-        split = 1 + ((( probability*(range-1) ) ) >> 8); \
-        if ( (value >> 24) < split ) \
-        { \
-            range = split; \
-            NORMALIZE \
-            Prob = coef_probs; \
-            ++c; \
-            Prob += vp8_coef_bands_x[c]; \
-            goto branch; \
-        } \
-        value -= (split<<24); \
-        range = range - split; \
-        NORMALIZE \
-    }
-
-#define DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val) \
-    DECODE_AND_APPLYSIGN(val) \
-    Prob = coef_probs + (ENTROPY_NODES*2); \
-    if(c < 15){\
-        qcoeff_ptr [ scan[c] ] = (INT16) v; \
-        ++c; \
-        goto DO_WHILE; }\
-    qcoeff_ptr [ scan[15] ] = (INT16) v; \
-    goto BLOCK_FINISHED;
-
-
-#define DECODE_EXTRABIT_AND_ADJUST_VAL(t,bits_count)\
-    split = 1 +  (((range-1) * vp8d_token_extra_bits2[t].Probs[bits_count]) >> 8); \
-    if(value >= (split<<24))\
-    {\
-        range = range-split;\
-        value = value-(split<<24);\
-        val += ((UINT16)1<<bits_count);\
-    }\
-    else\
-    {\
-        range = split;\
-    }\
-    NORMALIZE
-#endif
-
-#if 0
-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
-{
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-    const VP8_COMMON *const oc = & dx->common;
-
-    BOOL_DECODER *bc = x->current_bc;
-
-    ENTROPY_CONTEXT *a;
-    ENTROPY_CONTEXT *l;
-    int i;
-
-    int eobtotal = 0;
-
-    register int count;
-
-    BOOL_DATA *bufptr;
-    register unsigned int range;
-    register unsigned int value;
-    const int *scan;
-    register unsigned int shift;
-    UINT32 split;
-    INT16 *qcoeff_ptr;
-
-    UINT8 *coef_probs;
-    int type;
-    int stop;
-    INT16 val, bits_count;
-    INT16 c;
-    INT16 t;
-    INT16 v;
-    vp8_prob *Prob;
-
-    //int *scan;
-    type = 3;
-    i = 0;
-    stop = 16;
-
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
-    {
-        i = 24;
-        stop = 24;
-        type = 1;
-        qcoeff_ptr = &x->qcoeff[24*16];
-        scan = vp8_default_zig_zag1d;
-        eobtotal -= 16;
-    }
-    else
-    {
-        scan = vp8_default_zig_zag1d;
-        qcoeff_ptr = &x->qcoeff[0];
-    }
-
-    count   = bc->count;
-    range   = bc->range;
-    value   = bc->value;
-    bufptr  = &bc->buffer[bc->pos];
-
-
-    coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
-
-BLOCK_LOOP:
-    a = A[ vp8_block2context[i] ] + vp8_block2above[i];
-    l = L[ vp8_block2context[i] ] + vp8_block2left[i];
-    c = (INT16)(!type);
-
-    VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
-    Prob = coef_probs;
-    Prob += t * ENTROPY_NODES;
-
-DO_WHILE:
-    Prob += vp8_coef_bands_x[c];
-    DECODE_AND_BRANCH_IF_ZERO(Prob[EOB_CONTEXT_NODE], BLOCK_FINISHED);
-
-CHECK_0_:
-    DECODE_AND_LOOP_IF_ZERO(Prob[ZERO_CONTEXT_NODE], CHECK_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[ONE_CONTEXT_NODE], ONE_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[LOW_VAL_CONTEXT_NODE], LOW_VAL_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[HIGH_LOW_CONTEXT_NODE], HIGH_LOW_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREEFOUR_CONTEXT_NODE], CAT_THREEFOUR_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_FIVE_CONTEXT_NODE], CAT_FIVE_CONTEXT_NODE_0_);
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].min_val;
-    bits_count = vp8d_token_extra_bits2[DCT_VAL_CATEGORY6].Length;
-
-    do
-    {
-        DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY6, bits_count);
-        bits_count -- ;
-    }
-    while (bits_count >= 0);
-
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_FIVE_CONTEXT_NODE_0_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY5].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 4);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 3);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY5, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREEFOUR_CONTEXT_NODE_0_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_THREE_CONTEXT_NODE], CAT_THREE_CONTEXT_NODE_0_);
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY4].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 3);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY4, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_THREE_CONTEXT_NODE_0_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY3].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 2);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY3, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-HIGH_LOW_CONTEXT_NODE_0_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[CAT_ONE_CONTEXT_NODE], CAT_ONE_CONTEXT_NODE_0_);
-
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY2].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 1);
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY2, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-CAT_ONE_CONTEXT_NODE_0_:
-    val = vp8d_token_extra_bits2[DCT_VAL_CATEGORY1].min_val;
-    DECODE_EXTRABIT_AND_ADJUST_VAL(DCT_VAL_CATEGORY1, 0);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(val);
-
-LOW_VAL_CONTEXT_NODE_0_:
-    DECODE_AND_BRANCH_IF_ZERO(Prob[TWO_CONTEXT_NODE], TWO_CONTEXT_NODE_0_);
-    DECODE_AND_BRANCH_IF_ZERO(Prob[THREE_CONTEXT_NODE], THREE_CONTEXT_NODE_0_);
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(4);
-
-THREE_CONTEXT_NODE_0_:
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(3);
-
-TWO_CONTEXT_NODE_0_:
-    DECODE_SIGN_WRITE_COEFF_AND_CHECK_EXIT(2);
-
-ONE_CONTEXT_NODE_0_:
-    DECODE_AND_APPLYSIGN(1);
-    Prob = coef_probs + ENTROPY_NODES;
-
-    if (c < 15)
-    {
-        qcoeff_ptr [ scan[c] ] = (INT16) v;
-        ++c;
-        goto DO_WHILE;
-    }
-
-    qcoeff_ptr [ scan[15] ] = (INT16) v;
-BLOCK_FINISHED:
-    t = ((x->Block[i].eob = c) != !type);   // any nonzero data?
-    eobtotal += x->Block[i].eob;
-    *a = *l = t;
-    qcoeff_ptr += 16;
-
-    i++;
-
-    if (i < stop)
-        goto BLOCK_LOOP;
-
-    if (i == 25)
-    {
-        scan = vp8_default_zig_zag1d;//x->scan_order1d;
-        type = 0;
-        i = 0;
-        stop = 16;
-        coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
-        qcoeff_ptr = &x->qcoeff[0];
-        goto BLOCK_LOOP;
-    }
-
-    if (i == 16)
-    {
-        type = 2;
-        coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
-        stop = 24;
-        goto BLOCK_LOOP;
-    }
-
-    bc->count = count;
-    bc->value = value;
-    bc->range = range;
-    bc->pos  = bufptr - bc->buffer;
-    return eobtotal;
-
-}
-//#endif
-#else
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-#if 0
-//uses relative offsets
-
-const vp8_tree_index vp8_coef_tree_x[ 22] =   /* corresponding _CONTEXT_NODEs */
-{
-    -DCT_EOB_TOKEN, 1,                             /* 0 = EOB */
-    -ZERO_TOKEN, 1,                               /* 1 = ZERO */
-    -ONE_TOKEN, 1,                               /* 2 = ONE */
-    2, 5,                                       /* 3 = LOW_VAL */
-    -TWO_TOKEN, 1,                         /* 4 = TWO */
-    -THREE_TOKEN, -FOUR_TOKEN,                /* 5 = THREE */
-    2, 3,                                  /* 6 = HIGH_LOW */
-    -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2,   /* 7 = CAT_ONE */
-    2, 3,                                 /* 8 = CAT_THREEFOUR */
-    -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4,  /* 9 = CAT_THREE */
-    -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6   /* 10 = CAT_FIVE */
-};
-#endif
-
-#define _SCALEDOWN 8 //16 //8
-
-int vp8_decode_mb_tokens_v5(DETOK *detoken, int type);
-
-int vp8_decode_mb_tokens_v5_c(DETOK *detoken, int type)
-{
-    BOOL_DECODER *bc = detoken->current_bc;
-
-    ENTROPY_CONTEXT *a;
-    ENTROPY_CONTEXT *l;
-    int i;
-
-    register int count;
-
-    BOOL_DATA *bufptr;
-    register unsigned int range;
-    register unsigned int value;
-    register unsigned int shift;
-    UINT32 split;
-    INT16 *qcoeff_ptr;
-
-    UINT8 *coef_probs;
-//  int type;
-    int stop;
-    INT16 c;
-    INT16 t;
-    INT16 v;
-    vp8_prob *Prob;
-
-
-
-//  type = 3;
-    i = 0;
-    stop = 16;
-    qcoeff_ptr = detoken->qcoeff_start_ptr;
-
-//  if( detoken->mode != B_PRED && detoken->mode != SPLITMV)
-    if (type == 1)
-    {
-        i += 24;
-        stop += 8; //24;
-//      type = 1;
-        qcoeff_ptr += 24 * 16;
-//      eobtotal-=16;
-    }
-
-    count   = bc->count;
-    range   = bc->range;
-    value   = bc->value;
-    bufptr  = &bc->buffer[bc->pos];
-
-
-    coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
-
-BLOCK_LOOP:
-    a = detoken->A[ detoken->ptr_onyxblock2context_leftabove[i] ];
-    l = detoken->L[ detoken->ptr_onyxblock2context_leftabove[i] ];
-    c = !type;
-    a += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2ABOVE_OFFSET];
-    l += detoken->ptr_onyxblock2context_leftabove[i + ONYXBLOCK2LEFT_OFFSET];
-
-    //#define ONYX_COMBINEENTROPYCONTEXTS( Dest, A, B) \
-    //Dest = ((A)!=0) + ((B)!=0);
-
-    VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
-
-    Prob = coef_probs;
-    Prob += t * ENTROPY_NODES;
-    t = 0;
-
-    do
-    {
-
-        {
-//                  onyx_tree_index * onyx_coef_tree_ptr = onyx_coef_tree_x;
-
-            Prob += detoken->ptr_onyx_coef_bands_x[c];
-
-        GET_TOKEN_START:
-
-            do
-            {
-                split = 1 + (((range - 1) * (Prob[t>>1])) >> 8);
-
-                if (value >> 24 >= split)
-                {
-                    range = range - split;
-                    value = value - (split << 24);
-                    t += 1;
-
-                    //used to eliminate else branch
-                    split = range;
-                }
-
-                range = split;
-
-                t = detoken->vp8_coef_tree_ptr[ t ];
-
-                NORMALIZE
-
-            }
-            while (t  > 0) ;
-        }
-    GET_TOKEN_STOP:
-
-        if (t == -DCT_EOB_TOKEN)
-        {
-            break;
-        }
-
-        v = -t;
-
-        if (v > FOUR_TOKEN)
-        {
-            INT16 bits_count;
-            TOKENEXTRABITS *teb_ptr;
-
-//                      teb_ptr = &onyxd_token_extra_bits2[t];
-//                  teb_ptr = &onyxd_token_extra_bits2[v];
-            teb_ptr = &detoken->teb_base_ptr[v];
-
-
-            v = teb_ptr->min_val;
-            bits_count = teb_ptr->Length;
-
-            do
-            {
-                split = 1 + (((range - 1) * teb_ptr->Probs[bits_count]) >> _SCALEDOWN);
-
-                if ((value >> 24) >= split)
-                {
-                    range = range - split;
-                    value = value - (split << 24);
-                    v += ((UINT16)1 << bits_count);
-
-                    //used to eliminate else branch
-                    split = range;
-                }
-
-                range = split;
-
-                NORMALIZE
-
-                bits_count -- ;
-            }
-            while (bits_count >= 0);
-        }
-
-        Prob = coef_probs;
-
-        if (t)
-        {
-            split = 1 + (((range - 1) * vp8_prob_half) >> 8);
-
-            if ((value >> 24) >= split)
-            {
-                range = range - split;
-                value = value - (split << 24);
-                v = (v ^ -1) + 1;           /* negate w/out conditionals */
-
-                //used to eliminate else branch
-                split = range;
-            }
-
-            range = split;
-
-            NORMALIZE
-            Prob += ENTROPY_NODES;
-
-            if (t < -ONE_TOKEN)
-                Prob += ENTROPY_NODES;
-
-            t = -2;
-        }
-
-        //if t is zero, we will skip the eob table check
-        t += 2;
-        qcoeff_ptr [detoken->scan [c] ] = (INT16) v;
-
-    }
-    while (++c < 16);
-
-    if (t != -DCT_EOB_TOKEN)
-    {
-        --c;
-    }
-
-    t = ((detoken->eob[i] = c) != !type);   // any nonzero data?
-//  eobtotal += detoken->eob[i];
-    *a = *l = t;
-    qcoeff_ptr += 16;
-
-    i++;
-
-    if (i < stop)
-        goto BLOCK_LOOP;
-
-    if (i == 25)
-    {
-        type = 0;
-        i = 0;
-        stop = 16;
-//      coef_probs = (unsigned char *)(oc->fc.coef_probs [type] [ 0 ] [0]);
-        coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
-        qcoeff_ptr = detoken->qcoeff_start_ptr;
-        goto BLOCK_LOOP;
-    }
-
-    if (i == 16)
-    {
-        type = 2;
-//      coef_probs =(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
-        coef_probs = detoken->coef_probs[type]; //(unsigned char *)( oc->fc.coef_probs [type] [ 0 ] [0]);
-        stop = 24;
-        goto BLOCK_LOOP;
-    }
-
-    bc->count = count;
-    bc->value = value;
-    bc->range = range;
-    bc->pos  = bufptr - bc->buffer;
-    return 0;
-}
-//#if 0
-int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
-{
-//  const ONYX_COMMON * const oc = & dx->common;
-    int eobtotal = 0;
-    int i, type;
-    /*
-        dx->detoken.norm_ptr = norm;
-        dx->detoken.onyx_coef_tree_ptr = onyx_coef_tree;
-        dx->detoken.ptr_onyxblock2context_leftabove = ONYXBLOCK2CONTEXT_LEFTABOVE;
-        dx->detoken.ptr_onyx_coef_bands_x = onyx_coef_bands_x;
-        dx->detoken.scan = default_zig_zag1d;
-        dx->detoken.teb_base_ptr = onyxd_token_extra_bits2;
-
-        dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
-
-        dx->detoken.A = x->above_context;
-        dx->detoken.L = x->left_context;
-
-        dx->detoken.coef_probs[0] = (unsigned char *)( oc->fc.coef_probs [0] [ 0 ] [0]);
-        dx->detoken.coef_probs[1] = (unsigned char *)( oc->fc.coef_probs [1] [ 0 ] [0]);
-        dx->detoken.coef_probs[2] = (unsigned char *)( oc->fc.coef_probs [2] [ 0 ] [0]);
-        dx->detoken.coef_probs[3] = (unsigned char *)( oc->fc.coef_probs [3] [ 0 ] [0]);
-    */
-
-    dx->detoken.current_bc = x->current_bc;
-    dx->detoken.A = x->above_context;
-    dx->detoken.L = x->left_context;
-
-    type = 3;
-
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
-    {
-        type = 1;
-        eobtotal -= 16;
-    }
-
-    vp8_decode_mb_tokens_v5(&dx->detoken, type);
-
-    for (i = 0; i < 25; i++)
-    {
-        x->Block[i].eob = dx->detoken.eob[i];
-        eobtotal += dx->detoken.eob[i];
-    }
-
-    return eobtotal;
-}
-#endif
diff --git a/vp8/decoder/arm/detokenizearm_v6.asm b/vp8/decoder/arm/detokenizearm_v6.asm
deleted file mode 100644
index 92fd83656..000000000
--- a/vp8/decoder/arm/detokenizearm_v6.asm
+++ /dev/null
@@ -1,365 +0,0 @@
-;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_decode_mb_tokens_v5|
-
-    AREA    |.text|, CODE, READONLY  ; name this block of code
-
-    INCLUDE vpx_asm_offsets.asm
-
-l_qcoeff    EQU     0
-l_i         EQU     4
-l_type      EQU     8
-l_stop      EQU     12
-l_c         EQU     16
-l_l_ptr      EQU     20
-l_a_ptr      EQU     24
-l_bc        EQU     28
-l_coef_ptr   EQU     32
-l_stacksize EQU     64
-
-
-;; constant offsets -- these should be created at build time
-c_onyxblock2left_offset      EQU 25
-c_onyxblock2above_offset     EQU 50
-c_entropy_nodes              EQU 11
-c_dct_eob_token              EQU 11
-
-|vp8_decode_mb_tokens_v5| PROC
-    stmdb       sp!, {r4 - r11, lr}
-    sub         sp, sp, #l_stacksize
-    mov         r7, r1
-    mov         r9, r0                      ;DETOK *detoken
-
-    ldr         r1, [r9, #detok_current_bc]
-    ldr         r0, [r9, #detok_qcoeff_start_ptr]
-    mov         r11, #0
-    mov         r3, #0x10
-
-    cmp         r7, #1
-    addeq       r11, r11, #24
-    addeq       r3, r3, #8
-    addeq       r0, r0, #3, 24
-
-    str         r0, [sp, #l_qcoeff]
-    str         r11, [sp, #l_i]
-    str         r7, [sp, #l_type]
-    str         r3, [sp, #l_stop]
-    str         r1, [sp, #l_bc]
-
-    add         lr, r9, r7, lsl #2
-
-    ldr         r2, [r1, #bool_decoder_buffer]
-    ldr         r3, [r1, #bool_decoder_pos]
-
-    ldr         r10, [lr, #detok_coef_probs]
-    ldr         r5, [r1, #bool_decoder_count]
-    ldr         r6, [r1, #bool_decoder_range]
-    ldr         r4, [r1, #bool_decoder_value]
-    add         r8, r2, r3
-
-    str         r10, [sp, #l_coef_ptr]
-
-
-    ;align 4
-BLOCK_LOOP
-    ldr         r3, [r9, #detok_ptr_onyxblock2context_leftabove]
-    ldr         r2, [r9, #DETOK_A]
-    ldr         r1, [r9, #DETOK_L]
-    ldrb        r12, [r3, +r11]                                 ; detoken->ptr_onyxblock2context_leftabove[i]
-
-    cmp         r7, #0                                          ; check type
-    moveq       r7, #1
-    movne       r7, #0
-
-    ldr         r0, [r2, +r12, lsl #2]                          ; a
-    add         r1, r1, r12, lsl #4
-    add         r3, r3, r11
-
-    ldrb        r2, [r3, #c_onyxblock2above_offset]
-    ldrb        r3, [r3, #c_onyxblock2left_offset]
-    mov         lr, #c_entropy_nodes
-;;  ;++
-
-    ldr         r2, [r0, +r2, lsl #2]!
-    add         r3, r1, r3, lsl #2
-    str         r3, [sp, #l_l_ptr]
-    ldr         r3, [r3]
-
-    cmp         r2, #0
-    movne       r2, #1
-    cmp         r3, #0
-    addne       r2, r2, #1
-
-    str         r0, [sp, #l_a_ptr]
-    smlabb      r0, r2, lr, r10
-    mov         r1, #0                                          ; t = 0
-    str         r7, [sp, #l_c]
-
-    ;align 4
-COEFF_LOOP
-    ldr         r3, [r9, #detok_ptr_onyx_coef_bands_x]
-    ldr         lr, [r9, #detok_onyx_coef_tree_ptr]
-
-;;the following two lines are used if onyx_coef_bands_x is UINT16
-;;  add         r3, r3, r7, lsl #1
-;;  ldrh        r3, [r3]
-
-;;the following line is used if onyx_coef_bands_x is UINT8
-    ldrb        r3, [r7, +r3]
-
-
-;;  ;++
-;;  pld         [r8]
-    ;++
-    add         r0, r0, r3
-
-    ;align 4
-get_token_loop
-    ldrb        r2, [r0, +r1, asr #1]
-    mov         r3, r6, lsl #8
-    sub         r3, r3, #256                    ;split = 1 +  (((range-1) * probability) >> 8)
-    mov         r10, #1
-
-    smlawb      r2, r3, r2, r10
-    ldrb        r12, [r8]                       ;load cx data byte in stall slot
-    ;++
-
-    subs        r3, r4, r2, lsl #24             ;x = value-(split<<24)
-    addhs       r1, r1, #1                      ;t += 1
-    movhs       r4, r3                          ;update value
-    subhs       r2, r6, r2                      ;range = range - split
-    movlo       r6, r2
-
-;;; ldrsbhs     r1, [r1, +lr]
-    ldrsb     r1, [r1, +lr]
-
-
-;; use branch for short pipelines ???
-;;  cmp         r2, #0x80
-;;  bcs         |$LN22@decode_mb_to|
-
-    clz         r3, r2
-    sub         r3, r3, #24
-    subs        r5, r5, r3
-    mov         r6, r2, lsl r3
-    mov         r4, r4, lsl r3
-
-;; use branch for short pipelines ???
-;;  bgt         |$LN22@decode_mb_to|
-
-    addle         r5, r5, #8
-    rsble         r3, r5, #8
-    addle         r8, r8, #1
-    orrle         r4, r4, r12, lsl r3
-
-;;|$LN22@decode_mb_to|
-
-    cmp         r1, #0
-    bgt         get_token_loop
-
-    cmn         r1, #c_dct_eob_token             ;if(t == -DCT_EOB_TOKEN)
-    beq         END_OF_BLOCK
-
-    rsb         lr, r1, #0                      ;v = -t;
-
-    cmp         lr, #4                          ;if(v > FOUR_TOKEN)
-    ble         SKIP_EXTRABITS
-
-    ldr         r3, [r9, #detok_teb_base_ptr]
-    mov         r11, #1
-    add         r7, r3, lr, lsl #4
-
-    ldrsh       lr, [r7, #tokenextrabits_min_val];v = teb_ptr->min_val
-    ldrsh       r0, [r7, #tokenextrabits_length];bits_count = teb_ptr->Length
-
-extrabits_loop
-    add         r3, r0, r7
-
-    ldrb        r2, [r3, #4]
-    mov         r3, r6, lsl #8
-    sub         r3, r3, #256                    ;split = 1 +  (((range-1) * probability) >> 8)
-    mov         r10, #1
-
-    smlawb      r2, r3, r2, r10
-    ldrb        r12, [r8]
-    ;++
-
-    subs        r10, r4, r2, lsl #24            ;x = value-(split<<24)
-    movhs       r4, r10                         ;update value
-    subhs       r2, r6, r2                      ;range = range - split
-    addhs       lr, lr, r11, lsl r0             ;v += ((UINT16)1<<bits_count)
-    movlo       r6, r2                          ;range = split
-
-
-;; use branch for short pipelines ???
-;;  cmp         r2, #0x80
-;;  bcs         |$LN10@decode_mb_to|
-
-    clz         r3, r2
-    sub         r3, r3, #24
-    subs        r5, r5, r3
-    mov         r6, r2, lsl r3                  ;range
-    mov         r4, r4, lsl r3                  ;value
-
-    addle       r5, r5, #8
-    addle       r8, r8, #1
-    rsble       r3, r5, #8
-    orrle       r4, r4, r12, lsl r3
-
-;;|$LN10@decode_mb_to|
-    subs         r0, r0, #1
-    bpl         extrabits_loop
-
-
-SKIP_EXTRABITS
-    ldr         r11, [sp, #l_qcoeff]
-    ldr         r0, [sp, #l_coef_ptr]
-
-    cmp         r1, #0                          ;check for nonzero token
-    beq         SKIP_EOB_CHECK              ;if t is zero, we will skip the eob table chec
-
-    sub         r3, r6, #1                      ;range - 1
-    ;++
-    mov         r3, r3, lsl #7                  ; *= onyx_prob_half  (128)
-    ;++
-    mov         r3, r3, lsr #8
-    add         r2, r3, #1                      ;split
-
-    subs        r3, r4, r2, lsl #24             ;x = value-(split<<24)
-    movhs       r4, r3                          ;update value
-    subhs       r2, r6, r2                      ;range = range - split
-    mvnhs       r3, lr
-    addhs       lr, r3, #1                      ;v = (v ^ -1) + 1
-    movlo       r6, r2                          ;range = split
-
-;; use branch for short pipelines ???
-;;  cmp         r2, #0x80
-;;  bcs         |$LN6@decode_mb_to|
-
-    clz         r3, r2
-    sub         r3, r3, #24
-    subs        r5, r5, r3
-    mov         r6, r2, lsl r3
-    mov         r4, r4, lsl r3
-    ldrleb      r2, [r8], #1
-    addle       r5, r5, #8
-    rsble       r3, r5, #8
-    orrle       r4, r4, r2, lsl r3
-
-;;|$LN6@decode_mb_to|
-    add         r0, r0, #0xB
-
-    cmn         r1, #1
-
-    addlt       r0, r0, #0xB
-
-    mvn         r1, #1
-
-SKIP_EOB_CHECK
-    ldr         r7, [sp, #l_c]
-    ldr         r3, [r9, #detok_scan]
-    add         r1, r1, #2
-    cmp         r7, #(0x10 - 1)                     ;assume one less for now.... increment below
-
-    ldr         r3, [r3, +r7, lsl #2]
-    add         r7, r7, #1
-    add         r3, r11, r3, lsl #1
-
-    str         r7, [sp, #l_c]
-    strh        lr, [r3]
-
-    blt         COEFF_LOOP
-
-    sub         r7, r7, #1                          ;if(t != -DCT_EOB_TOKEN) --c
-
-END_OF_BLOCK
-    ldr         r3, [sp, #l_type]
-    ldr         r10, [sp, #l_coef_ptr]
-    ldr         r0, [sp, #l_qcoeff]
-    ldr         r11, [sp, #l_i]
-    ldr         r12, [sp, #l_stop]
-
-    cmp         r3, #0
-    moveq       r1, #1
-    movne       r1, #0
-    add         r3, r11, r9
-
-    cmp         r7, r1
-    strb        r7, [r3, #detok_eob]
-
-    ldr         r7, [sp, #l_l_ptr]
-    ldr         r2, [sp, #l_a_ptr]
-    movne       r3, #1
-    moveq       r3, #0
-
-    add         r0, r0, #0x20
-    add         r11, r11, #1
-    str         r3, [r7]
-    str         r3, [r2]
-    str         r0, [sp, #l_qcoeff]
-    str         r11, [sp, #l_i]
-
-    cmp         r11, r12                            ;i >= stop ?
-    ldr         r7, [sp, #l_type]
-    mov         lr, #0xB
-
-    blt         BLOCK_LOOP
-
-    cmp         r11, #0x19
-    bne         ln2_decode_mb_to
-
-    ldr         r12, [r9, #detok_qcoeff_start_ptr]
-    ldr         r10, [r9, #detok_coef_probs]
-    mov         r7, #0
-    mov         r3, #0x10
-    str         r12, [sp, #l_qcoeff]
-    str         r7, [sp, #l_i]
-    str         r7, [sp, #l_type]
-    str         r3, [sp, #l_stop]
-
-    str         r10, [sp, #l_coef_ptr]
-
-    b           BLOCK_LOOP
-
-ln2_decode_mb_to
-    cmp         r11, #0x10
-    bne         ln1_decode_mb_to
-
-    ldr         r10, [r9, #0x30]
-
-    mov         r7, #2
-    mov         r3, #0x18
-
-    str         r7, [sp, #l_type]
-    str         r3, [sp, #l_stop]
-
-    str         r10, [sp, #l_coef_ptr]
-    b           BLOCK_LOOP
-
-ln1_decode_mb_to
-    ldr         r2, [sp, #l_bc]
-    mov         r0, #0
-    nop
-
-    ldr         r3, [r2, #bool_decoder_buffer]
-    str         r5, [r2, #bool_decoder_count]
-    str         r4, [r2, #bool_decoder_value]
-    sub         r3, r8, r3
-    str         r3, [r2, #bool_decoder_pos]
-    str         r6, [r2, #bool_decoder_range]
-
-    add         sp, sp, #l_stacksize
-    ldmia       sp!, {r4 - r11, pc}
-
-    ENDP  ; |vp8_decode_mb_tokens_v5|
-
-    END
diff --git a/vp8/decoder/onyxd_if_sjl.c b/vp8/decoder/onyxd_if_sjl.c
deleted file mode 100644
index e68a7a075..000000000
--- a/vp8/decoder/onyxd_if_sjl.c
+++ /dev/null
@@ -1,399 +0,0 @@
-/*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "onyxc_int.h"
-#include "postproc.h"
-#include "onyxd.h"
-#include "onyxd_int.h"
-#include "vpx_mem/vpx_mem.h"
-#include "alloccommon.h"
-#include "vpx_scale/yv12extend.h"
-#include "loopfilter.h"
-#include "swapyv12buffer.h"
-#include "g_common.h"
-#include "threading.h"
-#include "decoderthreading.h"
-#include <stdio.h>
-#include "segmentation_common.h"
-#include "quant_common.h"
-#include "vpx_scale/vpxscale.h"
-#include "systemdependent.h"
-#include "vpx_ports/vpx_timer.h"
-
-
-#ifndef VPX_NO_GLOBALS
-static int init_ct = 0;
-#else
-# include "vpx_global_handling.h"
-# define init_ct ((int)vpxglobalm(onyxd,init_ct))
-#endif
-
-extern void vp8_init_loop_filter(VP8_COMMON *cm);
-
-extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
-extern void init_detokenizer(VP8D_COMP *dx);
-
-// DEBUG code
-void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
-{
-    FILE *yuv_file = fopen((char *)name, "ab");
-    unsigned char *src = s->y_buffer;
-    int h = s->y_height;
-
-    do
-    {
-        fwrite(src, s->y_width, 1,  yuv_file);
-        src += s->y_stride;
-    }
-    while (--h);
-
-    src = s->u_buffer;
-    h = s->uv_height;
-
-    do
-    {
-        fwrite(src, s->uv_width, 1,  yuv_file);
-        src += s->uv_stride;
-    }
-    while (--h);
-
-    src = s->v_buffer;
-    h = s->uv_height;
-
-    do
-    {
-        fwrite(src, s->uv_width, 1, yuv_file);
-        src += s->uv_stride;
-    }
-    while (--h);
-
-    fclose(yuv_file);
-}
-
-void vp8dx_initialize()
-{
-    if (!init_ct++)
-    {
-        vp8_initialize_common();
-        vp8_scale_machine_specific_config();
-    }
-}
-
-void vp8dx_shutdown()
-{
-    if (!--init_ct)
-    {
-        vp8_shutdown_common();
-    }
-}
-
-
-VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
-{
-    VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP));
-
-    if (!pbi)
-        return NULL;
-
-    vpx_memset(pbi, 0, sizeof(VP8D_COMP));
-
-    vp8dx_initialize();
-
-    vp8_create_common(&pbi->common);
-    vp8_dmachine_specific_config(pbi);
-
-    pbi->common.current_video_frame = 0;
-    pbi->ready_for_new_data = 1;
-
-    pbi->CPUFreq = 0; //vp8_get_processor_freq();
-    pbi->max_threads = oxcf->max_threads;
-    vp8_decoder_create_threads(pbi);
-
-    //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
-    // unnecessary calling of vp8cx_init_de_quantizer() for every frame.
-    vp8cx_init_de_quantizer(pbi);
-
-    {
-        VP8_COMMON *cm = &pbi->common;
-
-        vp8_init_loop_filter(cm);
-        cm->last_frame_type = KEY_FRAME;
-        cm->last_filter_type = cm->filter_type;
-        cm->last_sharpness_level = cm->sharpness_level;
-    }
-
-    init_detokenizer(pbi);
-
-    return (VP8D_PTR) pbi;
-}
-void vp8dx_remove_decompressor(VP8D_PTR ptr)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-
-    if (!pbi)
-        return;
-
-    vp8_decoder_remove_threads(pbi);
-    vp8_remove_common(&pbi->common);
-    vpx_free(pbi);
-    vp8dx_shutdown();
-
-}
-
-void vp8dx_set_setting(VP8D_PTR comp, VP8D_SETTING oxst, int x)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) comp;
-
-    (void) pbi;
-    (void) x;
-
-    switch (oxst)
-    {
-    case VP8D_OK:
-        break;
-    }
-}
-
-int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) comp;
-
-    (void) pbi;
-
-    switch (oxst)
-    {
-    case VP8D_OK:
-        break;
-    }
-
-    return -1;
-}
-
-int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-    VP8_COMMON *cm = &pbi->common;
-
-    if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
-
-    else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
-
-    else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
-
-    else
-        return -1;
-
-    return 0;
-}
-int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-    VP8_COMMON *cm = &pbi->common;
-
-    if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
-
-    else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
-
-    else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
-
-    else
-        return -1;
-
-    return 0;
-}
-int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, char *source, INT64 time_stamp)
-{
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-    VP8_COMMON *cm = &pbi->common;
-    int retcode = 0;
-
-    struct vpx_usec_timer timer;
-    (void) size;
-
-//  if(pbi->ready_for_new_data == 0)
-//      return -1;
-
-    vpx_usec_timer_start(&timer);
-
-    if (ptr == 0)
-    {
-        return -1;
-    }
-
-    //cm->current_video_frame++;
-    pbi->Source = source;
-
-    retcode = vp8_decode_frame(pbi);
-
-    if (retcode < 0)
-        return retcode;
-
-    // Update the GF useage maps.
-    vp8_update_gf_useage_maps(cm, &pbi->mb);
-
-    if (pbi->b_multithreaded)
-        vp8_stop_lfthread(pbi);
-
-    if (cm->refresh_last_frame)
-    {
-        vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-
-        cm->frame_to_show = &cm->last_frame;
-    }
-    else
-    {
-        cm->frame_to_show = &cm->new_frame;
-    }
-
-    if (!pbi->b_multithreaded)
-    {
-        struct vpx_usec_timer lpftimer;
-        vpx_usec_timer_start(&lpftimer);
-        // Apply the loop filter if appropriate.
-
-        if (cm->filter_level > 0)
-        {
-            vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
-            cm->last_frame_type = cm->frame_type;
-            cm->last_filter_type = cm->filter_type;
-            cm->last_sharpness_level = cm->sharpness_level;
-
-        }
-
-        vpx_usec_timer_mark(&lpftimer);
-        pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
-    }
-
-    vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
-
-#if 0
-    // DEBUG code
-    //vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
-    if (cm->current_video_frame <= 5)
-        write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
-#endif
-
-    // If any buffer copy / swaping is signalled it should be done here.
-    if (cm->copy_buffer_to_arf)
-    {
-        if (cm->copy_buffer_to_arf == 1)
-        {
-            if (cm->refresh_last_frame)
-                vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
-            else
-                vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
-        }
-        else if (cm->copy_buffer_to_arf == 2)
-            vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
-    }
-
-    if (cm->copy_buffer_to_gf)
-    {
-        if (cm->copy_buffer_to_gf == 1)
-        {
-            if (cm->refresh_last_frame)
-                vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
-            else
-                vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
-        }
-        else if (cm->copy_buffer_to_gf == 2)
-            vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
-    }
-
-    // Should the golden or alternate reference frame be refreshed?
-    if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
-    {
-        if (cm->refresh_golden_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
-
-        if (cm->refresh_alt_ref_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
-
-        //vpx_log("Decoder: recovery frame received \n");
-
-        // Update data structures that monitors GF useage
-        vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-        cm->gf_active_count = cm->mb_rows * cm->mb_cols;
-    }
-
-    vp8_clear_system_state();
-
-    vpx_usec_timer_mark(&timer);
-    pbi->decode_microseconds = vpx_usec_timer_elapsed(&timer);
-
-    pbi->time_decoding += pbi->decode_microseconds;
-
-//  vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);
-
-    cm->current_video_frame++;
-    pbi->ready_for_new_data = 0;
-    pbi->last_time_stamp = time_stamp;
-
-    {
-        int i;
-        INT64 earliest_time = pbi->dr[0].time_stamp;
-        INT64 latest_time = pbi->dr[0].time_stamp;
-        INT64 time_diff = 0;
-        int bytes = 0;
-
-        pbi->dr[pbi->common.current_video_frame&0xf].size = pbi->bc.pos + pbi->bc2.pos + 4;;
-        pbi->dr[pbi->common.current_video_frame&0xf].time_stamp = time_stamp;
-
-        for (i = 0; i < 16; i++)
-        {
-
-            bytes += pbi->dr[i].size;
-
-            if (pbi->dr[i].time_stamp < earliest_time)
-                earliest_time = pbi->dr[i].time_stamp;
-
-            if (pbi->dr[i].time_stamp > latest_time)
-                latest_time = pbi->dr[i].time_stamp;
-        }
-
-        time_diff = latest_time - earliest_time;
-
-        if (time_diff > 0)
-        {
-            pbi->common.bitrate = 80000.00 * bytes / time_diff  ;
-            pbi->common.framerate = 160000000.00 / time_diff ;
-        }
-
-    }
-    return retcode;
-}
-int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level,  int noise_level, int flags)
-{
-    int ret = -1;
-    VP8D_COMP *pbi = (VP8D_COMP *) ptr;
-
-    if (pbi->ready_for_new_data == 1)
-        return ret;
-
-    // ie no raw frame to show!!!
-    if (pbi->common.show_frame == 0)
-        return ret;
-
-    pbi->ready_for_new_data = 1;
-    *time_stamp = pbi->last_time_stamp;
-    *time_end_stamp = 0;
-
-    sd->clrtype = pbi->common.clr_type;
-    ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags);
-    vp8_clear_system_state();
-    return ret;
-}
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index ea75529cd..e0e1103f0 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -196,9 +196,6 @@ static void vp8_finalize_mmaps(vpx_codec_alg_priv_t *ctx)
     ctx->pbi->fb_storage_ptr[0] = mmap_lkup(ctx, VP6_SEG_IMG0_STRG);
     ctx->pbi->fb_storage_ptr[1] = mmap_lkup(ctx, VP6_SEG_IMG1_STRG);
     ctx->pbi->fb_storage_ptr[2] = mmap_lkup(ctx, VP6_SEG_IMG2_STRG);
-    #if CONFIG_NEW_TOKENS
-    ctx->pbi->token_graph = mmap_lkup(ctx, VP6_SEG_TOKEN_GRAPH);
-    #endif
     #if CONFIG_POSTPROC
     ctx->pbi->postproc.deblock.fragment_variances = mmap_lkup(ctx, VP6_SEG_DEBLOCKER);
     ctx->pbi->fb_storage_ptr[3] = mmap_lkup(ctx, VP6_SEG_PP_IMG_STRG);
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index f8bcd32d2..e741680c0 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -32,14 +32,3 @@ VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantize_v6$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantdcidct_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantidct_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantizeb_neon$(ASM)
-
-
-#for new token test
-ifeq ($(ARCH_ARM),yes)
-VP8_DX_SRCS-$(CONFIG_NEW_TOKENS)  += decoder/arm/detokenize_arm_sjl.c
-VP8_DX_SRCS-$(CONFIG_NEW_TOKENS)  += decoder/arm/detokenize_arm_v6$(ASM)
-VP8_DX_SRCS-$(CONFIG_NEW_TOKENS)  += decoder/onyxd_if_sjl.c
-
-VP8_DX_SRCS_REMOVE-$(CONFIG_NEW_TOKENS)  += decoder/arm/detokenize_arm.c
-VP8_DX_SRCS_REMOVE-$(CONFIG_NEW_TOKENS)  += decoder/onyxd_if.c
-endif

From d576690ba1b5fbe1a3e03207fe04c4d0cb875e73 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Fri, 23 Jul 2010 16:47:54 +0100
Subject: [PATCH 084/307] 80 character line length on Arnr LUT

Tweaked table to fit to 80 characters.

Change-Id: Ie6ba80e0b31b33e23d2bf78599abe223369fcefb
---
 vp8/encoder/onyx_if.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index e88d705a3..581cb68c7 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3220,13 +3220,13 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
 #if USE_FILTER_LUT
 static int modifier_lut[7][19] =
 {
-16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,         // Strength=0
-16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        // Strength=1
-16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        // Strength=2
-16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,      // Strength=3
-16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,     // Strength=4
-16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0,  // Strength=5
-16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1// Strength=6
+16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        // Strength=0
+16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,       // Strength=1
+16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,       // Strength=2
+16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     // Strength=3
+16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,    // Strength=4
+16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0, // Strength=5
+16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2,1// Strength=6
 };
 #endif
 static void vp8cx_temp_blur1_c

From e04e293522a3cf3761eae3690b8efbc2aa69848b Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Mon, 28 Jun 2010 17:15:09 -0700
Subject: [PATCH 085/307] Make the quantizer exact.

This replaces the approximate division-by-multiplication in the
 quantizer with an exact one that costs just one add and one
 shift extra.
The asm versions have not been updated in this patch, and thus
 have been disabled, since the new method requires different
 multipliers which are not compatible with the old method.

Change-Id: I53ac887af0f969d906e464c88b1f4be69c6b1206
---
 vp8/encoder/arm/csystemdependent.c     |  2 +-
 vp8/encoder/block.h                    |  1 +
 vp8/encoder/encodeframe.c              | 33 +++++++++++++++++++++-----
 vp8/encoder/ethreading.c               |  1 +
 vp8/encoder/onyx_int.h                 |  3 +++
 vp8/encoder/quantize.c                 | 10 ++++++--
 vp8/encoder/x86/x86_csystemdependent.c |  6 ++---
 7 files changed, 44 insertions(+), 12 deletions(-)

diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c
index 4521bfc31..bfceab16c 100644
--- a/vp8/encoder/arm/csystemdependent.c
+++ b/vp8/encoder/arm/csystemdependent.c
@@ -63,7 +63,7 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
     cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
 
     cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;
+    /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
 #elif HAVE_ARMV6
     cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
     cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index b55bc51cb..19d307d26 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -33,6 +33,7 @@ typedef struct
 
     // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
     short(*quant)[4];
+    short(*quant_shift)[4];
     short(*zbin)[4];
     short(*zrun_zbin_boost);
     short(*round)[4];
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 32cef1db1..a05b33268 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -103,6 +103,18 @@ static const int qzbin_factors[129] =
     80,
 };
 
+static void vp8cx_invert_quant(short *quant, short *shift, short d)
+{
+    unsigned t;
+    int l;
+    t = d;
+    for(l = 0; t > 1; l++)
+        t>>=1;
+    t = 1 + (1<<(16+l))/d;
+    *quant = (short)(t - (1<<16));
+    *shift = l;
+}
+
 void vp8cx_init_quantizer(VP8_COMP *cpi)
 {
     int r, c;
@@ -116,21 +128,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
     {
         // dc values
         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->Y1quant[Q][0] + 0,
+                           cpi->Y1quant_shift[Q][0] + 0, quant_val);
         cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
         cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
         cpi->common.Y1dequant[Q][0][0] = quant_val;
         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->Y2quant[Q][0] + 0,
+                           cpi->Y2quant_shift[Q][0] + 0, quant_val);
         cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
         cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
         cpi->common.Y2dequant[Q][0][0] = quant_val;
         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;
+        vp8cx_invert_quant(cpi->UVquant[Q][0] + 0,
+                           cpi->UVquant_shift[Q][0] + 0, quant_val);
         cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
         cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
         cpi->common.UVdequant[Q][0][0] = quant_val;
@@ -144,21 +159,24 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
             c = (rc & 3);
 
             quant_val = vp8_ac_yquant(Q);
-            cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->Y1quant[Q][r] + c,
+                               cpi->Y1quant_shift[Q][r] + c, quant_val);
             cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
             cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
             cpi->common.Y1dequant[Q][r][c] = quant_val;
             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->Y2quant[Q][r] + c,
+                               cpi->Y2quant_shift[Q][r] + c, quant_val);
             cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
             cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
             cpi->common.Y2dequant[Q][r][c] = quant_val;
             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;
+            vp8cx_invert_quant(cpi->UVquant[Q][r] + c,
+                               cpi->UVquant_shift[Q][r] + c, quant_val);
             cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
             cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
             cpi->common.UVdequant[Q][r][c] = quant_val;
@@ -198,6 +216,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     for (i = 0; i < 16; i++)
     {
         x->block[i].quant = cpi->Y1quant[QIndex];
+        x->block[i].quant_shift = cpi->Y1quant_shift[QIndex];
         x->block[i].zbin = cpi->Y1zbin[QIndex];
         x->block[i].round = cpi->Y1round[QIndex];
         x->e_mbd.block[i].dequant = cpi->common.Y1dequant[QIndex];
@@ -211,6 +230,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     for (i = 16; i < 24; i++)
     {
         x->block[i].quant = cpi->UVquant[QIndex];
+        x->block[i].quant_shift = cpi->UVquant_shift[QIndex];
         x->block[i].zbin = cpi->UVzbin[QIndex];
         x->block[i].round = cpi->UVround[QIndex];
         x->e_mbd.block[i].dequant = cpi->common.UVdequant[QIndex];
@@ -221,6 +241,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     // Y2
     zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
     x->block[24].quant = cpi->Y2quant[QIndex];
+    x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
     x->block[24].zbin = cpi->Y2zbin[QIndex];
     x->block[24].round = cpi->Y2round[QIndex];
     x->e_mbd.block[24].dequant = cpi->common.Y2dequant[QIndex];
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index dd98a09d1..54646f421 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -286,6 +286,7 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
     for (i = 0; i < 25; i++)
     {
         z->block[i].quant           = x->block[i].quant;
+        z->block[i].quant_shift     = x->block[i].quant_shift;
         z->block[i].zbin            = x->block[i].zbin;
         z->block[i].zrun_zbin_boost   = x->block[i].zrun_zbin_boost;
         z->block[i].round           = x->block[i].round;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index fcde2205d..f76d2efcd 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -234,14 +234,17 @@ typedef struct
 {
 
     DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]);
 
     DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]);
 
     DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]);
     DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]);
 
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 181870c11..877002b08 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -25,6 +25,7 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
     short *zbin_ptr   = &b->zbin[0][0];
     short *round_ptr  = &b->round[0][0];
     short *quant_ptr  = &b->quant[0][0];
+    short *quant_shift_ptr = &b->quant_shift[0][0];
     short *qcoeff_ptr = d->qcoeff;
     short *dqcoeff_ptr = d->dqcoeff;
     short *dequant_ptr = &d->dequant[0][0];
@@ -45,7 +46,9 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
 
         if (x >= zbin)
         {
-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x += round_ptr[rc];
+            y  = (((x * quant_ptr[rc]) >> 16) + x)
+                 >> quant_shift_ptr[rc];                // quantize (x)
             x  = (y ^ sz) - sz;                         // get the sign back
             qcoeff_ptr[rc] = x;                          // write to destination
             dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
@@ -69,6 +72,7 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
     short *zbin_ptr   = &b->zbin[0][0];
     short *round_ptr  = &b->round[0][0];
     short *quant_ptr  = &b->quant[0][0];
+    short *quant_shift_ptr = &b->quant_shift[0][0];
     short *qcoeff_ptr = d->qcoeff;
     short *dqcoeff_ptr = d->dqcoeff;
     short *dequant_ptr = &d->dequant[0][0];
@@ -95,7 +99,9 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
 
         if (x >= zbin)
         {
-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x += round_ptr[rc];
+            y  = (((x * quant_ptr[rc]) >> 16) + x)
+                 >> quant_shift_ptr[rc];                // quantize (x)
             x  = (y ^ sz) - sz;                         // get the sign back
             qcoeff_ptr[rc]  = x;                         // write to destination
             dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 11ef4197b..be226e040 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -238,7 +238,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.encodemb.submby                = vp8_subtract_mby_mmx;
         cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_mmx;
 
-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/
     }
 
 #endif
@@ -285,8 +285,8 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
         /* cpi->rtcd.encodemb.sub* not implemented for wmt */
 
-        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
-        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
+        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;*/
     }
 
 #endif

From 9404c7db6d627e4b1bed24419a6e1d388af29d93 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Fri, 23 Jul 2010 17:01:12 +0100
Subject: [PATCH 086/307] Rate control bug with long key frame interval.

In two pass encodes, the calculation of the number of bits
allocated to a KF group had the potential to overflow for high data
rates if the interval is very long.

We observed the problem in one test clip where there was one
section where there was an 8000 frame gap between key frames.

Change-Id: Ic48eb86271775d7573b4afd166b567b64f25b787
---
 vp8/encoder/firstpass.c | 79 +++++++++++++++++++++++++++++------------
 vp8/encoder/onyx_int.h  | 11 ++++--
 2 files changed, 65 insertions(+), 25 deletions(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 0a33feb1c..24886cb1d 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1572,26 +1572,36 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         // Calculate the number of bits to be spent on the gf or arf based on the boost number
         cpi->gf_bits = (int)((double)Boost * (cpi->gf_group_bits / (double)allocation_chunks));
 
-        // If the frame that is to be boosted is simpler than the average for the gf/arf group then use an alternative calculation
+        // If the frame that is to be boosted is simpler than the average for
+        // the gf/arf group then use an alternative calculation
         // based on the error score of the frame itself
         if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval)
         {
             double  alt_gf_grp_bits;
             int     alt_gf_bits;
 
-            alt_gf_grp_bits = ((double)cpi->kf_group_bits  * (mod_frame_err * (double)cpi->baseline_gf_interval) / (double)cpi->kf_group_error_left) ;
-            alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits / (double)allocation_chunks));
+            alt_gf_grp_bits =
+                (double)cpi->kf_group_bits  *
+                (mod_frame_err * (double)cpi->baseline_gf_interval) /
+                DOUBLE_DIVIDE_CHECK((double)cpi->kf_group_error_left);
+
+            alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits /
+                                                 (double)allocation_chunks));
 
             if (cpi->gf_bits > alt_gf_bits)
             {
                 cpi->gf_bits = alt_gf_bits;
             }
         }
-        // Else if it is harder than other frames in the group make sure it at least receives an allocation in keeping with
-        // its relative error score, otherwise it may be worse off than an "un-boosted" frame
+        // Else if it is harder than other frames in the group make sure it at
+        // least receives an allocation in keeping with its relative error
+        // score, otherwise it may be worse off than an "un-boosted" frame
         else
         {
-            int alt_gf_bits = (int)((double)cpi->kf_group_bits * (mod_frame_err / (double)cpi->kf_group_error_left));
+            int alt_gf_bits =
+                (int)((double)cpi->kf_group_bits *
+                      mod_frame_err /
+                      DOUBLE_DIVIDE_CHECK((double)cpi->kf_group_error_left));
 
             if (alt_gf_bits > cpi->gf_bits)
             {
@@ -2062,7 +2072,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     // Take a copy of the initial frame details
     vpx_memcpy(&first_frame, this_frame, sizeof(*this_frame));
 
-    cpi->kf_group_bits = 0;       // Estimate of total bits avaialable to kf group
+    cpi->kf_group_bits = 0;        // Total bits avaialable to kf group
     cpi->kf_group_error_left = 0;  // Group modified error score.
 
     kf_mod_err = calculate_modified_err(cpi, this_frame);
@@ -2129,39 +2139,64 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     // Calculate the number of bits that should be assigned to the kf group.
     if ((cpi->bits_left > 0) && ((int)cpi->modified_total_error_left > 0))
     {
-        int max_bits = frame_max_bits(cpi);    // Max for a single normal frame (not key frame)
+        // Max for a single normal frame (not key frame)
+        int max_bits = frame_max_bits(cpi);
 
-        // Default allocation based on bits left and relative complexity of the section
-        cpi->kf_group_bits = (int)(cpi->bits_left * (kf_group_err / cpi->modified_total_error_left));
+        // Maximum bits for the kf group
+        long long max_grp_bits;
+
+        // Default allocation based on bits left and relative
+        // complexity of the section
+        cpi->kf_group_bits = (long long)( cpi->bits_left *
+                                          ( kf_group_err /
+                                            cpi->modified_total_error_left ));
 
         // Clip based on maximum per frame rate defined by the user.
-        if (cpi->kf_group_bits > max_bits * cpi->frames_to_key)
-            cpi->kf_group_bits = max_bits * cpi->frames_to_key;
+        max_grp_bits = (long long)max_bits * (long long)cpi->frames_to_key;
+        if (cpi->kf_group_bits > max_grp_bits)
+            cpi->kf_group_bits = max_grp_bits;
 
         // Additional special case for CBR if buffer is getting full.
         if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
         {
-            // If the buffer is near or above the optimal and this kf group is not being allocated much
-            // then increase the allocation a bit.
-            if (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level)
+            int opt_buffer_lvl = cpi->oxcf.optimal_buffer_level;
+            int buffer_lvl = cpi->buffer_level;
+
+            // If the buffer is near or above the optimal and this kf group is
+            // not being allocated much then increase the allocation a bit.
+            if (buffer_lvl >= opt_buffer_lvl)
             {
-                int high_water_mark = (cpi->oxcf.optimal_buffer_level + cpi->oxcf.maximum_buffer_size) >> 1;
-                int min_group_bits;
+                int high_water_mark = (opt_buffer_lvl +
+                                       cpi->oxcf.maximum_buffer_size) >> 1;
+
+                long long av_group_bits;
+
+                // Av bits per frame * number of frames
+                av_group_bits = (long long)cpi->av_per_frame_bandwidth *
+                                (long long)cpi->frames_to_key;
 
                 // We are at or above the maximum.
                 if (cpi->buffer_level >= high_water_mark)
                 {
-                    min_group_bits = (cpi->av_per_frame_bandwidth * cpi->frames_to_key) + (cpi->buffer_level - high_water_mark);
+                    long long min_group_bits;
+
+                    min_group_bits = av_group_bits +
+                                     (long long)(buffer_lvl -
+                                                 high_water_mark);
 
                     if (cpi->kf_group_bits < min_group_bits)
                         cpi->kf_group_bits = min_group_bits;
                 }
                 // We are above optimal but below the maximum
-                else if (cpi->kf_group_bits < (cpi->av_per_frame_bandwidth * cpi->frames_to_key))
+                else if (cpi->kf_group_bits < av_group_bits)
                 {
-                    int bits_below_av = (cpi->av_per_frame_bandwidth * cpi->frames_to_key) - cpi->kf_group_bits;
-                    cpi->kf_group_bits += (int)((double)bits_below_av * (double)(cpi->buffer_level - cpi->oxcf.optimal_buffer_level) /
-                                                (double)(high_water_mark - cpi->oxcf.optimal_buffer_level));
+                    long long bits_below_av = av_group_bits -
+                                              cpi->kf_group_bits;
+
+                    cpi->kf_group_bits +=
+                       (long long)((double)bits_below_av *
+                                   (double)(buffer_lvl - opt_buffer_lvl) /
+                                   (double)(high_water_mark - opt_buffer_lvl));
                 }
             }
         }
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index fcde2205d..a09b23857 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -356,9 +356,14 @@ typedef struct
     int gf_bits;                     // Bits for the golden frame or ARF - 2 pass only
     int mid_gf_extra_bits;             // A few extra bits for the frame half way between two gfs.
 
-    int kf_group_bits;                // Projected total bits available for a key frame group of frames
-    int kf_group_error_left;           // Error score of frames still to be coded in kf group
-    int kf_bits;                     // Bits for the key frame in a key frame group - 2 pass only
+    // Projected total bits available for a key frame group of frames
+    long long kf_group_bits;
+
+    // Error score of frames still to be coded in kf group
+    long long kf_group_error_left;
+
+    // Bits for the key frame in a key frame group - 2 pass only
+    int kf_bits;
 
     int non_gf_bitrate_adjustment;     // Used in the few frames following a GF to recover the extra bits spent in that GF
     int initial_gf_use;               // percentage use of gf 2 frames after gf

From 0ce39012823b522c611db87f0810c540124e6e9d Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Thu, 22 Jul 2010 08:07:32 -0400
Subject: [PATCH 087/307] Swap alt/gold/new/last frame buffer ptrs instead of
 copying.

At the end of the decode, frame buffers were being copied.
The frames are not updated after the copy, they are just
for reference on later frames.  This change allows multiple
references to the same frame buffer instead of copying it.

Changes needed to be made to the encoder to handle this.  The
encoder is still doing frame buffer copies in similar places
where pointer reference could be done.

Change-Id: I7c38be4d23979cc49b5f17241ca3a78703803e66
---
 vp8/common/alloccommon.c  |  58 ++++++-------
 vp8/common/onyxc_int.h    |  10 ++-
 vp8/decoder/decodframe.c  |  44 +++++-----
 vp8/decoder/onyxd_if.c    | 166 +++++++++++++++++++++++---------------
 vp8/decoder/threading.c   |  47 +++++------
 vp8/encoder/encodeframe.c |  47 +++++------
 vp8/encoder/ethreading.c  |  18 +++--
 vp8/encoder/firstpass.c   |  59 +++++++-------
 vp8/encoder/onyx_if.c     | 132 ++++++++++++++++--------------
 vp8/encoder/pickinter.c   |  24 +++---
 vp8/encoder/rdopt.c       |  23 +++---
 11 files changed, 333 insertions(+), 295 deletions(-)

diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index c3368b5cf..d0a138d06 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -31,13 +31,15 @@ void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols)
         vpx_memset(&mi[i*cols-1], 0, sizeof(MODE_INFO));
     }
 }
+
 void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
 {
+    int i;
+
+    for (i = 0; i < NUM_YV12_BUFFERS; i++)
+        vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]);
+
     vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
-    vp8_yv12_de_alloc_frame_buffer(&oci->new_frame);
-    vp8_yv12_de_alloc_frame_buffer(&oci->last_frame);
-    vp8_yv12_de_alloc_frame_buffer(&oci->golden_frame);
-    vp8_yv12_de_alloc_frame_buffer(&oci->alt_ref_frame);
     vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
 
     vpx_free(oci->above_context[Y1CONTEXT]);
@@ -61,6 +63,8 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
 
 int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
 {
+    int i;
+
     vp8_de_alloc_frame_buffers(oci);
 
     // our internal buffers are always multiples of 16
@@ -71,37 +75,33 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
         height += 16 - (height & 0xf);
 
 
+    for (i = 0; i < NUM_YV12_BUFFERS; i++)
+    {
+      oci->fb_idx_ref_cnt[0] = 0;
+
+      if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i],  width, height, VP8BORDERINPIXELS) < 0)
+        {
+            vp8_de_alloc_frame_buffers(oci);
+            return ALLOC_FAILURE;
+        }
+    }
+
+    oci->new_fb_idx = 0;
+    oci->lst_fb_idx = 1;
+    oci->gld_fb_idx = 2;
+    oci->alt_fb_idx = 3;
+
+    oci->fb_idx_ref_cnt[0] = 1;
+    oci->fb_idx_ref_cnt[1] = 1;
+    oci->fb_idx_ref_cnt[2] = 1;
+    oci->fb_idx_ref_cnt[3] = 1;
+
     if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame,   width, 16, VP8BORDERINPIXELS) < 0)
     {
         vp8_de_alloc_frame_buffers(oci);
         return ALLOC_FAILURE;
     }
 
-
-    if (vp8_yv12_alloc_frame_buffer(&oci->new_frame,   width, height, VP8BORDERINPIXELS) < 0)
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    if (vp8_yv12_alloc_frame_buffer(&oci->last_frame,  width, height, VP8BORDERINPIXELS) < 0)
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    if (vp8_yv12_alloc_frame_buffer(&oci->golden_frame, width, height, VP8BORDERINPIXELS) < 0)
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    if (vp8_yv12_alloc_frame_buffer(&oci->alt_ref_frame, width, height, VP8BORDERINPIXELS) < 0)
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
     if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0)
     {
         vp8_de_alloc_frame_buffers(oci);
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index d2fbc8686..503ad5dd2 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -33,6 +33,7 @@ void vp8_initialize_common(void);
 #define MAXQ 127
 #define QINDEX_RANGE (MAXQ + 1)
 
+#define NUM_YV12_BUFFERS 4
 
 typedef struct frame_contexts
 {
@@ -94,11 +95,12 @@ typedef struct VP8Common
     YUV_TYPE clr_type;
     CLAMP_TYPE  clamp_type;
 
-    YV12_BUFFER_CONFIG last_frame;
-    YV12_BUFFER_CONFIG golden_frame;
-    YV12_BUFFER_CONFIG alt_ref_frame;
-    YV12_BUFFER_CONFIG new_frame;
     YV12_BUFFER_CONFIG *frame_to_show;
+
+    YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS];
+    int fb_idx_ref_cnt[NUM_YV12_BUFFERS];
+    int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx;
+
     YV12_BUFFER_CONFIG post_proc_buffer;
     YV12_BUFFER_CONFIG temp_scale_frame;
 
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 8d9db10fd..a5850db6d 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -381,8 +381,10 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
     int i;
     int recon_yoffset, recon_uvoffset;
     int mb_col;
-    int recon_y_stride = pc->last_frame.y_stride;
-    int recon_uv_stride = pc->last_frame.uv_stride;
+    int ref_fb_idx = pc->lst_fb_idx;
+    int dst_fb_idx = pc->new_fb_idx;
+    int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+    int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
 
     vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
     recon_yoffset = mb_row * recon_y_stride * 16;
@@ -419,33 +421,23 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
         xd->mb_to_left_edge = -((mb_col * 16) << 3);
         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
 
-        xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
-        xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
-        xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
+        xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+        xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+        xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 
         xd->left_available = (mb_col != 0);
 
         // Select the appropriate reference frame for this MB
         if (xd->mbmi.ref_frame == LAST_FRAME)
-        {
-            xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
-        }
+            ref_fb_idx = pc->lst_fb_idx;
         else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
-        {
-            // Golden frame reconstruction buffer
-            xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
-        }
+            ref_fb_idx = pc->gld_fb_idx;
         else
-        {
-            // Alternate reference frame reconstruction buffer
-            xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
-        }
+            ref_fb_idx = pc->alt_fb_idx;
+
+        xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+        xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+        xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
 
         vp8_build_uvmvs(xd, pc->full_pixel);
 
@@ -475,7 +467,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
 
     // adjust to the next row of mbs
     vp8_extend_mb_row(
-        &pc->new_frame,
+        &pc->yv12_fb[dst_fb_idx],
         xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
     );
 
@@ -890,11 +882,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
                     }
     }
 
-    vpx_memcpy(&xd->pre, &pc->last_frame, sizeof(YV12_BUFFER_CONFIG));
-    vpx_memcpy(&xd->dst, &pc->new_frame, sizeof(YV12_BUFFER_CONFIG));
+    vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
+    vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
 
     // set up frame new frame for intra coded blocks
-    vp8_setup_intra_recon(&pc->new_frame);
+    vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
 
     vp8_setup_block_dptrs(xd);
 
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 60ca74a7f..28f99086f 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -180,38 +180,38 @@ int vp8dx_get_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C
 {
     VP8D_COMP *pbi = (VP8D_COMP *) ptr;
     VP8_COMMON *cm = &pbi->common;
+    int ref_fb_idx;
 
     if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
-
+        ref_fb_idx = cm->lst_fb_idx;
     else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
-
+        ref_fb_idx = cm->gld_fb_idx;
     else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
-
+        ref_fb_idx = cm->alt_fb_idx;
     else
         return -1;
 
+    vp8_yv12_copy_frame_ptr(&cm->yv12_fb[ref_fb_idx], sd);
+
     return 0;
 }
 int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
 {
     VP8D_COMP *pbi = (VP8D_COMP *) ptr;
     VP8_COMMON *cm = &pbi->common;
+    int ref_fb_idx;
 
     if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
-
+        ref_fb_idx = cm->lst_fb_idx;
     else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
-
+        ref_fb_idx = cm->gld_fb_idx;
     else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
-
+        ref_fb_idx = cm->alt_fb_idx;
     else
         return -1;
 
+    vp8_yv12_copy_frame_ptr(sd, &cm->yv12_fb[ref_fb_idx]);
+
     return 0;
 }
 
@@ -221,12 +221,95 @@ extern void vp8_push_neon(INT64 *store);
 extern void vp8_pop_neon(INT64 *store);
 static INT64 dx_store_reg[8];
 #endif
+
+static int get_free_fb (VP8_COMMON *cm)
+{
+    int i;
+    for (i = 0; i < NUM_YV12_BUFFERS; i++)
+        if (cm->fb_idx_ref_cnt[i] == 0)
+            break;
+
+    cm->fb_idx_ref_cnt[i] = 1;
+    return i;
+}
+
+static void ref_cnt_fb (int *buf, int *idx, int new_idx)
+{
+    if (buf[*idx] > 0)
+        buf[*idx]--;
+
+    *idx = new_idx;
+
+    buf[new_idx]++;
+}
+
+// If any buffer copy / swapping is signalled it should be done here.
+static int swap_frame_buffers (VP8_COMMON *cm)
+{
+    int fb_to_update_with, err = 0;
+
+    if (cm->refresh_last_frame)
+        fb_to_update_with = cm->lst_fb_idx;
+    else
+        fb_to_update_with = cm->new_fb_idx;
+
+    // The alternate reference frame or golden frame can be updated
+    //  using the new, last, or golden/alt ref frame.  If it
+    //  is updated using the newly decoded frame it is a refresh.
+    //  An update using the last or golden/alt ref frame is a copy.
+    if (cm->copy_buffer_to_arf)
+    {
+        int new_fb = 0;
+
+        if (cm->copy_buffer_to_arf == 1)
+            new_fb = fb_to_update_with;
+        else if (cm->copy_buffer_to_arf == 2)
+            new_fb = cm->gld_fb_idx;
+        else
+            err = -1;
+
+        ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb);
+    }
+
+    if (cm->copy_buffer_to_gf)
+    {
+        int new_fb = 0;
+
+        if (cm->copy_buffer_to_gf == 1)
+            new_fb = fb_to_update_with;
+        else if (cm->copy_buffer_to_gf == 2)
+            new_fb = cm->alt_fb_idx;
+        else
+            err = -1;
+
+        ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb);
+    }
+
+    if (cm->refresh_golden_frame)
+        ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx);
+
+    if (cm->refresh_alt_ref_frame)
+        ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx);
+
+    if (cm->refresh_last_frame)
+    {
+        ref_cnt_fb (cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx);
+
+        cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx];
+    }
+    else
+        cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
+
+    cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
+
+    return err;
+}
+
 int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp)
 {
     VP8D_COMP *pbi = (VP8D_COMP *) ptr;
     VP8_COMMON *cm = &pbi->common;
     int retcode = 0;
-
     struct vpx_usec_timer timer;
 
 //  if(pbi->ready_for_new_data == 0)
@@ -257,6 +340,8 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     pbi->Source = source;
     pbi->source_sz = size;
 
+    cm->new_fb_idx = get_free_fb (cm);
+
     retcode = vp8_decode_frame(pbi);
 
     if (retcode < 0)
@@ -275,15 +360,11 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
         vp8_stop_lfthread(pbi);
 
-    if (cm->refresh_last_frame)
+    if (swap_frame_buffers (cm))
     {
-        vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-
-        cm->frame_to_show = &cm->last_frame;
-    }
-    else
-    {
-        cm->frame_to_show = &cm->new_frame;
+        pbi->common.error.error_code = VPX_CODEC_ERROR;
+        pbi->common.error.setjmp = 0;
+        return -1;
     }
 
     if (!pbi->b_multithreaded_lf)
@@ -313,49 +394,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
         write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
 #endif
 
-    // If any buffer copy / swaping is signalled it should be done here.
-    if (cm->copy_buffer_to_arf)
-    {
-        if (cm->copy_buffer_to_arf == 1)
-        {
-            if (cm->refresh_last_frame)
-                vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
-            else
-                vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
-        }
-        else if (cm->copy_buffer_to_arf == 2)
-            vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
-    }
-
-    if (cm->copy_buffer_to_gf)
-    {
-        if (cm->copy_buffer_to_gf == 1)
-        {
-            if (cm->refresh_last_frame)
-                vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
-            else
-                vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
-        }
-        else if (cm->copy_buffer_to_gf == 2)
-            vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
-    }
-
-    // Should the golden or alternate reference frame be refreshed?
-    if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame)
-    {
-        if (cm->refresh_golden_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
-
-        if (cm->refresh_alt_ref_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
-
-        //vpx_log("Decoder: recovery frame received \n");
-
-        // Update data structures that monitors GF useage
-        vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-        cm->gf_active_count = cm->mb_rows * cm->mb_cols;
-    }
-
     vp8_clear_system_state();
 
     vpx_usec_timer_mark(&timer);
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 18c8da077..752081ea9 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -59,11 +59,8 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
         mbd->frames_since_golden      = pc->frames_since_golden;
         mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
 
-        mbd->pre = pc->last_frame;
-        mbd->dst = pc->new_frame;
-
-
-
+        mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
+        mbd->dst = pc->yv12_fb[pc->new_fb_idx];
 
         vp8_setup_block_dptrs(mbd);
         vp8_build_block_doffsets(mbd);
@@ -119,8 +116,10 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                 int i;
                 int recon_yoffset, recon_uvoffset;
                 int mb_col;
-                int recon_y_stride = pc->last_frame.y_stride;
-                int recon_uv_stride = pc->last_frame.uv_stride;
+                int ref_fb_idx = pc->lst_fb_idx;
+                int dst_fb_idx = pc->new_fb_idx;
+                int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+                int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
 
                 volatile int *last_row_current_mb_col;
 
@@ -172,33 +171,23 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                     xd->mb_to_left_edge = -((mb_col * 16) << 3);
                     xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
 
-                    xd->dst.y_buffer = pc->new_frame.y_buffer + recon_yoffset;
-                    xd->dst.u_buffer = pc->new_frame.u_buffer + recon_uvoffset;
-                    xd->dst.v_buffer = pc->new_frame.v_buffer + recon_uvoffset;
+                    xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+                    xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+                    xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
 
                     xd->left_available = (mb_col != 0);
 
                     // Select the appropriate reference frame for this MB
                     if (xd->mbmi.ref_frame == LAST_FRAME)
-                    {
-                        xd->pre.y_buffer = pc->last_frame.y_buffer + recon_yoffset;
-                        xd->pre.u_buffer = pc->last_frame.u_buffer + recon_uvoffset;
-                        xd->pre.v_buffer = pc->last_frame.v_buffer + recon_uvoffset;
-                    }
+                        ref_fb_idx = pc->lst_fb_idx;
                     else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
-                    {
-                        // Golden frame reconstruction buffer
-                        xd->pre.y_buffer = pc->golden_frame.y_buffer + recon_yoffset;
-                        xd->pre.u_buffer = pc->golden_frame.u_buffer + recon_uvoffset;
-                        xd->pre.v_buffer = pc->golden_frame.v_buffer + recon_uvoffset;
-                    }
+                        ref_fb_idx = pc->gld_fb_idx;
                     else
-                    {
-                        // Alternate reference frame reconstruction buffer
-                        xd->pre.y_buffer = pc->alt_ref_frame.y_buffer + recon_yoffset;
-                        xd->pre.u_buffer = pc->alt_ref_frame.u_buffer + recon_uvoffset;
-                        xd->pre.v_buffer = pc->alt_ref_frame.v_buffer + recon_uvoffset;
-                    }
+                        ref_fb_idx = pc->alt_fb_idx;
+
+                    xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+                    xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+                    xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
 
                     vp8_build_uvmvs(xd, pc->full_pixel);
 
@@ -222,7 +211,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                 // adjust to the next row of mbs
                 vp8_extend_mb_row(
-                    &pc->new_frame,
+                    &pc->yv12_fb[dst_fb_idx],
                     xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
                 );
 
@@ -279,7 +268,7 @@ THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
                 MACROBLOCKD *mbd = &pbi->lpfmb;
                 int default_filt_lvl = pbi->common.filter_level;
 
-                YV12_BUFFER_CONFIG *post = &cm->new_frame;
+                YV12_BUFFER_CONFIG *post = &cm->yv12_fb[cm->new_fb_idx];
                 loop_filter_info *lfi = cm->lf_info;
                 int frame_type = cm->frame_type;
 
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index a05b33268..cb9a8dd36 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -277,8 +277,10 @@ void encode_mb_row(VP8_COMP *cpi,
     int i;
     int recon_yoffset, recon_uvoffset;
     int mb_col;
-    int recon_y_stride = cm->last_frame.y_stride;
-    int recon_uv_stride = cm->last_frame.uv_stride;
+    int ref_fb_idx = cm->lst_fb_idx;
+    int dst_fb_idx = cm->new_fb_idx;
+    int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+    int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
     int seg_map_index = (mb_row * cpi->common.mb_cols);
 
 
@@ -311,9 +313,9 @@ void encode_mb_row(VP8_COMP *cpi,
         x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
         x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
 
-        xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset;
-        xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset;
-        xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset;
+        xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+        xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+        xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
         xd->left_available = (mb_col != 0);
 
         // Is segmentation enabled
@@ -419,7 +421,7 @@ void encode_mb_row(VP8_COMP *cpi,
 
     //extend the recon for intra prediction
     vp8_extend_mb_row(
-        &cm->new_frame,
+        &cm->yv12_fb[dst_fb_idx],
         xd->dst.y_buffer + 16,
         xd->dst.u_buffer + 8,
         xd->dst.v_buffer + 8);
@@ -531,12 +533,12 @@ void vp8_encode_frame(VP8_COMP *cpi)
     // Copy data over into macro block data sturctures.
 
     x->src = * cpi->Source;
-    xd->pre = cm->last_frame;
-    xd->dst = cm->new_frame;
+    xd->pre = cm->yv12_fb[cm->lst_fb_idx];
+    xd->dst = cm->yv12_fb[cm->new_fb_idx];
 
     // set up frame new frame for intra coded blocks
 
-    vp8_setup_intra_recon(&cm->new_frame);
+    vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]);
 
     vp8_build_block_offsets(x);
 
@@ -1157,34 +1159,23 @@ int vp8cx_encode_inter_macroblock
         MV best_ref_mv;
         MV nearest, nearby;
         int mdcounts[4];
+        int ref_fb_idx;
 
         vp8_find_near_mvs(xd, xd->mode_info_context,
                           &nearest, &nearby, &best_ref_mv, mdcounts, xd->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
 
         vp8_build_uvmvs(xd, cpi->common.full_pixel);
 
-        // store motion vectors in our motion vector list
         if (xd->mbmi.ref_frame == LAST_FRAME)
-        {
-            // Set up pointers for this macro block into the previous frame recon buffer
-            xd->pre.y_buffer = cpi->common.last_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = cpi->common.last_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = cpi->common.last_frame.v_buffer + recon_uvoffset;
-        }
+            ref_fb_idx = cpi->common.lst_fb_idx;
         else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
-        {
-            // Set up pointers for this macro block into the golden frame recon buffer
-            xd->pre.y_buffer = cpi->common.golden_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = cpi->common.golden_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = cpi->common.golden_frame.v_buffer + recon_uvoffset;
-        }
+            ref_fb_idx = cpi->common.gld_fb_idx;
         else
-        {
-            // Set up pointers for this macro block into the alternate reference frame recon buffer
-            xd->pre.y_buffer = cpi->common.alt_ref_frame.y_buffer + recon_yoffset;
-            xd->pre.u_buffer = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset;
-            xd->pre.v_buffer = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset;
-        }
+            ref_fb_idx = cpi->common.alt_fb_idx;
+
+        xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+        xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+        xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
 
         if (xd->mbmi.mode == SPLITMV)
         {
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 54646f421..b8bd414cf 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -56,8 +56,10 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                     int i;
                     int recon_yoffset, recon_uvoffset;
                     int mb_col;
-                    int recon_y_stride = cm->last_frame.y_stride;
-                    int recon_uv_stride = cm->last_frame.uv_stride;
+                    int ref_fb_idx = cm->lst_fb_idx;
+                    int dst_fb_idx = cm->new_fb_idx;
+                    int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride;
+                    int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride;
                     volatile int *last_row_current_mb_col;
 
                     if (ithread > 0)
@@ -107,9 +109,9 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                         x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
                         x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
 
-                        xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset;
-                        xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset;
-                        xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset;
+                        xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+                        xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+                        xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
                         xd->left_available = (mb_col != 0);
 
                         // Is segmentation enabled
@@ -195,7 +197,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
 
                     //extend the recon for intra prediction
                     vp8_extend_mb_row(
-                        &cm->new_frame,
+                        &cm->yv12_fb[dst_fb_idx],
                         xd->dst.y_buffer + 16,
                         xd->dst.u_buffer + 8,
                         xd->dst.v_buffer + 8);
@@ -386,8 +388,8 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
         mbd->frames_till_alt_ref_frame = cm->frames_till_alt_ref_frame;
 
         mb->src = * cpi->Source;
-        mbd->pre = cm->last_frame;
-        mbd->dst = cm->new_frame;
+        mbd->pre = cm->yv12_fb[cm->lst_fb_idx];
+        mbd->dst = cm->yv12_fb[cm->new_fb_idx];
 
         mb->src.y_buffer += 16 * x->src.y_stride * (i + 1);
         mb->src.u_buffer +=  8 * x->src.uv_stride * (i + 1);
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 24886cb1d..b838378bb 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -536,8 +536,11 @@ void vp8_first_pass(VP8_COMP *cpi)
 
     int col_blocks = 4 * cm->mb_cols;
     int recon_yoffset, recon_uvoffset;
-    int recon_y_stride = cm->last_frame.y_stride;
-    int recon_uv_stride = cm->last_frame.uv_stride;
+    YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
+    YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
+    YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
+    int recon_y_stride = lst_yv12->y_stride;
+    int recon_uv_stride = lst_yv12->uv_stride;
     int intra_error = 0;
     int coded_error = 0;
 
@@ -559,8 +562,8 @@ void vp8_first_pass(VP8_COMP *cpi)
     vp8_clear_system_state();  //__asm emms;
 
     x->src = * cpi->Source;
-    xd->pre = cm->last_frame;
-    xd->dst = cm->new_frame;
+    xd->pre = *lst_yv12;
+    xd->dst = *new_yv12;
 
     vp8_build_block_offsets(x);
 
@@ -569,7 +572,7 @@ void vp8_first_pass(VP8_COMP *cpi)
     vp8_setup_block_ptrs(x);
 
     // set up frame new frame for intra coded blocks
-    vp8_setup_intra_recon(&cm->new_frame);
+    vp8_setup_intra_recon(new_yv12);
     vp8cx_frame_init_quantizer(cpi);
 
     // Initialise the MV cost table to the defaults
@@ -599,9 +602,9 @@ void vp8_first_pass(VP8_COMP *cpi)
             int gf_motion_error = INT_MAX;
             int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
 
-            xd->dst.y_buffer = cm->new_frame.y_buffer + recon_yoffset;
-            xd->dst.u_buffer = cm->new_frame.u_buffer + recon_uvoffset;
-            xd->dst.v_buffer = cm->new_frame.v_buffer + recon_uvoffset;
+            xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset;
+            xd->dst.u_buffer = new_yv12->u_buffer + recon_uvoffset;
+            xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset;
             xd->left_available = (mb_col != 0);
 
             // do intra 16x16 prediction
@@ -635,14 +638,14 @@ void vp8_first_pass(VP8_COMP *cpi)
                 int motion_error = INT_MAX;
 
                 // Simple 0,0 motion with no mv overhead
-                vp8_zz_motion_search( cpi, x, &cm->last_frame, &motion_error, recon_yoffset );
+                vp8_zz_motion_search( cpi, x, lst_yv12, &motion_error, recon_yoffset );
                 d->bmi.mv.as_mv.row = 0;
                 d->bmi.mv.as_mv.col = 0;
 
                 // Test last reference frame using the previous best mv as the
                 // starting point (best reference) for the search
                 vp8_first_pass_motion_search(cpi, x, &best_ref_mv,
-                                        &d->bmi.mv.as_mv, &cm->last_frame,
+                                        &d->bmi.mv.as_mv, lst_yv12,
                                         &motion_error, recon_yoffset);
 
                 // If the current best reference mv is not centred on 0,0 then do a 0,0 based search as well
@@ -650,7 +653,7 @@ void vp8_first_pass(VP8_COMP *cpi)
                 {
                    tmp_err = INT_MAX;
                    vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv,
-                                     &cm->last_frame, &tmp_err, recon_yoffset);
+                                     lst_yv12, &tmp_err, recon_yoffset);
 
                    if ( tmp_err < motion_error )
                    {
@@ -664,7 +667,7 @@ void vp8_first_pass(VP8_COMP *cpi)
                 // Experimental search in a second reference frame ((0,0) based only)
                 if (cm->current_video_frame > 1)
                 {
-                    vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, &cm->golden_frame, &gf_motion_error, recon_yoffset);
+                    vp8_first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset);
 
                     if ((gf_motion_error < motion_error) && (gf_motion_error < this_error))
                     {
@@ -682,9 +685,9 @@ void vp8_first_pass(VP8_COMP *cpi)
 
 
                     // Reset to last frame as reference buffer
-                    xd->pre.y_buffer = cm->last_frame.y_buffer + recon_yoffset;
-                    xd->pre.u_buffer = cm->last_frame.u_buffer + recon_uvoffset;
-                    xd->pre.v_buffer = cm->last_frame.v_buffer + recon_uvoffset;
+                    xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset;
+                    xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
+                    xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
                 }
 
                 if (motion_error <= this_error)
@@ -776,7 +779,7 @@ void vp8_first_pass(VP8_COMP *cpi)
         x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols;
 
         //extend the recon for intra prediction
-        vp8_extend_mb_row(&cm->new_frame, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
+        vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
         vp8_clear_system_state();  //__asm emms;
     }
 
@@ -842,17 +845,17 @@ void vp8_first_pass(VP8_COMP *cpi)
         (cpi->this_frame_stats.pcnt_inter > 0.20) &&
         ((cpi->this_frame_stats.intra_error / cpi->this_frame_stats.coded_error) > 2.0))
     {
-        vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
+        vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
     }
 
     // swap frame pointers so last frame refers to the frame we just compressed
-    vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-    vp8_yv12_extend_frame_borders(&cm->last_frame);
+    vp8_swap_yv12_buffer(lst_yv12, new_yv12);
+    vp8_yv12_extend_frame_borders(lst_yv12);
 
     // Special case for the first frame. Copy into the GF buffer as a second reference.
     if (cm->current_video_frame == 0)
     {
-        vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
+        vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
     }
 
 
@@ -868,7 +871,7 @@ void vp8_first_pass(VP8_COMP *cpi)
         else
             recon_file = fopen(filename, "ab");
 
-        fwrite(cm->last_frame.buffer_alloc, cm->last_frame.frame_size, 1, recon_file);
+        fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file);
         fclose(recon_file);
     }
 
@@ -1196,7 +1199,9 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     FIRSTPASS_STATS next_frame;
     FIRSTPASS_STATS *start_pos;
     int i;
-    int image_size = cpi->common.last_frame.y_width  * cpi->common.last_frame.y_height;
+    int y_width  = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_width;
+    int y_height = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_height;
+    int image_size = y_width  * y_height;
     double boost_score = 0.0;
     double old_boost_score = 0.0;
     double gf_group_err = 0.0;
@@ -2322,7 +2327,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         int allocation_chunks;
         int Counter = cpi->frames_to_key;
         int alt_kf_bits;
-
+        YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
         // Min boost based on kf interval
 #if 0
 
@@ -2342,10 +2347,10 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         }
 
         // bigger frame sizes need larger kf boosts, smaller frames smaller boosts...
-        if ((cpi->common.last_frame.y_width  * cpi->common.last_frame.y_height) > (320 * 240))
-            kf_boost += 2 * (cpi->common.last_frame.y_width  * cpi->common.last_frame.y_height) / (320 * 240);
-        else if ((cpi->common.last_frame.y_width  * cpi->common.last_frame.y_height) < (320 * 240))
-            kf_boost -= 4 * (320 * 240) / (cpi->common.last_frame.y_width  * cpi->common.last_frame.y_height);
+        if ((lst_yv12->y_width * lst_yv12->y_height) > (320 * 240))
+            kf_boost += 2 * (lst_yv12->y_width * lst_yv12->y_height) / (320 * 240);
+        else if ((lst_yv12->y_width * lst_yv12->y_height) < (320 * 240))
+            kf_boost -= 4 * (320 * 240) / (lst_yv12->y_width * lst_yv12->y_height);
 
         kf_boost = (int)((double)kf_boost * 100.0) >> 4;                          // Scale 16 to 100
 
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 581cb68c7..29d9f7e99 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1123,11 +1123,11 @@ void vp8_set_speed_features(VP8_COMP *cpi)
 
     if (cpi->sf.search_method == NSTEP)
     {
-        vp8_init3smotion_compensation(&cpi->mb, cm->last_frame.y_stride);
+        vp8_init3smotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride);
     }
     else if (cpi->sf.search_method == DIAMOND)
     {
-        vp8_init_dsmotion_compensation(&cpi->mb, cm->last_frame.y_stride);
+        vp8_init_dsmotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride);
     }
 
     if (cpi->sf.improved_dct)
@@ -1564,9 +1564,9 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
         cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
     }
 
-    if (((cm->Width + 15) & 0xfffffff0) != cm->last_frame.y_width ||
-        ((cm->Height + 15) & 0xfffffff0) != cm->last_frame.y_height ||
-        cm->last_frame.y_width == 0)
+    if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+        ((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
+        cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
     {
         alloc_raw_frame_buffers(cpi);
         vp8_alloc_compressor_data(cpi);
@@ -1843,9 +1843,9 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
         cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs;
     }
 
-    if (((cm->Width + 15) & 0xfffffff0) != cm->last_frame.y_width ||
-        ((cm->Height + 15) & 0xfffffff0) != cm->last_frame.y_height ||
-        cm->last_frame.y_width == 0)
+    if (((cm->Width + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+        ((cm->Height + 15) & 0xfffffff0) != cm->yv12_fb[cm->lst_fb_idx].y_height ||
+        cm->yv12_fb[cm->lst_fb_idx].y_width == 0)
     {
         alloc_raw_frame_buffers(cpi);
         vp8_alloc_compressor_data(cpi);
@@ -2241,7 +2241,8 @@ void vp8_remove_compressor(VP8_PTR *ptr)
 
             if (cpi->b_calculate_psnr)
             {
-                double samples = 3.0 / 2 * cpi->count * cpi->common.last_frame.y_width * cpi->common.last_frame.y_height;
+                YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+                double samples = 3.0 / 2 * cpi->count * lst_yv12->y_width * lst_yv12->y_height;
                 double total_psnr = vp8_mse2psnr(samples, 255.0, cpi->total_sq_error);
                 double total_psnr2 = vp8_mse2psnr(samples, 255.0, cpi->total_sq_error2);
                 double total_ssim = 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0);
@@ -2580,19 +2581,19 @@ int vp8_get_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONF
 {
     VP8_COMP *cpi = (VP8_COMP *)(ptr);
     VP8_COMMON *cm = &cpi->common;
+    int ref_fb_idx;
 
     if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->last_frame, sd);
-
+        ref_fb_idx = cm->lst_fb_idx;
     else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->golden_frame, sd);
-
+        ref_fb_idx = cm->gld_fb_idx;
     else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, sd);
-
+        ref_fb_idx = cm->alt_fb_idx;
     else
         return -1;
 
+    vp8_yv12_copy_frame_ptr(&cm->yv12_fb[ref_fb_idx], sd);
+
     return 0;
 }
 int vp8_set_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd)
@@ -2600,18 +2601,19 @@ int vp8_set_reference(VP8_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONF
     VP8_COMP *cpi = (VP8_COMP *)(ptr);
     VP8_COMMON *cm = &cpi->common;
 
+    int ref_fb_idx;
+
     if (ref_frame_flag == VP8_LAST_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->last_frame);
-
+        ref_fb_idx = cm->lst_fb_idx;
     else if (ref_frame_flag == VP8_GOLD_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->golden_frame);
-
+        ref_fb_idx = cm->gld_fb_idx;
     else if (ref_frame_flag == VP8_ALT_FLAG)
-        vp8_yv12_copy_frame_ptr(sd, &cm->alt_ref_frame);
-
+        ref_fb_idx = cm->alt_fb_idx;
     else
         return -1;
 
+    vp8_yv12_copy_frame_ptr(sd, &cm->yv12_fb[ref_fb_idx]);
+
     return 0;
 }
 int vp8_update_entropy(VP8_PTR comp, int update)
@@ -2686,8 +2688,8 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 #endif
     }
     // we may need to copy to a buffer so we can extend the image...
-    else if (cm->Width != cm->last_frame.y_width ||
-             cm->Height != cm->last_frame.y_height)
+    else if (cm->Width != cm->yv12_fb[cm->lst_fb_idx].y_width ||
+             cm->Height != cm->yv12_fb[cm->lst_fb_idx].y_height)
     {
         //vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
 #if HAVE_ARMV7
@@ -2840,7 +2842,7 @@ static void update_alt_ref_frame_and_stats(VP8_COMP *cpi)
     VP8_COMMON *cm = &cpi->common;
 
     // Update the golden frame buffer
-    vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
+    vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
 
     // Select an interval before next GF or altref
     if (!cpi->auto_gold)
@@ -2882,7 +2884,7 @@ static void update_golden_frame_and_stats(VP8_COMP *cpi)
     if (cm->refresh_golden_frame)
     {
         // Update the golden frame buffer
-        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
+        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
 
         // Select an interval before next GF
         if (!cpi->auto_gold)
@@ -4317,11 +4319,11 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
 
     if (cm->refresh_last_frame)
     {
-        vp8_swap_yv12_buffer(&cm->last_frame, &cm->new_frame);
-        cm->frame_to_show = &cm->last_frame;
+        vp8_swap_yv12_buffer(&cm->yv12_fb[cm->lst_fb_idx], &cm->yv12_fb[cm->new_fb_idx]);
+        cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx];
     }
     else
-        cm->frame_to_show = &cm->new_frame;
+        cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx];
 
 
 
@@ -4371,43 +4373,48 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
         }
     }
 
-
-    // At this point the new frame has been encoded coded.
-    // If any buffer copy / swaping is signalled it should be done here.
-    if (cm->frame_type == KEY_FRAME)
     {
-        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
-        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
-    }
-    else    // For non key frames
-    {
-        // Code to copy between reference buffers
-        if (cm->copy_buffer_to_arf)
+        YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx];
+        YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx];
+        YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx];
+        YV12_BUFFER_CONFIG *alt_yv12 = &cm->yv12_fb[cm->alt_fb_idx];
+        // At this point the new frame has been encoded coded.
+        // If any buffer copy / swaping is signalled it should be done here.
+        if (cm->frame_type == KEY_FRAME)
         {
-            if (cm->copy_buffer_to_arf == 1)
-            {
-                if (cm->refresh_last_frame)
-                    // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
-                    vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->alt_ref_frame);
-                else
-                    vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->alt_ref_frame);
-            }
-            else if (cm->copy_buffer_to_arf == 2)
-                vp8_yv12_copy_frame_ptr(&cm->golden_frame, &cm->alt_ref_frame);
+            vp8_yv12_copy_frame_ptr(cm->frame_to_show, gld_yv12);
+            vp8_yv12_copy_frame_ptr(cm->frame_to_show, alt_yv12);
         }
-
-        if (cm->copy_buffer_to_gf)
+        else    // For non key frames
         {
-            if (cm->copy_buffer_to_gf == 1)
+            // Code to copy between reference buffers
+            if (cm->copy_buffer_to_arf)
             {
-                if (cm->refresh_last_frame)
-                    // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
-                    vp8_yv12_copy_frame_ptr(&cm->new_frame, &cm->golden_frame);
-                else
-                    vp8_yv12_copy_frame_ptr(&cm->last_frame, &cm->golden_frame);
+                if (cm->copy_buffer_to_arf == 1)
+                {
+                    if (cm->refresh_last_frame)
+                        // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
+                        vp8_yv12_copy_frame_ptr(new_yv12, alt_yv12);
+                    else
+                        vp8_yv12_copy_frame_ptr(lst_yv12, alt_yv12);
+                }
+                else if (cm->copy_buffer_to_arf == 2)
+                    vp8_yv12_copy_frame_ptr(gld_yv12, alt_yv12);
+            }
+
+            if (cm->copy_buffer_to_gf)
+            {
+                if (cm->copy_buffer_to_gf == 1)
+                {
+                    if (cm->refresh_last_frame)
+                        // We copy new_frame here because last and new buffers will already have been swapped if cm->refresh_last_frame is set.
+                        vp8_yv12_copy_frame_ptr(new_yv12, gld_yv12);
+                    else
+                        vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
+                }
+                else if (cm->copy_buffer_to_gf == 2)
+                    vp8_yv12_copy_frame_ptr(alt_yv12, gld_yv12);
             }
-            else if (cm->copy_buffer_to_gf == 2)
-                vp8_yv12_copy_frame_ptr(&cm->alt_ref_frame, &cm->golden_frame);
         }
     }
 
@@ -4623,10 +4630,10 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
     {
         // Is this an alternate reference update
         if (cpi->common.refresh_alt_ref_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->alt_ref_frame);
+            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->alt_fb_idx]);
 
         if (cpi->common.refresh_golden_frame)
-            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->golden_frame);
+            vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cm->yv12_fb[cm->gld_fb_idx]);
     }
     else
     {
@@ -4678,7 +4685,8 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
         FILE *recon_file;
         sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame);
         recon_file = fopen(filename, "wb");
-        fwrite(cm->last_frame.buffer_alloc, cm->last_frame.frame_size, 1, recon_file);
+        fwrite(cm->yv12_fb[cm->lst_fb_idx].buffer_alloc,
+               cm->yv12_fb[cm->lst_fb_idx].frame_size, 1, recon_file);
         fclose(recon_file);
     }
 
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index bb6348d5f..1947e8167 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -460,36 +460,42 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
     // set up all the refframe dependent pointers.
     if (cpi->ref_frame_flags & VP8_LAST_FLAG)
     {
+        YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+
         vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[LAST_FRAME], &near_mv[LAST_FRAME],
                           &best_ref_mv[LAST_FRAME], MDCounts[LAST_FRAME], LAST_FRAME, cpi->common.ref_frame_sign_bias);
 
-        y_buffer[LAST_FRAME] = cpi->common.last_frame.y_buffer + recon_yoffset;
-        u_buffer[LAST_FRAME] = cpi->common.last_frame.u_buffer + recon_uvoffset;
-        v_buffer[LAST_FRAME] = cpi->common.last_frame.v_buffer + recon_uvoffset;
+        y_buffer[LAST_FRAME] = lst_yv12->y_buffer + recon_yoffset;
+        u_buffer[LAST_FRAME] = lst_yv12->u_buffer + recon_uvoffset;
+        v_buffer[LAST_FRAME] = lst_yv12->v_buffer + recon_uvoffset;
     }
     else
         skip_mode[LAST_FRAME] = 1;
 
     if (cpi->ref_frame_flags & VP8_GOLD_FLAG)
     {
+        YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
+
         vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[GOLDEN_FRAME], &near_mv[GOLDEN_FRAME],
                           &best_ref_mv[GOLDEN_FRAME], MDCounts[GOLDEN_FRAME], GOLDEN_FRAME, cpi->common.ref_frame_sign_bias);
 
-        y_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.y_buffer + recon_yoffset;
-        u_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.u_buffer + recon_uvoffset;
-        v_buffer[GOLDEN_FRAME] = cpi->common.golden_frame.v_buffer + recon_uvoffset;
+        y_buffer[GOLDEN_FRAME] = gld_yv12->y_buffer + recon_yoffset;
+        u_buffer[GOLDEN_FRAME] = gld_yv12->u_buffer + recon_uvoffset;
+        v_buffer[GOLDEN_FRAME] = gld_yv12->v_buffer + recon_uvoffset;
     }
     else
         skip_mode[GOLDEN_FRAME] = 1;
 
     if (cpi->ref_frame_flags & VP8_ALT_FLAG && cpi->source_alt_ref_active)
     {
+        YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
+
         vp8_find_near_mvs(&x->e_mbd, x->e_mbd.mode_info_context, &nearest_mv[ALTREF_FRAME], &near_mv[ALTREF_FRAME],
                           &best_ref_mv[ALTREF_FRAME], MDCounts[ALTREF_FRAME], ALTREF_FRAME, cpi->common.ref_frame_sign_bias);
 
-        y_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.y_buffer + recon_yoffset;
-        u_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset;
-        v_buffer[ALTREF_FRAME] = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset;
+        y_buffer[ALTREF_FRAME] = alt_yv12->y_buffer + recon_yoffset;
+        u_buffer[ALTREF_FRAME] = alt_yv12->u_buffer + recon_uvoffset;
+        v_buffer[ALTREF_FRAME] = alt_yv12->v_buffer + recon_uvoffset;
     }
     else
         skip_mode[ALTREF_FRAME] = 1;
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 65dbd8d8e..06a20c0b7 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1578,18 +1578,21 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
         if (x->e_mbd.mbmi.ref_frame == LAST_FRAME)
         {
+            YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
+
             if (!(cpi->ref_frame_flags & VP8_LAST_FLAG))
                 continue;
 
             lf_or_gf = 0;  // Local last frame vs Golden frame flag
 
             // Set up pointers for this macro block into the previous frame recon buffer
-            x->e_mbd.pre.y_buffer = cpi->common.last_frame.y_buffer + recon_yoffset;
-            x->e_mbd.pre.u_buffer = cpi->common.last_frame.u_buffer + recon_uvoffset;
-            x->e_mbd.pre.v_buffer = cpi->common.last_frame.v_buffer + recon_uvoffset;
+            x->e_mbd.pre.y_buffer = lst_yv12->y_buffer + recon_yoffset;
+            x->e_mbd.pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
+            x->e_mbd.pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
         }
         else if (x->e_mbd.mbmi.ref_frame == GOLDEN_FRAME)
         {
+            YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
 
             // not supposed to reference gold frame
             if (!(cpi->ref_frame_flags & VP8_GOLD_FLAG))
@@ -1598,12 +1601,14 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             lf_or_gf = 1;  // Local last frame vs Golden frame flag
 
             // Set up pointers for this macro block into the previous frame recon buffer
-            x->e_mbd.pre.y_buffer = cpi->common.golden_frame.y_buffer + recon_yoffset;
-            x->e_mbd.pre.u_buffer = cpi->common.golden_frame.u_buffer + recon_uvoffset;
-            x->e_mbd.pre.v_buffer = cpi->common.golden_frame.v_buffer + recon_uvoffset;
+            x->e_mbd.pre.y_buffer = gld_yv12->y_buffer + recon_yoffset;
+            x->e_mbd.pre.u_buffer = gld_yv12->u_buffer + recon_uvoffset;
+            x->e_mbd.pre.v_buffer = gld_yv12->v_buffer + recon_uvoffset;
         }
         else if (x->e_mbd.mbmi.ref_frame == ALTREF_FRAME)
         {
+            YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
+
             // not supposed to reference alt ref frame
             if (!(cpi->ref_frame_flags & VP8_ALT_FLAG))
                 continue;
@@ -1614,9 +1619,9 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             lf_or_gf = 1;  // Local last frame vs Golden frame flag
 
             // Set up pointers for this macro block into the previous frame recon buffer
-            x->e_mbd.pre.y_buffer = cpi->common.alt_ref_frame.y_buffer + recon_yoffset;
-            x->e_mbd.pre.u_buffer = cpi->common.alt_ref_frame.u_buffer + recon_uvoffset;
-            x->e_mbd.pre.v_buffer = cpi->common.alt_ref_frame.v_buffer + recon_uvoffset;
+            x->e_mbd.pre.y_buffer = alt_yv12->y_buffer + recon_yoffset;
+            x->e_mbd.pre.u_buffer = alt_yv12->u_buffer + recon_uvoffset;
+            x->e_mbd.pre.v_buffer = alt_yv12->v_buffer + recon_uvoffset;
         }
 
         vp8_find_near_mvs(&x->e_mbd,

From b2fa74ac18c4a333f7913a346588b87087989202 Mon Sep 17 00:00:00 2001
From: Jeff Muizelaar <jmuizelaar@mozilla.com>
Date: Fri, 28 May 2010 14:28:12 -0400
Subject: [PATCH 088/307] Combine idct and reconstruction steps

This moves the prediction step before the idct and combines the idct and
reconstruction steps into a single step. Combining them seems to give an
overall decoder performance improvement of about 1%.

Change-Id: I90d8b167ec70d79c7ba2ee484106a78b3d16e318
---
 vp8/common/arm/idct_arm.h              |   8 -
 vp8/common/arm/systemdependent.c       |   2 -
 vp8/common/generic/systemdependent.c   |   2 +-
 vp8/common/idct.h                      |  20 ++-
 vp8/common/idctllm.c                   |  33 ++--
 vp8/common/x86/idct_x86.h              |   4 -
 vp8/common/x86/x86_systemdependent.c   |   1 -
 vp8/decoder/arm/dequantize_arm.h       |  18 --
 vp8/decoder/arm/dsystemdependent.c     |   4 -
 vp8/decoder/decodframe.c               | 226 ++++++++++++-------------
 vp8/decoder/dequantize.c               |  58 ++++++-
 vp8/decoder/dequantize.h               |  38 +++--
 vp8/decoder/generic/dsystemdependent.c |   4 +-
 vp8/decoder/x86/dequantize_x86.h       |   8 -
 vp8/decoder/x86/x86_dsystemdependent.c |   2 -
 15 files changed, 218 insertions(+), 210 deletions(-)

diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h
index 87d888de2..97af32e69 100644
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -15,7 +15,6 @@
 #if HAVE_ARMV6
 extern prototype_idct(vp8_short_idct4x4llm_1_v6);
 extern prototype_idct(vp8_short_idct4x4llm_v6_dual);
-extern prototype_idct_scalar(vp8_dc_only_idct_armv6);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_armv6);
 extern prototype_second_order(vp8_short_inv_walsh4x4_armv6);
 
@@ -25,9 +24,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_armv6);
 #undef  vp8_idct_idct16
 #define vp8_idct_idct16 vp8_short_idct4x4llm_v6_dual
 
-#undef  vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_armv6
-
 #undef  vp8_idct_iwalsh1
 #define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_armv6
 
@@ -38,7 +34,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_armv6);
 #if HAVE_ARMV7
 extern prototype_idct(vp8_short_idct4x4llm_1_neon);
 extern prototype_idct(vp8_short_idct4x4llm_neon);
-extern prototype_idct_scalar(vp8_dc_only_idct_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 
@@ -48,9 +43,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 #undef  vp8_idct_idct16
 #define vp8_idct_idct16 vp8_short_idct4x4llm_neon
 
-#undef  vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_neon
-
 #undef  vp8_idct_iwalsh1
 #define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_neon
 
diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c
index bd6d146c0..b58cd789f 100644
--- a/vp8/common/arm/systemdependent.c
+++ b/vp8/common/arm/systemdependent.c
@@ -43,7 +43,6 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
 
     rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
     rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
-    rtcd->idct.idct1_scalar = vp8_dc_only_idct_neon;
     rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
     rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
 
@@ -75,7 +74,6 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
 
     rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
     rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
-    rtcd->idct.idct1_scalar = vp8_dc_only_idct_armv6;
     rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_armv6;
     rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_armv6;
 
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 5f7f943f5..2fbeaee4c 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -32,7 +32,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
 
     rtcd->idct.idct1        = vp8_short_idct4x4llm_1_c;
     rtcd->idct.idct16       = vp8_short_idct4x4llm_c;
-    rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
+    rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_c;
     rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;
     rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_c;
 
diff --git a/vp8/common/idct.h b/vp8/common/idct.h
index adf7771ed..7b04799c8 100644
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -18,8 +18,10 @@
 #define prototype_idct(sym) \
     void sym(short *input, short *output, int pitch)
 
-#define prototype_idct_scalar(sym) \
-    void sym(short input, short *output, int pitch)
+#define prototype_idct_scalar_add(sym) \
+    void sym(short input, \
+             unsigned char *pred, unsigned char *output, \
+             int pitch, int stride)
 
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/idct_x86.h"
@@ -39,10 +41,10 @@ extern prototype_idct(vp8_idct_idct1);
 #endif
 extern prototype_idct(vp8_idct_idct16);
 
-#ifndef vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_c
+#ifndef vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_c
 #endif
-extern prototype_idct_scalar(vp8_idct_idct1_scalar);
+extern prototype_idct_scalar_add(vp8_idct_idct1_scalar_add);
 
 
 #ifndef vp8_idct_iwalsh1
@@ -56,14 +58,14 @@ extern prototype_second_order(vp8_idct_iwalsh1);
 extern prototype_second_order(vp8_idct_iwalsh16);
 
 typedef prototype_idct((*vp8_idct_fn_t));
-typedef prototype_idct_scalar((*vp8_idct_scalar_fn_t));
+typedef prototype_idct_scalar_add((*vp8_idct_scalar_add_fn_t));
 typedef prototype_second_order((*vp8_second_order_fn_t));
 
 typedef struct
 {
-    vp8_idct_fn_t         idct1;
-    vp8_idct_fn_t         idct16;
-    vp8_idct_scalar_fn_t  idct1_scalar;
+    vp8_idct_fn_t            idct1;
+    vp8_idct_fn_t            idct16;
+    vp8_idct_scalar_add_fn_t idct1_scalar_add;
 
     vp8_second_order_fn_t iwalsh1;
     vp8_second_order_fn_t iwalsh16;
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index 2866fa4bb..c9686626c 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -104,23 +104,30 @@ void vp8_short_idct4x4llm_1_c(short *input, short *output, int pitch)
     }
 }
 
-
-void vp8_dc_only_idct_c(short input_dc, short *output, int pitch)
+void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
 {
-    int i;
-    int a1;
-    short *op = output;
-    int shortpitch = pitch >> 1;
-    a1 = ((input_dc + 4) >> 3);
+    int a1 = ((input_dc + 4) >> 3);
+    int r, c;
 
-    for (i = 0; i < 4; i++)
+    for (r = 0; r < 4; r++)
     {
-        op[0] = a1;
-        op[1] = a1;
-        op[2] = a1;
-        op[3] = a1;
-        op += shortpitch;
+        for (c = 0; c < 4; c++)
+        {
+            int a = a1 + pred_ptr[c] ;
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dst_ptr[c] = (unsigned char) a ;
+        }
+
+        dst_ptr += stride;
+        pred_ptr += pitch;
     }
+
 }
 
 void vp8_short_inv_walsh4x4_c(short *input, short *output)
diff --git a/vp8/common/x86/idct_x86.h b/vp8/common/x86/idct_x86.h
index 0ad8f55cf..d7b4fc95c 100644
--- a/vp8/common/x86/idct_x86.h
+++ b/vp8/common/x86/idct_x86.h
@@ -22,7 +22,6 @@
 #if HAVE_MMX
 extern prototype_idct(vp8_short_idct4x4llm_1_mmx);
 extern prototype_idct(vp8_short_idct4x4llm_mmx);
-extern prototype_idct_scalar(vp8_dc_only_idct_mmx);
 
 extern prototype_second_order(vp8_short_inv_walsh4x4_mmx);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
@@ -34,9 +33,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
 #undef  vp8_idct_idct16
 #define vp8_idct_idct16 vp8_short_idct4x4llm_mmx
 
-#undef  vp8_idct_idct1_scalar
-#define vp8_idct_idct1_scalar vp8_dc_only_idct_mmx
-
 #undef vp8_idct_iwalsh16
 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx
 
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 677eafa84..d7070f8e3 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -42,7 +42,6 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
     {
         rtcd->idct.idct1        = vp8_short_idct4x4llm_1_mmx;
         rtcd->idct.idct16       = vp8_short_idct4x4llm_mmx;
-        rtcd->idct.idct1_scalar = vp8_dc_only_idct_mmx;
         rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_mmx;
         rtcd->idct.iwalsh1     = vp8_short_inv_walsh4x4_1_mmx;
 
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index 31fc5d05c..78af0195d 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -14,32 +14,14 @@
 
 #if HAVE_ARMV6
 extern prototype_dequant_block(vp8_dequantize_b_v6);
-extern prototype_dequant_idct(vp8_dequant_idct_v6);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_v6);
 
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_v6
 
-#undef  vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_v6
-
-#undef  vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_v6
-#endif
-
 #if HAVE_ARMV7
 extern prototype_dequant_block(vp8_dequantize_b_neon);
-extern prototype_dequant_idct(vp8_dequant_idct_neon);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_neon);
 
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_neon
 
-#undef  vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_neon
-
-#undef  vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_neon
-#endif
-
 #endif
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
index 1504216fd..2ea03c415 100644
--- a/vp8/decoder/arm/dsystemdependent.c
+++ b/vp8/decoder/arm/dsystemdependent.c
@@ -23,8 +23,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
     pbi->mb.rtcd         = &pbi->common.rtcd;
 #if HAVE_ARMV7
     pbi->dequant.block   = vp8_dequantize_b_neon;
-    pbi->dequant.idct    = vp8_dequant_idct_neon;
-    pbi->dequant.idct_dc = vp8_dequant_dc_idct_neon;
     pbi->dboolhuff.start = vp8dx_start_decode_c;
     pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
     pbi->dboolhuff.debool = vp8dx_decode_bool_c;
@@ -32,8 +30,6 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
 
 #elif HAVE_ARMV6
     pbi->dequant.block   = vp8_dequantize_b_v6;
-    pbi->dequant.idct    = vp8_dequant_idct_v6;
-    pbi->dequant.idct_dc = vp8_dequant_dc_idct_v6;
     pbi->dboolhuff.start = vp8dx_start_decode_c;
     pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
     pbi->dboolhuff.debool = vp8dx_decode_bool_c;
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index a5850db6d..5668cef4d 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -126,7 +126,6 @@ static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
     }
 }
 
-
 static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
 {
     /* If the MV points so far into the UMV border that no visible pixels
@@ -182,110 +181,6 @@ static void clamp_mvs(MACROBLOCKD *xd)
 
 }
 
-static void reconstruct_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
-{
-    if (xd->frame_type == KEY_FRAME  ||  xd->mbmi.ref_frame == INTRA_FRAME)
-    {
-        vp8_build_intra_predictors_mbuv(xd);
-
-        if (xd->mbmi.mode != B_PRED)
-        {
-            vp8_build_intra_predictors_mby_ptr(xd);
-            vp8_recon16x16mb(RTCD_VTABLE(recon), xd);
-        }
-        else
-        {
-            vp8_recon_intra4x4mb(RTCD_VTABLE(recon), xd);
-        }
-    }
-    else
-    {
-        vp8_build_inter_predictors_mb(xd);
-        vp8_recon16x16mb(RTCD_VTABLE(recon), xd);
-    }
-}
-
-
-static void de_quantand_idct(VP8D_COMP *pbi, MACROBLOCKD *xd)
-{
-    int i;
-    BLOCKD *b = &xd->block[24];
-
-
-    if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV)
-    {
-        DEQUANT_INVOKE(&pbi->dequant, block)(b);
-
-        // do 2nd order transform on the dc block
-        if (b->eob > 1)
-        {
-            IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
-            ((int *)b->qcoeff)[0] = 0;
-            ((int *)b->qcoeff)[1] = 0;
-            ((int *)b->qcoeff)[2] = 0;
-            ((int *)b->qcoeff)[3] = 0;
-            ((int *)b->qcoeff)[4] = 0;
-            ((int *)b->qcoeff)[5] = 0;
-            ((int *)b->qcoeff)[6] = 0;
-            ((int *)b->qcoeff)[7] = 0;
-        }
-        else
-        {
-            IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
-            ((int *)b->qcoeff)[0] = 0;
-        }
-
-
-        for (i = 0; i < 16; i++)
-        {
-
-            b = &xd->block[i];
-
-            if (b->eob > 1)
-            {
-                DEQUANT_INVOKE(&pbi->dequant, idct_dc)(b->qcoeff, &b->dequant[0][0], b->diff, 32, xd->block[24].diff[i]);
-            }
-            else
-            {
-                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(xd->block[24].diff[i], b->diff, 32);
-            }
-        }
-
-        for (i = 16; i < 24; i++)
-        {
-            b = &xd->block[i];
-
-            if (b->eob > 1)
-            {
-                DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, 16);
-            }
-            else
-            {
-                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, 16);
-                ((int *)b->qcoeff)[0] = 0;
-            }
-        }
-    }
-    else
-    {
-        for (i = 0; i < 24; i++)
-        {
-
-            b = &xd->block[i];
-
-            if (b->eob > 1)
-            {
-                DEQUANT_INVOKE(&pbi->dequant, idct)(b->qcoeff, &b->dequant[0][0], b->diff, (32 - (i & 16)));
-            }
-            else
-            {
-                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar)(b->qcoeff[0] * b->dequant[0][0], b->diff, (32 - (i & 16)));
-                ((int *)b->qcoeff)[0] = 0;
-            }
-        }
-    }
-}
-
 void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
 {
     int eobtotal = 0;
@@ -321,27 +216,122 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
     {
         xd->mode_info_context->mbmi.dc_diff = 0;
         skip_recon_mb(pbi, xd);
+        return;
+    }
+
+    if (xd->segmentation_enabled)
+        mb_init_dequantizer(pbi, xd);
+
+    // do prediction
+    if (xd->frame_type == KEY_FRAME  ||  xd->mbmi.ref_frame == INTRA_FRAME)
+    {
+        vp8_build_intra_predictors_mbuv(xd);
+
+        if (xd->mbmi.mode != B_PRED)
+        {
+            vp8_build_intra_predictors_mby_ptr(xd);
+        } else {
+            vp8_intra_prediction_down_copy(xd);
+        }
     }
     else
     {
-        if (xd->segmentation_enabled)
-            mb_init_dequantizer(pbi, xd);
-
-        de_quantand_idct(pbi, xd);
-        reconstruct_mb(pbi, xd);
+        vp8_build_inter_predictors_mb(xd);
     }
 
-
-    /* Restore the original MV so as not to affect the entropy context. */
-    if (do_clamp)
+    // dequantization and idct
+    if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV)
     {
-        if (xd->mbmi.mode == SPLITMV)
-            for (i=0; i<24; i++)
-                xd->block[i].bmi.mv.as_mv = orig_mvs[i];
+        BLOCKD *b = &xd->block[24];
+        DEQUANT_INVOKE(&pbi->dequant, block)(b);
+
+        // do 2nd order transform on the dc block
+        if (b->eob > 1)
+        {
+            IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
+            ((int *)b->qcoeff)[0] = 0;
+            ((int *)b->qcoeff)[1] = 0;
+            ((int *)b->qcoeff)[2] = 0;
+            ((int *)b->qcoeff)[3] = 0;
+            ((int *)b->qcoeff)[4] = 0;
+            ((int *)b->qcoeff)[5] = 0;
+            ((int *)b->qcoeff)[6] = 0;
+            ((int *)b->qcoeff)[7] = 0;
+        }
         else
         {
-            xd->mbmi.mv.as_mv = orig_mvs[0];
-            xd->block[16].bmi.mv.as_mv = orig_mvs[1];
+            IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
+            ((int *)b->qcoeff)[0] = 0;
+        }
+
+
+        for (i = 0; i < 16; i++)
+        {
+
+            b = &xd->block[i];
+
+            if (b->eob > 1)
+            {
+                DEQUANT_INVOKE(&pbi->dequant, idct_dc_add)(b->qcoeff, &b->dequant[0][0],  b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride,
+                        xd->block[24].diff[i]);
+            }
+            else
+            {
+                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)(xd->block[24].diff[i], b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride);
+            }
+        }
+    }
+    else if ((xd->frame_type == KEY_FRAME  ||  xd->mbmi.ref_frame == INTRA_FRAME) && xd->mbmi.mode == B_PRED)
+    {
+        for (i = 0; i < 16; i++)
+        {
+
+            BLOCKD *b = &xd->block[i];
+            vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+
+            if (b->eob > 1)
+            {
+                DEQUANT_INVOKE(&pbi->dequant, idct_add)(b->qcoeff, &b->dequant[0][0],  b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride);
+            }
+            else
+            {
+                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)(b->qcoeff[0] * b->dequant[0][0], b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride);
+                ((int *)b->qcoeff)[0] = 0;
+            }
+        }
+
+    }
+    else
+    {
+        for (i = 0; i < 16; i++)
+        {
+            BLOCKD *b = &xd->block[i];
+
+            if (b->eob > 1)
+            {
+                DEQUANT_INVOKE(&pbi->dequant, idct_add)(b->qcoeff, &b->dequant[0][0],  b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride);
+            }
+            else
+            {
+                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)(b->qcoeff[0] * b->dequant[0][0], b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride);
+                ((int *)b->qcoeff)[0] = 0;
+            }
+        }
+    }
+
+    for (i = 16; i < 24; i++)
+    {
+
+        BLOCKD *b = &xd->block[i];
+
+        if (b->eob > 1)
+        {
+            DEQUANT_INVOKE(&pbi->dequant, idct_add)(b->qcoeff, &b->dequant[0][0],  b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride);
+        }
+        else
+        {
+            IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)(b->qcoeff[0] * b->dequant[0][0], b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride);
+            ((int *)b->qcoeff)[0] = 0;
         }
     }
 }
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index b023d28eb..4c924ffb9 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -32,8 +32,12 @@ void vp8_dequantize_b_c(BLOCKD *d)
     }
 }
 
-void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch)
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride)
 {
+    // output needs to be at least pitch * 4 for vp8_short_idct4x4llm_c to work properly
+    short output[16*4];
+    short *diff_ptr = output;
+    int r, c;
     int i;
 
     for (i = 0; i < 16; i++)
@@ -41,13 +45,38 @@ void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch)
         input[i] = dq[i] * input[i];
     }
 
-    vp8_short_idct4x4llm_c(input, output, pitch);
+    vp8_short_idct4x4llm_c(input, output, pitch*2);
+
     vpx_memset(input, 0, 32);
+
+    for (r = 0; r < 4; r++)
+    {
+        for (c = 0; c < 4; c++)
+        {
+            int a = diff_ptr[c] + pred[c];
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dest[c] = (unsigned char) a;
+        }
+
+        dest += stride;
+        diff_ptr += pitch;
+        pred += pitch;
+    }
 }
 
-void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc)
+void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc)
 {
     int i;
+    // output needs to be at least pitch * 4 for vp8_short_idct4x4llm_c to work properly
+    short output[16*4];
+    short *diff_ptr = output;
+    int r, c;
 
     input[0] = (short)Dc;
 
@@ -56,6 +85,27 @@ void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, in
         input[i] = dq[i] * input[i];
     }
 
-    vp8_short_idct4x4llm_c(input, output, pitch);
+    vp8_short_idct4x4llm_c(input, output, pitch*2);
+
     vpx_memset(input, 0, 32);
+
+    for (r = 0; r < 4; r++)
+    {
+        for (c = 0; c < 4; c++)
+        {
+            int a = diff_ptr[c] + pred[c];
+
+            if (a < 0)
+                a = 0;
+
+            if (a > 255)
+                a = 255;
+
+            dest[c] = (unsigned char) a;
+        }
+
+        dest += stride;
+        diff_ptr += pitch;
+        pred += pitch;
+    }
 }
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index a02ea7e81..50293c2f7 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -16,11 +16,16 @@
 #define prototype_dequant_block(sym) \
     void sym(BLOCKD *x)
 
-#define prototype_dequant_idct(sym) \
-    void sym(short *input, short *dq, short *output, int pitch)
+#define prototype_dequant_idct_add(sym) \
+    void sym(short *input, short *dq, \
+             unsigned char *pred, unsigned char *output, \
+             int pitch, int stride)
 
-#define prototype_dequant_idct_dc(sym) \
-    void sym(short *input, short *dq, short *output, int pitch, int dc)
+#define prototype_dequant_idct_dc_add(sym) \
+    void sym(short *input, short *dq, \
+             unsigned char *pred, unsigned char *output, \
+             int pitch, int stride, \
+             int dc)
 
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/dequantize_x86.h"
@@ -35,25 +40,26 @@
 #endif
 extern prototype_dequant_block(vp8_dequant_block);
 
-#ifndef vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_c
+#ifndef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_c
 #endif
-extern prototype_dequant_idct(vp8_dequant_idct);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add);
 
-#ifndef vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_c
+#ifndef vp8_dequant_idct_dc_add
+#define vp8_dequant_idct_dc_add vp8_dequant_dc_idct_add_c
 #endif
-extern prototype_dequant_idct_dc(vp8_dequant_idct_dc);
-
+extern prototype_dequant_idct_dc_add(vp8_dequant_idct_dc_add);
 
 typedef prototype_dequant_block((*vp8_dequant_block_fn_t));
-typedef prototype_dequant_idct((*vp8_dequant_idct_fn_t));
-typedef prototype_dequant_idct_dc((*vp8_dequant_idct_dc_fn_t));
+
+typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t));
+typedef prototype_dequant_idct_dc_add((*vp8_dequant_idct_dc_add_fn_t));
+
 typedef struct
 {
-    vp8_dequant_block_fn_t    block;
-    vp8_dequant_idct_fn_t     idct;
-    vp8_dequant_idct_dc_fn_t  idct_dc;
+    vp8_dequant_block_fn_t        block;
+    vp8_dequant_idct_add_fn_t     idct_add;
+    vp8_dequant_idct_dc_add_fn_t  idct_dc_add;
 } vp8_dequant_rtcd_vtable_t;
 
 #if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 272d422ad..c72597f4a 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -21,8 +21,8 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
 #if CONFIG_RUNTIME_CPU_DETECT
     pbi->mb.rtcd         = &pbi->common.rtcd;
     pbi->dequant.block   = vp8_dequantize_b_c;
-    pbi->dequant.idct    = vp8_dequant_idct_c;
-    pbi->dequant.idct_dc = vp8_dequant_dc_idct_c;
+    pbi->dequant.idct_add    = vp8_dequant_idct_add_c;
+    pbi->dequant.idct_dc_add    = vp8_dequant_dc_idct_add_c;
     pbi->dboolhuff.start = vp8dx_start_decode_c;
     pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
 #if 0 //For use with RTCD, when implemented
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
index 743a8f5dd..563d58460 100644
--- a/vp8/decoder/x86/dequantize_x86.h
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -21,20 +21,12 @@
  */
 #if HAVE_MMX
 extern prototype_dequant_block(vp8_dequantize_b_mmx);
-extern prototype_dequant_idct(vp8_dequant_idct_mmx);
-extern prototype_dequant_idct_dc(vp8_dequant_dc_idct_mmx);
 
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_mmx
 
-#undef  vp8_dequant_idct
-#define vp8_dequant_idct vp8_dequant_idct_mmx
-
-#undef  vp8_dequant_idct_dc
-#define vp8_dequant_idct_dc vp8_dequant_dc_idct_mmx
-
 #endif
 #endif
 
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index 957fb1d67..bcd2acc75 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -43,8 +43,6 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
     if (flags & HAS_MMX)
     {
         pbi->dequant.block   = vp8_dequantize_b_mmx;
-        pbi->dequant.idct    = vp8_dequant_idct_mmx;
-        pbi->dequant.idct_dc = vp8_dequant_dc_idct_mmx;
     }
 
 #endif

From 98fcccfe9751894ace9693a39ba0609fe5ea904d Mon Sep 17 00:00:00 2001
From: Jeff Muizelaar <jmuizelaar@mozilla.com>
Date: Thu, 3 Jun 2010 10:16:07 -0400
Subject: [PATCH 089/307] Change the x86 idct functions to do reconstruction at
 the same time

Change-Id: I896fe6f9664e6849c7cee2cc6bb4e045eb42540f
---
 vp8/common/x86/idct_x86.h              |  4 ++
 vp8/common/x86/idctllm_mmx.asm         | 58 +++++++++++-----
 vp8/common/x86/x86_systemdependent.c   |  1 +
 vp8/decoder/x86/dequantize_mmx.asm     | 93 +++++++++++++++++++-------
 vp8/decoder/x86/dequantize_x86.h       |  8 +++
 vp8/decoder/x86/x86_dsystemdependent.c |  2 +
 6 files changed, 127 insertions(+), 39 deletions(-)

diff --git a/vp8/common/x86/idct_x86.h b/vp8/common/x86/idct_x86.h
index d7b4fc95c..f1e433d5c 100644
--- a/vp8/common/x86/idct_x86.h
+++ b/vp8/common/x86/idct_x86.h
@@ -22,6 +22,7 @@
 #if HAVE_MMX
 extern prototype_idct(vp8_short_idct4x4llm_1_mmx);
 extern prototype_idct(vp8_short_idct4x4llm_mmx);
+extern prototype_idct_scalar_add(vp8_dc_only_idct_add_mmx);
 
 extern prototype_second_order(vp8_short_inv_walsh4x4_mmx);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
@@ -33,6 +34,9 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_1_mmx);
 #undef  vp8_idct_idct16
 #define vp8_idct_idct16 vp8_short_idct4x4llm_mmx
 
+#undef  vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_mmx
+
 #undef vp8_idct_iwalsh16
 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_mmx
 
diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm
index 1b18bd38d..b0d4a0c0d 100644
--- a/vp8/common/x86/idctllm_mmx.asm
+++ b/vp8/common/x86/idctllm_mmx.asm
@@ -220,35 +220,61 @@ sym(vp8_short_idct4x4llm_1_mmx):
     pop         rbp
     ret
 
-;void dc_only_idct_mmx(short input_dc, short *output, int pitch)
-global sym(vp8_dc_only_idct_mmx)
-sym(vp8_dc_only_idct_mmx):
+;void vp8_dc_only_idct_add_mmx(short input_dc, unsigned char *pred_ptr, unsigned char *dst_ptr, int pitch, int stride)
+global sym(vp8_dc_only_idct_add_mmx)
+sym(vp8_dc_only_idct_add_mmx):
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 3
+    SHADOW_ARGS_TO_STACK 5
     GET_GOT     rbx
+    push        rsi
+    push        rdi
     ; end prolog
 
-        movd        mm0,            arg(0) ;input_dc
+        mov         rsi,            arg(1) ;s -- prediction
+        mov         rdi,            arg(2) ;d -- destination
+        movsxd      rax,            dword ptr arg(4) ;stride
+        movsxd      rdx,            dword ptr arg(3) ;pitch
+        pxor        mm0,            mm0
 
-        paddw       mm0,            [fours GLOBAL]
-        mov         rdx,            arg(1) ;output
+        movd        mm5,            arg(0) ;input_dc
 
-        psraw       mm0,            3
-        movsxd      rax,            dword ptr arg(2) ;pitch
+        paddw       mm5,            [fours GLOBAL]
 
-        punpcklwd   mm0,            mm0
-        punpckldq   mm0,            mm0
+        psraw       mm5,            3
 
-        movq        [rdx],          mm0
-        movq        [rdx+rax],      mm0
+        punpcklwd   mm5,            mm5
+        punpckldq   mm5,            mm5
 
-        movq        [rdx+rax*2],    mm0
-        add         rdx,            rax
+        movd        mm1,            [rsi]
+        punpcklbw   mm1,            mm0
+        paddsw      mm1,            mm5
+        packuswb    mm1,            mm0              ; pack and unpack to saturate
+        movd        [rdi],          mm1
 
-        movq        [rdx+rax*2],    mm0
+        movd        mm2,            [rsi+rdx]
+        punpcklbw   mm2,            mm0
+        paddsw      mm2,            mm5
+        packuswb    mm2,            mm0              ; pack and unpack to saturate
+        movd        [rdi+rax],      mm2
+
+        movd        mm3,            [rsi+2*rdx]
+        punpcklbw   mm3,            mm0
+        paddsw      mm3,            mm5
+        packuswb    mm3,            mm0              ; pack and unpack to saturate
+        movd        [rdi+2*rax],    mm3
+
+        add         rdi,            rax
+        add         rsi,            rdx
+        movd        mm4,            [rsi+2*rdx]
+        punpcklbw   mm4,            mm0
+        paddsw      mm4,            mm5
+        packuswb    mm4,            mm0              ; pack and unpack to saturate
+        movd        [rdi+2*rax],    mm4
 
     ; begin epilog
+    pop rdi
+    pop rsi
     RESTORE_GOT
     UNSHADOW_ARGS
     pop         rbp
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index d7070f8e3..66738f855 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -42,6 +42,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
     {
         rtcd->idct.idct1        = vp8_short_idct4x4llm_1_mmx;
         rtcd->idct.idct16       = vp8_short_idct4x4llm_mmx;
+        rtcd->idct.idct1_scalar_add = vp8_dc_only_idct_add_mmx;
         rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_mmx;
         rtcd->idct.iwalsh1     = vp8_short_inv_walsh4x4_1_mmx;
 
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
index 2dcf548f6..7ad9289cc 100644
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -50,12 +50,12 @@ sym(vp8_dequantize_b_impl_mmx):
     ret
 
 
-;void dequant_idct_mmx(short *input, short *dq, short *output, int pitch)
-global sym(vp8_dequant_idct_mmx)
-sym(vp8_dequant_idct_mmx):
+;void dequant_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride)
+global sym(vp8_dequant_idct_add_mmx)
+sym(vp8_dequant_idct_add_mmx):
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 4
+    SHADOW_ARGS_TO_STACK 6
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -77,7 +77,8 @@ sym(vp8_dequant_idct_mmx):
         movq        mm3,    [rax+24]
         pmullw      mm3,    [rdx+24]
 
-        mov         rdx,    arg(2) ;output
+        mov         rdx,    arg(3) ;dest
+        mov         rsi,    arg(2) ;pred
         pxor        mm7,    mm7
 
 
@@ -88,7 +89,8 @@ sym(vp8_dequant_idct_mmx):
         movq        [rax+24],mm7
 
 
-        movsxd      rax,            dword ptr arg(3) ;pitch
+        movsxd      rax,            dword ptr arg(4) ;pitch
+        movsxd      rdi,            dword ptr arg(5) ;stride
 
         psubw       mm0,            mm2             ; b1= 0-2
         paddw       mm2,            mm2             ;
@@ -207,13 +209,34 @@ sym(vp8_dequant_idct_mmx):
         punpckldq   mm2,            mm4             ; 32 22 12 02
         punpckhdq   mm5,            mm4             ; 33 23 13 03
 
-        movq        [rdx],          mm0
+        pxor        mm7,            mm7
 
-        movq        [rdx+rax],      mm1
-        movq        [rdx+rax*2],    mm2
+        movd        mm4,            [rsi]
+        punpcklbw   mm4,            mm7
+        paddsw      mm0,            mm4
+        packuswb    mm0,            mm7
+        movd        [rdx],          mm0
 
-        add         rdx,            rax
-        movq        [rdx+rax*2],    mm5
+        movd        mm4,            [rsi+rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm1,            mm4
+        packuswb    mm1,            mm7
+        movd        [rdx+rdi],      mm1
+
+        movd        mm4,            [rsi+2*rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm2,            mm4
+        packuswb    mm2,            mm7
+        movd        [rdx+rdi*2],    mm2
+
+        add         rdx,            rdi
+        add         rsi,            rax
+
+        movd        mm4,            [rsi+2*rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm5,            mm4
+        packuswb    mm5,            mm7
+        movd        [rdx+rdi*2],    mm5
 
     ; begin epilog
     pop rdi
@@ -224,12 +247,12 @@ sym(vp8_dequant_idct_mmx):
     ret
 
 
-;void dequant_dc_idct_mmx(short *input, short *dq, short *output, int pitch, int Dc)
-global sym(vp8_dequant_dc_idct_mmx)
-sym(vp8_dequant_dc_idct_mmx):
+;void dequant_dc_idct_add_mmx(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc)
+global sym(vp8_dequant_dc_idct_add_mmx)
+sym(vp8_dequant_dc_idct_add_mmx):
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
+    SHADOW_ARGS_TO_STACK 7
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -238,7 +261,7 @@ sym(vp8_dequant_dc_idct_mmx):
         mov         rax,    arg(0) ;input
         mov         rdx,    arg(1) ;dq
 
-        movsxd      rcx,    dword ptr arg(4) ;Dc
+        movsxd      rcx,    dword ptr arg(6) ;Dc
 
         movq        mm0,    [rax   ]
         pmullw      mm0,    [rdx]
@@ -252,7 +275,8 @@ sym(vp8_dequant_dc_idct_mmx):
         movq        mm3,    [rax+24]
         pmullw      mm3,    [rdx+24]
 
-        mov         rdx,    arg(2) ;output
+        mov         rdx,    arg(3) ;dest
+        mov         rsi,    arg(2) ;pred
         pxor        mm7,    mm7
 
 
@@ -262,8 +286,10 @@ sym(vp8_dequant_dc_idct_mmx):
         movq        [rax+16],mm7
         movq        [rax+24],mm7
 
+
         pinsrw      mm0,    rcx,  0
-        movsxd      rax,            dword ptr arg(3) ;pitch
+        movsxd      rax,            dword ptr arg(4) ;pitch
+        movsxd      rdi,            dword ptr arg(5) ;stride
 
         psubw       mm0,            mm2             ; b1= 0-2
         paddw       mm2,            mm2             ;
@@ -382,13 +408,34 @@ sym(vp8_dequant_dc_idct_mmx):
         punpckldq   mm2,            mm4             ; 32 22 12 02
         punpckhdq   mm5,            mm4             ; 33 23 13 03
 
-        movq        [rdx],          mm0
+        pxor        mm7,            mm7
 
-        movq        [rdx+rax],      mm1
-        movq        [rdx+rax*2],    mm2
+        movd        mm4,            [rsi]
+        punpcklbw   mm4,            mm7
+        paddsw      mm0,            mm4
+        packuswb    mm0,            mm7
+        movd        [rdx],          mm0
 
-        add         rdx,            rax
-        movq        [rdx+rax*2],    mm5
+        movd        mm4,            [rsi+rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm1,            mm4
+        packuswb    mm1,            mm7
+        movd        [rdx+rdi],      mm1
+
+        movd        mm4,            [rsi+2*rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm2,            mm4
+        packuswb    mm2,            mm7
+        movd        [rdx+rdi*2],    mm2
+
+        add         rdx,            rdi
+        add         rsi,            rax
+
+        movd        mm4,            [rsi+2*rax]
+        punpcklbw   mm4,            mm7
+        paddsw      mm5,            mm4
+        packuswb    mm5,            mm7
+        movd        [rdx+rdi*2],    mm5
 
     ; begin epilog
     pop rdi
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
index 563d58460..32e777994 100644
--- a/vp8/decoder/x86/dequantize_x86.h
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -21,12 +21,20 @@
  */
 #if HAVE_MMX
 extern prototype_dequant_block(vp8_dequantize_b_mmx);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx);
+extern prototype_dequant_idct_dc_add(vp8_dequant_dc_idct_add_mmx);
 
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_mmx
 
+#undef  vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
+
+#undef  vp8_dequant_idct_dc
+#define vp8_dequant_idct_add_dc vp8_dequant_dc_idct_add_mmx
+
 #endif
 #endif
 
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index bcd2acc75..d7bed0873 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -43,6 +43,8 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
     if (flags & HAS_MMX)
     {
         pbi->dequant.block   = vp8_dequantize_b_mmx;
+        pbi->dequant.idct_add    = vp8_dequant_idct_add_mmx;
+        pbi->dequant.idct_dc_add = vp8_dequant_dc_idct_add_mmx;
     }
 
 #endif

From eafcf918a0a20b0a6763a826de8cf821961e75a0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fredrik=20S=C3=B6derquist?= <fs@opera.com>
Date: Mon, 7 Jun 2010 18:20:47 +0200
Subject: [PATCH 090/307] Only touch ctx->priv if vp8_mmap_alloc succeeded.

---
 vp8/vp8_dx_iface.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index e0e1103f0..36a0b39ef 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -223,11 +223,12 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx)
         res = vp8_mmap_alloc(&mmap);
 
         if (!res)
+        {
             vp8_init_ctx(ctx, &mmap);
 
-        ctx->priv->alg_priv->defer_alloc = 1;
-        /*post processing level initialized to do nothing */
-
+            ctx->priv->alg_priv->defer_alloc = 1;
+            /*post processing level initialized to do nothing */
+        }
     }
 
     return res;

From 2add72d9bcec97855691b90e62b8f3f1634e3544 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fredrik=20S=C3=B6derquist?= <fs@opera.com>
Date: Mon, 7 Jun 2010 18:24:41 +0200
Subject: [PATCH 091/307] Don't dereference ctx->priv if it hasn't been setup
 correctly.

---
 vp8/vp8_dx_iface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 36a0b39ef..7e4fc0be6 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -528,7 +528,7 @@ static vpx_codec_err_t vp8_xma_set_mmap(vpx_codec_ctx_t         *ctx,
 
     done = 1;
 
-    if (ctx->priv->alg_priv)
+    if (!res && ctx->priv->alg_priv)
     {
         for (i = 0; i < NELEMENTS(vp8_mem_req_segs); i++)
         {

From 1d8277f8e8e0866cd21b95674717c2e467f4ffd9 Mon Sep 17 00:00:00 2001
From: Justin Lebar <justin.lebar@gmail.com>
Date: Fri, 23 Jul 2010 16:42:25 -0700
Subject: [PATCH 092/307] Issue 150: Fixing linker warning in extend.c.

---
 vp8/common/extend.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/vp8/common/extend.c b/vp8/common/extend.c
index a16ff2bad..0608a13b4 100644
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -39,7 +39,10 @@ static void extend_plane_borders
 
     for (i = 0; i < h - 0 + 1; i++)
     {
-        vpx_memset(dest_ptr1, src_ptr1[0], el);
+        // Some linkers will complain if we call vpx_memset with el set to a
+        // constant 0.
+        if (el)
+            vpx_memset(dest_ptr1, src_ptr1[0], el);
         vpx_memset(dest_ptr2, src_ptr2[0], er);
         src_ptr1  += sp;
         src_ptr2  += sp;

From 56f5a9a060d4c89a71616a90207327e6c544f543 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Fri, 23 Jul 2010 13:42:30 -0400
Subject: [PATCH 093/307] update arm idct functions

Jeff Muizelaar posted some changes to the idct/reconstruction c code.
This is the equivalent update for the arm assembly.

This shows a good boost on v6, and a minor boost on neon.
Here are some numbers for highway in qcif, 2641 frames:
HEAD neon: ~161 fps
new neon:  ~162 fps
HEAD v6:   ~102 fps
new v6:    ~106 fps

The following functions have been updated for armv6 and neon:
vp8_dc_only_idct_add
vp8_dequant_idct_add
vp8_dequant_dc_idct_add

Conflicts:

	vp8/decoder/arm/armv6/dequantdcidct_v6.asm
	vp8/decoder/arm/armv6/dequantidct_v6.asm

Resolved by removing these files. When I rewrote the functions, I also
moved the files to dequant_dc_idct_v6.asm/dequant_idct_v6.asm

Change-Id: Ie3300df824d52474eca1a5134cf22d8b7809a5d4
---
 vp8/common/arm/armv6/dc_only_idct_add_v6.asm  |  67 ++++++
 vp8/common/arm/armv6/idct_v6.asm              |  32 ---
 vp8/common/arm/armv6/iwalsh_v6.asm            |  16 +-
 vp8/common/arm/idct_arm.h                     |  16 +-
 vp8/common/arm/neon/dc_only_idct_add_neon.asm |  49 ++++
 vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm  | 218 ++++++++++++++++++
 vp8/decoder/arm/armv6/dequant_idct_v6.asm     | 196 ++++++++++++++++
 vp8/decoder/arm/armv6/dequantdcidct_v6.asm    | 203 ----------------
 vp8/decoder/arm/armv6/dequantidct_v6.asm      | 184 ---------------
 vp8/decoder/arm/dequantize_arm.h              |  18 ++
 ...idct_neon.asm => dequant_dc_idct_neon.asm} |  72 +++---
 ...antidct_neon.asm => dequant_idct_neon.asm} |  67 +++---
 vp8/decoder/decodframe.c                      |   6 +-
 vp8/decoder/dequantize.c                      |  23 +-
 vp8/decoder/dequantize.h                      |  12 +-
 vp8/decoder/generic/dsystemdependent.c        |   2 +-
 vp8/decoder/x86/dequantize_x86.h              |   8 +-
 vp8/decoder/x86/x86_dsystemdependent.c        |   2 +-
 vp8/vp8_common.mk                             |   3 +
 vp8/vp8dx_arm.mk                              |   8 +-
 20 files changed, 675 insertions(+), 527 deletions(-)
 create mode 100644 vp8/common/arm/armv6/dc_only_idct_add_v6.asm
 create mode 100644 vp8/common/arm/neon/dc_only_idct_add_neon.asm
 create mode 100644 vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
 create mode 100644 vp8/decoder/arm/armv6/dequant_idct_v6.asm
 delete mode 100644 vp8/decoder/arm/armv6/dequantdcidct_v6.asm
 delete mode 100644 vp8/decoder/arm/armv6/dequantidct_v6.asm
 rename vp8/decoder/arm/neon/{dequantdcidct_neon.asm => dequant_dc_idct_neon.asm} (65%)
 rename vp8/decoder/arm/neon/{dequantidct_neon.asm => dequant_idct_neon.asm} (66%)

diff --git a/vp8/common/arm/armv6/dc_only_idct_add_v6.asm b/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
new file mode 100644
index 000000000..19227282e
--- /dev/null
+++ b/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
@@ -0,0 +1,67 @@
+;
+;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+    EXPORT  |vp8_dc_only_idct_add_v6|
+
+    AREA    |.text|, CODE, READONLY
+
+;void vp8_dc_only_idct_add_v6(short input_dc, unsigned char *pred_ptr,
+;                             unsigned char *dst_ptr, int pitch, int stride)
+; r0  input_dc
+; r1  pred_ptr
+; r2  dest_ptr
+; r3  pitch
+; sp  stride
+
+|vp8_dc_only_idct_add_v6| PROC
+    stmdb       sp!, {r4 - r7, lr}
+
+    add         r0, r0, #4                ; input_dc += 4
+    ldr         r12, c0x0000FFFF
+    ldr         r4, [r1], r3
+    ldr         r6, [r1], r3
+    and         r0, r12, r0, asr #3       ; input_dc >> 3 + mask
+    ldr         lr, [sp, #20]
+    orr         r0, r0, r0, lsl #16       ; a1 | a1
+
+    uxtab16     r5, r0, r4                ; a1+2 | a1+0
+    uxtab16     r4, r0, r4, ror #8        ; a1+3 | a1+1
+    uxtab16     r7, r0, r6
+    uxtab16     r6, r0, r6, ror #8
+    usat16      r5, #8, r5
+    usat16      r4, #8, r4
+    usat16      r7, #8, r7
+    usat16      r6, #8, r6
+    orr         r5, r5, r4, lsl #8
+    orr         r7, r7, r6, lsl #8
+    ldr         r4, [r1], r3
+    ldr         r6, [r1]
+    str         r5, [r2], lr
+    str         r7, [r2], lr
+
+    uxtab16     r5, r0, r4
+    uxtab16     r4, r0, r4, ror #8
+    uxtab16     r7, r0, r6
+    uxtab16     r6, r0, r6, ror #8
+    usat16      r5, #8, r5
+    usat16      r4, #8, r4
+    usat16      r7, #8, r7
+    usat16      r6, #8, r6
+    orr         r5, r5, r4, lsl #8
+    orr         r7, r7, r6, lsl #8
+    str         r5, [r2], lr
+    str         r7, [r2]
+
+    ldmia       sp!, {r4 - r7, pc}
+
+    ENDP  ; |vp8_dc_only_idct_add_v6|
+
+; Constant Pool
+c0x0000FFFF DCD 0x0000FFFF
+    END
diff --git a/vp8/common/arm/armv6/idct_v6.asm b/vp8/common/arm/armv6/idct_v6.asm
index d9913c75e..d96908cc6 100644
--- a/vp8/common/arm/armv6/idct_v6.asm
+++ b/vp8/common/arm/armv6/idct_v6.asm
@@ -15,8 +15,6 @@
     EXPORT  |vp8_short_idct4x4llm_v6_scott|
     EXPORT  |vp8_short_idct4x4llm_v6_dual|
 
-    EXPORT  |vp8_dc_only_idct_armv6|
-
     AREA    |.text|, CODE, READONLY
 
 ;********************************************************************************
@@ -344,34 +342,4 @@ loop2_dual
     ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
     ENDP
 
-
-; sjl added 10/17/08
-;void dc_only_idct_armv6(short input_dc, short *output, int pitch)
-|vp8_dc_only_idct_armv6| PROC
-    stmdb       sp!, {r4 - r6, lr}
-
-    add         r0, r0, #0x4
-    add         r4, r1, r2                      ; output + shortpitch
-    mov         r0, r0, ASR #0x3    ;aka a1
-    add         r5, r1, r2, LSL #1              ; output + shortpitch * 2
-    pkhbt       r0, r0, r0, lsl #16             ; a1 | a1
-    add         r6, r5, r2                      ; output + shortpitch * 3
-
-    str         r0, [r1, #0]
-    str         r0, [r1, #4]
-
-    str         r0, [r4, #0]
-    str         r0, [r4, #4]
-
-    str         r0, [r5, #0]
-    str         r0, [r5, #4]
-
-    str         r0, [r6, #0]
-    str         r0, [r6, #4]
-
-
-    ldmia       sp!, {r4 - r6, pc}
-
-    ENDP  ; |vp8_dc_only_idct_armv6|
-
     END
diff --git a/vp8/common/arm/armv6/iwalsh_v6.asm b/vp8/common/arm/armv6/iwalsh_v6.asm
index f4002b2ce..cab6bc916 100644
--- a/vp8/common/arm/armv6/iwalsh_v6.asm
+++ b/vp8/common/arm/armv6/iwalsh_v6.asm
@@ -8,8 +8,8 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
-    EXPORT |vp8_short_inv_walsh4x4_armv6|
-    EXPORT |vp8_short_inv_walsh4x4_1_armv6|
+    EXPORT |vp8_short_inv_walsh4x4_v6|
+    EXPORT |vp8_short_inv_walsh4x4_1_v6|
 
     ARM
     REQUIRE8
@@ -17,8 +17,8 @@
 
     AREA    |.text|, CODE, READONLY  ; name this block of code
 
-;short vp8_short_inv_walsh4x4_armv6(short *input, short *output)
-|vp8_short_inv_walsh4x4_armv6| PROC
+;short vp8_short_inv_walsh4x4_v6(short *input, short *output)
+|vp8_short_inv_walsh4x4_v6| PROC
 
     stmdb       sp!, {r4 - r11, lr}
 
@@ -123,11 +123,11 @@
     str         r5, [r1]
 
     ldmia       sp!, {r4 - r11, pc}
-    ENDP        ; |vp8_short_inv_walsh4x4_armv6|
+    ENDP        ; |vp8_short_inv_walsh4x4_v6|
 
 
-;short vp8_short_inv_walsh4x4_1_armv6(short *input, short *output)
-|vp8_short_inv_walsh4x4_1_armv6| PROC
+;short vp8_short_inv_walsh4x4_1_v6(short *input, short *output)
+|vp8_short_inv_walsh4x4_1_v6| PROC
 
     ldrsh       r2, [r0]             ; [0]
     add         r2, r2, #3           ; [0] + 3
@@ -145,7 +145,7 @@
     str         r2, [r1]
 
     bx          lr
-    ENDP        ; |vp8_short_inv_walsh4x4_1_armv6|
+    ENDP        ; |vp8_short_inv_walsh4x4_1_v6|
 
 ; Constant Pool
 c0x00030003 DCD 0x00030003
diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h
index 97af32e69..6d917c445 100644
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -15,8 +15,9 @@
 #if HAVE_ARMV6
 extern prototype_idct(vp8_short_idct4x4llm_1_v6);
 extern prototype_idct(vp8_short_idct4x4llm_v6_dual);
-extern prototype_second_order(vp8_short_inv_walsh4x4_1_armv6);
-extern prototype_second_order(vp8_short_inv_walsh4x4_armv6);
+extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
+extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
+extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
 
 #undef  vp8_idct_idct1
 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6
@@ -24,16 +25,20 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_armv6);
 #undef  vp8_idct_idct16
 #define vp8_idct_idct16 vp8_short_idct4x4llm_v6_dual
 
+#undef  vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_v6
+
 #undef  vp8_idct_iwalsh1
-#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_armv6
+#define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_v6
 
 #undef  vp8_idct_iwalsh16
-#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_armv6
+#define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
 #endif
 
 #if HAVE_ARMV7
 extern prototype_idct(vp8_short_idct4x4llm_1_neon);
 extern prototype_idct(vp8_short_idct4x4llm_neon);
+extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 
@@ -43,6 +48,9 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 #undef  vp8_idct_idct16
 #define vp8_idct_idct16 vp8_short_idct4x4llm_neon
 
+#undef  vp8_idct_idct1_scalar_add
+#define vp8_idct_idct1_scalar_add vp8_dc_only_idct_add_neon
+
 #undef  vp8_idct_iwalsh1
 #define vp8_idct_iwalsh1 vp8_short_inv_walsh4x4_1_neon
 
diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
new file mode 100644
index 000000000..e6f141fda
--- /dev/null
+++ b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
@@ -0,0 +1,49 @@
+;
+;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_dc_only_idct_add_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;void vp8_dc_only_idct_add_neon(short input_dc, unsigned char *pred_ptr,
+;                               unsigned char *dst_ptr, int pitch, int stride)
+; r0  input_dc
+; r1  pred_ptr
+; r2  dst_ptr
+; r3  pitch
+; sp  stride
+|vp8_dc_only_idct_add_neon| PROC
+    add             r0, r0, #4
+    asr             r0, r0, #3
+    ldr             r12, [sp]
+    vdup.16         q0, r0
+
+    vld1.32         {d2[0]}, [r1], r3
+    vld1.32         {d2[1]}, [r1], r3
+    vld1.32         {d4[0]}, [r1], r3
+    vld1.32         {d4[1]}, [r1]
+
+    vaddw.u8        q1, q0, d2
+    vaddw.u8        q2, q0, d4
+
+    vqmovun.s16     d2, q1
+    vqmovun.s16     d4, q2
+
+    vst1.32         {d2[0]}, [r2], r12
+    vst1.32         {d2[1]}, [r2], r12
+    vst1.32         {d4[0]}, [r2], r12
+    vst1.32         {d4[1]}, [r2]
+
+    bx             lr
+
+    ENDP
+    END
diff --git a/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
new file mode 100644
index 000000000..886873c70
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
@@ -0,0 +1,218 @@
+;
+;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+    EXPORT |vp8_dequant_dc_idct_add_v6|
+
+    AREA |.text|, CODE, READONLY
+
+;void vp8_dequant_dc_idct_v6(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride, int Dc)
+; r0 = input
+; r1 = dq
+; r2 = pred
+; r3 = dest
+; sp + 36 = pitch  ; +4 = 40
+; sp + 40 = stride  ; +4 = 44
+; sp + 44 = Dc  ; +4 = 48
+
+
+|vp8_dequant_dc_idct_add_v6| PROC
+    stmdb   sp!, {r4-r11, lr}
+
+    ldr     r6, [sp, #44]
+
+    ldr     r4, [r0]                ;input
+    ldr     r5, [r1], #4            ;dq
+
+    sub     sp, sp, #4
+    str     r3, [sp]
+
+    smultt  r7, r4, r5
+
+    ldr     r4, [r0, #4]            ;input
+    ldr     r5, [r1], #4            ;dq
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    smulbb  r6, r4, r5
+    smultt  r7, r4, r5
+
+    ldr     r4, [r0, #4]            ;input
+    ldr     r5, [r1], #4            ;dq
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    mov     r12, #3
+
+vp8_dequant_dc_add_loop
+    smulbb  r6, r4, r5
+    smultt  r7, r4, r5
+
+    ldr     r4, [r0, #4]            ;input
+    ldr     r5, [r1], #4            ;dq
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    smulbb  r6, r4, r5
+    smultt  r7, r4, r5
+
+    subs    r12, r12, #1
+
+    ldrne   r4, [r0, #4]
+    ldrne   r5, [r1], #4
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    bne     vp8_dequant_dc_add_loop
+
+    sub     r0, r0, #32
+    mov     r1, r0
+
+; short_idct4x4llm_v6_dual
+    ldr     r3, cospi8sqrt2minus1
+    ldr     r4, sinpi8sqrt2
+    ldr     r6, [r0, #8]
+    mov     r5, #2
+vp8_dequant_dc_idct_loop1_v6
+    ldr     r12, [r0, #24]
+    ldr     r14, [r0, #16]
+    smulwt  r9, r3, r6
+    smulwb  r7, r3, r6
+    smulwt  r10, r4, r6
+    smulwb  r8, r4, r6
+    pkhbt   r7, r7, r9, lsl #16
+    smulwt  r11, r3, r12
+    pkhbt   r8, r8, r10, lsl #16
+    uadd16  r6, r6, r7
+    smulwt  r7, r4, r12
+    smulwb  r9, r3, r12
+    smulwb  r10, r4, r12
+    subs    r5, r5, #1
+    pkhbt   r9, r9, r11, lsl #16
+    ldr     r11, [r0], #4
+    pkhbt   r10, r10, r7, lsl #16
+    uadd16  r7, r12, r9
+    usub16  r7, r8, r7
+    uadd16  r6, r6, r10
+    uadd16  r10, r11, r14
+    usub16  r8, r11, r14
+    uadd16  r9, r10, r6
+    usub16  r10, r10, r6
+    uadd16  r6, r8, r7
+    usub16  r7, r8, r7
+    str     r6, [r1, #8]
+    ldrne   r6, [r0, #8]
+    str     r7, [r1, #16]
+    str     r10, [r1, #24]
+    str     r9, [r1], #4
+    bne     vp8_dequant_dc_idct_loop1_v6
+
+    mov     r5, #2
+    sub     r0, r1, #8
+vp8_dequant_dc_idct_loop2_v6
+    ldr     r6, [r0], #4
+    ldr     r7, [r0], #4
+    ldr     r8, [r0], #4
+    ldr     r9, [r0], #4
+    smulwt  r1, r3, r6
+    smulwt  r12, r4, r6
+    smulwt  lr, r3, r8
+    smulwt  r10, r4, r8
+    pkhbt   r11, r8, r6, lsl #16
+    pkhbt   r1, lr, r1, lsl #16
+    pkhbt   r12, r10, r12, lsl #16
+    pkhtb   r6, r6, r8, asr #16
+    uadd16  r6, r1, r6
+    pkhbt   lr, r9, r7, lsl #16
+    uadd16  r10, r11, lr
+    usub16  lr, r11, lr
+    pkhtb   r8, r7, r9, asr #16
+    subs    r5, r5, #1
+    smulwt  r1, r3, r8
+    smulwb  r7, r3, r8
+    smulwt  r11, r4, r8
+    smulwb  r9, r4, r8
+    pkhbt   r1, r7, r1, lsl #16
+    uadd16  r8, r1, r8
+    pkhbt   r11, r9, r11, lsl #16
+    usub16  r1, r12, r8
+    uadd16  r8, r11, r6
+    ldr     r9, c0x00040004
+    ldr     r12, [sp, #40]
+    uadd16  r6, r10, r8
+    usub16  r7, r10, r8
+    uadd16  r7, r7, r9
+    uadd16  r6, r6, r9
+    uadd16  r10, r14, r1
+    usub16  r1, r14, r1
+    uadd16  r10, r10, r9
+    uadd16  r1, r1, r9
+    ldr     r11, [r2], r12
+    mov     r8, r7, asr #3
+    pkhtb   r9, r8, r10, asr #19
+    mov     r8, r1, asr #3
+    pkhtb   r8, r8, r6, asr #19
+    uxtb16  lr, r11, ror #8
+    qadd16  r9, r9, lr
+    uxtb16  lr, r11
+    qadd16  r8, r8, lr
+    usat16  r9, #8, r9
+    usat16  r8, #8, r8
+    orr     r9, r8, r9, lsl #8
+    ldr     r11, [r2], r12
+    ldr     lr, [sp]
+    ldr     r12, [sp, #44]
+    mov     r7, r7, lsl #16
+    mov     r1, r1, lsl #16
+    mov     r10, r10, lsl #16
+    mov     r6, r6, lsl #16
+    mov     r7, r7, asr #3
+    pkhtb   r7, r7, r10, asr #19
+    mov     r1, r1, asr #3
+    pkhtb   r1, r1, r6, asr #19
+    uxtb16  r8, r11, ror #8
+    qadd16  r7, r7, r8
+    uxtb16  r8, r11
+    qadd16  r1, r1, r8
+    usat16  r7, #8, r7
+    usat16  r1, #8, r1
+    orr     r1, r1, r7, lsl #8
+    str     r9, [lr], r12
+    str     r1, [lr], r12
+    str     lr, [sp]
+    bne     vp8_dequant_dc_idct_loop2_v6
+
+; vpx_memset
+    sub     r0, r0, #32
+    add     sp, sp, #4
+
+    mov     r12, #0
+    str     r12, [r0]
+    str     r12, [r0, #4]
+    str     r12, [r0, #8]
+    str     r12, [r0, #12]
+    str     r12, [r0, #16]
+    str     r12, [r0, #20]
+    str     r12, [r0, #24]
+    str     r12, [r0, #28]
+
+    ldmia   sp!, {r4 - r11, pc}
+    ENDP    ; |vp8_dequant_dc_idct_add_v6|
+
+; Constant Pool
+cospi8sqrt2minus1 DCD 0x00004E7B
+sinpi8sqrt2       DCD 0x00008A8C
+c0x00040004       DCD 0x00040004
+
+    END
diff --git a/vp8/decoder/arm/armv6/dequant_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
new file mode 100644
index 000000000..c13b51299
--- /dev/null
+++ b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
@@ -0,0 +1,196 @@
+;
+;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+    EXPORT |vp8_dequant_idct_add_v6|
+
+    AREA |.text|, CODE, READONLY
+;void vp8_dequant_idct_v6(short *input, short *dq, unsigned char *pred,
+; unsigned char *dest, int pitch, int stride)
+; r0 = input
+; r1 = dq
+; r2 = pred
+; r3 = dest
+; sp + 36 = pitch  ; +4 = 40
+; sp + 40 = stride  ; +4 = 44
+
+
+|vp8_dequant_idct_add_v6| PROC
+    stmdb   sp!, {r4-r11, lr}
+
+    ldr     r4, [r0]                ;input
+    ldr     r5, [r1], #4            ;dq
+
+    sub     sp, sp, #4
+    str     r3, [sp]
+
+    mov     r12, #4
+
+vp8_dequant_add_loop
+    smulbb  r6, r4, r5
+    smultt  r7, r4, r5
+
+    ldr     r4, [r0, #4]            ;input
+    ldr     r5, [r1], #4            ;dq
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    smulbb  r6, r4, r5
+    smultt  r7, r4, r5
+
+    subs    r12, r12, #1
+
+    ldrne   r4, [r0, #4]
+    ldrne   r5, [r1], #4
+
+    strh    r6, [r0], #2
+    strh    r7, [r0], #2
+
+    bne     vp8_dequant_add_loop
+
+    sub     r0, r0, #32
+    mov     r1, r0
+
+; short_idct4x4llm_v6_dual
+    ldr     r3, cospi8sqrt2minus1
+    ldr     r4, sinpi8sqrt2
+    ldr     r6, [r0, #8]
+    mov     r5, #2
+vp8_dequant_idct_loop1_v6
+    ldr     r12, [r0, #24]
+    ldr     r14, [r0, #16]
+    smulwt  r9, r3, r6
+    smulwb  r7, r3, r6
+    smulwt  r10, r4, r6
+    smulwb  r8, r4, r6
+    pkhbt   r7, r7, r9, lsl #16
+    smulwt  r11, r3, r12
+    pkhbt   r8, r8, r10, lsl #16
+    uadd16  r6, r6, r7
+    smulwt  r7, r4, r12
+    smulwb  r9, r3, r12
+    smulwb  r10, r4, r12
+    subs    r5, r5, #1
+    pkhbt   r9, r9, r11, lsl #16
+    ldr     r11, [r0], #4
+    pkhbt   r10, r10, r7, lsl #16
+    uadd16  r7, r12, r9
+    usub16  r7, r8, r7
+    uadd16  r6, r6, r10
+    uadd16  r10, r11, r14
+    usub16  r8, r11, r14
+    uadd16  r9, r10, r6
+    usub16  r10, r10, r6
+    uadd16  r6, r8, r7
+    usub16  r7, r8, r7
+    str     r6, [r1, #8]
+    ldrne   r6, [r0, #8]
+    str     r7, [r1, #16]
+    str     r10, [r1, #24]
+    str     r9, [r1], #4
+    bne     vp8_dequant_idct_loop1_v6
+
+    mov     r5, #2
+    sub     r0, r1, #8
+vp8_dequant_idct_loop2_v6
+    ldr     r6, [r0], #4
+    ldr     r7, [r0], #4
+    ldr     r8, [r0], #4
+    ldr     r9, [r0], #4
+    smulwt  r1, r3, r6
+    smulwt  r12, r4, r6
+    smulwt  lr, r3, r8
+    smulwt  r10, r4, r8
+    pkhbt   r11, r8, r6, lsl #16
+    pkhbt   r1, lr, r1, lsl #16
+    pkhbt   r12, r10, r12, lsl #16
+    pkhtb   r6, r6, r8, asr #16
+    uadd16  r6, r1, r6
+    pkhbt   lr, r9, r7, lsl #16
+    uadd16  r10, r11, lr
+    usub16  lr, r11, lr
+    pkhtb   r8, r7, r9, asr #16
+    subs    r5, r5, #1
+    smulwt  r1, r3, r8
+    smulwb  r7, r3, r8
+    smulwt  r11, r4, r8
+    smulwb  r9, r4, r8
+    pkhbt   r1, r7, r1, lsl #16
+    uadd16  r8, r1, r8
+    pkhbt   r11, r9, r11, lsl #16
+    usub16  r1, r12, r8
+    uadd16  r8, r11, r6
+    ldr     r9, c0x00040004
+    ldr     r12, [sp, #40]
+    uadd16  r6, r10, r8
+    usub16  r7, r10, r8
+    uadd16  r7, r7, r9
+    uadd16  r6, r6, r9
+    uadd16  r10, r14, r1
+    usub16  r1, r14, r1
+    uadd16  r10, r10, r9
+    uadd16  r1, r1, r9
+    ldr     r11, [r2], r12
+    mov     r8, r7, asr #3
+    pkhtb   r9, r8, r10, asr #19
+    mov     r8, r1, asr #3
+    pkhtb   r8, r8, r6, asr #19
+    uxtb16  lr, r11, ror #8
+    qadd16  r9, r9, lr
+    uxtb16  lr, r11
+    qadd16  r8, r8, lr
+    usat16  r9, #8, r9
+    usat16  r8, #8, r8
+    orr     r9, r8, r9, lsl #8
+    ldr     r11, [r2], r12
+    ldr     lr, [sp]
+    ldr     r12, [sp, #44]
+    mov     r7, r7, lsl #16
+    mov     r1, r1, lsl #16
+    mov     r10, r10, lsl #16
+    mov     r6, r6, lsl #16
+    mov     r7, r7, asr #3
+    pkhtb   r7, r7, r10, asr #19
+    mov     r1, r1, asr #3
+    pkhtb   r1, r1, r6, asr #19
+    uxtb16  r8, r11, ror #8
+    qadd16  r7, r7, r8
+    uxtb16  r8, r11
+    qadd16  r1, r1, r8
+    usat16  r7, #8, r7
+    usat16  r1, #8, r1
+    orr     r1, r1, r7, lsl #8
+    str     r9, [lr], r12
+    str     r1, [lr], r12
+    str     lr, [sp]
+    bne     vp8_dequant_idct_loop2_v6
+
+; vpx_memset
+    sub     r0, r0, #32
+    add     sp, sp, #4
+
+    mov     r12, #0
+    str     r12, [r0]
+    str     r12, [r0, #4]
+    str     r12, [r0, #8]
+    str     r12, [r0, #12]
+    str     r12, [r0, #16]
+    str     r12, [r0, #20]
+    str     r12, [r0, #24]
+    str     r12, [r0, #28]
+
+    ldmia   sp!, {r4 - r11, pc}
+    ENDP    ; |vp8_dequant_idct_add_v6|
+
+; Constant Pool
+cospi8sqrt2minus1 DCD 0x00004E7B
+sinpi8sqrt2       DCD 0x00008A8C
+c0x00040004       DCD 0x00040004
+
+    END
diff --git a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm b/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
deleted file mode 100644
index 025287223..000000000
--- a/vp8/decoder/arm/armv6/dequantdcidct_v6.asm
+++ /dev/null
@@ -1,203 +0,0 @@
-;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_dequant_dc_idct_v6|
-    ; ARM
-    ; REQUIRE8
-    ; PRESERVE8
-
-    AREA    |.text|, CODE, READONLY  ; name this block of code
-;void vp8_dequant_dc_idct_v6(short *input, short *dq, short *output, int pitch,int Dc)
-|vp8_dequant_dc_idct_v6| PROC
-    stmdb   sp!, {r4-r11, lr}
-
-    ldr     r6, [sp, #36]           ;load Dc
-
-    ldr     r4, [r0]                ;input
-    ldr     r5, [r1], #4            ;dq
-
-    sub     sp, sp, #4
-    str     r0, [sp]
-
-    smultt  r7, r4, r5
-
-    ldr     r4, [r0, #4]            ;input
-    ldr     r5, [r1], #4            ;dq
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    smulbb  r6, r4, r5
-    smultt  r7, r4, r5
-
-    ldr     r4, [r0, #4]            ;input
-    ldr     r5, [r1], #4            ;dq
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    mov     r12, #3
-
-dequant_dc_idct_loop
-    smulbb  r6, r4, r5
-    smultt  r7, r4, r5
-
-    ldr     r4, [r0, #4]            ;input
-    ldr     r5, [r1], #4            ;dq
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    smulbb  r6, r4, r5
-    smultt  r7, r4, r5
-
-    subs    r12, r12, #1
-
-    ldrne   r4, [r0, #4]
-    ldrne   r5, [r1], #4
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    bne     dequant_dc_idct_loop
-
-    sub     r0, r0, #32
-    mov     r1, r2
-    mov     r2, r3
-
-; short_idct4x4llm_v6_dual
-
-    mov r3, #0x00004E00 ;                   cos
-    orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
-    mov r4, #0x00008A00 ;                       sin
-    orr r4, r4, #0x0000008C ; sinpi8sqrt2
-    mov r5, #0x2    ; i=2                           i
-loop1_dual_11
-    ldr r6, [r0, #(4*2)]    ; i5 | i4                               5|4
-    ldr r12, [r0, #(12*2)]  ; i13 | i12                                                     13|12
-    ldr r14, [r0, #(8*2)]   ; i9 | i8                                                               9|8
-
-    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
-    smulwb  r7, r3, r6  ; (ip[4] * cospi8sqrt2minus1) >> 16                                 4c
-    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
-    smulwb  r8, r4, r6  ; (ip[4] * sinpi8sqrt2) >> 16                                       4s
-    pkhbt   r7, r7, r9, lsl #16 ; 5c | 4c
-    smulwt  r11, r3, r12    ; (ip[13] * cospi8sqrt2minus1) >> 16                                                    13c
-    pkhbt   r8, r8, r10, lsl #16    ; 5s | 4s
-    uadd16  r6, r6, r7  ; 5c+5 | 4c+4
-    smulwt  r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16                                  13s
-    smulwb  r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16                                            12c
-    smulwb  r10, r4, r12    ; (ip[12] * sinpi8sqrt2) >> 16                                              12s
-    subs    r5, r5, #0x1    ; i--                           --
-    pkhbt   r9, r9, r11, lsl #16    ; 13c | 12c
-    ldr r11, [r0], #0x4 ; i1 | i0       ++                                          1|0
-    pkhbt   r10, r10, r7, lsl #16   ; 13s | 12s
-    uadd16  r7, r12, r9 ; 13c+13 | 12c+12
-    usub16  r7, r8, r7  ; c                                 c
-    uadd16  r6, r6, r10 ; d                             d
-    uadd16  r10, r11, r14   ; a                                             a
-    usub16  r8, r11, r14    ; b                                     b
-    uadd16  r9, r10, r6 ; a+d                                           a+d
-    usub16  r10, r10, r6    ; a-d                                               a-d
-    uadd16  r6, r8, r7  ; b+c                               b+c
-    usub16  r7, r8, r7  ; b-c                                   b-c
-    str r6, [r1, r2]    ; o5 | o4
-    add r6, r2, r2  ; pitch * 2                             p2
-    str r7, [r1, r6]    ; o9 | o8
-    add r6,  r6, r2 ; pitch * 3                             p3
-    str r10, [r1, r6]   ; o13 | o12
-    str r9, [r1], #0x4  ; o1 | o0           ++
-    bne loop1_dual_11   ;
-    mov r5, #0x2    ; i=2                           i
-    sub r0, r1, #8  ; reset input/output        i/o
-loop2_dual_22
-    ldr r6, [r0, r2]    ; i5 | i4                               5|4
-    ldr r1, [r0]    ; i1 | i0           1|0
-    ldr r12, [r0, #0x4] ; i3 | i2                                                       3|2
-    add r14, r2, #0x4   ; pitch + 2                                                             p+2
-    ldr r14, [r0, r14]  ; i7 | i6                                                               7|6
-    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
-    smulwt  r7, r3, r1  ; (ip[1] * cospi8sqrt2minus1) >> 16                                 1c
-    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
-    smulwt  r8, r4, r1  ; (ip[1] * sinpi8sqrt2) >> 16                                       1s
-    pkhbt   r11, r6, r1, lsl #16    ; i0 | i4                                                   0|4
-    pkhbt   r7, r9, r7, lsl #16 ; 1c | 5c
-    pkhbt   r8, r10, r8, lsl #16    ; 1s | 5s = temp1 �                                     tc1
-    pkhtb   r1, r1, r6, asr #16 ; i1 | i5           1|5
-    uadd16  r1, r7, r1  ; 1c+1 | 5c+5 = temp2 (d)           td2
-    pkhbt   r9, r14, r12, lsl #16   ; i2 | i6                                           2|6
-    uadd16  r10, r11, r9    ; a                                             a
-    usub16  r9, r11, r9 ; b                                         b
-    pkhtb   r6, r12, r14, asr #16   ; i3 | i7                               3|7
-    subs    r5, r5, #0x1    ; i--                           --
-    smulwt  r7, r3, r6  ; (ip[3] * cospi8sqrt2minus1) >> 16                                 3c
-    smulwt  r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16                                                   3s
-    smulwb  r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16                                                     7c
-    smulwb  r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16                                                               7s
-
-    pkhbt   r7, r12, r7, lsl #16    ; 3c | 7c
-    pkhbt   r11, r14, r11, lsl #16  ; 3s | 7s = temp1 (d)                                                   td1
-    uadd16  r6, r7, r6  ; 3c+3 | 7c+7 = temp2  (c)                              tc2
-    usub16  r12, r8, r6 ; c (o1 | o5)                                                       c
-    uadd16  r6, r11, r1 ; d (o3 | o7)                               d
-    uadd16  r7, r10, r6 ; a+d                                   a+d
-    mov r8, #0x4    ; set up 4's                                        4
-    orr r8, r8, #0x40000    ;                                       4|4
-    usub16  r6, r10, r6 ; a-d                               a-d
-    uadd16  r6, r6, r8  ; a-d+4                             3|7
-    uadd16  r7, r7, r8  ; a+d+4                                 0|4
-    uadd16  r10, r9, r12    ; b+c                                               b+c
-    usub16  r1, r9, r12 ; b-c           b-c
-    uadd16  r10, r10, r8    ; b+c+4                                             1|5
-    uadd16  r1, r1, r8  ; b-c+4         2|6
-    mov r8, r10, asr #19    ; o1 >> 3
-    strh    r8, [r0, #2]    ; o1
-    mov r8, r1, asr #19 ; o2 >> 3
-    strh    r8, [r0, #4]    ; o2
-    mov r8, r6, asr #19 ; o3 >> 3
-    strh    r8, [r0, #6]    ; o3
-    mov r8, r7, asr #19 ; o0 >> 3
-    strh    r8, [r0], r2    ; o0        +p
-    sxth    r10, r10    ;
-    mov r8, r10, asr #3 ; o5 >> 3
-    strh    r8, [r0, #2]    ; o5
-    sxth    r1, r1  ;
-    mov r8, r1, asr #3  ; o6 >> 3
-    strh    r8, [r0, #4]    ; o6
-    sxth    r6, r6  ;
-    mov r8, r6, asr #3  ; o7 >> 3
-    strh    r8, [r0, #6]    ; o7
-    sxth    r7, r7  ;
-    mov r8, r7, asr #3  ; o4 >> 3
-    strh    r8, [r0], r2    ; o4        +p
-;;;;;   subs    r5, r5, #0x1    ; i--                           --
-    bne loop2_dual_22   ;
-
-
-;vpx_memset
-    ldr     r0, [sp]
-    add     sp, sp, #4
-
-    mov     r12, #0
-    str     r12, [r0]
-    str     r12, [r0, #4]
-    str     r12, [r0, #8]
-    str     r12, [r0, #12]
-    str     r12, [r0, #16]
-    str     r12, [r0, #20]
-    str     r12, [r0, #24]
-    str     r12, [r0, #28]
-
-    ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
-
-    ENDP    ;|vp8_dequant_dc_idct_v68|
-
-    END
diff --git a/vp8/decoder/arm/armv6/dequantidct_v6.asm b/vp8/decoder/arm/armv6/dequantidct_v6.asm
deleted file mode 100644
index 15e4c6814..000000000
--- a/vp8/decoder/arm/armv6/dequantidct_v6.asm
+++ /dev/null
@@ -1,184 +0,0 @@
-;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_dequant_idct_v6|
-    ; ARM
-    ; REQUIRE8
-    ; PRESERVE8
-
-    AREA    |.text|, CODE, READONLY  ; name this block of code
-;void vp8_dequant_idct_v6(short *input, short *dq, short *output, int pitch)
-|vp8_dequant_idct_v6| PROC
-    stmdb   sp!, {r4-r11, lr}
-
-    ldr     r4, [r0]            ;input
-    ldr     r5, [r1], #4            ;dq
-
-    sub     sp, sp, #4
-    str     r0, [sp]
-
-    mov     r12, #4
-
-dequant_idct_loop
-    smulbb  r6, r4, r5
-    smultt  r7, r4, r5
-
-    ldr     r4, [r0, #4]            ;input
-    ldr     r5, [r1], #4        ;dq
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    smulbb  r6, r4, r5
-    smultt  r7, r4, r5
-
-    subs    r12, r12, #1
-
-    ldrne   r4, [r0, #4]
-    ldrne   r5, [r1], #4
-
-    strh    r6, [r0], #2
-    strh    r7, [r0], #2
-
-    bne     dequant_idct_loop
-
-    sub     r0, r0, #32
-    mov     r1, r2
-    mov     r2, r3
-
-; short_idct4x4llm_v6_dual
-
-    mov r3, #0x00004E00 ;                   cos
-    orr r3, r3, #0x0000007B ; cospi8sqrt2minus1
-    mov r4, #0x00008A00 ;                       sin
-    orr r4, r4, #0x0000008C ; sinpi8sqrt2
-    mov r5, #0x2    ; i=2                           i
-loop1_dual_1
-    ldr r6, [r0, #(4*2)]    ; i5 | i4                               5|4
-    ldr r12, [r0, #(12*2)]  ; i13 | i12                                                     13|12
-    ldr r14, [r0, #(8*2)]   ; i9 | i8                                                               9|8
-
-    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
-    smulwb  r7, r3, r6  ; (ip[4] * cospi8sqrt2minus1) >> 16                                 4c
-    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
-    smulwb  r8, r4, r6  ; (ip[4] * sinpi8sqrt2) >> 16                                       4s
-    pkhbt   r7, r7, r9, lsl #16 ; 5c | 4c
-    smulwt  r11, r3, r12    ; (ip[13] * cospi8sqrt2minus1) >> 16                                                    13c
-    pkhbt   r8, r8, r10, lsl #16    ; 5s | 4s
-    uadd16  r6, r6, r7  ; 5c+5 | 4c+4
-    smulwt  r7, r4, r12 ; (ip[13] * sinpi8sqrt2) >> 16                                  13s
-    smulwb  r9, r3, r12 ; (ip[12] * cospi8sqrt2minus1) >> 16                                            12c
-    smulwb  r10, r4, r12    ; (ip[12] * sinpi8sqrt2) >> 16                                              12s
-    subs    r5, r5, #0x1    ; i--                           --
-    pkhbt   r9, r9, r11, lsl #16    ; 13c | 12c
-    ldr r11, [r0], #0x4 ; i1 | i0       ++                                          1|0
-    pkhbt   r10, r10, r7, lsl #16   ; 13s | 12s
-    uadd16  r7, r12, r9 ; 13c+13 | 12c+12
-    usub16  r7, r8, r7  ; c                                 c
-    uadd16  r6, r6, r10 ; d                             d
-    uadd16  r10, r11, r14   ; a                                             a
-    usub16  r8, r11, r14    ; b                                     b
-    uadd16  r9, r10, r6 ; a+d                                           a+d
-    usub16  r10, r10, r6    ; a-d                                               a-d
-    uadd16  r6, r8, r7  ; b+c                               b+c
-    usub16  r7, r8, r7  ; b-c                                   b-c
-    str r6, [r1, r2]    ; o5 | o4
-    add r6, r2, r2  ; pitch * 2                             p2
-    str r7, [r1, r6]    ; o9 | o8
-    add r6,  r6, r2 ; pitch * 3                             p3
-    str r10, [r1, r6]   ; o13 | o12
-    str r9, [r1], #0x4  ; o1 | o0           ++
-    bne loop1_dual_1    ;
-    mov r5, #0x2    ; i=2                           i
-    sub r0, r1, #8  ; reset input/output        i/o
-loop2_dual_2
-    ldr r6, [r0, r2]    ; i5 | i4                               5|4
-    ldr r1, [r0]    ; i1 | i0           1|0
-    ldr r12, [r0, #0x4] ; i3 | i2                                                       3|2
-    add r14, r2, #0x4   ; pitch + 2                                                             p+2
-    ldr r14, [r0, r14]  ; i7 | i6                                                               7|6
-    smulwt  r9, r3, r6  ; (ip[5] * cospi8sqrt2minus1) >> 16                                         5c
-    smulwt  r7, r3, r1  ; (ip[1] * cospi8sqrt2minus1) >> 16                                 1c
-    smulwt  r10, r4, r6 ; (ip[5] * sinpi8sqrt2) >> 16                                               5s
-    smulwt  r8, r4, r1  ; (ip[1] * sinpi8sqrt2) >> 16                                       1s
-    pkhbt   r11, r6, r1, lsl #16    ; i0 | i4                                                   0|4
-    pkhbt   r7, r9, r7, lsl #16 ; 1c | 5c
-    pkhbt   r8, r10, r8, lsl #16    ; 1s | 5s = temp1 �                                     tc1
-    pkhtb   r1, r1, r6, asr #16 ; i1 | i5           1|5
-    uadd16  r1, r7, r1  ; 1c+1 | 5c+5 = temp2 (d)           td2
-    pkhbt   r9, r14, r12, lsl #16   ; i2 | i6                                           2|6
-    uadd16  r10, r11, r9    ; a                                             a
-    usub16  r9, r11, r9 ; b                                         b
-    pkhtb   r6, r12, r14, asr #16   ; i3 | i7                               3|7
-    subs    r5, r5, #0x1    ; i--                           --
-    smulwt  r7, r3, r6  ; (ip[3] * cospi8sqrt2minus1) >> 16                                 3c
-    smulwt  r11, r4, r6 ; (ip[3] * sinpi8sqrt2) >> 16                                                   3s
-    smulwb  r12, r3, r6 ; (ip[7] * cospi8sqrt2minus1) >> 16                                                     7c
-    smulwb  r14, r4, r6 ; (ip[7] * sinpi8sqrt2) >> 16                                                               7s
-
-    pkhbt   r7, r12, r7, lsl #16    ; 3c | 7c
-    pkhbt   r11, r14, r11, lsl #16  ; 3s | 7s = temp1 (d)                                                   td1
-    uadd16  r6, r7, r6  ; 3c+3 | 7c+7 = temp2  (c)                              tc2
-    usub16  r12, r8, r6 ; c (o1 | o5)                                                       c
-    uadd16  r6, r11, r1 ; d (o3 | o7)                               d
-    uadd16  r7, r10, r6 ; a+d                                   a+d
-    mov r8, #0x4    ; set up 4's                                        4
-    orr r8, r8, #0x40000    ;                                       4|4
-    usub16  r6, r10, r6 ; a-d                               a-d
-    uadd16  r6, r6, r8  ; a-d+4                             3|7
-    uadd16  r7, r7, r8  ; a+d+4                                 0|4
-    uadd16  r10, r9, r12    ; b+c                                               b+c
-    usub16  r1, r9, r12 ; b-c           b-c
-    uadd16  r10, r10, r8    ; b+c+4                                             1|5
-    uadd16  r1, r1, r8  ; b-c+4         2|6
-    mov r8, r10, asr #19    ; o1 >> 3
-    strh    r8, [r0, #2]    ; o1
-    mov r8, r1, asr #19 ; o2 >> 3
-    strh    r8, [r0, #4]    ; o2
-    mov r8, r6, asr #19 ; o3 >> 3
-    strh    r8, [r0, #6]    ; o3
-    mov r8, r7, asr #19 ; o0 >> 3
-    strh    r8, [r0], r2    ; o0        +p
-    sxth    r10, r10    ;
-    mov r8, r10, asr #3 ; o5 >> 3
-    strh    r8, [r0, #2]    ; o5
-    sxth    r1, r1  ;
-    mov r8, r1, asr #3  ; o6 >> 3
-    strh    r8, [r0, #4]    ; o6
-    sxth    r6, r6  ;
-    mov r8, r6, asr #3  ; o7 >> 3
-    strh    r8, [r0, #6]    ; o7
-    sxth    r7, r7  ;
-    mov r8, r7, asr #3  ; o4 >> 3
-    strh    r8, [r0], r2    ; o4        +p
-;;;;;   subs    r5, r5, #0x1    ; i--                           --
-    bne loop2_dual_2    ;
-            ;
-
-;vpx_memset
-    ldr     r0, [sp]
-    add     sp, sp, #4
-
-    mov     r12, #0
-    str     r12, [r0]
-    str     r12, [r0, #4]
-    str     r12, [r0, #8]
-    str     r12, [r0, #12]
-    str     r12, [r0, #16]
-    str     r12, [r0, #20]
-    str     r12, [r0, #24]
-    str     r12, [r0, #28]
-
-    ldmia   sp!, {r4 - r11, pc} ; replace vars, return                      restore
-
-    ENDP    ;|vp8_dequant_idct_v6|
-
-    END
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index 78af0195d..3a044f847 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -14,14 +14,32 @@
 
 #if HAVE_ARMV6
 extern prototype_dequant_block(vp8_dequantize_b_v6);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_v6);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_v6);
 
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_v6
 
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_v6
+
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_v6
+#endif
+
 #if HAVE_ARMV7
 extern prototype_dequant_block(vp8_dequantize_b_neon);
+extern prototype_dequant_idct_add(vp8_dequant_idct_add_neon);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_neon);
 
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_neon
 
+#undef vp8_dequant_idct_add
+#define vp8_dequant_idct_add vp8_dequant_idct_add_neon
+
+#undef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_neon
+#endif
+
 #endif
diff --git a/vp8/decoder/arm/neon/dequantdcidct_neon.asm b/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
similarity index 65%
rename from vp8/decoder/arm/neon/dequantdcidct_neon.asm
rename to vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
index ae126f23f..ddb324068 100644
--- a/vp8/decoder/arm/neon/dequantdcidct_neon.asm
+++ b/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
@@ -9,31 +9,43 @@
 ;
 
 
-    EXPORT  |vp8_dequant_dc_idct_neon|
+    EXPORT  |vp8_dequant_dc_idct_add_neon|
     ARM
     REQUIRE8
     PRESERVE8
 
     AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_dc_idct_c(short *input, short *dq, short *output, int pitch, int Dc);
+;void vp8_dequant_dc_idct_add_neon(short *input, short *dq, unsigned char *pred,
+;                                  unsigned char *dest, int pitch, int stride,
+;                                  int Dc);
 ; r0    short *input,
 ; r1    short *dq,
-; r2    short *output,
-; r3    int pitch,
-; (stack)   int Dc
-|vp8_dequant_dc_idct_neon| PROC
+; r2    unsigned char *pred
+; r3    unsigned char *dest
+; sp    int pitch
+; sp+4  int stride
+; sp+8  int Dc
+|vp8_dequant_dc_idct_add_neon| PROC
     vld1.16         {q3, q4}, [r0]
     vld1.16         {q5, q6}, [r1]
 
-    ldr             r1, [sp]                ;load Dc from stack
+    ldr             r1, [sp, #8]            ;load Dc from stack
 
-    ldr             r12, _dcidct_coeff_
+    ldr             r12, _CONSTANTS_
 
     vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
     vmul.i16        q2, q4, q6
 
     vmov.16         d2[0], r1
 
+    ldr             r1, [sp]                ; pitch
+    vld1.32         {d14[0]}, [r2], r1
+    vld1.32         {d14[1]}, [r2], r1
+    vld1.32         {d15[0]}, [r2], r1
+    vld1.32         {d15[1]}, [r2]
+
+    ldr             r1, [sp, #4]            ; stride
+
 ;|short_idct4x4llm_neon| PROC
     vld1.16         {d0}, [r12]
     vswp            d3, d4                  ;q2(vp[4] vp[12])
@@ -47,14 +59,9 @@
     vshr.s16        q3, q3, #1
     vshr.s16        q4, q4, #1
 
-    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+    vqadd.s16       q3, q3, q2
     vqadd.s16       q4, q4, q2
 
-    ;d6 - c1:temp1
-    ;d7 - d1:temp2
-    ;d8 - d1:temp1
-    ;d9 - c1:temp2
-
     vqsub.s16       d10, d6, d9             ;c1
     vqadd.s16       d11, d7, d8             ;d1
 
@@ -83,7 +90,7 @@
     vshr.s16        q3, q3, #1
     vshr.s16        q4, q4, #1
 
-    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+    vqadd.s16       q3, q3, q2
     vqadd.s16       q4, q4, q2
 
     vqsub.s16       d10, d6, d9             ;c1
@@ -101,34 +108,29 @@
     vrshr.s16       d4, d4, #3
     vrshr.s16       d5, d5, #3
 
-    add             r1, r2, r3
-    add             r12, r1, r3
-    add             r0, r12, r3
-
     vtrn.32         d2, d4
     vtrn.32         d3, d5
     vtrn.16         d2, d3
     vtrn.16         d4, d5
 
-    vst1.16         {d2}, [r2]
-    vst1.16         {d3}, [r1]
-    vst1.16         {d4}, [r12]
-    vst1.16         {d5}, [r0]
+    vaddw.u8        q1, q1, d14
+    vaddw.u8        q2, q2, d15
+
+    vqmovun.s16     d0, q1
+    vqmovun.s16     d1, q2
+
+    vst1.32         {d0[0]}, [r3], r1
+    vst1.32         {d0[1]}, [r3], r1
+    vst1.32         {d1[0]}, [r3], r1
+    vst1.32         {d1[1]}, [r3]
 
     bx             lr
 
-    ENDP
+    ENDP           ; |vp8_dequant_dc_idct_add_neon|
 
-;-----------------
-    AREA    dcidct4x4_dat, DATA, READWRITE          ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_dcidct_coeff_
-    DCD     dcidct_coeff
-dcidct_coeff
-    DCD     0x4e7b4e7b, 0x8a8c8a8c
-
-;20091, 20091, 35468, 35468
+; Constant Pool
+_CONSTANTS_       DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b4e7b
+sinpi8sqrt2       DCD 0x8a8c8a8c
 
     END
diff --git a/vp8/decoder/arm/neon/dequantidct_neon.asm b/vp8/decoder/arm/neon/dequant_idct_neon.asm
similarity index 66%
rename from vp8/decoder/arm/neon/dequantidct_neon.asm
rename to vp8/decoder/arm/neon/dequant_idct_neon.asm
index e0888ed35..5c60dd690 100644
--- a/vp8/decoder/arm/neon/dequantidct_neon.asm
+++ b/vp8/decoder/arm/neon/dequant_idct_neon.asm
@@ -9,22 +9,33 @@
 ;
 
 
-    EXPORT  |vp8_dequant_idct_neon|
+    EXPORT  |vp8_dequant_idct_add_neon|
     ARM
     REQUIRE8
     PRESERVE8
 
     AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_idct_c(short *input, short *dq, short *output, int pitch);
+;void vp8_dequant_idct_neon(short *input, short *dq, unsigned char *pred,
+;                           unsigned char *dest, int pitch, int stride)
 ; r0    short *input,
 ; r1    short *dq,
-; r2    short *output,
-; r3    int pitch,
-|vp8_dequant_idct_neon| PROC
+; r2    unsigned char *pred
+; r3    unsigned char *dest
+; sp    int pitch
+; sp+4  int stride
+
+|vp8_dequant_idct_add_neon| PROC
     vld1.16         {q3, q4}, [r0]
     vld1.16         {q5, q6}, [r1]
+    ldr             r1, [sp]                ; pitch
+    vld1.32         {d14[0]}, [r2], r1
+    vld1.32         {d14[1]}, [r2], r1
+    vld1.32         {d15[0]}, [r2], r1
+    vld1.32         {d15[1]}, [r2]
 
-    ldr             r12, _didct_coeff_
+    ldr             r1, [sp, #4]            ; stride
+
+    ldr             r12, _CONSTANTS_
 
     vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
     vmul.i16        q2, q4, q6
@@ -42,14 +53,9 @@
     vshr.s16        q3, q3, #1
     vshr.s16        q4, q4, #1
 
-    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+    vqadd.s16       q3, q3, q2
     vqadd.s16       q4, q4, q2
 
-    ;d6 - c1:temp1
-    ;d7 - d1:temp2
-    ;d8 - d1:temp1
-    ;d9 - c1:temp2
-
     vqsub.s16       d10, d6, d9             ;c1
     vqadd.s16       d11, d7, d8             ;d1
 
@@ -78,7 +84,7 @@
     vshr.s16        q3, q3, #1
     vshr.s16        q4, q4, #1
 
-    vqadd.s16       q3, q3, q2              ;modify since sinpi8sqrt2 > 65536/2 (negtive number)
+    vqadd.s16       q3, q3, q2
     vqadd.s16       q4, q4, q2
 
     vqsub.s16       d10, d6, d9             ;c1
@@ -96,34 +102,29 @@
     vrshr.s16       d4, d4, #3
     vrshr.s16       d5, d5, #3
 
-    add             r1, r2, r3
-    add             r12, r1, r3
-    add             r0, r12, r3
-
     vtrn.32         d2, d4
     vtrn.32         d3, d5
     vtrn.16         d2, d3
     vtrn.16         d4, d5
 
-    vst1.16         {d2}, [r2]
-    vst1.16         {d3}, [r1]
-    vst1.16         {d4}, [r12]
-    vst1.16         {d5}, [r0]
+    vaddw.u8        q1, q1, d14
+    vaddw.u8        q2, q2, d15
+
+    vqmovun.s16     d0, q1
+    vqmovun.s16     d1, q2
+
+    vst1.32         {d0[0]}, [r3], r1
+    vst1.32         {d0[1]}, [r3], r1
+    vst1.32         {d1[0]}, [r3], r1
+    vst1.32         {d1[1]}, [r3]
 
     bx             lr
 
-    ENDP
+    ENDP           ; |vp8_dequant_idct_add_neon|
 
-;-----------------
-    AREA    didct4x4_dat, DATA, READWRITE           ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 48 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_didct_coeff_
-    DCD     didct_coeff
-didct_coeff
-    DCD     0x4e7b4e7b, 0x8a8c8a8c
-
-;20091, 20091, 35468, 35468
+; Constant Pool
+_CONSTANTS_       DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b4e7b
+sinpi8sqrt2       DCD 0x8a8c8a8c
 
     END
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 5668cef4d..f4c6be9b5 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -272,8 +272,10 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
 
             if (b->eob > 1)
             {
-                DEQUANT_INVOKE(&pbi->dequant, idct_dc_add)(b->qcoeff, &b->dequant[0][0],  b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride,
-                        xd->block[24].diff[i]);
+                DEQUANT_INVOKE(&pbi->dequant, dc_idct_add)
+                    (b->qcoeff, &b->dequant[0][0], b->predictor,
+                     *(b->base_dst) + b->dst, 16, b->dst_stride,
+                     xd->block[24].diff[i]);
             }
             else
             {
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 4c924ffb9..df7cf5f6c 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -32,10 +32,10 @@ void vp8_dequantize_b_c(BLOCKD *d)
     }
 }
 
-void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride)
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
+                            unsigned char *dest, int pitch, int stride)
 {
-    // output needs to be at least pitch * 4 for vp8_short_idct4x4llm_c to work properly
-    short output[16*4];
+    short output[16];
     short *diff_ptr = output;
     int r, c;
     int i;
@@ -45,7 +45,8 @@ void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, unsign
         input[i] = dq[i] * input[i];
     }
 
-    vp8_short_idct4x4llm_c(input, output, pitch*2);
+    // the idct halves ( >> 1) the pitch
+    vp8_short_idct4x4llm_c(input, output, 4 << 1);
 
     vpx_memset(input, 0, 32);
 
@@ -65,16 +66,17 @@ void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred, unsign
         }
 
         dest += stride;
-        diff_ptr += pitch;
+        diff_ptr += 4;
         pred += pitch;
     }
 }
 
-void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, unsigned char *dest, int pitch, int stride, int Dc)
+void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
+                               unsigned char *dest, int pitch, int stride,
+                               int Dc)
 {
     int i;
-    // output needs to be at least pitch * 4 for vp8_short_idct4x4llm_c to work properly
-    short output[16*4];
+    short output[16];
     short *diff_ptr = output;
     int r, c;
 
@@ -85,7 +87,8 @@ void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, uns
         input[i] = dq[i] * input[i];
     }
 
-    vp8_short_idct4x4llm_c(input, output, pitch*2);
+    // the idct halves ( >> 1) the pitch
+    vp8_short_idct4x4llm_c(input, output, 4 << 1);
 
     vpx_memset(input, 0, 32);
 
@@ -105,7 +108,7 @@ void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred, uns
         }
 
         dest += stride;
-        diff_ptr += pitch;
+        diff_ptr += 4;
         pred += pitch;
     }
 }
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index 50293c2f7..fbca3919c 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -21,7 +21,7 @@
              unsigned char *pred, unsigned char *output, \
              int pitch, int stride)
 
-#define prototype_dequant_idct_dc_add(sym) \
+#define prototype_dequant_dc_idct_add(sym) \
     void sym(short *input, short *dq, \
              unsigned char *pred, unsigned char *output, \
              int pitch, int stride, \
@@ -45,21 +45,21 @@ extern prototype_dequant_block(vp8_dequant_block);
 #endif
 extern prototype_dequant_idct_add(vp8_dequant_idct_add);
 
-#ifndef vp8_dequant_idct_dc_add
-#define vp8_dequant_idct_dc_add vp8_dequant_dc_idct_add_c
+#ifndef vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_c
 #endif
-extern prototype_dequant_idct_dc_add(vp8_dequant_idct_dc_add);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add);
 
 typedef prototype_dequant_block((*vp8_dequant_block_fn_t));
 
 typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t));
-typedef prototype_dequant_idct_dc_add((*vp8_dequant_idct_dc_add_fn_t));
+typedef prototype_dequant_dc_idct_add((*vp8_dequant_dc_idct_add_fn_t));
 
 typedef struct
 {
     vp8_dequant_block_fn_t        block;
     vp8_dequant_idct_add_fn_t     idct_add;
-    vp8_dequant_idct_dc_add_fn_t  idct_dc_add;
+    vp8_dequant_dc_idct_add_fn_t  dc_idct_add;
 } vp8_dequant_rtcd_vtable_t;
 
 #if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index c72597f4a..ab085e230 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -22,7 +22,7 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
     pbi->mb.rtcd         = &pbi->common.rtcd;
     pbi->dequant.block   = vp8_dequantize_b_c;
     pbi->dequant.idct_add    = vp8_dequant_idct_add_c;
-    pbi->dequant.idct_dc_add    = vp8_dequant_dc_idct_add_c;
+    pbi->dequant.dc_idct_add    = vp8_dequant_dc_idct_add_c;
     pbi->dboolhuff.start = vp8dx_start_decode_c;
     pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
 #if 0 //For use with RTCD, when implemented
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
index 32e777994..f0830a769 100644
--- a/vp8/decoder/x86/dequantize_x86.h
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -22,7 +22,7 @@
 #if HAVE_MMX
 extern prototype_dequant_block(vp8_dequantize_b_mmx);
 extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx);
-extern prototype_dequant_idct_dc_add(vp8_dequant_dc_idct_add_mmx);
+extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_mmx);
 
 
 #if !CONFIG_RUNTIME_CPU_DETECT
@@ -30,10 +30,10 @@ extern prototype_dequant_idct_dc_add(vp8_dequant_dc_idct_add_mmx);
 #define vp8_dequant_block vp8_dequantize_b_mmx
 
 #undef  vp8_dequant_idct_add
-#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
+#define vp8_dequant_idct_add vp8_dequant_idct_mmx
 
-#undef  vp8_dequant_idct_dc
-#define vp8_dequant_idct_add_dc vp8_dequant_dc_idct_add_mmx
+#undef  vp8_dequant_dc_idct_add
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_mmx
 
 #endif
 #endif
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index d7bed0873..789105141 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -44,7 +44,7 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
     {
         pbi->dequant.block   = vp8_dequantize_b_mmx;
         pbi->dequant.idct_add    = vp8_dequant_idct_add_mmx;
-        pbi->dequant.idct_dc_add = vp8_dequant_dc_idct_add_mmx;
+        pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_mmx;
     }
 
 #endif
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index d993927ec..5b8a3017e 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -125,6 +125,7 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/systemdependent.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/vpx_asm_offsets.c
 
 VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/filter_c.c
+VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/idctllm.c
 VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/recon.c
 VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/reconintra4x4.c
 VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/generic/systemdependent.c
@@ -134,6 +135,7 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/bilinearfilter_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x4_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x8_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem16x16_v6$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/dc_only_idct_add_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/iwalsh_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/filter_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/idct_v6$(ASM)
@@ -150,6 +152,7 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/bilinearpredict16x16_neon$(ASM
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem8x4_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem8x8_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem16x16_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/dc_only_idct_add_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/iwalsh_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index e741680c0..e9674ca1c 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -23,12 +23,12 @@ VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/generic/dsystemdependent.c
 
 #File list for armv6
 # decoder
-VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantdcidct_v6$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantidct_v6$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_dc_idct_v6$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_idct_v6$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantize_v6$(ASM)
 
 #File list for neon
 # decoder
-VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantdcidct_neon$(ASM)
-VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantidct_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_dc_idct_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_idct_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantizeb_neon$(ASM)

From d8009c077a96e0210ca47b12a3774bbb2437590f Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 27 Jul 2010 11:12:21 -0400
Subject: [PATCH 094/307] neon: disable asm quantizer

The assembly version of the quantizer has not been updated to match
the new exact quantizer introduced in commit e04e2935. That commit tried
to disable this code but missed the non-RTCD case.

Thanks to David Baker <david.baker at openmarket.com> for isolating the
issue and testing this fix.

Change-Id: I0e51492dc6f8e44d2c10b587427448bf94135c65
---
 vp8/encoder/arm/csystemdependent.c | 4 ++++
 vp8/encoder/arm/quantize_arm.h     | 7 +++++--
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c
index bfceab16c..698cf1e6f 100644
--- a/vp8/encoder/arm/csystemdependent.c
+++ b/vp8/encoder/arm/csystemdependent.c
@@ -63,6 +63,10 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
     cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
 
     cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
+    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
+    /* The neon quantizer has not been updated to match the new exact
+     * quantizer introduced in commit e04e2935
+     */
     /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
 #elif HAVE_ARMV6
     cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
diff --git a/vp8/encoder/arm/quantize_arm.h b/vp8/encoder/arm/quantize_arm.h
index 5a7b0caac..339b8a28a 100644
--- a/vp8/encoder/arm/quantize_arm.h
+++ b/vp8/encoder/arm/quantize_arm.h
@@ -15,8 +15,11 @@
 #if HAVE_ARMV7
 extern prototype_quantize_block(vp8_fast_quantize_b_neon);
 
-#undef  vp8_quantize_fastquantb
-#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon
+/* The neon quantizer has not been updated to match the new exact
+ * quantizer introduced in commit e04e2935
+ */
+//#undef  vp8_quantize_fastquantb
+//#define vp8_quantize_fastquantb vp8_fast_quantize_b_neon
 
 #endif
 

From b9a038a5edb157ac74da79d9469a5fb23c3457e2 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Tue, 27 Jul 2010 11:56:19 -0400
Subject: [PATCH 095/307] Fix build w/o RTCD

So many places to update ...

Change-Id: Ide957b40cc833f99c2d1849acade6850fbf7585d
---
 vp8/decoder/x86/dequantize_x86.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
index f0830a769..44926761e 100644
--- a/vp8/decoder/x86/dequantize_x86.h
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -30,10 +30,10 @@ extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_mmx);
 #define vp8_dequant_block vp8_dequantize_b_mmx
 
 #undef  vp8_dequant_idct_add
-#define vp8_dequant_idct_add vp8_dequant_idct_mmx
+#define vp8_dequant_idct_add vp8_dequant_idct_add_mmx
 
 #undef  vp8_dequant_dc_idct_add
-#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_mmx
+#define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_mmx
 
 #endif
 #endif

From a570bbd418f7bffdf291a1752a198a4b84e717cb Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Tue, 27 Jul 2010 12:10:48 -0400
Subject: [PATCH 096/307] x86/sse2: disable asm quantizer

follow up to Change I0e51492d: neon: disable asm quantizer

Now x86 doesn't segfault with --disable-runtime-cpu-detect and -p=2

Change-Id: I8ca127bb299198efebbcbd5a661e81788361933f
---
 vp8/encoder/x86/csystemdependent.c | 10 ++++++++--
 vp8/encoder/x86/quantize_x86.h     |  7 +++++--
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/vp8/encoder/x86/csystemdependent.c b/vp8/encoder/x86/csystemdependent.c
index bf12fee54..4bb6b60b8 100644
--- a/vp8/encoder/x86/csystemdependent.c
+++ b/vp8/encoder/x86/csystemdependent.c
@@ -180,7 +180,10 @@ void vp8_cmachine_specific_config(void)
     {
         // Willamette instruction set available:
         vp8_mbuverror                = vp8_mbuverror_xmm;
-        vp8_fast_quantize_b            = vp8_fast_quantize_b_sse;
+        /* The sse quantizer has not been updated to match the new exact
+         * quantizer introduced in commit e04e2935
+         */
+        vp8_fast_quantize_b            = vp8_fast_quantize_b_c;
 #if 0 //new fdct
         vp8_short_fdct4x4             = vp8_short_fdct4x4_mmx;
         vp8_short_fdct8x4             = vp8_short_fdct8x4_mmx;
@@ -224,7 +227,10 @@ void vp8_cmachine_specific_config(void)
     {
         // MMX instruction set available:
         vp8_mbuverror                = vp8_mbuverror_mmx;
-        vp8_fast_quantize_b            = vp8_fast_quantize_b_mmx;
+        /* The mmx quantizer has not been updated to match the new exact
+         * quantizer introduced in commit e04e2935
+         */
+        vp8_fast_quantize_b            = vp8_fast_quantize_b_c;
 #if 0 // new fdct
         vp8_short_fdct4x4             = vp8_short_fdct4x4_mmx;
         vp8_short_fdct8x4             = vp8_short_fdct8x4_mmx;
diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h
index 37d69a890..31a43e428 100644
--- a/vp8/encoder/x86/quantize_x86.h
+++ b/vp8/encoder/x86/quantize_x86.h
@@ -27,8 +27,11 @@ extern prototype_quantize_block(vp8_regular_quantize_b_sse2);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 
-#undef vp8_quantize_quantb
-#define vp8_quantize_quantb vp8_regular_quantize_b_sse2
+/* The sse2 quantizer has not been updated to match the new exact
+ * quantizer introduced in commit e04e2935
+ *#undef vp8_quantize_quantb
+ *#define vp8_quantize_quantb vp8_regular_quantize_b_sse2
+ */
 
 #endif
 

From 23d68a5f3004960d1a50702e6d76323d5ea0a721 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 22 Jun 2010 09:53:23 -0400
Subject: [PATCH 097/307] configure: pass original arguments through to make
 dist

When running configure automatically through the make dist target,
reuse the arguments passed to the original configure command.

Change-Id: I40e5b8384d6485a565b91e6d2356d5bc9c4c5928
---
 build/make/Makefile | 9 ++-------
 configure           | 1 +
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/build/make/Makefile b/build/make/Makefile
index 20a48671e..5011ce36d 100755
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -39,13 +39,8 @@ dist:
 	@if [ -d "$(DIST_DIR)/src" ]; then \
             mkdir -p "$(DIST_DIR)/build"; \
             cd "$(DIST_DIR)/build"; \
-            if [ "$(TGT_CC)" = "rvct" ] ; then \
-				echo "../src/configure --target=$(TOOLCHAIN) --libc=$(ALT_LIBC)"; \
-				../src/configure --target=$(TOOLCHAIN) --libc=$(ALT_LIBC); \
-			else \
-				echo "../src/configure --target=$(TOOLCHAIN)"; \
-				../src/configure --target=$(TOOLCHAIN); \
-			fi; \
+            echo "Rerunning configure $(CONFIGURE_ARGS)"; \
+            ../src/configure $(CONFIGURE_ARGS); \
             $(if $(filter vs%,$(TGT_CC)),make NO_LAUNCH_DEVENV=1;) \
         fi
 	@if [ -d "$(DIST_DIR)" ]; then \
diff --git a/configure b/configure
index 9be0624fb..5c908d4b3 100755
--- a/configure
+++ b/configure
@@ -385,6 +385,7 @@ VERSION_MAJOR=${VERSION_MAJOR}
 VERSION_MINOR=${VERSION_MINOR}
 VERSION_PATCH=${VERSION_PATCH}
 
+CONFIGURE_ARGS=${CONFIGURE_ARGS}
 EOF
     enabled child || echo "CONFIGURE_ARGS?=${CONFIGURE_ARGS}" >> config.mk
 

From f95c80b60f384d926c6e82aaa497959c90610d9e Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Wed, 28 Jul 2010 10:44:17 -0700
Subject: [PATCH 098/307] Enable the switch between two versions of quantizer

To facilitate more testing related to quantizer and rate
control, the old version quantizer is added back. old and
new quantizer can be switched back and forth by define or
un-define the macro "EXACT_QUANT".

Change-Id: Ia77e687622421550f10e9d65a9884128a79a65ff
---
 vp8/encoder/encodeframe.c |  69 +++++++++++++++++++++++++-
 vp8/encoder/quantize.c    | 100 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 166 insertions(+), 3 deletions(-)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index cb9a8dd36..4e4483edb 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -102,7 +102,8 @@ static const int qzbin_factors[129] =
     80, 80, 80, 80, 80, 80, 80, 80,
     80,
 };
-
+//#define EXACT_QUANT
+#ifdef EXACT_QUANT
 static void vp8cx_invert_quant(short *quant, short *shift, short d)
 {
     unsigned t;
@@ -184,7 +185,71 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
         }
     }
 }
-
+#else
+void vp8cx_init_quantizer(VP8_COMP *cpi)
+{
+    int r, c;
+    int i;
+    int quant_val;
+    int Q;
+
+    int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
+
+    for (Q = 0; Q < QINDEX_RANGE; Q++)
+    {
+        // dc values
+        quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
+        cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;
+        cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.Y1dequant[Q][0][0] = quant_val;
+        cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
+        cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
+        cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.Y2dequant[Q][0][0] = quant_val;
+        cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
+        cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;
+        cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
+        cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.UVdequant[Q][0][0] = quant_val;
+        cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        // all the ac values = ;
+        for (i = 1; i < 16; i++)
+        {
+            int rc = vp8_default_zig_zag1d[i];
+            r = (rc >> 2);
+            c = (rc & 3);
+
+            quant_val = vp8_ac_yquant(Q);
+            cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;
+            cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.Y1dequant[Q][r][c] = quant_val;
+            cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+
+            quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
+            cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
+            cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.Y2dequant[Q][r][c] = quant_val;
+            cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+
+            quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+            cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;
+            cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.UVdequant[Q][r][c] = quant_val;
+            cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+        }
+    }
+}
+#endif
 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
 {
     int i;
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 877002b08..7b4472467 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -16,6 +16,8 @@
 #include "entropy.h"
 #include "predictdc.h"
 
+//#define EXACT_QUANT
+#ifdef EXACT_QUANT
 void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
 {
     int i, rc, eob;
@@ -116,7 +118,103 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
 
     d->eob = eob + 1;
 }
-
+#else
+void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
+{
+    int i, rc, eob;
+    int zbin;
+    int x, y, z, sz;
+    short *coeff_ptr  = &b->coeff[0];
+    short *zbin_ptr   = &b->zbin[0][0];
+    short *round_ptr  = &b->round[0][0];
+    short *quant_ptr  = &b->quant[0][0];
+    short *qcoeff_ptr = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = &d->dequant[0][0];
+
+    vpx_memset(qcoeff_ptr, 0, 32);
+    vpx_memset(dqcoeff_ptr, 0, 32);
+
+    eob = -1;
+
+    for (i = 0; i < 16; i++)
+    {
+        rc   = vp8_default_zig_zag1d[i];
+        z    = coeff_ptr[rc];
+        zbin = zbin_ptr[rc] ;
+
+        sz = (z >> 31);                                 // sign of z
+        x  = (z ^ sz) - sz;                             // x = abs(z)
+
+        if (x >= zbin)
+        {
+            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x  = (y ^ sz) - sz;                         // get the sign back
+            qcoeff_ptr[rc] = x;                          // write to destination
+            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
+
+            if (y)
+            {
+                eob = i;                                // last nonzero coeffs
+            }
+        }
+    }
+    d->eob = eob + 1;
+}
+
+void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
+{
+    int i, rc, eob;
+    int zbin;
+    int x, y, z, sz;
+    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
+    short *coeff_ptr  = &b->coeff[0];
+    short *zbin_ptr   = &b->zbin[0][0];
+    short *round_ptr  = &b->round[0][0];
+    short *quant_ptr  = &b->quant[0][0];
+    short *qcoeff_ptr = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = &d->dequant[0][0];
+    short zbin_oq_value = b->zbin_extra;
+
+    vpx_memset(qcoeff_ptr, 0, 32);
+    vpx_memset(dqcoeff_ptr, 0, 32);
+
+    eob = -1;
+
+    for (i = 0; i < 16; i++)
+    {
+        rc   = vp8_default_zig_zag1d[i];
+        z    = coeff_ptr[rc];
+
+        //if ( i == 0 )
+        //    zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2;
+        //else
+        zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+
+        zbin_boost_ptr ++;
+        sz = (z >> 31);                                 // sign of z
+        x  = (z ^ sz) - sz;                             // x = abs(z)
+
+        if (x >= zbin)
+        {
+            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x  = (y ^ sz) - sz;                         // get the sign back
+            qcoeff_ptr[rc]  = x;                         // write to destination
+            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
+
+            if (y)
+            {
+                eob = i;                                // last nonzero coeffs
+                zbin_boost_ptr = &b->zrun_zbin_boost[0];    // reset zero runlength
+            }
+        }
+    }
+
+    d->eob = eob + 1;
+}
+
+#endif
 void vp8_quantize_mby(MACROBLOCK *x)
 {
     int i;

From 062e6c18869ddd22b82b8dbf7bbd6a953fe49dbe Mon Sep 17 00:00:00 2001
From: Frank Galligan <fgalligan@google.com>
Date: Wed, 28 Jul 2010 17:25:09 -0400
Subject: [PATCH 099/307] Removed two unused global variables.

Removed the global variables vp8_an and vp8_cd. vp8_an was causing problems
because it was increasing the .bss by 1572864 bytes.

Change-Id: I6c12e294133c7fb6e770c0e4536d8287a5720a87
---
 vp8/common/postproc.c | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 1670a1aa2..648892194 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -330,13 +330,6 @@ void vp8_de_noise(YV12_BUFFER_CONFIG         *source,
 
 }
 
-
-//Notes: It is better to change CHAR to unsigned or signed to
-//avoid error on ARM platform.
-char vp8_an[8][64][3072];
-int vp8_cd[8][64];
-
-
 double vp8_gaussian(double sigma, double mu, double x)
 {
     return 1 / (sigma * sqrt(2.0 * 3.14159265)) *

From 38a20e030f442fb8dfa1e596c98500bd35919e6f Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 29 Jul 2010 17:04:39 -0400
Subject: [PATCH 100/307] apple: include proper mach primatives

Fixes implicit declaration warning for 'mach_task_self'.

Patch courtesy of timeless at gmail.com

Change-Id: I9991dedd1ccfddc092eca86705ecbc3b764b799d
---
 vp8/decoder/threading.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 752081ea9..8f4e9da46 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -12,6 +12,9 @@
 #ifndef WIN32
 # include <unistd.h>
 #endif
+#ifdef __APPLE__
+#include <mach/mach_init.h>
+#endif
 #include "onyxd_int.h"
 #include "vpx_mem/vpx_mem.h"
 #include "threading.h"

From 7d243701d986b8b7fa2d3148e7a09b52cffd272a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philip=20J=C3=A4genstedt?= <philipj@opera.com>
Date: Tue, 13 Jul 2010 11:43:51 +0200
Subject: [PATCH 101/307] Replace pinsrw (SSE) with MMX instructions

Fixes http://code.google.com/p/webm/issues/detail?id=136

Change-Id:	I5a3e294061644a1a9718e8ba4a39548ede25cc42
---
 vp8/decoder/x86/dequantize_mmx.asm | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
index 7ad9289cc..f11eef35a 100644
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -261,8 +261,6 @@ sym(vp8_dequant_dc_idct_add_mmx):
         mov         rax,    arg(0) ;input
         mov         rdx,    arg(1) ;dq
 
-        movsxd      rcx,    dword ptr arg(6) ;Dc
-
         movq        mm0,    [rax   ]
         pmullw      mm0,    [rdx]
 
@@ -286,8 +284,13 @@ sym(vp8_dequant_dc_idct_add_mmx):
         movq        [rax+16],mm7
         movq        [rax+24],mm7
 
+        ; move lower word of Dc to lower word of mm0
+        psrlq       mm0,    16
+        movzx       rcx,    word ptr arg(6) ;Dc
+        psllq       mm0,    16
+        movd        mm7,    rcx
+        por         mm0,    mm7
 
-        pinsrw      mm0,    rcx,  0
         movsxd      rax,            dword ptr arg(4) ;pitch
         movsxd      rdi,            dword ptr arg(5) ;stride
 

From c8134bc54a783d5c7e480d21ec5bd1460b03fb92 Mon Sep 17 00:00:00 2001
From: Jan Kratochvil <jan.kratochvil@redhat.com>
Date: Sat, 31 Jul 2010 17:12:31 +0200
Subject: [PATCH 102/307] nasm: use OWORD vs DQWORD

nasm knows only OWORD.  yasm knows both OWORD and DQWORD.

Provide nasm compatibility.  No binary change by this patch with yasm on
{x86_64,i686}-fedora13-linux-gnu.  Few longer opcodes with nasm on
{x86_64,i686}-fedora13-linux-gnu have been checked as safe.

Change-Id: I62151390089e90df9a7667822fa594ac20b00e78
---
 vp8/encoder/x86/quantize_sse2.asm | 48 +++++++++++++++----------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
index c64a8ba12..faaff6428 100644
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -42,8 +42,8 @@ sym(vp8_regular_quantize_b_impl_sse2):
 
     sub         rsp, vp8_regularquantizeb_stack_size
 
-    movdqa      DQWORD PTR[rsp + save_xmm6], xmm6
-    movdqa      DQWORD PTR[rsp + save_xmm7], xmm7
+    movdqa      OWORD PTR[rsp + save_xmm6], xmm6
+    movdqa      OWORD PTR[rsp + save_xmm7], xmm7
 
     mov         rdx, arg(0)                 ;coeff_ptr
     mov         eax, arg(8)                 ;zbin_oq_value
@@ -51,8 +51,8 @@ sym(vp8_regular_quantize_b_impl_sse2):
     mov         rcx, arg(1)                 ;zbin_ptr
     movd        xmm7, eax
 
-    movdqa      xmm0, DQWORD PTR[rdx]
-    movdqa      xmm4, DQWORD PTR[rdx + 16]
+    movdqa      xmm0, OWORD PTR[rdx]
+    movdqa      xmm4, OWORD PTR[rdx + 16]
 
     movdqa      xmm1, xmm0
     movdqa      xmm5, xmm4
@@ -63,8 +63,8 @@ sym(vp8_regular_quantize_b_impl_sse2):
     pxor        xmm1, xmm0
     pxor        xmm5, xmm4
 
-    movdqa      xmm2, DQWORD PTR[rcx]       ;load zbin_ptr
-    movdqa      xmm3, DQWORD PTR[rcx + 16]  ;load zbin_ptr
+    movdqa      xmm2, OWORD PTR[rcx]        ;load zbin_ptr
+    movdqa      xmm3, OWORD PTR[rcx + 16]   ;load zbin_ptr
 
     pshuflw     xmm7, xmm7, 0
     psubw       xmm1, xmm0                  ;x = abs(z)
@@ -81,17 +81,17 @@ sym(vp8_regular_quantize_b_impl_sse2):
     mov         rdi, arg(5)                 ;round_ptr
     mov         rsi, arg(6)                 ;quant_ptr
 
-    movdqa      DQWORD PTR[rsp + abs_minus_zbin_lo], xmm1
-    movdqa      DQWORD PTR[rsp + abs_minus_zbin_hi], xmm5
+    movdqa      OWORD PTR[rsp + abs_minus_zbin_lo], xmm1
+    movdqa      OWORD PTR[rsp + abs_minus_zbin_hi], xmm5
 
     paddw       xmm1, xmm2                  ;add (zbin_ptr + zbin_oq_value) back
     paddw       xmm5, xmm3                  ;add (zbin_ptr + zbin_oq_value) back
 
-    movdqa      xmm2, DQWORD PTR[rdi]
-    movdqa      xmm3, DQWORD PTR[rsi]
+    movdqa      xmm2, OWORD PTR[rdi]
+    movdqa      xmm3, OWORD PTR[rsi]
 
-    movdqa      xmm6, DQWORD PTR[rdi + 16]
-    movdqa      xmm7, DQWORD PTR[rsi + 16]
+    movdqa      xmm6, OWORD PTR[rdi + 16]
+    movdqa      xmm7, OWORD PTR[rsi + 16]
 
     paddw       xmm1, xmm2
     paddw       xmm5, xmm6
@@ -108,11 +108,11 @@ sym(vp8_regular_quantize_b_impl_sse2):
     psubw       xmm1, xmm0
     psubw       xmm5, xmm4
 
-    movdqa      DQWORD PTR[rsp + temp_qcoeff_lo], xmm1
-    movdqa      DQWORD PTR[rsp + temp_qcoeff_hi], xmm5
+    movdqa      OWORD PTR[rsp + temp_qcoeff_lo], xmm1
+    movdqa      OWORD PTR[rsp + temp_qcoeff_hi], xmm5
 
-    movdqa      DQWORD PTR[rsi], xmm6       ;zero qcoeff
-    movdqa      DQWORD PTR[rsi + 16], xmm6  ;zero qcoeff
+    movdqa      OWORD PTR[rsi], xmm6        ;zero qcoeff
+    movdqa      OWORD PTR[rsi + 16], xmm6   ;zero qcoeff
 
     xor         rax, rax
     mov         rcx, -1
@@ -223,22 +223,22 @@ rq_zigzag_1c:
     mov         rcx, arg(3)                 ;dequant_ptr
     mov         rsi, arg(7)                 ;dqcoeff_ptr
 
-    movdqa      xmm2, DQWORD PTR[rdi]
-    movdqa      xmm3, DQWORD PTR[rdi + 16]
+    movdqa      xmm2, OWORD PTR[rdi]
+    movdqa      xmm3, OWORD PTR[rdi + 16]
 
-    movdqa      xmm0, DQWORD PTR[rcx]
-    movdqa      xmm1, DQWORD PTR[rcx + 16]
+    movdqa      xmm0, OWORD PTR[rcx]
+    movdqa      xmm1, OWORD PTR[rcx + 16]
 
     pmullw      xmm0, xmm2
     pmullw      xmm1, xmm3
 
-    movdqa      DQWORD PTR[rsi], xmm0       ;store dqcoeff
-    movdqa      DQWORD PTR[rsi + 16], xmm1  ;store dqcoeff
+    movdqa      OWORD PTR[rsi], xmm0        ;store dqcoeff
+    movdqa      OWORD PTR[rsi + 16], xmm1   ;store dqcoeff
 
     mov         rax, [rsp + eob]
 
-    movdqa      xmm6, DQWORD PTR[rsp + save_xmm6]
-    movdqa      xmm7, DQWORD PTR[rsp + save_xmm7]
+    movdqa      xmm6, OWORD PTR[rsp + save_xmm6]
+    movdqa      xmm7, OWORD PTR[rsp + save_xmm7]
 
     add         rax, 1
 

From 0327d3df9005c4ebe099db74012ba2f37efda7b1 Mon Sep 17 00:00:00 2001
From: Jan Kratochvil <jan.kratochvil@redhat.com>
Date: Sat, 31 Jul 2010 17:12:31 +0200
Subject: [PATCH 103/307] nasm: end labels with colon (':')

Labels should end by colon (':'), nasm requires it.

Provide nasm compatibility.  No binary change by this patch with yasm
on {x86_64,i686}-fedora13-linux-gnu.  Few longer opcodes with nasm on
{x86_64,i686}-fedora13-linux-gnu have been checked as safe.

Change-Id: I0b2ec6f01afb061d92841887affb5ca0084f936f
---
 vp8/encoder/x86/subtract_mmx.asm | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm
index 5775d98c4..c381ffdf5 100644
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ b/vp8/encoder/x86/subtract_mmx.asm
@@ -15,7 +15,7 @@
 ;                            unsigned short *diff, unsigned char *Predictor,
 ;                            int pitch);
 global sym(vp8_subtract_b_mmx_impl)
-sym(vp8_subtract_b_mmx_impl)
+sym(vp8_subtract_b_mmx_impl):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
@@ -151,7 +151,7 @@ submby_loop:
 
 ;void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
 global sym(vp8_subtract_mbuv_mmx)
-sym(vp8_subtract_mbuv_mmx)
+sym(vp8_subtract_mbuv_mmx):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5

From 0e8f108fb0c33a509c87f08a870b11d92223e1e4 Mon Sep 17 00:00:00 2001
From: Jan Kratochvil <jan.kratochvil@redhat.com>
Date: Sat, 31 Jul 2010 17:12:31 +0200
Subject: [PATCH 104/307] nasm: avoid space before the :data symbol type.

global label:data
           ^^

Provide nasm compatibility.  No binary change by this patch with yasm
on {x86_64,i686}-fedora13-linux-gnu.  Few longer opcodes with nasm on
{x86_64,i686}-fedora13-linux-gnu have been checked as safe.

Change-Id:	I10f17eb1e4d4a718d4ebd1d0ccddc807c365e021
---
 vp8/common/x86/subpixel_mmx.asm | 4 ++--
 vpx_ports/x86_abi_support.asm   | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index b0008fcdb..b8a712761 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -731,7 +731,7 @@ rd:
     times 4 dw 0x40
 
 align 16
-global sym(vp8_six_tap_mmx) HIDDEN_DATA
+global HIDDEN_DATA(sym(vp8_six_tap_mmx))
 sym(vp8_six_tap_mmx):
     times 8 dw 0
     times 8 dw 0
@@ -791,7 +791,7 @@ sym(vp8_six_tap_mmx):
 
 
 align 16
-global sym(vp8_bilinear_filters_mmx) HIDDEN_DATA
+global HIDDEN_DATA(sym(vp8_bilinear_filters_mmx))
 sym(vp8_bilinear_filters_mmx):
     times 8 dw 128
     times 8 dw 0
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index 5748c235e..eff992634 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -138,16 +138,16 @@
     %endmacro
   %endif
   %endif
-  %define HIDDEN_DATA
+  %define HIDDEN_DATA(x) x
 %else
   %macro GET_GOT 1
   %endmacro
   %define GLOBAL wrt rip
   %ifidn __OUTPUT_FORMAT__,elf64
     %define WRT_PLT wrt ..plt
-    %define HIDDEN_DATA :data hidden
+    %define HIDDEN_DATA(x) x:data hidden
   %else
-    %define HIDDEN_DATA
+    %define HIDDEN_DATA(x) x
   %endif
 %endif
 %ifnmacro GET_GOT

From 4e6827a013084ce3856af969c673e0237ffd5537 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 2 Aug 2010 10:21:52 -0400
Subject: [PATCH 105/307] configure: support directories containing .o

Fixes http://code.google.com/p/webm/issues/detail?id=96

The regex which postprocesses the gcc make-deps (-M) output was too
greedy and matching in the dependencies part of the rule rather than
the target only. The patch provided with the issue was not correct, as
it tried to match the .o at the end of the line, which isn't correct
at least for my GCC version. This patch matches word characters
instead of .*

Thanks to raimue and the MacPorts community for isolating this issue.

Change-Id: I28510da2252e03db910c017101d9db12e5945a27
---
 build/make/configure.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index 35131b07b..3b6c919fd 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -378,7 +378,7 @@ EOF
 fmt_deps = sed -e 's;^__image.axf;\$(dir \$@)\$(notdir \$<).o \$@;' #hide
 EOF
     else cat >> $1 << EOF
-fmt_deps = sed -e 's;^\(.*\)\.o;\$(dir \$@)\1\$(suffix \$<).o \$@;' #hide
+fmt_deps = sed -e 's;^\([a-zA-Z0-9_]*\)\.o;\$(dir \$@)\1\$(suffix \$<).o \$@;'
 EOF
     fi
 

From 618c7d27a0e5521d5031d8d885b45536dda50815 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 9 Aug 2010 09:33:00 -0400
Subject: [PATCH 106/307] Mark loopfilter C functions as static

Clang defaults to C99 mode, and inline works differently in C99.
(gcc, on the other hand, defaults to a special gnu-style inlining,
which uses different syntax.)   Making the functions static makes sure
clang doesn't decide to discard a function because it's too large to
inline.

Thanks to eli.friedman for the patch.

Fixes http://code.google.com/p/webm/issues/detail?id=114

Change-Id: If3c1c3c176eb855a584a60007237283b0cc631a4
---
 vp8/common/loopfilter_filters.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index 2d209ee98..7aba7ed13 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -18,7 +18,7 @@
 
 typedef unsigned char uc;
 
-__inline signed char vp8_signed_char_clamp(int t)
+static __inline signed char vp8_signed_char_clamp(int t)
 {
     t = (t < -128 ? -128 : t);
     t = (t > 127 ? 127 : t);
@@ -27,7 +27,7 @@ __inline signed char vp8_signed_char_clamp(int t)
 
 
 // should we apply any filter at all ( 11111111 yes, 00000000 no)
-__inline signed char vp8_filter_mask(signed char limit, signed char flimit,
+static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
                                      uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
 {
     signed char mask = 0;
@@ -47,7 +47,7 @@ __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
 }
 
 // is there high variance internal edge ( 11111111 yes, 00000000 no)
-__inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
+static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
 {
     signed char hev = 0;
     hev  |= (abs(p1 - p0) > thresh) * -1;
@@ -55,7 +55,7 @@ __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
     return hev;
 }
 
-__inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
+static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *op0, uc *oq0, uc *oq1)
 
 {
     signed char ps0, qs0;
@@ -161,7 +161,7 @@ void vp8_loop_filter_vertical_edge_c
     while (++i < count * 8);
 }
 
-__inline void vp8_mbfilter(signed char mask, signed char hev,
+static __inline void vp8_mbfilter(signed char mask, signed char hev,
                            uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2)
 {
     signed char s, u;
@@ -281,7 +281,7 @@ void vp8_mbloop_filter_vertical_edge_c
 }
 
 // should we apply any filter at all ( 11111111 yes, 00000000 no)
-__inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
+static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
 {
 // Why does this cause problems for win32?
 // error C2143: syntax error : missing ';' before 'type'
@@ -294,7 +294,7 @@ __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimi
     return mask;
 }
 
-__inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
+static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1)
 {
     signed char vp8_filter, Filter1, Filter2;
     signed char p1 = (signed char) * op1 ^ 0x80;

From ba2e107d2878ae5bf225ff038e46d309be30dfd0 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 29 Jul 2010 16:24:26 -0400
Subject: [PATCH 107/307] First modification of multi-thread decoder

This is the first modification of VP8 multi-thread decoder, which uses
same threads to decode macroblocks and then do loopfiltering for each
frame.

Inspired by Rob Clark, synchronization was done on every 8 macroblocks
instead of every macroblock to reduce lock contention.

Comparing with the original code, this implementation gave about 15%-
20% performance gain while decoding my test clips on a Core2 Quad
platform (Linux).

The work is not done yet.

Test on other platforms are needed.

Change-Id: Ice9ddb0b511af1359b9f71e65066143c04fef3b5
---
 vp8/decoder/decoderthreading.h |   1 +
 vp8/decoder/onyxd_if.c         |  31 ++
 vp8/decoder/onyxd_int.h        |  12 +-
 vp8/decoder/threading.c        | 763 ++++++++++++++++++++++-----------
 4 files changed, 549 insertions(+), 258 deletions(-)

diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h
index 5685c0479..2267767b6 100644
--- a/vp8/decoder/decoderthreading.h
+++ b/vp8/decoder/decoderthreading.h
@@ -18,6 +18,7 @@
 
 extern void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
                                  MACROBLOCKD *xd);
+extern void vp8_mt_loop_filter_frame(VP8D_COMP *pbi);
 extern void vp8_stop_lfthread(VP8D_COMP *pbi);
 extern void vp8_start_lfthread(VP8D_COMP *pbi);
 extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 28f99086f..8b240e164 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -367,6 +367,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
         return -1;
     }
 
+/*
     if (!pbi->b_multithreaded_lf)
     {
         struct vpx_usec_timer lpftimer;
@@ -378,12 +379,42 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 
         vpx_usec_timer_mark(&lpftimer);
         pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+    }else{
+      struct vpx_usec_timer lpftimer;
+      vpx_usec_timer_start(&lpftimer);
+      // Apply the loop filter if appropriate.
+
+      if (cm->filter_level > 0)
+          vp8_mt_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
+
+      vpx_usec_timer_mark(&lpftimer);
+      pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
     }
     if (cm->filter_level > 0) {
         cm->last_frame_type = cm->frame_type;
         cm->last_filter_type = cm->filter_type;
         cm->last_sharpness_level = cm->sharpness_level;
     }
+*/
+
+    if(pbi->common.filter_level)
+    {
+        struct vpx_usec_timer lpftimer;
+        vpx_usec_timer_start(&lpftimer);
+        // Apply the loop filter if appropriate.
+
+        if (pbi->b_multithreaded_lf && cm->multi_token_partition != ONE_PARTITION)
+            vp8_mt_loop_filter_frame(pbi);   //cm, &pbi->mb, cm->filter_level);
+        else
+            vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
+
+        vpx_usec_timer_mark(&lpftimer);
+        pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+
+        cm->last_frame_type = cm->frame_type;
+        cm->last_filter_type = cm->filter_type;
+        cm->last_sharpness_level = cm->sharpness_level;
+    }
 
     vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
 
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index e02c96228..c08e0fb75 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -95,20 +95,22 @@ typedef struct VP8Decompressor
     int current_mb_col_main;
     int decoding_thread_count;
     int allocated_decoding_thread_count;
+    int *current_mb_col;                  //Each row remembers its already decoded column.
+    int mt_baseline_filter_level[MAX_MB_SEGMENTS];
 
     // variable for threading
     DECLARE_ALIGNED(16, MACROBLOCKD, lpfmb);
 #if CONFIG_MULTITHREAD
-    pthread_t           h_thread_lpf;         // thread for postprocessing
-    sem_t               h_event_lpf;          // Event for post_proc completed
-    sem_t               h_event_start_lpf;
+    //pthread_t           h_thread_lpf;         // thread for postprocessing
+    sem_t               h_event_end_lpf;          // Event for post_proc completed
+    sem_t               *h_event_start_lpf;
 #endif
     MB_ROW_DEC           *mb_row_di;
     DECODETHREAD_DATA   *de_thread_data;
 #if CONFIG_MULTITHREAD
     pthread_t           *h_decoding_thread;
-    sem_t               *h_event_mbrdecoding;
-    sem_t               h_event_main;
+    sem_t               *h_event_start_decoding;
+    sem_t               h_event_end_decoding;
     // end of threading data
 #endif
     vp8_reader *mbc;
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 8f4e9da46..d27374afe 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -23,6 +23,8 @@
 #include "extend.h"
 #include "vpx_ports/vpx_timer.h"
 
+#define MAX_ROWS 256
+
 extern void vp8_decode_mb_row(VP8D_COMP *pbi,
                               VP8_COMMON *pc,
                               int mb_row,
@@ -31,11 +33,10 @@ extern void vp8_decode_mb_row(VP8D_COMP *pbi,
 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
 extern void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd);
 
+void vp8_thread_loop_filter(VP8D_COMP *pbi, MB_ROW_DEC *mbrd, int ithread);
+
 void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
 {
-
-
-
 #if CONFIG_MULTITHREAD
     VP8_COMMON *const pc = & pbi->common;
     int i, j;
@@ -46,8 +47,6 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
 #if CONFIG_RUNTIME_CPU_DETECT
         mbd->rtcd = xd->rtcd;
 #endif
-
-
         mbd->subpixel_predict        = xd->subpixel_predict;
         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
@@ -82,6 +81,8 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
         }
     }
 
+    for (i=0; i< pc->mb_rows; i++)
+        pbi->current_mb_col[i]=-1;
 #else
     (void) pbi;
     (void) xd;
@@ -90,6 +91,70 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
 #endif
 }
 
+void vp8_setup_loop_filter_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
+{
+#if CONFIG_MULTITHREAD
+    VP8_COMMON *const pc = & pbi->common;
+    int i, j;
+
+    for (i = 0; i < count; i++)
+    {
+        MACROBLOCKD *mbd = &mbrd[i].mbd;
+//#if CONFIG_RUNTIME_CPU_DETECT
+//        mbd->rtcd = xd->rtcd;
+//#endif
+
+        //mbd->subpixel_predict        = xd->subpixel_predict;
+        //mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
+        //mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
+        //mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
+        //mbd->gf_active_ptr            = xd->gf_active_ptr;
+
+        mbd->mode_info        = pc->mi - 1;
+        mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
+        mbd->mode_info_stride  = pc->mode_info_stride;
+
+        //mbd->frame_type = pc->frame_type;
+        //mbd->frames_since_golden      = pc->frames_since_golden;
+        //mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
+
+        //mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
+        //mbd->dst = pc->yv12_fb[pc->new_fb_idx];
+
+        //vp8_setup_block_dptrs(mbd);
+        //vp8_build_block_doffsets(mbd);
+        mbd->segmentation_enabled    = xd->segmentation_enabled;  //
+        mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;  //
+        vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));   //
+
+        //signed char ref_lf_deltas[MAX_REF_LF_DELTAS];
+        vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
+        //signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];
+        vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
+        //unsigned char mode_ref_lf_delta_enabled;
+        //unsigned char mode_ref_lf_delta_update;
+        mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
+        mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
+
+        //mbd->mbmi.mode = DC_PRED;
+        //mbd->mbmi.uv_mode = DC_PRED;
+        //mbd->current_bc = &pbi->bc2;
+
+        //for (j = 0; j < 25; j++)
+        //{
+        //    mbd->block[j].dequant = xd->block[j].dequant;
+        //}
+    }
+
+    for (i=0; i< pc->mb_rows; i++)
+        pbi->current_mb_col[i]=-1;
+#else
+    (void) pbi;
+    (void) xd;
+    (void) mbrd;
+    (void) count;
+#endif
+}
 
 THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 {
@@ -104,142 +169,145 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
         if (pbi->b_multithreaded_rd == 0)
             break;
 
-        //if(WaitForSingleObject(pbi->h_event_mbrdecoding[ithread], INFINITE) == WAIT_OBJECT_0)
-        if (sem_wait(&pbi->h_event_mbrdecoding[ithread]) == 0)
+        //if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)
+        if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
         {
             if (pbi->b_multithreaded_rd == 0)
                 break;
             else
             {
                 VP8_COMMON *pc = &pbi->common;
-                int mb_row       = mbrd->mb_row;
                 MACROBLOCKD *xd = &mbrd->mbd;
 
-                //printf("ithread:%d mb_row %d\n", ithread, mb_row);
-                int i;
-                int recon_yoffset, recon_uvoffset;
-                int mb_col;
-                int ref_fb_idx = pc->lst_fb_idx;
-                int dst_fb_idx = pc->new_fb_idx;
-                int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
-                int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
-
+                int mb_row;
+                int num_part = 1 << pbi->common.multi_token_partition;
                 volatile int *last_row_current_mb_col;
 
-                if (ithread > 0)
-                    last_row_current_mb_col = &pbi->mb_row_di[ithread-1].current_mb_col;
-                else
-                    last_row_current_mb_col = &pbi->current_mb_col_main;
-
-                recon_yoffset = mb_row * recon_y_stride * 16;
-                recon_uvoffset = mb_row * recon_uv_stride * 8;
-                // reset above block coeffs
-
-                xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
-                xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
-                xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
-                xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
-                xd->left_context = mb_row_left_context;
-                vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
-                xd->up_available = (mb_row != 0);
-
-                xd->mb_to_top_edge = -((mb_row * 16)) << 3;
-                xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
-
-                for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
+                for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
                 {
+                    int i;
+                    int recon_yoffset, recon_uvoffset;
+                    int mb_col;
+                    int ref_fb_idx = pc->lst_fb_idx;
+                    int dst_fb_idx = pc->new_fb_idx;
+                    int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+                    int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
 
-                    while (mb_col > (*last_row_current_mb_col - 1) && *last_row_current_mb_col != pc->mb_cols - 1)
+                    pbi->mb_row_di[ithread].mb_row = mb_row;
+                    pbi->mb_row_di[ithread].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
+
+                    last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
+
+                    recon_yoffset = mb_row * recon_y_stride * 16;
+                    recon_uvoffset = mb_row * recon_uv_stride * 8;
+                    // reset above block coeffs
+
+                    xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
+                    xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
+                    xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
+                    xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
+                    xd->left_context = mb_row_left_context;
+                    vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
+                    xd->up_available = (mb_row != 0);
+
+                    xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+                    xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+
+                    for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
                     {
-                        x86_pause_hint();
-                        thread_sleep(0);
-                    }
-
-                    // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
-                    // the partition_bmi array is unused in the decoder, so don't copy it.
-                    vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi,
-                               sizeof(MB_MODE_INFO) - sizeof(xd->mbmi.partition_bmi));
-
-                    if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
-                    {
-                        for (i = 0; i < 16; i++)
+                        if ((mb_col & 7) == 0)
                         {
-                            BLOCKD *d = &xd->block[i];
-                            vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+                            while (mb_col > (*last_row_current_mb_col - 8) && *last_row_current_mb_col != pc->mb_cols - 1)
+                            {
+                                x86_pause_hint();
+                                thread_sleep(0);
+                            }
                         }
+
+                        // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
+                        // the partition_bmi array is unused in the decoder, so don't copy it.
+                        vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi,
+                                   sizeof(MB_MODE_INFO) - sizeof(xd->mbmi.partition_bmi));
+
+                        if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
+                        {
+                            for (i = 0; i < 16; i++)
+                            {
+                                BLOCKD *d = &xd->block[i];
+                                vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+                            }
+                        }
+
+                        // Distance of Mb to the various image edges.
+                        // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                        xd->mb_to_left_edge = -((mb_col * 16) << 3);
+                        xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+
+                        xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+                        xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+                        xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
+                        xd->left_available = (mb_col != 0);
+
+                        // Select the appropriate reference frame for this MB
+                        if (xd->mbmi.ref_frame == LAST_FRAME)
+                            ref_fb_idx = pc->lst_fb_idx;
+                        else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
+                            ref_fb_idx = pc->gld_fb_idx;
+                        else
+                            ref_fb_idx = pc->alt_fb_idx;
+
+                        xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+                        xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+                        xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+                        vp8_build_uvmvs(xd, pc->full_pixel);
+
+                        vp8_decode_macroblock(pbi, xd);
+
+                        recon_yoffset += 16;
+                        recon_uvoffset += 8;
+
+                        ++xd->mode_info_context;  /* next mb */
+
+                        xd->gf_active_ptr++;      // GF useage flag for next MB
+
+                        xd->above_context[Y1CONTEXT] += 4;
+                        xd->above_context[UCONTEXT ] += 2;
+                        xd->above_context[VCONTEXT ] += 2;
+                        xd->above_context[Y2CONTEXT] ++;
+
+                        //pbi->mb_row_di[ithread].current_mb_col = mb_col;
+                        pbi->current_mb_col[mb_row] = mb_col;
                     }
 
-                    // Distance of Mb to the various image edges.
-                    // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-                    xd->mb_to_left_edge = -((mb_col * 16) << 3);
-                    xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
-
-                    xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
-                    xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
-                    xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
-
-                    xd->left_available = (mb_col != 0);
-
-                    // Select the appropriate reference frame for this MB
-                    if (xd->mbmi.ref_frame == LAST_FRAME)
-                        ref_fb_idx = pc->lst_fb_idx;
-                    else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
-                        ref_fb_idx = pc->gld_fb_idx;
-                    else
-                        ref_fb_idx = pc->alt_fb_idx;
-
-                    xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
-                    xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
-                    xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
-
-                    vp8_build_uvmvs(xd, pc->full_pixel);
-
-                    vp8_decode_macroblock(pbi, xd);
-
-
-                    recon_yoffset += 16;
-                    recon_uvoffset += 8;
-
-                    ++xd->mode_info_context;  /* next mb */
-
-                    xd->gf_active_ptr++;      // GF useage flag for next MB
-
-                    xd->above_context[Y1CONTEXT] += 4;
-                    xd->above_context[UCONTEXT ] += 2;
-                    xd->above_context[VCONTEXT ] += 2;
-                    xd->above_context[Y2CONTEXT] ++;
-                    pbi->mb_row_di[ithread].current_mb_col = mb_col;
-
-                }
-
-                // adjust to the next row of mbs
-                vp8_extend_mb_row(
+                    // adjust to the next row of mbs
+                    vp8_extend_mb_row(
                     &pc->yv12_fb[dst_fb_idx],
                     xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
-                );
+                    );
 
-                ++xd->mode_info_context;      /* skip prediction column */
+                    ++xd->mode_info_context;      /* skip prediction column */
 
-                // since we have multithread
-                xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
+                    // since we have multithread
+                    xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
 
-                //memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
-                if ((mb_row & 1) == 1)
-                {
                     pbi->last_mb_row_decoded = mb_row;
-                    //printf("S%d", pbi->last_mb_row_decoded);
-                }
-
-                if (ithread == (pbi->decoding_thread_count - 1) || mb_row == pc->mb_rows - 1)
-                {
-                    //SetEvent(pbi->h_event_main);
-                    sem_post(&pbi->h_event_main);
 
                 }
             }
         }
-    }
+        //  add this to each frame
+        if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
+        {
+            //SetEvent(pbi->h_event_end_decoding);
+            sem_post(&pbi->h_event_end_decoding);
+        }
 
+        if ((pbi->b_multithreaded_lf) &&(pbi->common.filter_level))
+            vp8_thread_loop_filter(pbi, mbrd, ithread);
+
+    }
 #else
     (void) p_data;
 #endif
@@ -247,96 +315,60 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
     return 0 ;
 }
 
-THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
+
+void vp8_thread_loop_filter(VP8D_COMP *pbi, MB_ROW_DEC *mbrd, int ithread)
 {
 #if CONFIG_MULTITHREAD
-    VP8D_COMP *pbi = (VP8D_COMP *)p_data;
 
-    while (1)
-    {
-        if (pbi->b_multithreaded_lf == 0)
-            break;
-
-        //printf("before waiting for start_lpf\n");
-
-        //if(WaitForSingleObject(pbi->h_event_start_lpf, INFINITE) == WAIT_OBJECT_0)
-        if (sem_wait(&pbi->h_event_start_lpf) == 0)
+        if (sem_wait(&pbi->h_event_start_lpf[ithread]) == 0)
         {
-            if (pbi->b_multithreaded_lf == 0) // we're shutting down
-                break;
-            else
+           // if (pbi->b_multithreaded_lf == 0) // we're shutting down      ????
+           //     break;
+           // else
             {
-
                 VP8_COMMON *cm  = &pbi->common;
-                MACROBLOCKD *mbd = &pbi->lpfmb;
+                MACROBLOCKD *mbd = &mbrd->mbd;
                 int default_filt_lvl = pbi->common.filter_level;
 
-                YV12_BUFFER_CONFIG *post = &cm->yv12_fb[cm->new_fb_idx];
+                YV12_BUFFER_CONFIG *post = cm->frame_to_show;
                 loop_filter_info *lfi = cm->lf_info;
-                int frame_type = cm->frame_type;
+                //int frame_type = cm->frame_type;
 
                 int mb_row;
                 int mb_col;
 
-                int baseline_filter_level[MAX_MB_SEGMENTS];
                 int filter_level;
                 int alt_flt_enabled = mbd->segmentation_enabled;
 
                 int i;
                 unsigned char *y_ptr, *u_ptr, *v_ptr;
 
-                volatile int *last_mb_row_decoded = &pbi->last_mb_row_decoded;
-
-                //MODE_INFO * this_mb_mode_info = cm->mi;
-                mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
-
-                // Note the baseline filter values for each segment
-                if (alt_flt_enabled)
-                {
-                    for (i = 0; i < MAX_MB_SEGMENTS; i++)
-                    {
-                        if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
-                            baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                        else
-                        {
-                            baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                            baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
-                        }
-                    }
-                }
-                else
-                {
-                    for (i = 0; i < MAX_MB_SEGMENTS; i++)
-                        baseline_filter_level[i] = default_filt_lvl;
-                }
-
-                // Initialize the loop filter for this frame.
-                if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
-                    vp8_init_loop_filter(cm);
-                else if (frame_type != cm->last_frame_type)
-                    vp8_frame_init_loop_filter(lfi, frame_type);
+                volatile int *last_row_current_mb_col;
 
                 // Set up the buffer pointers
-                y_ptr = post->y_buffer;
-                u_ptr = post->u_buffer;
-                v_ptr = post->v_buffer;
+                y_ptr = post->y_buffer + post->y_stride  * 16 * (ithread +1);
+                u_ptr = post->u_buffer + post->uv_stride *  8 * (ithread +1);
+                v_ptr = post->v_buffer + post->uv_stride *  8 * (ithread +1);
 
                 // vp8_filter each macro block
-                for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
+                for (mb_row = ithread+1; mb_row < cm->mb_rows; mb_row+= (pbi->decoding_thread_count + 1))
                 {
+                    last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
 
-                    while (mb_row >= *last_mb_row_decoded)
-                    {
-                        x86_pause_hint();
-                        thread_sleep(0);
-                    }
-
-                    //printf("R%d", mb_row);
                     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
                     {
                         int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
 
-                        filter_level = baseline_filter_level[Segment];
+                        if ((mb_col & 7) == 0)
+                        {
+                            while (mb_col > (*last_row_current_mb_col-8) && *last_row_current_mb_col != cm->mb_cols - 1)
+                            {
+                                x86_pause_hint();
+                                thread_sleep(0);
+                            }
+                        }
+
+                        filter_level = pbi->mt_baseline_filter_level[Segment];
 
                         // Apply any context driven MB level adjustment
                         vp8_adjust_mb_lf_value(mbd, &filter_level);
@@ -362,29 +394,28 @@ THREAD_FUNCTION vp8_thread_loop_filter(void *p_data)
                         v_ptr += 8;
 
                         mbd->mode_info_context++;     // step to next MB
-
+                        pbi->current_mb_col[mb_row] = mb_col;
                     }
 
-                    y_ptr += post->y_stride  * 16 - post->y_width;
-                    u_ptr += post->uv_stride *  8 - post->uv_width;
-                    v_ptr += post->uv_stride *  8 - post->uv_width;
-
                     mbd->mode_info_context++;         // Skip border mb
+
+                    y_ptr += post->y_stride  * 16 * (pbi->decoding_thread_count + 1) - post->y_width;
+                    u_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
+                    v_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
+
+                    mbd->mode_info_context += pbi->decoding_thread_count * mbd->mode_info_stride;         // Skip border mb
                 }
-
-                //printf("R%d\n", mb_row);
-                // When done, signal main thread that ME is finished
-                //SetEvent(pbi->h_event_lpf);
-                sem_post(&pbi->h_event_lpf);
             }
-
         }
-    }
 
+        //  add this to each frame
+        if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
+        {
+          sem_post(&pbi->h_event_end_lpf);
+        }
 #else
-    (void) p_data;
+    (void) pbi;
 #endif
-    return 0;
 }
 
 void vp8_decoder_create_threads(VP8D_COMP *pbi)
@@ -396,39 +427,38 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
     pbi->b_multithreaded_rd = 0;
     pbi->b_multithreaded_lf = 0;
     pbi->allocated_decoding_thread_count = 0;
-    core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads; //vp8_get_proc_core_count();
-    if (core_count > 1)
-    {
-        sem_init(&pbi->h_event_lpf, 0, 0);
-        sem_init(&pbi->h_event_start_lpf, 0, 0);
-        pbi->b_multithreaded_lf = 1;
-        pthread_create(&pbi->h_thread_lpf, 0, vp8_thread_loop_filter, (pbi));
-    }
+    core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
 
     if (core_count > 1)
     {
         pbi->b_multithreaded_rd = 1;
-        pbi->decoding_thread_count = core_count - 1;
+        pbi->b_multithreaded_lf = 1;  // this can be merged with pbi->b_multithreaded_rd ?
+        pbi->decoding_thread_count = core_count -1;
 
         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
-        CHECK_MEM_ERROR(pbi->h_event_mbrdecoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
+        CHECK_MEM_ERROR(pbi->h_event_start_decoding, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
         CHECK_MEM_ERROR(pbi->mb_row_di, vpx_memalign(32, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count));
         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
 
+        CHECK_MEM_ERROR(pbi->current_mb_col, vpx_malloc(sizeof(int) * MAX_ROWS));  // pc->mb_rows));
+        CHECK_MEM_ERROR(pbi->h_event_start_lpf, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
+
         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
         {
-            sem_init(&pbi->h_event_mbrdecoding[ithread], 0, 0);
+            sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
+            sem_init(&pbi->h_event_start_lpf[ithread], 0, 0);
 
             pbi->de_thread_data[ithread].ithread  = ithread;
             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
             pbi->de_thread_data[ithread].ptr2     = (void *) &pbi->mb_row_di[ithread];
 
             pthread_create(&pbi->h_decoding_thread[ithread], 0, vp8_thread_decoding_proc, (&pbi->de_thread_data[ithread]));
-
         }
 
-        sem_init(&pbi->h_event_main, 0, 0);
+        sem_init(&pbi->h_event_end_decoding, 0, 0);
+        sem_init(&pbi->h_event_end_lpf, 0, 0);
+
         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
     }
 
@@ -443,39 +473,35 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
 
     if (pbi->b_multithreaded_lf)
     {
+        int i;
         pbi->b_multithreaded_lf = 0;
-        sem_post(&pbi->h_event_start_lpf);
-        pthread_join(pbi->h_thread_lpf, 0);
-        sem_destroy(&pbi->h_event_start_lpf);
+
+        for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+            sem_destroy(&pbi->h_event_start_lpf[i]);
+
+        sem_destroy(&pbi->h_event_end_lpf);
     }
 
     //shutdown MB Decoding thread;
     if (pbi->b_multithreaded_rd)
     {
+        int i;
+
         pbi->b_multithreaded_rd = 0;
+
         // allow all threads to exit
+        for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
         {
-            int i;
-
-            for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
-            {
-
-                sem_post(&pbi->h_event_mbrdecoding[i]);
-                pthread_join(pbi->h_decoding_thread[i], NULL);
-            }
-        }
-        {
-
-            int i;
-            for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
-            {
-                sem_destroy(&pbi->h_event_mbrdecoding[i]);
-            }
-
-
+            sem_post(&pbi->h_event_start_decoding[i]);
+            pthread_join(pbi->h_decoding_thread[i], NULL);
         }
 
-        sem_destroy(&pbi->h_event_main);
+        for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
+        {
+            sem_destroy(&pbi->h_event_start_decoding[i]);
+        }
+
+        sem_destroy(&pbi->h_event_end_decoding);
 
         if (pbi->h_decoding_thread)
         {
@@ -483,10 +509,16 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
             pbi->h_decoding_thread = NULL;
         }
 
-        if (pbi->h_event_mbrdecoding)
+        if (pbi->h_event_start_decoding)
         {
-            vpx_free(pbi->h_event_mbrdecoding);
-            pbi->h_event_mbrdecoding = NULL;
+            vpx_free(pbi->h_event_start_decoding);
+            pbi->h_event_start_decoding = NULL;
+        }
+
+        if (pbi->h_event_start_lpf)
+        {
+            vpx_free(pbi->h_event_start_lpf);
+            pbi->h_event_start_lpf = NULL;
         }
 
         if (pbi->mb_row_di)
@@ -500,8 +532,13 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
             vpx_free(pbi->de_thread_data);
             pbi->de_thread_data = NULL;
         }
-    }
 
+        if (pbi->current_mb_col)
+        {
+            vpx_free(pbi->current_mb_col);
+            pbi->current_mb_col = NULL ;
+        }
+    }
 #else
     (void) pbi;
 #endif
@@ -511,9 +548,12 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
 void vp8_start_lfthread(VP8D_COMP *pbi)
 {
 #if CONFIG_MULTITHREAD
+  /*
     memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
     pbi->last_mb_row_decoded = 0;
     sem_post(&pbi->h_event_start_lpf);
+    */
+    (void) pbi;
 #else
     (void) pbi;
 #endif
@@ -522,14 +562,17 @@ void vp8_start_lfthread(VP8D_COMP *pbi)
 void vp8_stop_lfthread(VP8D_COMP *pbi)
 {
 #if CONFIG_MULTITHREAD
+  /*
     struct vpx_usec_timer timer;
 
     vpx_usec_timer_start(&timer);
 
-    sem_wait(&pbi->h_event_lpf);
+    sem_wait(&pbi->h_event_end_lpf);
 
     vpx_usec_timer_mark(&timer);
     pbi->time_loop_filtering += vpx_usec_timer_elapsed(&timer);
+    */
+    (void) pbi;
 #else
     (void) pbi;
 #endif
@@ -545,49 +588,263 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
 
     int ibc = 0;
     int num_part = 1 << pbi->common.multi_token_partition;
+    int i;
+    volatile int *last_row_current_mb_col = NULL;
 
     vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
 
+    for (i = 0; i < pbi->decoding_thread_count; i++)
+        sem_post(&pbi->h_event_start_decoding[i]);
+
     for (mb_row = 0; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
     {
         int i;
-        pbi->current_mb_col_main = -1;
 
-        xd->current_bc = &pbi->mbc[ibc];
-        ibc++ ;
+        xd->current_bc = &pbi->mbc[mb_row%num_part];
 
-        if (ibc == num_part)
-            ibc = 0;
-
-        for (i = 0; i < pbi->decoding_thread_count; i++)
+        //vp8_decode_mb_row(pbi, pc, mb_row, xd);
         {
-            if ((mb_row + i + 1) >= pc->mb_rows)
-                break;
+            int i;
+            int recon_yoffset, recon_uvoffset;
+            int mb_col;
+            int ref_fb_idx = pc->lst_fb_idx;
+            int dst_fb_idx = pc->new_fb_idx;
+            int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
+            int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
 
-            pbi->mb_row_di[i].mb_row = mb_row + i + 1;
-            pbi->mb_row_di[i].mbd.current_bc =  &pbi->mbc[ibc];
-            ibc++;
+           // volatile int *last_row_current_mb_col = NULL;
+            if (mb_row > 0)
+                last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
 
-            if (ibc == num_part)
-                ibc = 0;
+            vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
+            recon_yoffset = mb_row * recon_y_stride * 16;
+            recon_uvoffset = mb_row * recon_uv_stride * 8;
+            // reset above block coeffs
 
-            pbi->mb_row_di[i].current_mb_col = -1;
-            sem_post(&pbi->h_event_mbrdecoding[i]);
+            xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
+            xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
+            xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
+            xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
+            xd->up_available = (mb_row != 0);
+
+            xd->mb_to_top_edge = -((mb_row * 16)) << 3;
+            xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
+
+            for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
+            {
+
+                if ( mb_row > 0 && (mb_col & 7) == 0){
+               //if ( mb_row > 0 ){
+                  while (mb_col > (*last_row_current_mb_col - 8) && *last_row_current_mb_col != pc->mb_cols - 1)
+                  {
+                      x86_pause_hint();
+                      thread_sleep(0);
+                  }
+
+                }
+
+                // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
+                // the partition_bmi array is unused in the decoder, so don't copy it.
+                vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi,
+                           sizeof(MB_MODE_INFO) - sizeof(xd->mbmi.partition_bmi));
+
+                if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
+                {
+                    for (i = 0; i < 16; i++)
+                    {
+                        BLOCKD *d = &xd->block[i];
+                        vpx_memcpy(&d->bmi, &xd->mode_info_context->bmi[i], sizeof(B_MODE_INFO));
+                    }
+                }
+
+                // Distance of Mb to the various image edges.
+                // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                xd->mb_to_left_edge = -((mb_col * 16) << 3);
+                xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
+
+                xd->dst.y_buffer = pc->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
+                xd->dst.u_buffer = pc->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;
+                xd->dst.v_buffer = pc->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset;
+
+                xd->left_available = (mb_col != 0);
+
+                // Select the appropriate reference frame for this MB
+                if (xd->mbmi.ref_frame == LAST_FRAME)
+                    ref_fb_idx = pc->lst_fb_idx;
+                else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
+                    ref_fb_idx = pc->gld_fb_idx;
+                else
+                    ref_fb_idx = pc->alt_fb_idx;
+
+                xd->pre.y_buffer = pc->yv12_fb[ref_fb_idx].y_buffer + recon_yoffset;
+                xd->pre.u_buffer = pc->yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
+                xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
+
+                vp8_build_uvmvs(xd, pc->full_pixel);
+
+                vp8_decode_macroblock(pbi, xd);
+
+                recon_yoffset += 16;
+                recon_uvoffset += 8;
+
+                ++xd->mode_info_context;  /* next mb */
+
+                xd->gf_active_ptr++;      // GF useage flag for next MB
+
+                xd->above_context[Y1CONTEXT] += 4;
+                xd->above_context[UCONTEXT ] += 2;
+                xd->above_context[VCONTEXT ] += 2;
+                xd->above_context[Y2CONTEXT] ++;
+
+                //pbi->current_mb_col_main = mb_col;
+                pbi->current_mb_col[mb_row] = mb_col;
+            }
+
+            // adjust to the next row of mbs
+            vp8_extend_mb_row(
+                &pc->yv12_fb[dst_fb_idx],
+                xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
+            );
+
+            ++xd->mode_info_context;      /* skip prediction column */
+
+            pbi->last_mb_row_decoded = mb_row;
         }
-
-        vp8_decode_mb_row(pbi, pc, mb_row, xd);
-
         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
-
-        if (mb_row < pc->mb_rows - 1)
-        {
-            sem_wait(&pbi->h_event_main);
-        }
     }
 
-    pbi->last_mb_row_decoded = mb_row;
+    sem_wait(&pbi->h_event_end_decoding);   // add back for each frame
 #else
     (void) pbi;
     (void) xd;
 #endif
 }
+
+
+void vp8_mt_loop_filter_frame( VP8D_COMP *pbi)
+{
+#if CONFIG_MULTITHREAD
+    VP8_COMMON *cm  = &pbi->common;
+    MACROBLOCKD *mbd = &pbi->mb;
+    int default_filt_lvl = pbi->common.filter_level;
+
+    YV12_BUFFER_CONFIG *post = cm->frame_to_show;
+    loop_filter_info *lfi = cm->lf_info;
+    int frame_type = cm->frame_type;
+
+    int mb_row;
+    int mb_col;
+
+    int filter_level;
+    int alt_flt_enabled = mbd->segmentation_enabled;
+
+    int i;
+    unsigned char *y_ptr, *u_ptr, *v_ptr;
+
+    volatile int *last_row_current_mb_col=NULL;
+
+    vp8_setup_loop_filter_thread_data(pbi, mbd, pbi->mb_row_di, pbi->decoding_thread_count);
+
+    mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
+
+    // Note the baseline filter values for each segment
+    if (alt_flt_enabled)
+    {
+        for (i = 0; i < MAX_MB_SEGMENTS; i++)
+        {
+            // Abs value
+            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+                pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+            // Delta Value
+            else
+            {
+                pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+                pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+            }
+        }
+    }
+    else
+    {
+        for (i = 0; i < MAX_MB_SEGMENTS; i++)
+            pbi->mt_baseline_filter_level[i] = default_filt_lvl;
+    }
+
+    // Initialize the loop filter for this frame.
+    if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
+        vp8_init_loop_filter(cm);
+    else if (frame_type != cm->last_frame_type)
+        vp8_frame_init_loop_filter(lfi, frame_type);
+
+    for (i = 0; i < pbi->decoding_thread_count; i++)
+        sem_post(&pbi->h_event_start_lpf[i]);
+        // sem_post(&pbi->h_event_start_lpf);
+
+    // Set up the buffer pointers
+    y_ptr = post->y_buffer;
+    u_ptr = post->u_buffer;
+    v_ptr = post->v_buffer;
+
+    // vp8_filter each macro block
+    for (mb_row = 0; mb_row < cm->mb_rows; mb_row+= (pbi->decoding_thread_count + 1))
+    {
+        if (mb_row > 0)
+            last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
+
+        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
+        {
+            int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
+
+
+            if ( mb_row > 0 && (mb_col & 7) == 0){
+            // if ( mb_row > 0 ){
+                while (mb_col > (*last_row_current_mb_col-8) && *last_row_current_mb_col != cm->mb_cols - 1)
+                {
+                    x86_pause_hint();
+                    thread_sleep(0);
+                }
+            }
+
+            filter_level = pbi->mt_baseline_filter_level[Segment];
+
+            // Distance of Mb to the various image edges.
+            // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+            // Apply any context driven MB level adjustment
+            vp8_adjust_mb_lf_value(mbd, &filter_level);
+
+            if (filter_level)
+            {
+                if (mb_col > 0)
+                    cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+
+                if (mbd->mode_info_context->mbmi.dc_diff > 0)
+                    cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+
+                // don't apply across umv border
+                if (mb_row > 0)
+                    cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+
+                if (mbd->mode_info_context->mbmi.dc_diff > 0)
+                    cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
+            }
+
+            y_ptr += 16;
+            u_ptr += 8;
+            v_ptr += 8;
+
+            mbd->mode_info_context++;     // step to next MB
+            pbi->current_mb_col[mb_row] = mb_col;
+        }
+        mbd->mode_info_context++;         // Skip border mb
+
+        //update for multi-thread
+        y_ptr += post->y_stride  * 16 * (pbi->decoding_thread_count + 1) - post->y_width;
+        u_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
+        v_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
+        mbd->mode_info_context += pbi->decoding_thread_count * mbd->mode_info_stride;
+    }
+
+    sem_wait(&pbi->h_event_end_lpf);
+#else
+    (void) pbi;
+#endif
+}

From e4fe866949951c8eb79c5ebdb0a6dab37cef37a9 Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Tue, 10 Aug 2010 17:06:05 -0400
Subject: [PATCH 108/307] Added ssse3 version of sixtap filters

Improved decoder performance by 9% for the clip used.

Change-Id: I8fc5609213b7bef10248372595dc85b29f9895b9
---
 vp8/common/x86/subpixel_ssse3.asm    | 957 +++++++++++++++++++++++++++
 vp8/common/x86/subpixel_x86.h        |  33 +
 vp8/common/x86/vp8_asm_stubs.c       | 169 +++++
 vp8/common/x86/x86_systemdependent.c |  13 +
 vp8/vp8_common.mk                    |   1 +
 5 files changed, 1173 insertions(+)
 create mode 100644 vp8/common/x86/subpixel_ssse3.asm

diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
new file mode 100644
index 000000000..f45c7547b
--- /dev/null
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -0,0 +1,957 @@
+;
+;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%define BLOCK_HEIGHT_WIDTH 4
+%define VP8_FILTER_WEIGHT 128
+%define VP8_FILTER_SHIFT  7
+
+
+;/************************************************************************************
+; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
+; input pixel array has output_height rows. This routine assumes that output_height is an
+; even number. This function handles 8 pixels in horizontal direction, calculating ONE
+; rows each iteration to take advantage of the 128 bits operations.
+;*************************************************************************************/
+;void vp8_filter_block1d8_h6_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char *output_ptr,
+;    unsigned int    output_pitch,
+;    unsigned int    output_height,
+;    unsigned int    vp8_filter_index
+;)
+global sym(vp8_filter_block1d8_h6_ssse3)
+sym(vp8_filter_block1d8_h6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4
+
+    movdqa      xmm7, [rd GLOBAL]
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+    mov         rdi, arg(2)             ;output_ptr
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d8_h4_ssse3
+
+    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+    sub         rdi, rdx
+;xmm3 free
+filter_block1d8_h6_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm1, xmm0
+    pshufb      xmm0, [shuf1b GLOBAL]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pmaddubsw   xmm0, xmm4
+    pmaddubsw   xmm1, xmm5
+
+    pshufb      xmm2, [shuf3b GLOBAL]
+    add         rdi, rdx
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+    dec         rcx
+    paddsw      xmm0, xmm1
+    paddsw      xmm0, xmm7
+    paddsw      xmm0, xmm2
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+
+    movq        MMWORD Ptr [rdi], xmm0
+    jnz         filter_block1d8_h6_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d8_h4_ssse3:
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    movdqa      xmm3, XMMWORD PTR [shuf2b GLOBAL]
+    movdqa      xmm4, XMMWORD PTR [shuf3b GLOBAL]
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+    sub         rdi, rdx
+;xmm3 free
+filter_block1d8_h4_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm2, xmm0
+    pshufb      xmm0, xmm3 ;[shuf2b GLOBAL]
+    pshufb      xmm2, xmm4 ;[shuf3b GLOBAL]
+
+    pmaddubsw   xmm0, xmm5
+    add         rdi, rdx
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+    dec         rcx
+    paddsw      xmm0, xmm7
+    paddsw      xmm0, xmm2
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+
+    movq        MMWORD Ptr [rdi], xmm0
+
+    jnz         filter_block1d8_h4_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+;void vp8_filter_block1d16_h6_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char  *output_ptr,
+;    unsigned int    output_pitch,
+;    unsigned int    output_height,
+;    unsigned int    vp8_filter_index
+;)
+global sym(vp8_filter_block1d16_h6_ssse3)
+sym(vp8_filter_block1d16_h6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+
+    mov         rdi, arg(2)             ;output_ptr
+    movdqa      xmm7, [rd GLOBAL]
+
+;;
+;;    cmp         esi, DWORD PTR [rax]
+;;    je          vp8_filter_block1d16_h4_ssse3
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+filter_block1d16_h6_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm1, xmm0
+    pshufb      xmm0, [shuf1b GLOBAL]
+    movdqa      xmm2, xmm1
+    pmaddubsw   xmm0, xmm4
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+    movdqu      xmm3,   XMMWORD PTR [rsi + 6]
+
+    pmaddubsw   xmm2, xmm6
+    paddsw      xmm0, xmm1
+    movdqa      xmm1, xmm3
+    pshufb      xmm3, [shuf1b GLOBAL]
+    paddsw      xmm0, xmm7
+    pmaddubsw   xmm3, xmm4
+    paddsw      xmm0, xmm2
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+    pmaddubsw   xmm2, xmm6
+
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+    lea         rsi,    [rsi + rax]
+    paddsw      xmm3, xmm1
+    paddsw      xmm3, xmm7
+    paddsw      xmm3, xmm2
+    psraw       xmm3, 7
+    packuswb    xmm3, xmm3
+
+    punpcklqdq  xmm0, xmm3
+
+    movdqa      XMMWORD Ptr [rdi], xmm0
+
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d16_h6_rowloop_ssse3
+
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d16_h4_ssse3:
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+filter_block1d16_h4_rowloop_ssse3:
+    movdqu      xmm1,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+    movdqu      xmm3,   XMMWORD PTR [rsi + 6]
+
+    pmaddubsw   xmm2, xmm6
+    movdqa      xmm0, xmm3
+    pshufb      xmm3, [shuf3b GLOBAL]
+    pshufb      xmm0, [shuf2b GLOBAL]
+
+    paddsw      xmm1, xmm7
+    paddsw      xmm1, xmm2
+
+    pmaddubsw   xmm0, xmm5
+    pmaddubsw   xmm3, xmm6
+
+    psraw       xmm1, 7
+    packuswb    xmm1, xmm1
+    lea         rsi,    [rsi + rax]
+    paddsw      xmm3, xmm0
+    paddsw      xmm3, xmm7
+    psraw       xmm3, 7
+    packuswb    xmm3, xmm3
+
+    punpcklqdq  xmm1, xmm3
+
+    movdqa      XMMWORD Ptr [rdi], xmm1
+
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d16_h4_rowloop_ssse3
+
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_filter_block1d4_h6_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char  *output_ptr,
+;    unsigned int    output_pitch,
+;    unsigned int    output_height,
+;    unsigned int    vp8_filter_index
+;)
+global sym(vp8_filter_block1d4_h6_ssse3)
+sym(vp8_filter_block1d4_h6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    mov         rsi, arg(0)             ;src_ptr
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+    movdqa      xmm7, [rd GLOBAL]
+
+
+
+
+
+    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rdi, arg(2)             ;output_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+;xmm3 free
+filter_block1d4_h6_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm1, xmm0
+    pshufb      xmm0, [shuf1b GLOBAL]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pmaddubsw   xmm0, xmm4
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+;--
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+;--
+    paddsw      xmm0, xmm1
+    paddsw      xmm0, xmm7
+    pxor        xmm1, xmm1
+    paddsw      xmm0, xmm2
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+
+;
+    punpcklbw   xmm0, xmm1
+
+    movq        MMWORD PTR [rdi], xmm0
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d4_h6_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_filter_block1d16_v6_ssse3
+;(
+;    unsigned char *src_ptr,
+;    unsigned int   src_pitch,
+;    unsigned char *output_ptr,
+;    unsigned int   out_pitch,
+;    unsigned int   output_height,
+;    unsigned int   vp8_filter_index
+;)
+global sym(vp8_filter_block1d16_v6_ssse3)
+sym(vp8_filter_block1d16_v6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d16_v4_ssse3
+
+    movdqa      xmm5, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ;out_pitch
+%endif
+    mov         rax, rsi
+    movsxd      rcx, DWORD PTR arg(4)   ;output_height
+    add         rax, rdx
+
+
+vp8_filter_block1d16_v6_ssse3_loop:
+    movq        xmm1, MMWORD PTR [rsi]                  ;A
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    movq        xmm0, MMWORD PTR [rax + rdx * 4]        ;F
+
+    pmaddubsw   xmm3, xmm6
+    punpcklbw   xmm1, xmm0                  ;A F
+    pmaddubsw   xmm2, xmm7
+    pmaddubsw   xmm1, xmm5
+
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm1
+    paddsw      xmm2, [rd GLOBAL]
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi], xmm2          ;store the results
+
+    movq        xmm1, MMWORD PTR [rsi + 8]                  ;A
+    movq        xmm2, MMWORD PTR [rsi + rdx + 8]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2 + 8]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2 + 8]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4 + 8]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    movq        xmm0, MMWORD PTR [rax + rdx * 4 + 8]        ;F
+    pmaddubsw   xmm3, xmm6
+    punpcklbw   xmm1, xmm0                  ;A F
+    pmaddubsw   xmm2, xmm7
+    pmaddubsw   xmm1, xmm5
+
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm1
+    paddsw      xmm2, [rd GLOBAL]
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi+8], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;out_pitch
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d16_v6_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d16_v4_ssse3:
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ;out_pitch
+%endif
+    mov         rax, rsi
+    movsxd      rcx, DWORD PTR arg(4)   ;output_height
+    add         rax, rdx
+
+vp8_filter_block1d16_v4_ssse3_loop:
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    pmaddubsw   xmm3, xmm6
+    pmaddubsw   xmm2, xmm7
+    movq        xmm5, MMWORD PTR [rsi + rdx + 8]            ;B
+    movq        xmm1, MMWORD PTR [rsi + rdx * 2 + 8]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2 + 8]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4 + 8]        ;E
+
+    paddsw      xmm2, [rd GLOBAL]
+    paddsw      xmm2, xmm3
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    punpcklbw   xmm5, xmm4                  ;B D
+    punpcklbw   xmm1, xmm0                  ;C E
+
+    pmaddubsw   xmm1, xmm6
+    pmaddubsw   xmm5, xmm7
+
+    movdqa      xmm4, [rd GLOBAL]
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm5, xmm1
+    paddsw      xmm5, xmm4
+    psraw       xmm5, 7
+    packuswb    xmm5, xmm5
+
+    punpcklqdq  xmm2, xmm5
+
+    movdqa       XMMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;out_pitch
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d16_v4_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_filter_block1d8_v6_ssse3
+;(
+;    unsigned char *src_ptr,
+;    unsigned int   src_pitch,
+;    unsigned char *output_ptr,
+;    unsigned int   out_pitch,
+;    unsigned int   output_height,
+;    unsigned int   vp8_filter_index
+;)
+global sym(vp8_filter_block1d8_v6_ssse3)
+sym(vp8_filter_block1d8_v6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ; out_pitch
+%endif
+    movsxd      rcx, DWORD PTR arg(4)   ;[output_height]
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d8_v4_ssse3
+
+    movdqa      xmm5, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    mov         rax, rsi
+    add         rax, rdx
+
+vp8_filter_block1d8_v6_ssse3_loop:
+    movq        xmm1, MMWORD PTR [rsi]                  ;A
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    movq        xmm0, MMWORD PTR [rax + rdx * 4]        ;F
+    movdqa      xmm4, [rd GLOBAL]
+
+    pmaddubsw   xmm3, xmm6
+    punpcklbw   xmm1, xmm0                  ;A F
+    pmaddubsw   xmm2, xmm7
+    pmaddubsw   xmm1, xmm5
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm1
+    paddsw      xmm2, xmm4
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d8_v6_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d8_v4_ssse3:
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+    movdqa      xmm5, [rd GLOBAL]
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    mov         rax, rsi
+    add         rax, rdx
+
+vp8_filter_block1d8_v4_ssse3_loop:
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    pmaddubsw   xmm3, xmm6
+    pmaddubsw   xmm2, xmm7
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm5
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d8_v4_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+global sym(vp8_filter_block1d8_h6_ssse3_slow)
+sym(vp8_filter_block1d8_h6_ssse3_slow):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 7
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    mov         rdx, arg(6) ;vp8_filter
+    mov         rsi, arg(0) ;src_ptr
+
+    mov         rdi, arg(1) ;output_ptr
+
+    movsxd      rcx, dword ptr arg(4) ;output_height
+    movsxd      rax, dword ptr arg(2) ;src_pixels_per_line
+
+    movq        xmm7, [rdx]
+    pxor        xmm4, xmm4
+    movdqa      xmm5, XMMWORD PTR [shuf1 GLOBAL]
+    movdqa      xmm6, XMMWORD PTR [shuf2 GLOBAL]
+
+    movsxd      rdx, dword ptr arg(5) ;output_width
+
+    punpcklqdq  xmm7, xmm7          ;copy filter constants to upper 8 bytes
+
+filter_block1d8_h6_rowloop3_slow:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    lea         rsi,    [rsi + rax]
+
+    movdqa      xmm1, xmm0
+    pshufb      xmm0, XMMWORD PTR [shuf1 GLOBAL]
+
+    movdqa      xmm2, xmm1
+    pmaddubsw   xmm0, xmm7
+    pshufb      xmm1, XMMWORD PTR [shuf2 GLOBAL]
+
+    movdqa      xmm3, xmm2
+    pmaddubsw   xmm1, xmm7
+    pshufb      xmm2, XMMWORD PTR [shuf3 GLOBAL]
+
+    pshufb      xmm3, XMMWORD PTR [shuf4 GLOBAL]
+
+    pmaddubsw   xmm2, xmm7
+    pmaddubsw   xmm3, xmm7
+;4 cycles
+
+    phaddsw     xmm0, xmm1
+    phaddsw     xmm2, xmm3
+;7 cycles
+    phaddsw     xmm0, xmm2
+;7 cycles
+
+
+    paddsw      xmm0, [rd GLOBAL]
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+
+;
+    punpcklbw   xmm0, xmm4
+
+    movdqa      XMMWORD Ptr [rdi], xmm0
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d8_h6_rowloop3_slow                ; next row
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+;void vp8_filter_block1d16_h6_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    unsigned short *output_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned int    pixel_step,
+;    unsigned int    output_height,
+;    unsigned int    output_width,
+;    short           *vp8_filter
+;)
+global sym(vp8_filter_block1d16_h6_ssse3_slow)
+sym(vp8_filter_block1d16_h6_ssse3_slow):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 7
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+    mov         rdx, arg(6) ;vp8_filter
+    mov         rsi, arg(0) ;src_ptr
+
+    mov         rdi, arg(1) ;output_ptr
+
+    movsxd      rcx, dword ptr arg(4) ;output_height
+    movsxd      rax, dword ptr arg(2) ;src_pixels_per_line
+
+    movq        xmm7, [rdx]
+    pxor        xmm4, xmm4
+    movdqa      xmm5, XMMWORD PTR [shuf1 GLOBAL]
+    movdqa      xmm6, XMMWORD PTR [shuf2 GLOBAL]
+
+    movsxd      rdx, dword ptr arg(5) ;output_width
+
+    punpcklqdq  xmm7, xmm7          ;copy filter constants to upper 8 bytes
+    sub         rdi, rdx
+
+filter_block1d16_h6_rowloop3_slow:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm1, xmm0
+    pshufb      xmm0, xmm5
+
+    movdqa      xmm2, xmm1
+    pmaddubsw   xmm0, xmm7
+    pshufb      xmm1, xmm6
+
+    movdqa      xmm3, xmm2
+    pmaddubsw   xmm1, xmm7
+    pshufb      xmm2, XMMWORD PTR [shuf3 GLOBAL]
+    movdqu      xmm4,   XMMWORD PTR [rsi + 6]
+    pshufb      xmm3, XMMWORD PTR [shuf4 GLOBAL]
+    lea         rsi,    [rsi + rax]
+    pmaddubsw   xmm2, xmm7
+    phaddsw     xmm0, xmm1
+
+    pmaddubsw   xmm3, xmm7
+    movdqa      xmm1, xmm4
+    pshufb      xmm4, xmm5
+    movdqa      xmm5, xmm1
+    pmaddubsw   xmm4, xmm7
+    pshufb      xmm1, xmm6
+    phaddsw     xmm2, xmm3
+    pmaddubsw   xmm1, xmm7
+    movdqa      xmm3, xmm5
+    pshufb      xmm5, XMMWORD PTR [shuf3 GLOBAL]
+    add         rdi, rdx
+    pmaddubsw   xmm5, xmm7
+    pshufb      xmm3, XMMWORD PTR [shuf4 GLOBAL]
+    phaddsw     xmm4, xmm1
+    dec         rcx
+    phaddsw     xmm0, xmm2
+    pmaddubsw   xmm3, xmm7
+
+
+    paddsw      xmm0, [rd GLOBAL]
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+    phaddsw     xmm5, xmm3
+    pxor        xmm3, xmm3
+    punpcklbw   xmm0, xmm3
+;--
+;--
+;--
+;--
+
+    phaddsw     xmm4, xmm5
+    movdqa      xmm5, XMMWORD PTR [shuf1 GLOBAL]
+    movdqa      XMMWORD Ptr [rdi], xmm0
+;--
+;--
+;--
+;--
+;--
+    paddsw      xmm4, [rd GLOBAL]
+    psraw       xmm4, 7
+    packuswb    xmm4, xmm4
+;
+    punpcklbw   xmm4, xmm3
+
+    movdqa      XMMWORD Ptr [rdi+16], xmm4
+
+    jnz         filter_block1d16_h6_rowloop3_slow                ; next row
+
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+SECTION_RODATA
+align 16
+shuf1:
+    db 0, 1, 2, 4, 3, 5, 128, 128, 1, 2, 3, 5, 4, 6, 128, 128
+shuf2:
+    db 2, 3, 4, 6, 5, 7, 128, 128, 3, 4, 5, 7, 6, 8, 128, 128
+shuf3:
+    db 4, 5, 6, 8, 7, 9, 128, 128, 5, 6, 7, 9, 8, 10, 128, 128
+shuf4:
+    db 6, 7, 8, 10, 9, 11, 128, 128, 7, 8, 9, 11, 10, 12, 128, 128
+
+shuf1a:
+    db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
+shuf2a:
+    db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11
+shuf3a:
+    db 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12
+
+shuf1b:
+    db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
+shuf2b:
+    db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11
+shuf3b:
+    db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10
+
+align 16
+rd:
+    times 8 dw 0x40
+
+align 16
+k0_k5:
+    times 8 db 0, 0             ;placeholder
+    times 8 db 0, 0
+    times 8 db 2, 1
+    times 8 db 0, 0
+    times 8 db 3, 3
+    times 8 db 0, 0
+    times 8 db 1, 2
+    times 8 db 0, 0
+k1_k3:
+    times 8 db  0,    0         ;placeholder
+    times 8 db  -6,  12
+    times 8 db -11,  36
+    times 8 db  -9,  50
+    times 8 db -16,  77
+    times 8 db  -6,  93
+    times 8 db  -8, 108
+    times 8 db  -1, 123
+k2_k4:
+    times 8 db 128,    0        ;placeholder
+    times 8 db 123,   -1
+    times 8 db 108,   -8
+    times 8 db  93,   -6
+    times 8 db  77,  -16
+    times 8 db  50,   -9
+    times 8 db  36,  -11
+    times 8 db  12,   -6
+
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index c406be789..e5c08b15c 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -86,4 +86,37 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
 #endif
 #endif
 
+#if HAVE_SSSE3
+extern prototype_subpixel_predict(vp8_sixtap_predict16x16_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x8_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x4_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict4x4_ssse3);
+//extern prototype_subpixel_predict(vp8_bilinear_predict16x16_sse2);
+//extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_subpix_sixtap16x16
+#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_ssse3
+
+#undef  vp8_subpix_sixtap8x8
+#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_ssse3
+
+#undef  vp8_subpix_sixtap8x4
+#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_ssse3
+
+//#undef  vp8_subpix_sixtap4x4
+//#define vp8_subpix_sixtap4x4 vp8_sixtap_predict4x4_ssse3
+
+
+//#undef  vp8_subpix_bilinear16x16
+//#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_sse2
+
+//#undef  vp8_subpix_bilinear8x8
+//#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_sse2
+
+#endif
+#endif
+
+
+
 #endif
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 79650cf34..2c99cb64f 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -359,3 +359,172 @@ void vp8_sixtap_predict8x4_sse2
 }
 
 #endif
+
+#if HAVE_SSSE3
+
+extern void vp8_filter_block1d8_h6_ssse3
+(
+    unsigned char  *src_ptr,
+    unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    unsigned int    output_pitch,
+    unsigned int    output_height,
+    unsigned int    vp8_filter_index
+);
+
+extern void vp8_filter_block1d16_h6_ssse3
+(
+    unsigned char  *src_ptr,
+    unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    unsigned int    output_pitch,
+    unsigned int    output_height,
+    unsigned int    vp8_filter_index
+);
+
+extern void vp8_filter_block1d16_v6_ssse3
+(
+    unsigned char *src_ptr,
+    unsigned int   src_pitch,
+    unsigned char *output_ptr,
+    unsigned int   out_pitch,
+    unsigned int   output_height,
+    unsigned int   vp8_filter_index
+);
+
+extern void vp8_filter_block1d8_v6_ssse3
+(
+    unsigned char *src_ptr,
+    unsigned int   src_pitch,
+    unsigned char *output_ptr,
+    unsigned int   out_pitch,
+    unsigned int   output_height,
+    unsigned int   vp8_filter_index
+);
+
+void vp8_sixtap_predict16x16_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+
+)
+{
+    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
+
+    if (xoffset)
+    {
+        if (yoffset)
+        {
+            vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
+            vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
+        }
+        else
+        {
+            // First-pass only
+            vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
+        }
+    }
+    else
+    {
+        // Second-pass only
+        vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
+    }
+}
+
+void vp8_sixtap_predict8x8_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
+
+    if (xoffset)
+    {
+        if (yoffset)
+        {
+            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
+            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
+        }
+        else
+        {
+            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
+        }
+    }
+    else
+    {
+        // Second-pass only
+        vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
+    }
+}
+
+
+void vp8_sixtap_predict8x4_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
+
+    if (xoffset)
+    {
+        if (yoffset)
+        {
+            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
+            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
+        }
+        else
+        {
+            // First-pass only
+            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
+        }
+    }
+    else
+    {
+        // Second-pass only
+        vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
+    }
+}
+
+void vp8_sixtap_predict4x4_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 16*16);
+
+    if (xoffset)
+    {
+        if (yoffset)
+        {
+
+        }
+        else
+        {
+        }
+    }
+    else
+    {
+    }
+
+}
+
+#endif
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 66738f855..2d8ced00d 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -27,6 +27,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
     int mmx_enabled = flags & HAS_MMX;
     int xmm_enabled = flags & HAS_SSE;
     int wmt_enabled = flags & HAS_SSE2;
+    int SSSE3Enabled = flags & HAS_SSSE3;
 
     /* Note:
      *
@@ -114,5 +115,17 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
     }
 
 #endif
+
+#if HAVE_SSSE3
+
+    if (SSSE3Enabled)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_ssse3;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_ssse3;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_ssse3;
+//        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_ssse3;
+    }
+#endif
+
 #endif
 }
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 5b8a3017e..a8a252a17 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -109,6 +109,7 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
+VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm
 ifeq ($(CONFIG_POSTPROC),yes)
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm

From 8fa38096a39433c6b86848dce0e307acd2a54482 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Fri, 2 Jul 2010 14:35:53 -0700
Subject: [PATCH 109/307] Add trellis quantization.

Replace the exponential search for optimal rounding during
 quantization with a linear Viterbi trellis and enable it
 by default when using --best.
Right now this operates on top of the output of the adaptive
 zero-bin quantizer in vp8_regular_quantize_b() and gives a small
 gain.
It can be tested as a replacement for that quantizer by
 enabling the call to vp8_strict_quantize_b(), which uses
 normal rounding and no zero bin offset.
Ultimately, the quantizer will have to become a function of lambda
 in order to take advantage of activity masking, since there is
 limited ability to change the quantization factor itself.
However, currently vp8_strict_quantize_b() plus the trellis
 quantizer (which is lambda-dependent) loses to
 vp8_regular_quantize_b() alone (which is not) on my test clip.

Patch Set 3:

Fix an issue related to the cost evaluation of successor
states when a coefficient is reduced to zero. With this
issue fixed, now the trellis search almost exactly matches
the exponential search.

Patch Set 2:

Overall, the goal of this patch set is to make "trellis"
search to produce encodings that match the exponential
search version. There are three main differences between
Patch Set 2 and 1:
a. Patch set 1 did not properly account for the scale of
2nd order error, so patch set 2 disable it all together
for 2nd blocks.
b. Patch set 1 was not consistent on when to enable the
the quantization optimization. Patch set 2 restore the
condition to be consistent.
c. Patch set 1 checks quantized level L-1, and L for any
input coefficient was quantized to L. Patch set 2 limits
the candidate coefficient to those that were rounded up
to L. It is worth noting here that a strategy to check
L and L+1 for coefficients that were truncated down to L
might work.

(a and b get trellis quant to basically match the exponential
search on all mid/low rate encodings on cif set, without
a, b, trellis quant can hurt the psnr by 0.2 to .3db at
200kbps for some cif clips)
(c gets trellis quant  to match the exponential search
to match at Q0 encoding, without c, trellis quant can be
1.5 to 2db lower for encodings with fixed Q at 0 on most
derf cif clips)

Change-Id:	Ib1a043b665d75fbf00cb0257b7c18e90eebab95e
---
 vp8/encoder/encodeintra.c |   1 -
 vp8/encoder/encodemb.c    | 626 ++++++++++++++++++--------------------
 vp8/encoder/quantize.c    |  59 ++++
 vp8/encoder/quantize.h    |   2 +
 vp8/encoder/rdopt.c       |   5 -
 vp8/encoder/tokenize.c    |   4 +-
 vp8/encoder/tokenize.h    |   6 +
 7 files changed, 361 insertions(+), 342 deletions(-)

diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 870cb5815..a8595af2d 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -109,7 +109,6 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
 #if !(CONFIG_REALTIME_ONLY)
 #if 1
-
     if (x->optimize && x->rddiv > 1)
         vp8_optimize_mby(x, rtcd);
 
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 8bc01df5b..06aeb6f7f 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -13,6 +13,7 @@
 #include "encodemb.h"
 #include "reconinter.h"
 #include "quantize.h"
+#include "tokenize.h"
 #include "invtrans.h"
 #include "recon.h"
 #include "reconintra.h"
@@ -279,354 +280,307 @@ void vp8_stuff_inter16x16(MACROBLOCK *x)
 }
 
 #if !(CONFIG_REALTIME_ONLY)
-extern const TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
-extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
-extern int vp8_dct_value_cost[DCT_MAX_VALUE*2];
-extern int *vp8_dct_value_cost_ptr;
+#define RDCOST(RM,DM,R,D) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
+#define RDTRUNC(RM,DM,R,D) ( (128+(R)*(RM)) & 0xFF )
 
-static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l)
+typedef struct vp8_token_state vp8_token_state;
+
+struct vp8_token_state{
+  int           rate;
+  int           error;
+  signed char   next;
+  signed char   token;
+  short         qc;
+};
+
+void vp8_optimize_b(MACROBLOCK *mb, int i, int type,
+                    ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
+                    const VP8_ENCODER_RTCD *rtcd)
 {
-    int c = !type;              /* start at coef 0, unless Y with Y2 */
-    int eob = b->eob;
-    int pt ;    /* surrounding block/prev coef predictor */
-    int cost = 0;
-    short *qcoeff_ptr = b->qcoeff;
+    BLOCK *b;
+    BLOCKD *d;
+    vp8_token_state tokens[17][2];
+    unsigned best_mask[2];
+    const short *dequant_ptr;
+    const short *coeff_ptr;
+    short *qcoeff_ptr;
+    short *dqcoeff_ptr;
+    int eob;
+    int i0;
+    int rc;
+    int x;
+    int sz;
+    int next;
+    int path;
+    int rdmult;
+    int rddiv;
+    int final_eob;
+    int rd_cost0;
+    int rd_cost1;
+    int rate0;
+    int rate1;
+    int error0;
+    int error1;
+    int t0;
+    int t1;
+    int best;
+    int band;
+    int pt;
 
+    b = &mb->block[i];
+    d = &mb->e_mbd.block[i];
+
+    /* Enable this to test the effect of RDO as a replacement for the dynamic
+     *  zero bin instead of an augmentation of it.
+     */
+#if 0
+    vp8_strict_quantize_b(b, d);
+#endif
+
+    dequant_ptr = &d->dequant[0][0];
+    coeff_ptr = &b->coeff[0];
+    qcoeff_ptr = d->qcoeff;
+    dqcoeff_ptr = d->dqcoeff;
+    i0 = !type;
+    eob = d->eob;
+
+    /* Now set up a Viterbi trellis to evaluate alternative roundings. */
+    /* TODO: These should vary with the block type, since the quantizer does. */
+    rdmult = mb->rdmult << 2;
+    rddiv = mb->rddiv;
+    best_mask[0] = best_mask[1] = 0;
+    /* Initialize the sentinel node of the trellis. */
+    tokens[eob][0].rate = 0;
+    tokens[eob][0].error = 0;
+    tokens[eob][0].next = 16;
+    tokens[eob][0].token = DCT_EOB_TOKEN;
+    tokens[eob][0].qc = 0;
+    *(tokens[eob] + 1) = *(tokens[eob] + 0);
+    next = eob;
+    for (i = eob; i-- > i0;)
+    {
+        int base_bits;
+        int d2;
+        int dx;
+
+        rc = vp8_default_zig_zag1d[i];
+        x = qcoeff_ptr[rc];
+        /* Only add a trellis state for non-zero coefficients. */
+        if (x)
+        {
+            int shortcut=0;
+            error0 = tokens[next][0].error;
+            error1 = tokens[next][1].error;
+            /* Evaluate the first possibility for this state. */
+            rate0 = tokens[next][0].rate;
+            rate1 = tokens[next][1].rate;
+            t0 = (vp8_dct_value_tokens_ptr + x)->Token;
+            /* Consider both possible successor states. */
+            if (next < 16)
+            {
+                band = vp8_coef_bands[i + 1];
+                pt = vp8_prev_token_class[t0];
+                rate0 +=
+                    mb->token_costs[type][band][pt][tokens[next][0].token];
+                rate1 +=
+                    mb->token_costs[type][band][pt][tokens[next][1].token];
+            }
+            rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
+            rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
+            if (rd_cost0 == rd_cost1)
+            {
+                rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
+                rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
+            }
+            /* And pick the best. */
+            best = rd_cost1 < rd_cost0;
+            base_bits = *(vp8_dct_value_cost_ptr + x);
+            dx = dqcoeff_ptr[rc] - coeff_ptr[rc];
+            d2 = dx*dx;
+            tokens[i][0].rate = base_bits + (best ? rate1 : rate0);
+            tokens[i][0].error = d2 + (best ? error1 : error0);
+            tokens[i][0].next = next;
+            tokens[i][0].token = t0;
+            tokens[i][0].qc = x;
+            best_mask[0] |= best << i;
+            /* Evaluate the second possibility for this state. */
+            rate0 = tokens[next][0].rate;
+            rate1 = tokens[next][1].rate;
+
+            if((abs(x)*dequant_ptr[rc]>abs(coeff_ptr[rc])) &&
+               (abs(x)*dequant_ptr[rc]<abs(coeff_ptr[rc])+dequant_ptr[rc]))
+                shortcut = 1;
+            else
+                shortcut = 0;
+
+            if(shortcut)
+            {
+                sz = -(x < 0);
+                x -= 2*sz + 1;
+            }
+
+            /* Consider both possible successor states. */
+            if (!x)
+            {
+                /* If we reduced this coefficient to zero, check to see if
+                 *  we need to move the EOB back here.
+                 */
+                t0 = tokens[next][0].token == DCT_EOB_TOKEN ?
+                    DCT_EOB_TOKEN : ZERO_TOKEN;
+                t1 = tokens[next][1].token == DCT_EOB_TOKEN ?
+                    DCT_EOB_TOKEN : ZERO_TOKEN;
+            }
+            else
+            {
+                t0=t1 = (vp8_dct_value_tokens_ptr + x)->Token;
+            }
+            if (next < 16)
+            {
+                band = vp8_coef_bands[i + 1];
+                if(t0!=DCT_EOB_TOKEN)
+                {
+                    pt = vp8_prev_token_class[t0];
+                    rate0 += mb->token_costs[type][band][pt][
+                        tokens[next][0].token];
+                }
+                if(t1!=DCT_EOB_TOKEN)
+                {
+                    pt = vp8_prev_token_class[t1];
+                    rate1 += mb->token_costs[type][band][pt][
+                        tokens[next][1].token];
+                }
+            }
+
+            rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
+            rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
+            if (rd_cost0 == rd_cost1)
+            {
+                rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
+                rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
+            }
+            /* And pick the best. */
+            best = rd_cost1 < rd_cost0;
+            base_bits = *(vp8_dct_value_cost_ptr + x);
+
+            if(shortcut)
+            {
+                dx -= (dequant_ptr[rc] + sz) ^ sz;
+                d2 = dx*dx;
+            }
+            tokens[i][1].rate = base_bits + (best ? rate1 : rate0);
+            tokens[i][1].error = d2 + (best ? error1 : error0);
+            tokens[i][1].next = next;
+            tokens[i][1].token =best?t1:t0;
+            tokens[i][1].qc = x;
+            best_mask[1] |= best << i;
+            /* Finally, make this the new head of the trellis. */
+            next = i;
+        }
+        /* There's no choice to make for a zero coefficient, so we don't
+         *  add a new trellis node, but we do need to update the costs.
+         */
+        else
+        {
+            band = vp8_coef_bands[i + 1];
+            t0 = tokens[next][0].token;
+            t1 = tokens[next][1].token;
+            /* Update the cost of each path if we're past the EOB token. */
+            if (t0 != DCT_EOB_TOKEN)
+            {
+                tokens[next][0].rate += mb->token_costs[type][band][0][t0];
+                tokens[next][0].token = ZERO_TOKEN;
+            }
+            if (t1 != DCT_EOB_TOKEN)
+            {
+                tokens[next][1].rate += mb->token_costs[type][band][0][t1];
+                tokens[next][1].token = ZERO_TOKEN;
+            }
+            /* Don't update next, because we didn't add a new node. */
+        }
+    }
+
+    /* Now pick the best path through the whole trellis. */
+    band = vp8_coef_bands[i + 1];
     VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l);
-
-# define QC( I)  ( qcoeff_ptr [vp8_default_zig_zag1d[I]] )
-
-    for (; c < eob; c++)
+    rate0 = tokens[next][0].rate;
+    rate1 = tokens[next][1].rate;
+    error0 = tokens[next][0].error;
+    error1 = tokens[next][1].error;
+    t0 = tokens[next][0].token;
+    t1 = tokens[next][1].token;
+    rate0 += mb->token_costs[type][band][pt][t0];
+    rate1 += mb->token_costs[type][band][pt][t1];
+    rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0);
+    rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1);
+    if (rd_cost0 == rd_cost1)
     {
-        int v = QC(c);
-        int t = vp8_dct_value_tokens_ptr[v].Token;
-        cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [t];
-        cost += vp8_dct_value_cost_ptr[v];
-        pt = vp8_prev_token_class[t];
+        rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0);
+        rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1);
     }
+    best = rd_cost1 < rd_cost0;
+    final_eob = i0 - 1;
+    for (i = next; i < eob; i = next)
+    {
+        x = tokens[i][best].qc;
+        if (x)
+            final_eob = i;
+        rc = vp8_default_zig_zag1d[i];
+        qcoeff_ptr[rc] = x;
+        dqcoeff_ptr[rc] = x * dequant_ptr[rc];
+        next = tokens[i][best].next;
+        best = (best_mask[best] >> i) & 1;
+    }
+    final_eob++;
 
-# undef QC
-
-    if (c < 16)
-        cost += mb->token_costs [type] [vp8_coef_bands[c]] [pt] [DCT_EOB_TOKEN];
-
-    return cost;
+    d->eob = final_eob;
+    *a = *l = (d->eob != !type);
 }
 
-static int mbycost_coeffs(MACROBLOCK *mb)
-{
-    int cost = 0;
-    int b;
-    TEMP_CONTEXT t;
-    int type = 0;
-
-    MACROBLOCKD *x = &mb->e_mbd;
-
-    vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4);
-
-    if (x->mbmi.mode == SPLITMV)
-        type = 3;
-
-    for (b = 0; b < 16; b++)
-        cost += cost_coeffs(mb, x->block + b, type,
-                            t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
-
-    return cost;
-}
-
-#define RDFUNC(RM,DM,R,D,target_rd) ( ((128+(R)*(RM)) >> 8) + (DM)*(D) )
-
-void vp8_optimize_b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
-{
-    BLOCK *b = &x->block[i];
-    BLOCKD *bd = &x->e_mbd.block[i];
-    short *dequant_ptr = &bd->dequant[0][0];
-    int nzpos[16] = {0};
-    short saved_qcoefs[16];
-    short saved_dqcoefs[16];
-    int baserate, baseerror, baserd;
-    int rate, error, thisrd;
-    int k;
-    int nzcoefcount = 0;
-    int nc, bestnc = 0;
-    int besteob;
-
-    // count potential coefficient to be optimized
-    for (k = !type; k < 16; k++)
-    {
-        int qcoef = abs(bd->qcoeff[k]);
-        int coef = abs(b->coeff[k]);
-        int dq   = dequant_ptr[k];
-
-        if (qcoef && (qcoef * dq > coef) && (qcoef * dq < coef + dq))
-        {
-            nzpos[nzcoefcount] = k;
-            nzcoefcount++;
-        }
-    }
-
-    // if nothing here, do nothing for this block.
-    if (!nzcoefcount)
-    {
-        *a = *l = (bd->eob != !type);
-        return;
-    }
-
-    // save a copy of quantized coefficients
-    vpx_memcpy(saved_qcoefs, bd->qcoeff, 32);
-    vpx_memcpy(saved_dqcoefs, bd->dqcoeff, 32);
-
-    besteob   = bd->eob;
-    baserate  = cost_coeffs(x, bd, type, a, l);
-    baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
-    baserd    = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
-
-    for (nc = 1; nc < (1 << nzcoefcount); nc++)
-    {
-        //reset coefficients
-        vpx_memcpy(bd->qcoeff,  saved_qcoefs,  32);
-        vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
-
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int pos = nzpos[k];
-
-            if ((nc & (1 << k)))
-            {
-                int cur_qcoef = bd->qcoeff[pos];
-
-                if (cur_qcoef < 0)
-                {
-                    bd->qcoeff[pos]++;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    bd->qcoeff[pos]--;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-            }
-        }
-
-        {
-            int eob = -1;
-            int rc;
-            int m;
-
-            for (m = 0; m < 16; m++)
-            {
-                rc   = vp8_default_zig_zag1d[m];
-
-                if (bd->qcoeff[rc])
-                    eob = m;
-            }
-
-            bd->eob = eob + 1;
-        }
-
-        rate  = cost_coeffs(x, bd, type, a, l);
-        error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 2;
-        thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
-        if (thisrd < baserd)
-        {
-            baserd = thisrd;
-            bestnc = nc;
-            besteob = bd->eob;
-        }
-    }
-
-    //reset coefficients
-    vpx_memcpy(bd->qcoeff,  saved_qcoefs, 32);
-    vpx_memcpy(bd->dqcoeff, saved_dqcoefs, 32);
-
-    if (bestnc)
-    {
-        for (k = 0; k < nzcoefcount; k++)
-        {
-            int pos = nzpos[k];
-
-            if (bestnc & (1 << k))
-            {
-                int cur_qcoef = bd->qcoeff[pos];
-
-                if (cur_qcoef < 0)
-                {
-                    bd->qcoeff[pos]++;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-                else
-                {
-                    bd->qcoeff[pos]--;
-                    bd->dqcoeff[pos] = bd->qcoeff[pos] * dequant_ptr[pos];
-                }
-            }
-        }
-
-#if 0
-        {
-            int eob = -1;
-            int rc;
-            int m;
-
-            for (m = 0; m < 16; m++)
-            {
-                rc   = vp8_default_zig_zag1d[m];
-
-                if (bd->qcoeff[rc])
-                    eob = m;
-            }
-
-            bd->eob = eob + 1;
-        }
-#endif
-    }
-
-#if 1
-    bd->eob = besteob;
-#endif
-#if 0
-    {
-        int eob = -1;
-        int rc;
-        int m;
-
-        for (m = 0; m < 16; m++)
-        {
-            rc   = vp8_default_zig_zag1d[m];
-
-            if (bd->qcoeff[rc])
-                eob = m;
-        }
-
-        bd->eob = eob + 1;
-    }
-
-#endif
-    *a = *l = (bd->eob != !type);
-    return;
-}
-
-
-void vp8_optimize_y2b(MACROBLOCK *x, int i, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, const VP8_ENCODER_RTCD *rtcd)
-{
-
-    BLOCK *b = &x->block[i];
-    BLOCKD *bd = &x->e_mbd.block[i];
-    short *dequant_ptr = &bd->dequant[0][0];
-
-    int baserate, baseerror, baserd;
-    int rate, error, thisrd;
-    int k;
-
-    if (bd->eob == 0)
-        return;
-
-    baserate  = cost_coeffs(x, bd, type, a, l);
-    baseerror = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4;
-    baserd = RDFUNC(x->rdmult, x->rddiv, baserate, baseerror, 100);
-
-    for (k = 0; k < 16; k++)
-    {
-        int cur_qcoef = bd->qcoeff[k];
-
-        if (!cur_qcoef)
-            continue;
-
-        if (cur_qcoef < 0)
-        {
-            bd->qcoeff[k]++;
-            bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k];
-        }
-        else
-        {
-            bd->qcoeff[k]--;
-            bd->dqcoeff[k] = bd->qcoeff[k] * dequant_ptr[k];
-        }
-
-        if (bd->qcoeff[k] == 0)
-        {
-            int eob = -1;
-            int rc;
-            int l;
-
-            for (l = 0; l < 16; l++)
-            {
-                rc   = vp8_default_zig_zag1d[l];
-
-                if (bd->qcoeff[rc])
-                    eob = l;
-            }
-
-            bd->eob = eob + 1;
-        }
-
-        rate  =   cost_coeffs(x, bd, type, a, l);
-        error = ENCODEMB_INVOKE(&rtcd->encodemb, berr)(b->coeff, bd->dqcoeff) >> 4;
-        thisrd = RDFUNC(x->rdmult, x->rddiv, rate, error, 100);
-
-        if (thisrd > baserd)
-        {
-            bd->qcoeff[k] = cur_qcoef;
-            bd->dqcoeff[k] = cur_qcoef * dequant_ptr[k];
-        }
-        else
-        {
-            baserd = thisrd;
-        }
-
-    }
-
-    {
-        int eob = -1;
-        int rc;
-
-        for (k = 0; k < 16; k++)
-        {
-            rc   = vp8_default_zig_zag1d[k];
-
-            if (bd->qcoeff[rc])
-                eob = k;
-        }
-
-        bd->eob = eob + 1;
-    }
-
-    return;
-}
-
-
 void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
     int b;
     TEMP_CONTEXT t, t2;
-    int type = 0;
+    int type;
+    int has_2nd_order;
 
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
-
-    if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
-        type = 3;
+    vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT],
+        x->e_mbd.left_context[Y1CONTEXT], 4);
+    has_2nd_order = (x->e_mbd.mbmi.mode != B_PRED
+        && x->e_mbd.mbmi.mode != SPLITMV);
+    type = has_2nd_order ? 0 : 3;
 
     for (b = 0; b < 16; b++)
     {
-        //vp8_optimize_bplus(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
-        vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+        vp8_optimize_b(x, b, type,
+            t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
     }
 
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
-    vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
+    vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT],
+        x->e_mbd.left_context[UCONTEXT], 2);
+    vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT],
+        x->e_mbd.left_context[VCONTEXT], 2);
 
     for (b = 16; b < 20; b++)
     {
-        //vp8_optimize_bplus(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
-        vp8_optimize_b(x, b, vp8_block2type[b], t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+        vp8_optimize_b(x, b, vp8_block2type[b],
+            t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
     }
 
     for (b = 20; b < 24; b++)
     {
-        //vp8_optimize_bplus(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b]);
-        vp8_optimize_b(x, b, vp8_block2type[b], t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
+        vp8_optimize_b(x, b, vp8_block2type[b],
+            t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
     }
+
+
+    /*
+    if (has_2nd_order)
+    {
+        vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT],
+            x->e_mbd.left_context[Y2CONTEXT], 1);
+        vp8_optimize_b(x, 24, 1, t.a, t.l, rtcd);
+    }
+    */
 }
 
 
@@ -663,31 +617,40 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
     int b;
     TEMP_CONTEXT t;
-    int type = 0;
+    int type;
+    int has_2nd_order;
 
     if (!x->e_mbd.above_context[Y1CONTEXT])
         return;
 
     if (!x->e_mbd.left_context[Y1CONTEXT])
         return;
-
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT], x->e_mbd.left_context[Y1CONTEXT], 4);
-
-    if (x->e_mbd.mbmi.mode == SPLITMV || x->e_mbd.mbmi.mode == B_PRED)
-        type = 3;
+    vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT],
+        x->e_mbd.left_context[Y1CONTEXT], 4);
+    has_2nd_order = (x->e_mbd.mbmi.mode != B_PRED
+        && x->e_mbd.mbmi.mode != SPLITMV);
+    type = has_2nd_order ? 0 : 3;
 
     for (b = 0; b < 16; b++)
     {
-        vp8_optimize_b(x, b, type, t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+        vp8_optimize_b(x, b, type,
+            t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
     }
 
+    /*
+    if (has_2nd_order)
+    {
+        vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT],
+            x->e_mbd.left_context[Y2CONTEXT], 1);
+        vp8_optimize_b(x, 24, 1, t.a, t.l, rtcd);
+    }
+    */
 }
 
 void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
     int b;
     TEMP_CONTEXT t, t2;
-
     if (!x->e_mbd.above_context[UCONTEXT])
         return;
 
@@ -700,7 +663,6 @@ void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
     if (!x->e_mbd.left_context[VCONTEXT])
         return;
 
-
     vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
     vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
 
@@ -731,15 +693,11 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
     vp8_quantize_mb(x);
 
 #if !(CONFIG_REALTIME_ONLY)
-#if 1
-
     if (x->optimize && x->rddiv > 1)
     {
         vp8_optimize_mb(x, rtcd);
         vp8_find_mb_skip_coef(x);
     }
-
-#endif
 #endif
 
     vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 7b4472467..353217c93 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -215,6 +215,65 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
 }
 
 #endif
+
+/* Perform regular quantization, with unbiased rounding and no zero bin. */
+void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
+{
+    int i;
+    int rc;
+    int eob;
+    int x;
+    int y;
+    int z;
+    int sz;
+    short *coeff_ptr;
+    short *quant_ptr;
+    short *quant_shift_ptr;
+    short *qcoeff_ptr;
+    short *dqcoeff_ptr;
+    short *dequant_ptr;
+
+    coeff_ptr = &b->coeff[0];
+    quant_ptr = &b->quant[0][0];
+    quant_shift_ptr = &b->quant_shift[0][0];
+    qcoeff_ptr = d->qcoeff;
+    dqcoeff_ptr = d->dqcoeff;
+    dequant_ptr = &d->dequant[0][0];
+    eob = - 1;
+    vpx_memset(qcoeff_ptr, 0, 32);
+    vpx_memset(dqcoeff_ptr, 0, 32);
+    for (i = 0; i < 16; i++)
+    {
+        int dq;
+        int round;
+
+        /*TODO: These arrays should be stored in zig-zag order.*/
+        rc = vp8_default_zig_zag1d[i];
+        z = coeff_ptr[rc];
+        dq = dequant_ptr[rc];
+        round = dq >> 1;
+        /* Sign of z. */
+        sz = -(z < 0);
+        x = (z + sz) ^ sz;
+        x += round;
+        if (x >= dq)
+        {
+            /* Quantize x. */
+            y  = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc];
+            /* Put the sign back. */
+            x = (y + sz) ^ sz;
+            /* Save the coefficient and its dequantized value. */
+            qcoeff_ptr[rc] = x;
+            dqcoeff_ptr[rc] = x * dq;
+            /* Remember the last non-zero coefficient. */
+            if (y)
+                eob = i;
+        }
+    }
+
+    d->eob = eob + 1;
+}
+
 void vp8_quantize_mby(MACROBLOCK *x)
 {
     int i;
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index 775641893..05056d9ce 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -47,6 +47,8 @@ typedef struct
 #define QUANTIZE_INVOKE(ctx,fn) vp8_quantize_##fn
 #endif
 
+extern void vp8_strict_quantize_b(BLOCK *b,BLOCKD *d);
+
 extern void vp8_quantize_mb(MACROBLOCK *x);
 extern void vp8_quantize_mbuv(MACROBLOCK *x);
 extern void vp8_quantize_mby(MACROBLOCK *x);
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 06a20c0b7..8fe2d206a 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -64,11 +64,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x);
 #define MAXF(a,b)            (((a) > (b)) ? (a) : (b))
 
 
-extern const TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
-extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
-extern int vp8_dct_value_cost[DCT_MAX_VALUE*2];
-extern int *vp8_dct_value_cost_ptr;
-
 
 const int vp8_auto_speed_thresh[17] =
 {
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 29be6b62b..da44f6960 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -27,9 +27,9 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
 void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x);
 
 TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
-TOKENEXTRA *vp8_dct_value_tokens_ptr;
+const TOKENEXTRA *vp8_dct_value_tokens_ptr;
 int vp8_dct_value_cost[DCT_MAX_VALUE*2];
-int *vp8_dct_value_cost_ptr;
+const int *vp8_dct_value_cost_ptr;
 #if 0
 int skip_true_count = 0;
 int skip_false_count = 0;
diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h
index 6f154381a..6b3d08a30 100644
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -35,5 +35,11 @@ void print_context_counters();
 extern _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens];
 #endif
 
+extern const int *vp8_dct_value_cost_ptr;
+/* TODO: The Token field should be broken out into a separate char array to
+ *  improve cache locality, since it's needed for costing when the rest of the
+ *  fields are not.
+ */
+extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
 
 #endif  /* tokenize_h */

From 3b95a46c5538ad4c5b765923d61d8d1b12a4569a Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Tue, 10 Aug 2010 21:12:04 -0700
Subject: [PATCH 110/307] Normalize quantizer's zero bin and rounding factors

This patch changes a few numbers in the two constant arrays
for quantizer's zerobin and rounding factors, in general to
make the sum of the two factors for any Q to be 128.  While
it might be beneficial to calibrate the two arrays for best
quantizer performance, it is not the purpose of this patch.
Normalizing the two arrays will enable quick optimization
of the current faster quantizer, i.e .zerobin check can be
removed.

Change-Id: If9abfd7929bf4b8e9ecd64a79d817c6728c820bd
---
 vp8/encoder/encodeframe.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 4e4483edb..905ef8858 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -60,10 +60,9 @@ unsigned int uv_modes[4]  = {0, 0, 0, 0};
 unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 #endif
 
-// The first four entries are dummy values
 static const int qrounding_factors[129] =
 {
-    56, 56, 56, 56, 56, 56, 56, 56,
+    56, 56, 56, 56, 48, 48, 56, 56,
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
@@ -84,7 +83,7 @@ static const int qrounding_factors[129] =
 
 static const int qzbin_factors[129] =
 {
-    64, 64, 64, 64, 80, 80, 80, 80,
+    72, 72, 72, 72, 80, 80, 72, 72,
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,

From c404fa42aca246c644a5bd84d43bbe9d9740e7a8 Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Tue, 10 Aug 2010 21:45:34 -0700
Subject: [PATCH 111/307] Removed duplicate functions

Change-Id: Ie587972ccefd3c762b8cdf8ef39345cd22924b9b
---
 vp8/encoder/encodeintra.c |  4 +--
 vp8/encoder/encodemb.c    | 57 +++------------------------------------
 vp8/encoder/encodemb.h    |  2 --
 3 files changed, 5 insertions(+), 58 deletions(-)

diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index a8595af2d..556940f0e 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -155,7 +155,7 @@ void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     ENCODEMB_INVOKE(&rtcd->encodemb, submby)(x->src_diff, x->src.y_buffer, x->e_mbd.predictor, x->src.y_stride);
 
-    vp8_transform_intra_mbyrd(x);
+    vp8_transform_intra_mby(x);
 
     x->e_mbd.mbmi.mb_skip_coeff = 1;
 
@@ -223,7 +223,7 @@ void vp8_encode_intra16x16mbuvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
 
-    vp8_transform_mbuvrd(x);
+    vp8_transform_mbuv(x);
 
     vp8_quantize_mbuv(x);
 
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 06aeb6f7f..485001e51 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -121,21 +121,12 @@ void vp8_transform_mbuv(MACROBLOCK *x)
 
     for (i = 16; i < 24; i += 2)
     {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16);
-    }
-}
-
-void vp8_transform_mbuvrd(MACROBLOCK *x)
-{
-    int i;
-
-    for (i = 16; i < 24; i += 2)
-    {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], 
             &x->block[i].coeff[0], 16);
     }
 }
 
+
 void vp8_transform_intra_mby(MACROBLOCK *x)
 {
     int i;
@@ -155,23 +146,6 @@ void vp8_transform_intra_mby(MACROBLOCK *x)
 
 }
 
-void vp8_transform_intra_mbyrd(MACROBLOCK *x)
-{
-    int i;
-
-    for (i = 0; i < 16; i += 2)
-    {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
-            &x->block[i].coeff[0], 32);
-    }
-
-    // build dc block from 16 y dc values
-    vp8_build_dcblock(x);
-
-    // do 2nd order transform on the dc block
-    x->short_walsh4x4(&x->block[24].src_diff[0],
-        &x->block[24].coeff[0], 8);
-}
 
 void vp8_transform_mb(MACROBLOCK *x)
 {
@@ -219,31 +193,6 @@ void vp8_transform_mby(MACROBLOCK *x)
     }
 }
 
-void vp8_transform_mbrd(MACROBLOCK *x)
-{
-    int i;
-
-    for (i = 0; i < 16; i += 2)
-    {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
-            &x->block[i].coeff[0], 32);
-    }
-
-    // build dc block from 16 y dc values
-    if (x->e_mbd.mbmi.mode != SPLITMV)
-        vp8_build_dcblock(x);
-
-    for (i = 16; i < 24; i += 2)
-    {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
-            &x->block[i].coeff[0], 16);
-    }
-
-    // do 2nd order transform on the dc block
-    if (x->e_mbd.mbmi.mode != SPLITMV)
-        x->short_walsh4x4(&x->block[24].src_diff[0],
-            &x->block[24].coeff[0], 8);
-}
 
 void vp8_stuff_inter16x16(MACROBLOCK *x)
 {
@@ -744,7 +693,7 @@ void vp8_encode_inter16x16uvrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
     vp8_build_inter_predictors_mbuv(&x->e_mbd);
     ENCODEMB_INVOKE(&rtcd->encodemb, submbuv)(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->e_mbd.predictor, x->src.uv_stride);
 
-    vp8_transform_mbuvrd(x);
+    vp8_transform_mbuv(x);
 
     vp8_quantize_mbuv(x);
 
diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index 423935248..f7b110d64 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -100,9 +100,7 @@ extern void vp8_stuff_inter16x16(MACROBLOCK *x);
 void vp8_build_dcblock(MACROBLOCK *b);
 void vp8_transform_mb(MACROBLOCK *mb);
 void vp8_transform_mbuv(MACROBLOCK *x);
-void vp8_transform_mbuvrd(MACROBLOCK *x);
 void vp8_transform_intra_mby(MACROBLOCK *x);
-void vp8_transform_intra_mbyrd(MACROBLOCK *x);
 void Encode16x16Y(MACROBLOCK *x);
 void Encode16x16UV(MACROBLOCK *x);
 void vp8_encode_inter16x16uv(const struct VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x);

From 99f46d62d95ffbbdc2a5aebc699a316457725682 Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Wed, 11 Aug 2010 11:02:31 -0400
Subject: [PATCH 112/307] Moved gf_active code to encoder only

The gf_active code is only used by the encoder, so it was moved from
common and decoder.

Change-Id: Iada15acd5b2b33ff70c34668ca87d4cfd0d05025
---
 vp8/common/alloccommon.c         | 19 -------------------
 vp8/common/blockd.h              |  3 ---
 vp8/common/onyxc_int.h           |  2 --
 vp8/common/segmentation_common.c | 22 +++++++++++-----------
 vp8/common/segmentation_common.h |  4 ++--
 vp8/decoder/decodframe.c         |  7 +------
 vp8/decoder/onyxd_if.c           |  5 +----
 vp8/decoder/threading.c          |  6 ------
 vp8/encoder/block.h              |  3 +++
 vp8/encoder/encodeframe.c        |  4 ++--
 vp8/encoder/ethreading.c         |  4 ++--
 vp8/encoder/onyx_if.c            | 31 +++++++++++++++++++++++--------
 vp8/encoder/onyx_int.h           |  6 ++++++
 vp8/encoder/ratectrl.c           |  4 ++--
 14 files changed, 53 insertions(+), 67 deletions(-)

diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index d0a138d06..369d48101 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -54,11 +54,6 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
     oci->above_context[Y2CONTEXT] = 0;
     oci->mip = 0;
 
-    // Structure used to minitor GF useage
-    if (oci->gf_active_flags != 0)
-        vpx_free(oci->gf_active_flags);
-
-    oci->gf_active_flags = 0;
 }
 
 int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
@@ -157,20 +152,6 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
 
     vp8_update_mode_info_border(oci->mi, oci->mb_rows, oci->mb_cols);
 
-    // Structures used to minitor GF usage
-    if (oci->gf_active_flags != 0)
-        vpx_free(oci->gf_active_flags);
-
-    oci->gf_active_flags = (unsigned char *)vpx_calloc(oci->mb_rows * oci->mb_cols, 1);
-
-    if (!oci->gf_active_flags)
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    oci->gf_active_count = oci->mb_rows * oci->mb_cols;
-
     return 0;
 }
 void vp8_setup_version(VP8_COMMON *cm)
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 865b8c18f..468c83295 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -275,9 +275,6 @@ typedef struct
     int mb_to_top_edge;
     int mb_to_bottom_edge;
 
-    //char * gf_active_ptr;
-    signed char *gf_active_ptr;
-
     unsigned int frames_since_golden;
     unsigned int frames_till_alt_ref_frame;
     vp8_subpix_fn_t  subpixel_predict;
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 503ad5dd2..8dce00824 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -133,8 +133,6 @@ typedef struct VP8Common
 
     unsigned int frames_since_golden;
     unsigned int frames_till_alt_ref_frame;
-    unsigned char *gf_active_flags;   // Record of which MBs still refer to last golden frame either directly or through 0,0
-    int gf_active_count;
 
     /* We allocate a MODE_INFO struct for each macroblock, together with
        an extra row on top and column on the left to simplify prediction. */
diff --git a/vp8/common/segmentation_common.c b/vp8/common/segmentation_common.c
index 5df1b496b..16b96e9db 100644
--- a/vp8/common/segmentation_common.c
+++ b/vp8/common/segmentation_common.c
@@ -12,19 +12,19 @@
 #include "segmentation_common.h"
 #include "vpx_mem/vpx_mem.h"
 
-void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd)
+void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x)
 {
     int mb_row, mb_col;
 
     MODE_INFO *this_mb_mode_info = cm->mi;
 
-    xd->gf_active_ptr = (signed char *)cm->gf_active_flags;
+    x->gf_active_ptr = (signed char *)cpi->gf_active_flags;
 
     if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame))
     {
         // Reset Gf useage monitors
-        vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-        cm->gf_active_count = cm->mb_rows * cm->mb_cols;
+        vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+        cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
     }
     else
     {
@@ -40,19 +40,19 @@ void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd)
                 // else if using non 0,0 motion or intra modes then clear flag if it is currently set
                 if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME))
                 {
-                    if (*(xd->gf_active_ptr) == 0)
+                    if (*(x->gf_active_ptr) == 0)
                     {
-                        *(xd->gf_active_ptr) = 1;
-                        cm->gf_active_count ++;
+                        *(x->gf_active_ptr) = 1;
+                        cpi->gf_active_count ++;
                     }
                 }
-                else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && *(xd->gf_active_ptr))
+                else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && *(x->gf_active_ptr))
                 {
-                    *(xd->gf_active_ptr) = 0;
-                    cm->gf_active_count--;
+                    *(x->gf_active_ptr) = 0;
+                    cpi->gf_active_count--;
                 }
 
-                xd->gf_active_ptr++;          // Step onto next entry
+                x->gf_active_ptr++;          // Step onto next entry
                 this_mb_mode_info++;           // skip to next mb
 
             }
diff --git a/vp8/common/segmentation_common.h b/vp8/common/segmentation_common.h
index 41c7f7f63..1e33dced0 100644
--- a/vp8/common/segmentation_common.h
+++ b/vp8/common/segmentation_common.h
@@ -11,6 +11,6 @@
 
 #include "string.h"
 #include "blockd.h"
-#include "onyxc_int.h"
+#include "onyx_int.h"
 
-extern void vp8_update_gf_useage_maps(VP8_COMMON *cm, MACROBLOCKD *xd);
+extern void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x);
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index f4c6be9b5..8e501f52c 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -21,7 +21,7 @@
 #include "alloccommon.h"
 #include "entropymode.h"
 #include "quant_common.h"
-#include "segmentation_common.h"
+
 #include "setupintrarecon.h"
 #include "demode.h"
 #include "decodemv.h"
@@ -447,8 +447,6 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
 
         ++xd->mode_info_context;  /* next mb */
 
-        xd->gf_active_ptr++;      // GF useage flag for next MB
-
         xd->above_context[Y1CONTEXT] += 4;
         xd->above_context[UCONTEXT ] += 2;
         xd->above_context[VCONTEXT ] += 2;
@@ -901,9 +899,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     vpx_memset(pc->above_context[VCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
     vpx_memset(pc->above_context[Y2CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols);
 
-    xd->gf_active_ptr = (signed char *)pc->gf_active_flags;     // Point to base of GF active flags data structure
-
-
     vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO));
 
 
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 8b240e164..728d5ca8c 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -24,7 +24,7 @@
 #include "threading.h"
 #include "decoderthreading.h"
 #include <stdio.h>
-#include "segmentation_common.h"
+
 #include "quant_common.h"
 #include "vpx_scale/vpxscale.h"
 #include "systemdependent.h"
@@ -354,9 +354,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
         return retcode;
     }
 
-    // Update the GF useage maps.
-    vp8_update_gf_useage_maps(cm, &pbi->mb);
-
     if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
         vp8_stop_lfthread(pbi);
 
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index d27374afe..6db23bf0c 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -51,7 +51,6 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
         mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
-        mbd->gf_active_ptr            = xd->gf_active_ptr;
 
         mbd->mode_info        = pc->mi - 1;
         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
@@ -108,7 +107,6 @@ void vp8_setup_loop_filter_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
         //mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
         //mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
         //mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
-        //mbd->gf_active_ptr            = xd->gf_active_ptr;
 
         mbd->mode_info        = pc->mi - 1;
         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
@@ -270,8 +268,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                         ++xd->mode_info_context;  /* next mb */
 
-                        xd->gf_active_ptr++;      // GF useage flag for next MB
-
                         xd->above_context[Y1CONTEXT] += 4;
                         xd->above_context[UCONTEXT ] += 2;
                         xd->above_context[VCONTEXT ] += 2;
@@ -689,8 +685,6 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
 
                 ++xd->mode_info_context;  /* next mb */
 
-                xd->gf_active_ptr++;      // GF useage flag for next MB
-
                 xd->above_context[Y1CONTEXT] += 4;
                 xd->above_context[UCONTEXT ] += 2;
                 xd->above_context[VCONTEXT ] += 2;
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index 19d307d26..c914a32b6 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -93,6 +93,9 @@ typedef struct
 
     int encode_breakout;
 
+    //char * gf_active_ptr;
+    signed char *gf_active_ptr;
+
     unsigned char *active_ptr;
     MV_CONTEXT *mvc;
 
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 905ef8858..b1bd81065 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -454,7 +454,7 @@ void encode_mb_row(VP8_COMP *cpi,
 
         cpi->tplist[mb_row].stop = *tp;
 
-        xd->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
+        x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 
         // store macroblock mode info into context array
         vpx_memcpy(&xd->mode_info_context->mbmi, &xd->mbmi, sizeof(xd->mbmi));
@@ -536,7 +536,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
     //}
 
 
-    xd->gf_active_ptr = (signed char *)cm->gf_active_flags;     // Point to base of GF active flags data structure
+    x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
 
     x->vector_range = 32;
 
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index b8bd414cf..1877417bb 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -164,7 +164,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
 
                         cpi->tplist[mb_row].stop = *tp;
 
-                        xd->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
+                        x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 
                         // store macroblock mode info into context array
                         vpx_memcpy(&xd->mode_info_context->mbmi, &xd->mbmi, sizeof(xd->mbmi));
@@ -371,7 +371,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
 #if CONFIG_RUNTIME_CPU_DETECT
         mbd->rtcd                   = xd->rtcd;
 #endif
-        mbd->gf_active_ptr            = xd->gf_active_ptr;
+        mb->gf_active_ptr            = x->gf_active_ptr;
 
         mb->vector_range             = 32;
 
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 29d9f7e99..081a77597 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -224,6 +224,12 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
     vpx_free(cpi->tok);
     cpi->tok = 0;
 
+    // Structure used to minitor GF useage
+    if (cpi->gf_active_flags != 0)
+        vpx_free(cpi->gf_active_flags);
+
+    cpi->gf_active_flags = 0;
+
 }
 
 static void enable_segmentation(VP8_PTR ptr)
@@ -1256,6 +1262,15 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
     cpi->inter_zz_count = 0;
     cpi->gf_bad_count = 0;
     cpi->gf_update_recommended = 0;
+
+
+    // Structures used to minitor GF usage
+    if (cpi->gf_active_flags != 0)
+        vpx_free(cpi->gf_active_flags);
+
+    CHECK_MEM_ERROR(cpi->gf_active_flags, vpx_calloc(1, cm->mb_rows * cm->mb_cols));
+
+    cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
 }
 
 
@@ -2862,8 +2877,8 @@ static void update_alt_ref_frame_and_stats(VP8_COMP *cpi)
     }
 
     // Update data structure that monitors level of reference to last GF
-    vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-    cm->gf_active_count = cm->mb_rows * cm->mb_cols;
+    vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+    cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
     // this frame refreshes means next frames don't unless specified by user
 
     cpi->common.frames_since_golden = 0;
@@ -2910,8 +2925,8 @@ static void update_golden_frame_and_stats(VP8_COMP *cpi)
         }
 
         // Update data structure that monitors level of reference to last GF
-        vpx_memset(cm->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
-        cm->gf_active_count = cm->mb_rows * cm->mb_cols;
+        vpx_memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols));
+        cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
 
         // this frame refreshes means next frames don't unless specified by user
         cm->refresh_golden_frame = 0;
@@ -3415,7 +3430,7 @@ static void vp8cx_temp_filter_c
         {
             if ((frames_to_blur_backward + frames_to_blur_forward) >= max_frames)
             {
-                frames_to_blur_backward 
+                frames_to_blur_backward
                     = max_frames - frames_to_blur_forward - 1;
             }
         }
@@ -4298,7 +4313,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
 
     // Update the GF useage maps.
     // This is done after completing the compression of a frame when all modes etc. are finalized but before loop filter
-    vp8_update_gf_useage_maps(cm, &cpi->mb.e_mbd);
+    vp8_update_gf_useage_maps(cpi, cm, &cpi->mb);
 
     if (cm->frame_type == KEY_FRAME)
         cm->refresh_last_frame = 1;
@@ -4306,7 +4321,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
     if (0)
     {
         FILE *f = fopen("gfactive.stt", "a");
-        fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame);
+        fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame);
         fclose(f);
     }
 
@@ -4710,7 +4725,7 @@ int vp8_is_gf_update_needed(VP8_PTR ptr)
 void vp8_check_gf_quality(VP8_COMP *cpi)
 {
     VP8_COMMON *cm = &cpi->common;
-    int gf_active_pct = (100 * cm->gf_active_count) / (cm->mb_rows * cm->mb_cols);
+    int gf_active_pct = (100 * cpi->gf_active_count) / (cm->mb_rows * cm->mb_cols);
     int gf_ref_usage_pct = (cpi->count_mb_ref_frame_usage[GOLDEN_FRAME] * 100) / (cm->mb_rows * cm->mb_cols);
     int last_ref_zz_useage = (cpi->inter_zz_count * 100) / (cm->mb_rows * cm->mb_cols);
 
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 4bf6c9a10..c860a6ca0 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -646,6 +646,12 @@ typedef struct
     int b_calculate_ssimg;
 #endif
     int b_calculate_psnr;
+
+
+    unsigned char *gf_active_flags;   // Record of which MBs still refer to last golden frame either directly or through 0,0
+    int gf_active_count;
+
+
 } VP8_COMP;
 
 void control_data_rate(VP8_COMP *cpi);
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 582c617ef..d32808165 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -408,7 +408,7 @@ static void calc_gf_params(VP8_COMP *cpi)
                   cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
                   cpi->recent_ref_frame_usage[ALTREF_FRAME];
 
-    int pct_gf_active = (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
+    int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
 
     // Reset the last boost indicator
     //cpi->last_boost = 100;
@@ -1022,7 +1022,7 @@ void vp8_calc_pframe_target_size(VP8_COMP *cpi)
                       cpi->recent_ref_frame_usage[GOLDEN_FRAME] +
                       cpi->recent_ref_frame_usage[ALTREF_FRAME];
 
-        int pct_gf_active = (100 * cpi->common.gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
+        int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols);
 
         // Reset the last boost indicator
         //cpi->last_boost = 100;

From c0ba42d3c0e8f28422c819e7b93413224a5cf2cb Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Wed, 11 Aug 2010 13:36:35 -0400
Subject: [PATCH 113/307] rename DETOK_[AL]

everything else uses lowercase detok

Change-Id: I9671e2e90eb2961208dfa81c00b3accb5749ec04
---
 vp8/common/arm/vpx_asm_offsets.c | 52 ++++++++++++++++----------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/vp8/common/arm/vpx_asm_offsets.c b/vp8/common/arm/vpx_asm_offsets.c
index 33adccf31..c342f8fdc 100644
--- a/vp8/common/arm/vpx_asm_offsets.c
+++ b/vp8/common/arm/vpx_asm_offsets.c
@@ -32,42 +32,42 @@
  */
 
 #if CONFIG_VP8_DECODER || CONFIG_VP8_ENCODER
-DEFINE(yv12_buffer_config_y_width,               offsetof(YV12_BUFFER_CONFIG, y_width));
-DEFINE(yv12_buffer_config_y_height,              offsetof(YV12_BUFFER_CONFIG, y_height));
-DEFINE(yv12_buffer_config_y_stride,              offsetof(YV12_BUFFER_CONFIG, y_stride));
-DEFINE(yv12_buffer_config_uv_width,               offsetof(YV12_BUFFER_CONFIG, uv_width));
-DEFINE(yv12_buffer_config_uv_height,              offsetof(YV12_BUFFER_CONFIG, uv_height));
-DEFINE(yv12_buffer_config_uv_stride,              offsetof(YV12_BUFFER_CONFIG, uv_stride));
-DEFINE(yv12_buffer_config_y_buffer,              offsetof(YV12_BUFFER_CONFIG, y_buffer));
-DEFINE(yv12_buffer_config_u_buffer,              offsetof(YV12_BUFFER_CONFIG, u_buffer));
-DEFINE(yv12_buffer_config_v_buffer,              offsetof(YV12_BUFFER_CONFIG, v_buffer));
+DEFINE(yv12_buffer_config_y_width,              offsetof(YV12_BUFFER_CONFIG, y_width));
+DEFINE(yv12_buffer_config_y_height,             offsetof(YV12_BUFFER_CONFIG, y_height));
+DEFINE(yv12_buffer_config_y_stride,             offsetof(YV12_BUFFER_CONFIG, y_stride));
+DEFINE(yv12_buffer_config_uv_width,             offsetof(YV12_BUFFER_CONFIG, uv_width));
+DEFINE(yv12_buffer_config_uv_height,            offsetof(YV12_BUFFER_CONFIG, uv_height));
+DEFINE(yv12_buffer_config_uv_stride,            offsetof(YV12_BUFFER_CONFIG, uv_stride));
+DEFINE(yv12_buffer_config_y_buffer,             offsetof(YV12_BUFFER_CONFIG, y_buffer));
+DEFINE(yv12_buffer_config_u_buffer,             offsetof(YV12_BUFFER_CONFIG, u_buffer));
+DEFINE(yv12_buffer_config_v_buffer,             offsetof(YV12_BUFFER_CONFIG, v_buffer));
 DEFINE(yv12_buffer_config_border,               offsetof(YV12_BUFFER_CONFIG, border));
 #endif
 
 #if CONFIG_VP8_DECODER
 DEFINE(mb_diff,                                 offsetof(MACROBLOCKD, diff));
 DEFINE(mb_predictor,                            offsetof(MACROBLOCKD, predictor));
-DEFINE(mb_dst_y_stride,                          offsetof(MACROBLOCKD, dst.y_stride));
-DEFINE(mb_dst_y_buffer,                          offsetof(MACROBLOCKD, dst.y_buffer));
-DEFINE(mb_dst_u_buffer,                          offsetof(MACROBLOCKD, dst.u_buffer));
-DEFINE(mb_dst_v_buffer,                          offsetof(MACROBLOCKD, dst.v_buffer));
+DEFINE(mb_dst_y_stride,                         offsetof(MACROBLOCKD, dst.y_stride));
+DEFINE(mb_dst_y_buffer,                         offsetof(MACROBLOCKD, dst.y_buffer));
+DEFINE(mb_dst_u_buffer,                         offsetof(MACROBLOCKD, dst.u_buffer));
+DEFINE(mb_dst_v_buffer,                         offsetof(MACROBLOCKD, dst.v_buffer));
 DEFINE(mb_mbmi_mode,                            offsetof(MACROBLOCKD, mbmi.mode));
-DEFINE(mb_up_available,                          offsetof(MACROBLOCKD, up_available));
-DEFINE(mb_left_available,                        offsetof(MACROBLOCKD, left_available));
+DEFINE(mb_up_available,                         offsetof(MACROBLOCKD, up_available));
+DEFINE(mb_left_available,                       offsetof(MACROBLOCKD, left_available));
 
 DEFINE(detok_scan,                              offsetof(DETOK, scan));
-DEFINE(detok_ptr_onyxblock2context_leftabove,    offsetof(DETOK, ptr_onyxblock2context_leftabove));
-DEFINE(detok_onyx_coef_tree_ptr,                  offsetof(DETOK, vp8_coef_tree_ptr));
-DEFINE(detok_teb_base_ptr,                        offsetof(DETOK, teb_base_ptr));
-DEFINE(detok_norm_ptr,                           offsetof(DETOK, norm_ptr));
-DEFINE(detok_ptr_onyx_coef_bands_x,                offsetof(DETOK, ptr_onyx_coef_bands_x));
+DEFINE(detok_ptr_onyxblock2context_leftabove,   offsetof(DETOK, ptr_onyxblock2context_leftabove));
+DEFINE(detok_onyx_coef_tree_ptr,                offsetof(DETOK, vp8_coef_tree_ptr));
+DEFINE(detok_teb_base_ptr,                      offsetof(DETOK, teb_base_ptr));
+DEFINE(detok_norm_ptr,                          offsetof(DETOK, norm_ptr));
+DEFINE(detok_ptr_onyx_coef_bands_x,             offsetof(DETOK, ptr_onyx_coef_bands_x));
 
-DEFINE(DETOK_A,                                 offsetof(DETOK, A));
-DEFINE(DETOK_L,                                 offsetof(DETOK, L));
+DEFINE(detok_A,                                 offsetof(DETOK, A));
+DEFINE(detok_L,                                 offsetof(DETOK, L));
 
-DEFINE(detok_qcoeff_start_ptr,                    offsetof(DETOK, qcoeff_start_ptr));
-DEFINE(detok_current_bc,                         offsetof(DETOK, current_bc));
-DEFINE(detok_coef_probs,                         offsetof(DETOK, coef_probs));
+DEFINE(detok_qcoeff_start_ptr,                  offsetof(DETOK, qcoeff_start_ptr));
+DEFINE(detok_current_bc,                        offsetof(DETOK, current_bc));
+DEFINE(detok_coef_probs,                        offsetof(DETOK, coef_probs));
 DEFINE(detok_eob,                               offsetof(DETOK, eob));
 
 DEFINE(bool_decoder_user_buffer_end,            offsetof(BOOL_DECODER, user_buffer_end));
@@ -76,7 +76,7 @@ DEFINE(bool_decoder_value,                      offsetof(BOOL_DECODER, value));
 DEFINE(bool_decoder_count,                      offsetof(BOOL_DECODER, count));
 DEFINE(bool_decoder_range,                      offsetof(BOOL_DECODER, range));
 
-DEFINE(tokenextrabits_min_val,                   offsetof(TOKENEXTRABITS, min_val));
+DEFINE(tokenextrabits_min_val,                  offsetof(TOKENEXTRABITS, min_val));
 DEFINE(tokenextrabits_length,                   offsetof(TOKENEXTRABITS, Length));
 #endif
 

From b07e5b6fa1e9d6b873636a97f2638ddffd8a8e02 Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Wed, 11 Aug 2010 13:49:00 -0400
Subject: [PATCH 114/307] Finished vp8_sixtap_predict4x4_ssse3 function

Added vp8_filter_block1d4_h6_ssse3 and vp8_filter_block1d4_v6_ssse3
assembly routines.  Also removed unused assembly.

Change-Id: I01c1021835f2edda9da706822345f217087ca0d0
---
 vp8/common/x86/subpixel_ssse3.asm    | 317 ++++++++++++---------------
 vp8/common/x86/subpixel_x86.h        |   4 +-
 vp8/common/x86/vp8_asm_stubs.c       |  51 +++--
 vp8/common/x86/x86_systemdependent.c |   2 +-
 4 files changed, 184 insertions(+), 190 deletions(-)

diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
index f45c7547b..6b3f9994f 100644
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -316,21 +316,21 @@ sym(vp8_filter_block1d4_h6_ssse3):
     ; end prolog
 
     movsxd      rdx, DWORD PTR arg(5)   ;table index
-    mov         rsi, arg(0)             ;src_ptr
+    xor         rsi, rsi
     shl         rdx, 4      ;
 
     lea         rax, [k0_k5 GLOBAL]
     add         rax, rdx
     movdqa      xmm7, [rd GLOBAL]
 
-
-
-
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d4_h4_ssse3
 
     movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
     movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
     movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
 
+    mov         rsi, arg(0)             ;src_ptr
     mov         rdi, arg(2)             ;output_ptr
     movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
     movsxd      rcx, dword ptr arg(4)   ;output_height
@@ -362,10 +362,8 @@ filter_block1d4_h6_rowloop_ssse3:
     psraw       xmm0, 7
     packuswb    xmm0, xmm0
 
-;
-    punpcklbw   xmm0, xmm1
+    movd        DWORD PTR [rdi], xmm0
 
-    movq        MMWORD PTR [rdi], xmm0
     add         rdi, rdx
     dec         rcx
     jnz         filter_block1d4_h6_rowloop_ssse3
@@ -378,6 +376,53 @@ filter_block1d4_h6_rowloop_ssse3:
     pop         rbp
     ret
 
+vp8_filter_block1d4_h4_ssse3:
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+    movdqa      xmm0, XMMWORD PTR [shuf2b GLOBAL]
+    movdqa      xmm3, XMMWORD PTR [shuf3b GLOBAL]
+
+    mov         rsi, arg(0)             ;src_ptr
+    mov         rdi, arg(2)             ;output_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+filter_block1d4_h4_rowloop_ssse3:
+    movdqu      xmm1,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, xmm0 ;;[shuf2b GLOBAL]
+    pshufb      xmm2, xmm3 ;;[shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+;--
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+;--
+    paddsw      xmm1, xmm7
+    paddsw      xmm1, xmm2
+    psraw       xmm1, 7
+    packuswb    xmm1, xmm1
+
+    movd        DWORD PTR [rdi], xmm1
+
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d4_h4_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+
 ;void vp8_filter_block1d16_v6_ssse3
 ;(
 ;    unsigned char *src_ptr,
@@ -700,81 +745,88 @@ vp8_filter_block1d8_v4_ssse3_loop:
     UNSHADOW_ARGS
     pop         rbp
     ret
-
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-global sym(vp8_filter_block1d8_h6_ssse3_slow)
-sym(vp8_filter_block1d8_h6_ssse3_slow):
+;void vp8_filter_block1d4_v6_ssse3
+;(
+;    unsigned char *src_ptr,
+;    unsigned int   src_pitch,
+;    unsigned char *output_ptr,
+;    unsigned int   out_pitch,
+;    unsigned int   output_height,
+;    unsigned int   vp8_filter_index
+;)
+global sym(vp8_filter_block1d4_v6_ssse3)
+sym(vp8_filter_block1d4_v6_ssse3):
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 7
+    SHADOW_ARGS_TO_STACK 6
     GET_GOT     rbx
     push        rsi
     push        rdi
     ; end prolog
 
-    mov         rdx, arg(6) ;vp8_filter
-    mov         rsi, arg(0) ;src_ptr
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
 
-    mov         rdi, arg(1) ;output_ptr
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
 
-    movsxd      rcx, dword ptr arg(4) ;output_height
-    movsxd      rax, dword ptr arg(2) ;src_pixels_per_line
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ; out_pitch
+%endif
+    movsxd      rcx, DWORD PTR arg(4)   ;[output_height]
 
-    movq        xmm7, [rdx]
-    pxor        xmm4, xmm4
-    movdqa      xmm5, XMMWORD PTR [shuf1 GLOBAL]
-    movdqa      xmm6, XMMWORD PTR [shuf2 GLOBAL]
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d4_v4_ssse3
 
-    movsxd      rdx, dword ptr arg(5) ;output_width
+    movq        mm5, MMWORD PTR [rax]         ;k0_k5
+    movq        mm6, MMWORD PTR [rax+256]     ;k2_k4
+    movq        mm7, MMWORD PTR [rax+128]     ;k1_k3
 
-    punpcklqdq  xmm7, xmm7          ;copy filter constants to upper 8 bytes
+    mov         rsi, arg(0)             ;src_ptr
 
-filter_block1d8_h6_rowloop3_slow:
-    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+    mov         rax, rsi
+    add         rax, rdx
 
-    lea         rsi,    [rsi + rax]
+vp8_filter_block1d4_v6_ssse3_loop:
+    movd        mm1, DWORD PTR [rsi]                  ;A
+    movd        mm2, DWORD PTR [rsi + rdx]            ;B
+    movd        mm3, DWORD PTR [rsi + rdx * 2]        ;C
+    movd        mm4, DWORD PTR [rax + rdx * 2]        ;D
+    movd        mm0, DWORD PTR [rsi + rdx * 4]        ;E
 
-    movdqa      xmm1, xmm0
-    pshufb      xmm0, XMMWORD PTR [shuf1 GLOBAL]
+    punpcklbw   mm2, mm4                  ;B D
+    punpcklbw   mm3, mm0                  ;C E
 
-    movdqa      xmm2, xmm1
-    pmaddubsw   xmm0, xmm7
-    pshufb      xmm1, XMMWORD PTR [shuf2 GLOBAL]
+    movd        mm0, DWORD PTR [rax + rdx * 4]        ;F
 
-    movdqa      xmm3, xmm2
-    pmaddubsw   xmm1, xmm7
-    pshufb      xmm2, XMMWORD PTR [shuf3 GLOBAL]
+    movq        mm4, [rd GLOBAL]
 
-    pshufb      xmm3, XMMWORD PTR [shuf4 GLOBAL]
+    pmaddubsw   mm3, mm6
+    punpcklbw   mm1, mm0                  ;A F
+    pmaddubsw   mm2, mm7
+    pmaddubsw   mm1, mm5
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      mm2, mm3
+    paddsw      mm2, mm1
+    paddsw      mm2, mm4
+    psraw       mm2, 7
+    packuswb    mm2, mm2
 
-    pmaddubsw   xmm2, xmm7
-    pmaddubsw   xmm3, xmm7
-;4 cycles
+    movd        DWORD PTR [rdi], mm2
 
-    phaddsw     xmm0, xmm1
-    phaddsw     xmm2, xmm3
-;7 cycles
-    phaddsw     xmm0, xmm2
-;7 cycles
-
-
-    paddsw      xmm0, [rd GLOBAL]
-    psraw       xmm0, 7
-    packuswb    xmm0, xmm0
-
-;
-    punpcklbw   xmm0, xmm4
-
-    movdqa      XMMWORD Ptr [rdi], xmm0
-    add         rdi, rdx
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
     dec         rcx
-    jnz         filter_block1d8_h6_rowloop3_slow                ; next row
+    jnz         vp8_filter_block1d4_v6_ssse3_loop
 
     ; begin epilog
     pop rdi
@@ -783,111 +835,46 @@ filter_block1d8_h6_rowloop3_slow:
     UNSHADOW_ARGS
     pop         rbp
     ret
-;void vp8_filter_block1d16_h6_ssse3
-;(
-;    unsigned char  *src_ptr,
-;    unsigned short *output_ptr,
-;    unsigned int    src_pixels_per_line,
-;    unsigned int    pixel_step,
-;    unsigned int    output_height,
-;    unsigned int    output_width,
-;    short           *vp8_filter
-;)
-global sym(vp8_filter_block1d16_h6_ssse3_slow)
-sym(vp8_filter_block1d16_h6_ssse3_slow):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 7
-    SAVE_XMM
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-    mov         rdx, arg(6) ;vp8_filter
-    mov         rsi, arg(0) ;src_ptr
 
-    mov         rdi, arg(1) ;output_ptr
+vp8_filter_block1d4_v4_ssse3:
+    movq        mm6, MMWORD PTR [rax+256]     ;k2_k4
+    movq        mm7, MMWORD PTR [rax+128]     ;k1_k3
+    movq        mm5, MMWORD PTR [rd GLOBAL]
 
-    movsxd      rcx, dword ptr arg(4) ;output_height
-    movsxd      rax, dword ptr arg(2) ;src_pixels_per_line
+    mov         rsi, arg(0)             ;src_ptr
 
-    movq        xmm7, [rdx]
-    pxor        xmm4, xmm4
-    movdqa      xmm5, XMMWORD PTR [shuf1 GLOBAL]
-    movdqa      xmm6, XMMWORD PTR [shuf2 GLOBAL]
+    mov         rax, rsi
+    add         rax, rdx
 
-    movsxd      rdx, dword ptr arg(5) ;output_width
+vp8_filter_block1d4_v4_ssse3_loop:
+    movd        mm2, DWORD PTR [rsi + rdx]            ;B
+    movd        mm3, DWORD PTR [rsi + rdx * 2]        ;C
+    movd        mm4, DWORD PTR [rax + rdx * 2]        ;D
+    movd        mm0, DWORD PTR [rsi + rdx * 4]        ;E
 
-    punpcklqdq  xmm7, xmm7          ;copy filter constants to upper 8 bytes
-    sub         rdi, rdx
+    punpcklbw   mm2, mm4                  ;B D
+    punpcklbw   mm3, mm0                  ;C E
 
-filter_block1d16_h6_rowloop3_slow:
-    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+    pmaddubsw   mm3, mm6
+    pmaddubsw   mm2, mm7
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      mm2, mm3
+    paddsw      mm2, mm5
+    psraw       mm2, 7
+    packuswb    mm2, mm2
 
-    movdqa      xmm1, xmm0
-    pshufb      xmm0, xmm5
+    movd        DWORD PTR [rdi], mm2
 
-    movdqa      xmm2, xmm1
-    pmaddubsw   xmm0, xmm7
-    pshufb      xmm1, xmm6
-
-    movdqa      xmm3, xmm2
-    pmaddubsw   xmm1, xmm7
-    pshufb      xmm2, XMMWORD PTR [shuf3 GLOBAL]
-    movdqu      xmm4,   XMMWORD PTR [rsi + 6]
-    pshufb      xmm3, XMMWORD PTR [shuf4 GLOBAL]
-    lea         rsi,    [rsi + rax]
-    pmaddubsw   xmm2, xmm7
-    phaddsw     xmm0, xmm1
-
-    pmaddubsw   xmm3, xmm7
-    movdqa      xmm1, xmm4
-    pshufb      xmm4, xmm5
-    movdqa      xmm5, xmm1
-    pmaddubsw   xmm4, xmm7
-    pshufb      xmm1, xmm6
-    phaddsw     xmm2, xmm3
-    pmaddubsw   xmm1, xmm7
-    movdqa      xmm3, xmm5
-    pshufb      xmm5, XMMWORD PTR [shuf3 GLOBAL]
-    add         rdi, rdx
-    pmaddubsw   xmm5, xmm7
-    pshufb      xmm3, XMMWORD PTR [shuf4 GLOBAL]
-    phaddsw     xmm4, xmm1
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
     dec         rcx
-    phaddsw     xmm0, xmm2
-    pmaddubsw   xmm3, xmm7
-
-
-    paddsw      xmm0, [rd GLOBAL]
-    psraw       xmm0, 7
-    packuswb    xmm0, xmm0
-    phaddsw     xmm5, xmm3
-    pxor        xmm3, xmm3
-    punpcklbw   xmm0, xmm3
-;--
-;--
-;--
-;--
-
-    phaddsw     xmm4, xmm5
-    movdqa      xmm5, XMMWORD PTR [shuf1 GLOBAL]
-    movdqa      XMMWORD Ptr [rdi], xmm0
-;--
-;--
-;--
-;--
-;--
-    paddsw      xmm4, [rd GLOBAL]
-    psraw       xmm4, 7
-    packuswb    xmm4, xmm4
-;
-    punpcklbw   xmm4, xmm3
-
-    movdqa      XMMWORD Ptr [rdi+16], xmm4
-
-    jnz         filter_block1d16_h6_rowloop3_slow                ; next row
-
+    jnz         vp8_filter_block1d4_v4_ssse3_loop
 
     ; begin epilog
     pop rdi
@@ -899,22 +886,6 @@ filter_block1d16_h6_rowloop3_slow:
 
 SECTION_RODATA
 align 16
-shuf1:
-    db 0, 1, 2, 4, 3, 5, 128, 128, 1, 2, 3, 5, 4, 6, 128, 128
-shuf2:
-    db 2, 3, 4, 6, 5, 7, 128, 128, 3, 4, 5, 7, 6, 8, 128, 128
-shuf3:
-    db 4, 5, 6, 8, 7, 9, 128, 128, 5, 6, 7, 9, 8, 10, 128, 128
-shuf4:
-    db 6, 7, 8, 10, 9, 11, 128, 128, 7, 8, 9, 11, 10, 12, 128, 128
-
-shuf1a:
-    db 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
-shuf2a:
-    db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11
-shuf3a:
-    db 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11, 10, 12
-
 shuf1b:
     db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
 shuf2b:
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index e5c08b15c..b371892c9 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -104,8 +104,8 @@ extern prototype_subpixel_predict(vp8_sixtap_predict4x4_ssse3);
 #undef  vp8_subpix_sixtap8x4
 #define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_ssse3
 
-//#undef  vp8_subpix_sixtap4x4
-//#define vp8_subpix_sixtap4x4 vp8_sixtap_predict4x4_ssse3
+#undef  vp8_subpix_sixtap4x4
+#define vp8_subpix_sixtap4x4 vp8_sixtap_predict4x4_ssse3
 
 
 //#undef  vp8_subpix_bilinear16x16
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 2c99cb64f..8b54b2327 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -402,6 +402,26 @@ extern void vp8_filter_block1d8_v6_ssse3
     unsigned int   vp8_filter_index
 );
 
+extern void vp8_filter_block1d4_h6_ssse3
+(
+    unsigned char  *src_ptr,
+    unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    unsigned int    output_pitch,
+    unsigned int    output_height,
+    unsigned int    vp8_filter_index
+);
+
+extern void vp8_filter_block1d4_v6_ssse3
+(
+    unsigned char *src_ptr,
+    unsigned int   src_pitch,
+    unsigned char *output_ptr,
+    unsigned int   out_pitch,
+    unsigned int   output_height,
+    unsigned int   vp8_filter_index
+);
+
 void vp8_sixtap_predict16x16_ssse3
 (
     unsigned char  *src_ptr,
@@ -509,21 +529,24 @@ void vp8_sixtap_predict4x4_ssse3
     int dst_pitch
 )
 {
-    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 16*16);
+  DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
 
-    if (xoffset)
-    {
-        if (yoffset)
-        {
-
-        }
-        else
-        {
-        }
-    }
-    else
-    {
-    }
+  if (xoffset)
+  {
+      if (yoffset)
+      {
+          vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
+          vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
+      }
+      else
+      {
+          vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
+      }
+  }
+  else
+  {
+      vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
+  }
 
 }
 
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 2d8ced00d..ce487ff9f 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -123,7 +123,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
         rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_ssse3;
         rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_ssse3;
         rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_ssse3;
-//        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_ssse3;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_ssse3;
     }
 #endif
 

From 392a958274f6456add66363ae2dfdfc060b94fe9 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 9 Aug 2010 13:27:26 -0400
Subject: [PATCH 115/307] avoid negative array subscript warnings

The mv_ref and sub_mv_ref token encodings are indexed from NEARESTMV
and LEFT4X4, respectively, rather than being zero-based like the
other token encodings.

Change-Id: I3699c3f84111209ecfb91097c4b900773e9a3ad5
---
 vp8/common/entropymode.c | 6 ++++--
 vp8/common/entropymode.h | 4 ----
 vp8/common/treecoder.c   | 6 ++++++
 vp8/common/treecoder.h   | 2 ++
 vp8/encoder/bitstream.c  | 6 ++++--
 vp8/encoder/rdopt.c      | 3 ++-
 6 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c
index 493728d5d..41922834f 100644
--- a/vp8/common/entropymode.c
+++ b/vp8/common/entropymode.c
@@ -264,8 +264,10 @@ void vp8_entropy_mode_init()
     vp8_tokens_from_tree(vp8_uv_mode_encodings,  vp8_uv_mode_tree);
     vp8_tokens_from_tree(vp8_mbsplit_encodings, vp8_mbsplit_tree);
 
-    vp8_tokens_from_tree(VP8_MVREFENCODINGS,   vp8_mv_ref_tree);
-    vp8_tokens_from_tree(VP8_SUBMVREFENCODINGS, vp8_sub_mv_ref_tree);
+    vp8_tokens_from_tree_offset(vp8_mv_ref_encoding_array,
+                                vp8_mv_ref_tree, NEARESTMV);
+    vp8_tokens_from_tree_offset(vp8_sub_mv_ref_encoding_array,
+                                vp8_sub_mv_ref_tree, LEFT4X4);
 
     vp8_tokens_from_tree(vp8_small_mvencodings, vp8_small_mvtree);
 }
diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h
index afa513d3c..3d5af7cf7 100644
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@@ -54,10 +54,6 @@ extern struct vp8_token_struct vp8_mbsplit_encodings  [VP8_NUMMBSPLITS];
 extern struct vp8_token_struct vp8_mv_ref_encoding_array    [VP8_MVREFS];
 extern struct vp8_token_struct vp8_sub_mv_ref_encoding_array [VP8_SUBMVREFS];
 
-#define VP8_MVREFENCODINGS      (vp8_mv_ref_encoding_array - NEARESTMV)
-#define VP8_SUBMVREFENCODINGS   (vp8_sub_mv_ref_encoding_array - LEFT4X4)
-
-
 extern const vp8_tree_index vp8_small_mvtree[];
 
 extern struct vp8_token_struct vp8_small_mvencodings [8];
diff --git a/vp8/common/treecoder.c b/vp8/common/treecoder.c
index 5829cb701..495abd716 100644
--- a/vp8/common/treecoder.c
+++ b/vp8/common/treecoder.c
@@ -47,6 +47,12 @@ void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t)
     tree2tok(p, t, 0, 0, 0);
 }
 
+void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t,
+                                 int offset)
+{
+    tree2tok(p - offset, t, 0, 0, 0);
+}
+
 static void branch_counts(
     int n,                      /* n = size of alphabet */
     vp8_token tok               [ /* n */ ],
diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index c8f5af96d..990327536 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h
@@ -54,6 +54,8 @@ typedef const struct vp8_token_struct
 /* Construct encoding array from tree. */
 
 void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree);
+void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree,
+                                 int offset);
 
 
 /* Convert array of token occurrence counts into a table of probabilities
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 3d7c7612d..21629841b 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -792,7 +792,8 @@ static void write_mv_ref
 
     assert(NEARESTMV <= m  &&  m <= SPLITMV);
 
-    vp8_write_token(w, vp8_mv_ref_tree, p, VP8_MVREFENCODINGS + m);
+    vp8_write_token(w, vp8_mv_ref_tree, p,
+                    vp8_mv_ref_encoding_array - NEARESTMV + m);
 }
 
 static void write_sub_mv_ref
@@ -802,7 +803,8 @@ static void write_sub_mv_ref
 {
     assert(LEFT4X4 <= m  &&  m <= NEW4X4);
 
-    vp8_write_token(w, vp8_sub_mv_ref_tree, p, VP8_SUBMVREFENCODINGS + m);
+    vp8_write_token(w, vp8_sub_mv_ref_tree, p,
+                    vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
 }
 
 static void write_mv
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 8fe2d206a..3c6f0b324 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -880,7 +880,8 @@ int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
     vp8_prob p [VP8_MVREFS-1];
     assert(NEARESTMV <= m  &&  m <= SPLITMV);
     vp8_mv_ref_probs(p, near_mv_ref_ct);
-    return vp8_cost_token(vp8_mv_ref_tree, p, VP8_MVREFENCODINGS + m);
+    return vp8_cost_token(vp8_mv_ref_tree, p,
+                          vp8_mv_ref_encoding_array - NEARESTMV + m);
 }
 
 void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv)

From d22e2968a8507342a2be98f712edf470686dc85f Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 9 Aug 2010 13:48:04 -0400
Subject: [PATCH 116/307] cosmetics: add missing 2D array braces

Silences compile warning.

Change-Id: I4b207d97f8570fe29aa2710e4ce4f02e7e43b57a
---
 vp8/encoder/onyx_if.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 081a77597..f768e60c7 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3237,13 +3237,20 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
 #if USE_FILTER_LUT
 static int modifier_lut[7][19] =
 {
-16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,        // Strength=0
-16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,       // Strength=1
-16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,       // Strength=2
-16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     // Strength=3
-16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,    // Strength=4
-16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0, // Strength=5
-16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2,1// Strength=6
+    // Strength=0
+    {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=1
+    {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=2
+    {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=3
+    {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=4
+    {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=5
+    {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
+    // Strength=6
+    {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
 };
 #endif
 static void vp8cx_temp_blur1_c

From 9c7a0090e0c8e4dda45570d273b9cd228b58e9d6 Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Thu, 12 Aug 2010 16:25:43 -0400
Subject: [PATCH 117/307] Removed unnecessary MB_MODE_INFO copies

These copies occurred for each macroblock in the encoder and decoder.
Thetemp MB_MODE_INFO mbmi was removed from MACROBLOCKD.  As a result,
a large number compile errors had to be fixed.

Change-Id: I4cf0ffae3ce244f6db04a4c217d52dd256382cf3
---
 vp8/common/blockd.h       |  4 +-
 vp8/common/invtrans.c     |  3 +-
 vp8/common/reconinter.c   | 39 +++++++++-------
 vp8/common/reconintra.c   |  8 ++--
 vp8/decoder/decodframe.c  | 41 ++++++-----------
 vp8/decoder/detokenize.c  |  4 +-
 vp8/decoder/threading.c   | 25 +++--------
 vp8/encoder/encodeframe.c | 88 +++++++++++++++++-------------------
 vp8/encoder/encodeintra.c | 10 ++---
 vp8/encoder/encodemb.c    | 26 +++++------
 vp8/encoder/ethreading.c  | 16 +++----
 vp8/encoder/firstpass.c   |  8 ++--
 vp8/encoder/pickinter.c   | 58 ++++++++++++------------
 vp8/encoder/quantize.c    | 18 ++++----
 vp8/encoder/rdopt.c       | 94 +++++++++++++++++++--------------------
 vp8/encoder/tokenize.c    | 20 ++++-----
 16 files changed, 216 insertions(+), 246 deletions(-)

diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 468c83295..de308ffc3 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -215,7 +215,7 @@ typedef struct
 {
     DECLARE_ALIGNED(16, short, diff[400]);      // from idct diff
     DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
-    DECLARE_ALIGNED(16, short, reference[384]);
+//not used    DECLARE_ALIGNED(16, short, reference[384]);
     DECLARE_ALIGNED(16, short, qcoeff[400]);
     DECLARE_ALIGNED(16, short, dqcoeff[400]);
 
@@ -232,8 +232,6 @@ typedef struct
 
     FRAME_TYPE frame_type;
 
-    MB_MODE_INFO mbmi;
-
     int up_available;
     int left_available;
 
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index d1822c8bf..32ef15ed1 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -65,7 +65,8 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
 {
     int i;
 
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != B_PRED &&
+        x->mode_info_context->mbmi.mode != SPLITMV)
     {
         // do 2nd order transform on the dc block
 
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index d17dc26af..f11995e24 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -210,7 +210,8 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
 {
     int i;
 
-    if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
+        x->mode_info_context->mbmi.mode != SPLITMV)
     {
         unsigned char *uptr, *vptr;
         unsigned char *upred_ptr = &x->predictor[256];
@@ -254,16 +255,18 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
     }
 }
 
-
+//encoder only
 void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
 {
-    if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
+
+  if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
+      x->mode_info_context->mbmi.mode != SPLITMV)
     {
         unsigned char *ptr_base;
         unsigned char *ptr;
         unsigned char *pred_ptr = x->predictor;
-        int mv_row = x->mbmi.mv.as_mv.row;
-        int mv_col = x->mbmi.mv.as_mv.col;
+        int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+        int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
         int pre_stride = x->block[0].pre_stride;
 
         ptr_base = x->pre.y_buffer;
@@ -282,7 +285,7 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
     {
         int i;
 
-        if (x->mbmi.partitioning < 3)
+        if (x->mode_info_context->mbmi.partitioning < 3)
         {
             for (i = 0; i < 4; i++)
             {
@@ -313,7 +316,9 @@ void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
 
 void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
 {
-    if (x->mbmi.ref_frame != INTRA_FRAME && x->mbmi.mode != SPLITMV)
+
+    if (x->mode_info_context->mbmi.ref_frame != INTRA_FRAME &&
+        x->mode_info_context->mbmi.mode != SPLITMV)
     {
         int offset;
         unsigned char *ptr_base;
@@ -323,8 +328,8 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
         unsigned char *upred_ptr = &x->predictor[256];
         unsigned char *vpred_ptr = &x->predictor[320];
 
-        int mv_row = x->mbmi.mv.as_mv.row;
-        int mv_col = x->mbmi.mv.as_mv.col;
+        int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+        int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
         int pre_stride = x->block[0].pre_stride;
 
         ptr_base = x->pre.y_buffer;
@@ -361,7 +366,7 @@ void vp8_build_inter_predictors_mb(MACROBLOCKD *x)
     {
         int i;
 
-        if (x->mbmi.partitioning < 3)
+        if (x->mode_info_context->mbmi.partitioning < 3)
         {
             for (i = 0; i < 4; i++)
             {
@@ -410,7 +415,7 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
 {
     int i, j;
 
-    if (x->mbmi.mode == SPLITMV)
+    if (x->mode_info_context->mbmi.mode == SPLITMV)
     {
         for (i = 0; i < 2; i++)
         {
@@ -455,8 +460,8 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
     }
     else
     {
-        int mvrow = x->mbmi.mv.as_mv.row;
-        int mvcol = x->mbmi.mv.as_mv.col;
+        int mvrow = x->mode_info_context->mbmi.mv.as_mv.row;
+        int mvcol = x->mode_info_context->mbmi.mv.as_mv.col;
 
         if (mvrow < 0)
             mvrow -= 1;
@@ -535,7 +540,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
     unsigned char *pred_ptr = x->predictor;
     unsigned char *dst_ptr = x->dst.y_buffer;
 
-    if (x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != SPLITMV)
     {
         int offset;
         unsigned char *ptr_base;
@@ -547,8 +552,8 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
         unsigned char *udst_ptr = x->dst.u_buffer;
         unsigned char *vdst_ptr = x->dst.v_buffer;
 
-        int mv_row = x->mbmi.mv.as_mv.row;
-        int mv_col = x->mbmi.mv.as_mv.col;
+        int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
+        int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
         int pre_stride = x->dst.y_stride; //x->block[0].pre_stride;
 
         ptr_base = x->pre.y_buffer;
@@ -587,7 +592,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
         //if sth is wrong, go back to what it is in build_inter_predictors_mb.
         int i;
 
-        if (x->mbmi.partitioning < 3)
+        if (x->mode_info_context->mbmi.partitioning < 3)
         {
             for (i = 0; i < 4; i++)
             {
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index 7133b0915..b5f57c203 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -43,7 +43,7 @@ void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
     }
 
     // for Y
-    switch (x->mbmi.mode)
+    switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
     {
@@ -164,7 +164,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
     }
 
     // for Y
-    switch (x->mbmi.mode)
+    switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
     {
@@ -290,7 +290,7 @@ void vp8_build_intra_predictors_mbuv(MACROBLOCKD *x)
         vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
     }
 
-    switch (x->mbmi.uv_mode)
+    switch (x->mode_info_context->mbmi.uv_mode)
     {
     case DC_PRED:
     {
@@ -430,7 +430,7 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
         vleft_col[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
     }
 
-    switch (x->mbmi.uv_mode)
+    switch (x->mode_info_context->mbmi.uv_mode)
     {
     case DC_PRED:
     {
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 8e501f52c..9942e0b57 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -113,7 +113,7 @@ static void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
 // to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
 static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
 {
-    if (xd->frame_type == KEY_FRAME  ||  xd->mbmi.ref_frame == INTRA_FRAME)
+    if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
 
         vp8_build_intra_predictors_mbuv_s(xd);
@@ -164,7 +164,7 @@ static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
 
 static void clamp_mvs(MACROBLOCKD *xd)
 {
-    if (xd->mbmi.mode == SPLITMV)
+    if (xd->mode_info_context->mbmi.mode == SPLITMV)
     {
         int i;
 
@@ -175,7 +175,7 @@ static void clamp_mvs(MACROBLOCKD *xd)
     }
     else
     {
-        clamp_mv_to_umv_border(&xd->mbmi.mv.as_mv, xd);
+        clamp_mv_to_umv_border(&xd->mode_info_context->mbmi.mv.as_mv, xd);
         clamp_uvmv_to_umv_border(&xd->block[16].bmi.mv.as_mv, xd);
     }
 
@@ -184,10 +184,9 @@ static void clamp_mvs(MACROBLOCKD *xd)
 void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
 {
     int eobtotal = 0;
-    MV  orig_mvs[24];
-    int i, do_clamp = xd->mbmi.need_to_clamp_mvs;
+    int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
 
-    if (xd->mbmi.mb_skip_coeff)
+    if (xd->mode_info_context->mbmi.mb_skip_coeff)
     {
         vp8_reset_mb_tokens_context(xd);
     }
@@ -199,20 +198,12 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
     /* Perform temporary clamping of the MV to be used for prediction */
     if (do_clamp)
     {
-        if (xd->mbmi.mode == SPLITMV)
-            for (i=0; i<24; i++)
-                orig_mvs[i] = xd->block[i].bmi.mv.as_mv;
-        else
-        {
-            orig_mvs[0] = xd->mbmi.mv.as_mv;
-            orig_mvs[1] = xd->block[16].bmi.mv.as_mv;
-        }
         clamp_mvs(xd);
     }
 
     xd->mode_info_context->mbmi.dc_diff = 1;
 
-    if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV && eobtotal == 0)
+    if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
     {
         xd->mode_info_context->mbmi.dc_diff = 0;
         skip_recon_mb(pbi, xd);
@@ -223,11 +214,11 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
         mb_init_dequantizer(pbi, xd);
 
     // do prediction
-    if (xd->frame_type == KEY_FRAME  ||  xd->mbmi.ref_frame == INTRA_FRAME)
+    if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
         vp8_build_intra_predictors_mbuv(xd);
 
-        if (xd->mbmi.mode != B_PRED)
+        if (xd->mode_info_context->mbmi.mode != B_PRED)
         {
             vp8_build_intra_predictors_mby_ptr(xd);
         } else {
@@ -240,7 +231,7 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
     }
 
     // dequantization and idct
-    if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV)
+    if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
     {
         BLOCKD *b = &xd->block[24];
         DEQUANT_INVOKE(&pbi->dequant, block)(b);
@@ -283,7 +274,7 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
             }
         }
     }
-    else if ((xd->frame_type == KEY_FRAME  ||  xd->mbmi.ref_frame == INTRA_FRAME) && xd->mbmi.mode == B_PRED)
+    else if ((xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
     {
         for (i = 0; i < 16; i++)
         {
@@ -394,12 +385,8 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
 
     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
     {
-        // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
-        // the partition_bmi array is unused in the decoder, so don't copy it.
-        vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi,
-                   sizeof(MB_MODE_INFO) - sizeof(xd->mbmi.partition_bmi));
 
-        if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
+        if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
         {
             for (i = 0; i < 16; i++)
             {
@@ -420,9 +407,9 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
         xd->left_available = (mb_col != 0);
 
         // Select the appropriate reference frame for this MB
-        if (xd->mbmi.ref_frame == LAST_FRAME)
+        if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
             ref_fb_idx = pc->lst_fb_idx;
-        else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
+        else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
             ref_fb_idx = pc->gld_fb_idx;
         else
             ref_fb_idx = pc->alt_fb_idx;
@@ -608,7 +595,7 @@ static void init_frame(VP8D_COMP *pbi)
     xd->left_context = pc->left_context;
     xd->mode_info_context = pc->mi;
     xd->frame_type = pc->frame_type;
-    xd->mbmi.mode = DC_PRED;
+    xd->mode_info_context->mbmi.mode = DC_PRED;
     xd->mode_info_stride = pc->mode_info_stride;
 }
 
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 09547966d..740741745 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -95,7 +95,7 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
     *(l+1) = 0;
 
     /* Clear entropy contexts for Y2 blocks */
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
     {
         a = A[Y2CONTEXT];
         l = L[Y2CONTEXT];
@@ -240,7 +240,7 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
     i = 0;
     stop = 16;
 
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
     {
         i = 24;
         stop = 24;
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 6db23bf0c..fd422eb50 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -69,9 +69,6 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
 
-        mbd->mbmi.mode = DC_PRED;
-        mbd->mbmi.uv_mode = DC_PRED;
-
         mbd->current_bc = &pbi->bc2;
 
         for (j = 0; j < 25; j++)
@@ -222,12 +219,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                             }
                         }
 
-                        // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
-                        // the partition_bmi array is unused in the decoder, so don't copy it.
-                        vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi,
-                                   sizeof(MB_MODE_INFO) - sizeof(xd->mbmi.partition_bmi));
-
-                        if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
+                        if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
                         {
                             for (i = 0; i < 16; i++)
                             {
@@ -248,9 +240,9 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                         xd->left_available = (mb_col != 0);
 
                         // Select the appropriate reference frame for this MB
-                        if (xd->mbmi.ref_frame == LAST_FRAME)
+                        if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
                             ref_fb_idx = pc->lst_fb_idx;
-                        else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
+                        else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
                             ref_fb_idx = pc->gld_fb_idx;
                         else
                             ref_fb_idx = pc->alt_fb_idx;
@@ -639,12 +631,7 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
 
                 }
 
-                // Take a copy of the mode and Mv information for this macroblock into the xd->mbmi
-                // the partition_bmi array is unused in the decoder, so don't copy it.
-                vpx_memcpy(&xd->mbmi, &xd->mode_info_context->mbmi,
-                           sizeof(MB_MODE_INFO) - sizeof(xd->mbmi.partition_bmi));
-
-                if (xd->mbmi.mode == SPLITMV || xd->mbmi.mode == B_PRED)
+                if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
                 {
                     for (i = 0; i < 16; i++)
                     {
@@ -665,9 +652,9 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
                 xd->left_available = (mb_col != 0);
 
                 // Select the appropriate reference frame for this MB
-                if (xd->mbmi.ref_frame == LAST_FRAME)
+                if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
                     ref_fb_idx = pc->lst_fb_idx;
-                else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
+                else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
                     ref_fb_idx = pc->gld_fb_idx;
                 else
                     ref_fb_idx = pc->alt_fb_idx;
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index b1bd81065..bddb55b49 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -254,7 +254,6 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     int i;
     int QIndex;
     MACROBLOCKD *xd = &x->e_mbd;
-    MB_MODE_INFO *mbmi = &xd->mbmi;
     int zbin_extra;
 
     // Select the baseline MB Q index.
@@ -262,12 +261,12 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     {
         // Abs Value
         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
-            QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
 
+            QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
         // Delta Value
         else
         {
-            QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
+            QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][xd->mode_info_context->mbmi.segment_id];
             QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
         }
     }
@@ -388,14 +387,14 @@ void encode_mb_row(VP8_COMP *cpi,
         {
             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
-                xd->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
+                xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
             else
-                xd->mbmi.segment_id = 0;
+                xd->mode_info_context->mbmi.segment_id = 0;
 
             vp8cx_mb_init_quantizer(cpi, x);
         }
         else
-            xd->mbmi.segment_id = 0;         // Set to Segment 0 by default
+            xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
 
         x->active_ptr = cpi->active_map + seg_map_index + mb_col;
 
@@ -426,7 +425,7 @@ void encode_mb_row(VP8_COMP *cpi,
 #endif
 
             // Count of last ref frame 0,0 useage
-            if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME))
+            if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
                 cpi->inter_zz_count ++;
 
             // Special case code for cyclic refresh
@@ -434,14 +433,14 @@ void encode_mb_row(VP8_COMP *cpi,
             // during vp8cx_encode_inter_macroblock()) back into the global sgmentation map
             if (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)
             {
-                cpi->segmentation_map[seg_map_index+mb_col] = xd->mbmi.segment_id;
+                cpi->segmentation_map[seg_map_index+mb_col] = xd->mode_info_context->mbmi.segment_id;
 
                 // If the block has been refreshed mark it as clean (the magnitude of the -ve influences how long it will be before we consider another refresh):
                 // Else if it was coded (last frame 0,0) and has not already been refreshed then mark it as a candidate for cleanup next time (marked 0)
                 // else mark it as dirty (1).
-                if (xd->mbmi.segment_id)
+                if (xd->mode_info_context->mbmi.segment_id)
                     cpi->cyclic_refresh_map[seg_map_index+mb_col] = -1;
-                else if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME))
+                else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
                 {
                     if (cpi->cyclic_refresh_map[seg_map_index+mb_col] == 1)
                         cpi->cyclic_refresh_map[seg_map_index+mb_col] = 0;
@@ -456,9 +455,6 @@ void encode_mb_row(VP8_COMP *cpi,
 
         x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 
-        // store macroblock mode info into context array
-        vpx_memcpy(&xd->mode_info_context->mbmi, &xd->mbmi, sizeof(xd->mbmi));
-
         for (i = 0; i < 16; i++)
             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
 
@@ -471,7 +467,7 @@ void encode_mb_row(VP8_COMP *cpi,
         recon_uvoffset += 8;
 
         // Keep track of segment useage
-        segment_counts[xd->mbmi.segment_id] ++;
+        segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
 
         // skip to next mb
         xd->mode_info_context++;
@@ -627,8 +623,8 @@ void vp8_encode_frame(VP8_COMP *cpi)
     //x->rdmult = (int)(cpi->RDMULT * pow( (cpi->rate_correction_factor * 2.0), 0.75 ));
 #endif
 
-    xd->mbmi.mode = DC_PRED;
-    xd->mbmi.uv_mode = DC_PRED;
+    xd->mode_info_context->mbmi.mode = DC_PRED;
+    xd->mode_info_context->mbmi.uv_mode = DC_PRED;
 
     xd->left_context = cm->left_context;
 
@@ -982,8 +978,8 @@ void vp8_build_block_offsets(MACROBLOCK *x)
 static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x)
 {
     const MACROBLOCKD *xd = & x->e_mbd;
-    const MB_PREDICTION_MODE m = xd->mbmi.mode;
-    const MB_PREDICTION_MODE uvm = xd->mbmi.uv_mode;
+    const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode;
+    const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode;
 
 #ifdef MODE_STATS
     const int is_key = cpi->common.frame_type == KEY_FRAME;
@@ -1021,7 +1017,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
     int rateuv_tokenonly = 0;
     int i;
 
-    x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+    x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
 #if !(CONFIG_REALTIME_ONLY)
 
@@ -1037,7 +1033,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
 
         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
 
-        x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
 
         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
         rate += rateuv;
@@ -1045,7 +1041,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
         if (Error4x4 < Error16x16)
         {
             rate += rate4x4;
-            x->e_mbd.mbmi.mode = B_PRED;
+            x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
 
             // get back the intra block modes
             for (i = 0; i < 16; i++)
@@ -1085,7 +1081,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
 
         for (mode = DC_PRED; mode <= TM_PRED; mode ++)
         {
-            x->e_mbd.mbmi.mode = mode;
+            x->e_mbd.mode_info_context->mbmi.mode = mode;
             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
             rate2  = x->mbmode_cost[x->e_mbd.frame_type][mode];
@@ -1105,17 +1101,17 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
         else
             Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
 
-        x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
 
         if (Error4x4 < Error16x16)
         {
-            x->e_mbd.mbmi.mode = B_PRED;
+            x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
             cpi->prediction_error += Error4x4;
         }
         else
         {
-            x->e_mbd.mbmi.mode = best_mode;
+            x->e_mbd.mode_info_context->mbmi.mode = best_mode;
             vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
             cpi->prediction_error += Error16x16;
         }
@@ -1149,7 +1145,7 @@ int vp8cx_encode_inter_macroblock
     x->skip = 0;
 
     if (xd->segmentation_enabled)
-        x->encode_breakout = cpi->segment_encode_breakout[xd->mbmi.segment_id];
+        x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id];
     else
         x->encode_breakout = cpi->oxcf.encode_breakout;
 
@@ -1180,17 +1176,17 @@ int vp8cx_encode_inter_macroblock
         if (cpi->cyclic_refresh_mode_enabled)
         {
             // Clear segment_id back to 0 if not coded (last frame 0,0)
-            if ((xd->mbmi.segment_id == 1) &&
-                ((xd->mbmi.ref_frame != LAST_FRAME) || (xd->mbmi.mode != ZEROMV)))
+            if ((xd->mode_info_context->mbmi.segment_id == 1) &&
+                ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV)))
             {
-                xd->mbmi.segment_id = 0;
+                xd->mode_info_context->mbmi.segment_id = 0;
             }
         }
 
         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
         if (cpi->zbin_mode_boost_enabled)
         {
-            if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame != LAST_FRAME))
+            if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME))
                 cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
             else
                 cpi->zbin_mode_boost = 0;
@@ -1199,15 +1195,15 @@ int vp8cx_encode_inter_macroblock
         vp8cx_mb_init_quantizer(cpi,  x);
     }
 
-    cpi->count_mb_ref_frame_usage[xd->mbmi.ref_frame] ++;
+    cpi->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame] ++;
 
-    if (xd->mbmi.ref_frame == INTRA_FRAME)
+    if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
-        x->e_mbd.mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
 
         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
 
-        if (xd->mbmi.mode == B_PRED)
+        if (xd->mode_info_context->mbmi.mode == B_PRED)
         {
             vp8_encode_intra4x4mby(IF_RTCD(&cpi->rtcd), x);
         }
@@ -1226,13 +1222,13 @@ int vp8cx_encode_inter_macroblock
         int ref_fb_idx;
 
         vp8_find_near_mvs(xd, xd->mode_info_context,
-                          &nearest, &nearby, &best_ref_mv, mdcounts, xd->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
+                          &nearest, &nearby, &best_ref_mv, mdcounts, xd->mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
 
         vp8_build_uvmvs(xd, cpi->common.full_pixel);
 
-        if (xd->mbmi.ref_frame == LAST_FRAME)
+        if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
             ref_fb_idx = cpi->common.lst_fb_idx;
-        else if (xd->mbmi.ref_frame == GOLDEN_FRAME)
+        else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
             ref_fb_idx = cpi->common.gld_fb_idx;
         else
             ref_fb_idx = cpi->common.alt_fb_idx;
@@ -1241,7 +1237,7 @@ int vp8cx_encode_inter_macroblock
         xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset;
         xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
 
-        if (xd->mbmi.mode == SPLITMV)
+        if (xd->mode_info_context->mbmi.mode == SPLITMV)
         {
             int i;
 
@@ -1254,19 +1250,19 @@ int vp8cx_encode_inter_macroblock
                 }
             }
         }
-        else if (xd->mbmi.mode == NEWMV)
+        else if (xd->mode_info_context->mbmi.mode == NEWMV)
         {
             cpi->MVcount[0][mv_max+((xd->block[0].bmi.mv.as_mv.row - best_ref_mv.row) >> 1)]++;
             cpi->MVcount[1][mv_max+((xd->block[0].bmi.mv.as_mv.col - best_ref_mv.col) >> 1)]++;
         }
 
-        if (!x->skip && !x->e_mbd.mbmi.force_no_skip)
+        if (!x->skip && !x->e_mbd.mode_info_context->mbmi.force_no_skip)
         {
             vp8_encode_inter16x16(IF_RTCD(&cpi->rtcd), x);
 
             // Clear mb_skip_coeff if mb_no_coeff_skip is not set
             if (!cpi->common.mb_no_coeff_skip)
-                xd->mbmi.mb_skip_coeff = 0;
+                xd->mode_info_context->mbmi.mb_skip_coeff = 0;
 
         }
         else
@@ -1279,19 +1275,19 @@ int vp8cx_encode_inter_macroblock
     {
         if (cpi->common.mb_no_coeff_skip)
         {
-            if (xd->mbmi.mode != B_PRED && xd->mbmi.mode != SPLITMV)
-                xd->mbmi.dc_diff = 0;
+            if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
+                xd->mode_info_context->mbmi.dc_diff = 0;
             else
-                xd->mbmi.dc_diff = 1;
+                xd->mode_info_context->mbmi.dc_diff = 1;
 
-            xd->mbmi.mb_skip_coeff = 1;
+            xd->mode_info_context->mbmi.mb_skip_coeff = 1;
             cpi->skip_true_count ++;
             vp8_fix_contexts(cpi, xd);
         }
         else
         {
             vp8_stuff_mb(cpi, xd, t);
-            xd->mbmi.mb_skip_coeff = 0;
+            xd->mode_info_context->mbmi.mb_skip_coeff = 0;
             cpi->skip_false_count ++;
         }
     }
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 556940f0e..4ed6e8414 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -53,7 +53,7 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BLOCK
 
     x->quantize_b(be, b);
 
-    x->e_mbd.mbmi.mb_skip_coeff &= (!b->eob);
+    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!b->eob);
 
     vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32);
 
@@ -70,7 +70,7 @@ void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BL
 
     x->quantize_b(be, b);
 
-    x->e_mbd.mbmi.mb_skip_coeff &= (!b->eob);
+    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!b->eob);
 
     IDCT_INVOKE(&rtcd->common->idct, idct16)(b->dqcoeff, b->diff, 32);
 
@@ -124,7 +124,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
     {
         BLOCKD *d = &x->e_mbd.block[b];
 
-        switch (x->e_mbd.mbmi.mode)
+        switch (x->e_mbd.mode_info_context->mbmi.mode)
         {
 
         case DC_PRED:
@@ -157,7 +157,7 @@ void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_transform_intra_mby(x);
 
-    x->e_mbd.mbmi.mb_skip_coeff = 1;
+    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = 1;
 
     vp8_quantize_mby(x);
 
@@ -170,7 +170,7 @@ void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
     {
         BLOCKD *d = &x->e_mbd.block[b];
 
-        switch (x->e_mbd.mbmi.mode)
+        switch (x->e_mbd.mode_info_context->mbmi.mode)
         {
 
         case DC_PRED:
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 485001e51..52f0291f9 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -121,7 +121,7 @@ void vp8_transform_mbuv(MACROBLOCK *x)
 
     for (i = 16; i < 24; i += 2)
     {
-        x->vp8_short_fdct8x4(&x->block[i].src_diff[0], 
+        x->vp8_short_fdct8x4(&x->block[i].src_diff[0],
             &x->block[i].coeff[0], 16);
     }
 }
@@ -158,7 +158,7 @@ void vp8_transform_mb(MACROBLOCK *x)
     }
 
     // build dc block from 16 y dc values
-    if (x->e_mbd.mbmi.mode != SPLITMV)
+    if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
         vp8_build_dcblock(x);
 
     for (i = 16; i < 24; i += 2)
@@ -168,7 +168,7 @@ void vp8_transform_mb(MACROBLOCK *x)
     }
 
     // do 2nd order transform on the dc block
-    if (x->e_mbd.mbmi.mode != SPLITMV)
+    if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
         x->short_walsh4x4(&x->block[24].src_diff[0],
         &x->block[24].coeff[0], 8);
 
@@ -185,7 +185,7 @@ void vp8_transform_mby(MACROBLOCK *x)
     }
 
     // build dc block from 16 y dc values
-    if (x->e_mbd.mbmi.mode != SPLITMV)
+    if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
     {
         vp8_build_dcblock(x);
         x->short_walsh4x4(&x->block[24].src_diff[0],
@@ -494,8 +494,8 @@ void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 
     vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT],
         x->e_mbd.left_context[Y1CONTEXT], 4);
-    has_2nd_order = (x->e_mbd.mbmi.mode != B_PRED
-        && x->e_mbd.mbmi.mode != SPLITMV);
+    has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
     type = has_2nd_order ? 0 : 3;
 
     for (b = 0; b < 16; b++)
@@ -538,25 +538,25 @@ static void vp8_find_mb_skip_coef(MACROBLOCK *x)
 {
     int i;
 
-    x->e_mbd.mbmi.mb_skip_coeff = 1;
+    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = 1;
 
-    if (x->e_mbd.mbmi.mode != B_PRED && x->e_mbd.mbmi.mode != SPLITMV)
+    if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
     {
         for (i = 0; i < 16; i++)
         {
-            x->e_mbd.mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
+            x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
         }
 
         for (i = 16; i < 25; i++)
         {
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
+            x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
         }
     }
     else
     {
         for (i = 0; i < 24; i++)
         {
-            x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
+            x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
         }
     }
 }
@@ -576,8 +576,8 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
         return;
     vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT],
         x->e_mbd.left_context[Y1CONTEXT], 4);
-    has_2nd_order = (x->e_mbd.mbmi.mode != B_PRED
-        && x->e_mbd.mbmi.mode != SPLITMV);
+    has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
     type = has_2nd_order ? 0 : 3;
 
     for (b = 0; b < 16; b++)
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 1877417bb..e87a06cb3 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -120,14 +120,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                         {
                             // Code to set segment id in xd->mbmi.segment_id for current MB (with range checking)
                             if (cpi->segmentation_map[seg_map_index+mb_col] <= 3)
-                                xd->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
+                                xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[seg_map_index+mb_col];
                             else
-                                xd->mbmi.segment_id = 0;
+                                xd->mode_info_context->mbmi.segment_id = 0;
 
                             vp8cx_mb_init_quantizer(cpi, x);
                         }
                         else
-                            xd->mbmi.segment_id = 0;         // Set to Segment 0 by default
+                            xd->mode_info_context->mbmi.segment_id = 0;         // Set to Segment 0 by default
 
 
                         if (cm->frame_type == KEY_FRAME)
@@ -157,7 +157,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
 #endif
 
                             // Count of last ref frame 0,0 useage
-                            if ((xd->mbmi.mode == ZEROMV) && (xd->mbmi.ref_frame == LAST_FRAME))
+                            if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME))
                                 cpi->inter_zz_count ++;
 
                         }
@@ -166,9 +166,6 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
 
                         x->gf_active_ptr++;      // Increment pointer into gf useage flags structure for next mb
 
-                        // store macroblock mode info into context array
-                        vpx_memcpy(&xd->mode_info_context->mbmi, &xd->mbmi, sizeof(xd->mbmi));
-
                         for (i = 0; i < 16; i++)
                             vpx_memcpy(&xd->mode_info_context->bmi[i], &xd->block[i].bmi, sizeof(xd->block[i].bmi));
 
@@ -181,7 +178,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                         recon_uvoffset += 8;
 
                         // Keep track of segment useage
-                        segment_counts[xd->mbmi.segment_id] ++;
+                        segment_counts[xd->mode_info_context->mbmi.segment_id] ++;
 
                         // skip to next mb
                         xd->mode_info_context++;
@@ -405,9 +402,6 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
         mb->rddiv = cpi->RDDIV;
         mb->rdmult = cpi->RDMULT;
 
-        mbd->mbmi.mode = DC_PRED;
-        mbd->mbmi.uv_mode = DC_PRED;
-
         mbd->left_context = cm->left_context;
         mb->mvc = cm->fc.mvc;
 
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index b838378bb..5cbd2a43e 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -78,9 +78,9 @@ int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred)
 
     if (use_dc_pred)
     {
-        x->e_mbd.mbmi.mode = DC_PRED;
-        x->e_mbd.mbmi.uv_mode = DC_PRED;
-        x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+        x->e_mbd.mode_info_context->mbmi.mode = DC_PRED;
+        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
         vp8_encode_intra16x16mby(IF_RTCD(&cpi->rtcd), x);
     }
@@ -565,6 +565,8 @@ void vp8_first_pass(VP8_COMP *cpi)
     xd->pre = *lst_yv12;
     xd->dst = *new_yv12;
 
+    xd->mode_info_context = cm->mi;
+
     vp8_build_block_offsets(x);
 
     vp8_setup_block_dptrs(&x->e_mbd);
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 1947e8167..cd615dcd6 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -410,7 +410,7 @@ int vp8_pick_intra_mbuv_mode(MACROBLOCK *mb)
     }
 
 
-    mb->e_mbd.mbmi.uv_mode = best_mode;
+    mb->e_mbd.mode_info_context->mbmi.uv_mode = best_mode;
     return best_error;
 
 }
@@ -535,7 +535,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
 
     best_rd = INT_MAX;
 
-    x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+    x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
 
     // if we encode a new mv this is important
     // find the best new motion vector
@@ -547,9 +547,9 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
         if (best_rd <= cpi->rd_threshes[mode_index])
             continue;
 
-        x->e_mbd.mbmi.ref_frame = vp8_ref_frame_order[mode_index];
+        x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index];
 
-        if (skip_mode[x->e_mbd.mbmi.ref_frame])
+        if (skip_mode[x->e_mbd.mode_info_context->mbmi.ref_frame])
             continue;
 
         // Check to see if the testing frequency for this mode is at its max
@@ -582,29 +582,29 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
         // Experimental debug code.
         //all_rds[mode_index] = -1;
 
-        x->e_mbd.mbmi.mode = this_mode;
-        x->e_mbd.mbmi.uv_mode = DC_PRED;
+        x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
 
         // Work out the cost assosciated with selecting the reference frame
-        frame_cost = ref_frame_cost[x->e_mbd.mbmi.ref_frame];
+        frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
         rate2 += frame_cost;
 
         // everything but intra
-        if (x->e_mbd.mbmi.ref_frame)
+        if (x->e_mbd.mode_info_context->mbmi.ref_frame)
         {
-            x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mbmi.ref_frame];
-            x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mbmi.ref_frame];
-            x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mbmi.ref_frame];
-            mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mbmi.ref_frame];
-            mode_mv[NEARMV] = near_mv[x->e_mbd.mbmi.ref_frame];
-            best_ref_mv1 = best_ref_mv[x->e_mbd.mbmi.ref_frame];
-            memcpy(mdcounts, MDCounts[x->e_mbd.mbmi.ref_frame], sizeof(mdcounts));
+            x->e_mbd.pre.y_buffer = y_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            x->e_mbd.pre.u_buffer = u_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            x->e_mbd.pre.v_buffer = v_buffer[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            mode_mv[NEARESTMV] = nearest_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            mode_mv[NEARMV] = near_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            best_ref_mv1 = best_ref_mv[x->e_mbd.mode_info_context->mbmi.ref_frame];
+            memcpy(mdcounts, MDCounts[x->e_mbd.mode_info_context->mbmi.ref_frame], sizeof(mdcounts));
         }
 
         //Only consider ZEROMV/ALTREF_FRAME for alt ref frame.
         if (cpi->is_src_frame_alt_ref)
         {
-            if (this_mode != ZEROMV || x->e_mbd.mbmi.ref_frame != ALTREF_FRAME)
+            if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
                 continue;
         }
 
@@ -644,7 +644,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
         case TM_PRED:
             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
             distortion2 = VARIANCE_INVOKE(&cpi->rtcd.variance, get16x16prederror)(x->src.y_buffer, x->src.y_stride, x->e_mbd.predictor, 16, 0x7fffffff);
-            rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode];
+            rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
 
             if (this_rd < best_intra_rd)
@@ -782,10 +782,10 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
                 continue;
 
             rate2 += vp8_cost_mv_ref(this_mode, mdcounts);
-            x->e_mbd.mbmi.mode = this_mode;
-            x->e_mbd.mbmi.mv.as_mv = mode_mv[this_mode];
+            x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+            x->e_mbd.mode_info_context->mbmi.mv.as_mv = mode_mv[this_mode];
             x->e_mbd.block[0].bmi.mode = this_mode;
-            x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mbmi.mv.as_int;
+            x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int;
 
             distortion2 = get_inter_mbpred_error(x, cpi->fn_ptr.svf, cpi->fn_ptr.vf, (unsigned int *)(&sse));
 
@@ -824,7 +824,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
             *returnrate = rate2;
             *returndistortion = distortion2;
             best_rd = this_rd;
-            vpx_memcpy(&best_mbmode, &x->e_mbd.mbmi, sizeof(MB_MODE_INFO));
+            vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
 
             if (this_mode == B_PRED || this_mode == SPLITMV)
                 for (i = 0; i < 16; i++)
@@ -870,9 +870,9 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
 
     if (best_mbmode.mode <= B_PRED)
     {
-        x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+        x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
         vp8_pick_intra_mbuv_mode(x);
-        best_mbmode.uv_mode = x->e_mbd.mbmi.uv_mode;
+        best_mbmode.uv_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
     }
 
 
@@ -898,23 +898,23 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
         best_mbmode.partitioning = 0;
         best_mbmode.dc_diff = 0;
 
-        vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+        vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
 
         for (i = 0; i < 16; i++)
         {
             vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
         }
 
-        x->e_mbd.mbmi.mv.as_int = 0;
+        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
 
         return best_rd;
     }
 
 
     // macroblock modes
-    vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+    vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
 
-    if (x->e_mbd.mbmi.mode == B_PRED || x->e_mbd.mbmi.mode == SPLITMV)
+    if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED || x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
         for (i = 0; i < 16; i++)
         {
             vpx_memcpy(&x->e_mbd.block[i].bmi, &best_bmodes[i], sizeof(B_MODE_INFO));
@@ -922,10 +922,10 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
         }
     else
     {
-        vp8_set_mbmode_and_mvs(x, x->e_mbd.mbmi.mode, &best_bmodes[0].mv.as_mv);
+        vp8_set_mbmode_and_mvs(x, x->e_mbd.mode_info_context->mbmi.mode, &best_bmodes[0].mv.as_mv);
     }
 
-    x->e_mbd.mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
+    x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
 
     return best_rd;
 }
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 353217c93..2ea16d8f0 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -277,34 +277,34 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
 void vp8_quantize_mby(MACROBLOCK *x)
 {
     int i;
-    int has_2nd_order = (x->e_mbd.mbmi.mode != B_PRED
-        && x->e_mbd.mbmi.mode != SPLITMV);
+    int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
 
     for (i = 0; i < 16; i++)
     {
         x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-        x->e_mbd.mbmi.mb_skip_coeff &=
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &=
             (x->e_mbd.block[i].eob <= has_2nd_order);
     }
 
     if(has_2nd_order)
     {
         x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
-        x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob);
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob);
     }
 }
 
 void vp8_quantize_mb(MACROBLOCK *x)
 {
     int i;
-    int has_2nd_order=(x->e_mbd.mbmi.mode != B_PRED
-        && x->e_mbd.mbmi.mode != SPLITMV);
+    int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
+        && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
 
-    x->e_mbd.mbmi.mb_skip_coeff = 1;
+    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = 1;
     for (i = 0; i < 24+has_2nd_order; i++)
     {
         x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-        x->e_mbd.mbmi.mb_skip_coeff &=
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &=
             (x->e_mbd.block[i].eob <= (has_2nd_order && i<16));
     }
 }
@@ -317,6 +317,6 @@ void vp8_quantize_mbuv(MACROBLOCK *x)
     for (i = 16; i < 24; i++)
     {
         x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-        x->e_mbd.mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
+        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
     }
 }
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 3c6f0b324..c2a3ce1f7 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -486,7 +486,7 @@ static int macro_block_max_error(MACROBLOCK *mb)
     int i, j;
     int berror;
 
-    dc = !(mb->e_mbd.mbmi.mode == B_PRED || mb->e_mbd.mbmi.mode == SPLITMV);
+    dc = !(mb->e_mbd.mode_info_context->mbmi.mode == B_PRED || mb->e_mbd.mode_info_context->mbmi.mode == SPLITMV);
 
     for (i = 0; i < 16; i++)
     {
@@ -622,14 +622,14 @@ int vp8_rdcost_mby(MACROBLOCK *mb)
     vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4);
     vp8_setup_temp_context(&t2, x->above_context[Y2CONTEXT], x->left_context[Y2CONTEXT], 1);
 
-    if (x->mbmi.mode == SPLITMV)
+    if (x->mode_info_context->mbmi.mode == SPLITMV)
         type = 3;
 
     for (b = 0; b < 16; b++)
         cost += cost_coeffs(mb, x->block + b, type,
                             t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
 
-    if (x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != SPLITMV)
         cost += cost_coeffs(mb, x->block + 24, 1,
                             t2.a + vp8_block2above[24], t2.l + vp8_block2left[24]);
 
@@ -761,9 +761,9 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int
         int dummy;
         rate = 0;
 
-        x->e_mbd.mbmi.mode = mode;
+        x->e_mbd.mode_info_context->mbmi.mode = mode;
 
-        rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode];
+        rate += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
 
         vp8_encode_intra16x16mbyrd(IF_RTCD(&cpi->rtcd), x);
 
@@ -785,7 +785,7 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int
         }
     }
 
-    x->e_mbd.mbmi.mode = mode_selected;
+    x->e_mbd.mode_info_context->mbmi.mode = mode_selected;
     return best_rd;
 }
 
@@ -847,11 +847,11 @@ int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *ra
         int distortion;
         int this_rd;
 
-        x->e_mbd.mbmi.uv_mode = mode;
+        x->e_mbd.mode_info_context->mbmi.uv_mode = mode;
         vp8_encode_intra16x16mbuvrd(IF_RTCD(&cpi->rtcd), x);
 
         rate_to = rd_cost_mbuv(x);
-        rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.uv_mode];
+        rate = rate_to + x->intra_uv_mode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.uv_mode];
 
         distortion = vp8_get_mbuvrecon_error(IF_RTCD(&cpi->rtcd.variance), x);
 
@@ -870,7 +870,7 @@ int vp8_rd_pick_intra_mbuv_mode(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *ra
     *rate = r;
     *distortion = d;
 
-    x->e_mbd.mbmi.uv_mode = mode_selected;
+    x->e_mbd.mode_info_context->mbmi.uv_mode = mode_selected;
     return best_rd;
 }
 #endif
@@ -888,9 +888,9 @@ void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv)
 {
     int i;
 
-    x->e_mbd.mbmi.mode = mb;
-    x->e_mbd.mbmi.mv.as_mv.row = mv->row;
-    x->e_mbd.mbmi.mv.as_mv.col = mv->col;
+    x->e_mbd.mode_info_context->mbmi.mode = mb;
+    x->e_mbd.mode_info_context->mbmi.mv.as_mv.row = mv->row;
+    x->e_mbd.mode_info_context->mbmi.mv.as_mv.col = mv->col;
 
     for (i = 0; i < 16; i++)
     {
@@ -1060,7 +1060,7 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp
     }
 
     // 2nd order fdct
-    if (x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != SPLITMV)
     {
         mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8);
     }
@@ -1072,7 +1072,7 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp
     }
 
     // DC predication and Quantization of 2nd Order block
-    if (x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != SPLITMV)
     {
 
         {
@@ -1081,7 +1081,7 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp
     }
 
     // Distortion
-    if (x->mbmi.mode == SPLITMV)
+    if (x->mode_info_context->mbmi.mode == SPLITMV)
         d = ENCODEMB_INVOKE(rtcd, mberr)(mb, 0) << 2;
     else
     {
@@ -1401,10 +1401,10 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
 
     // save partitions
     labels = vp8_mbsplits[best_seg];
-    x->e_mbd.mbmi.partitioning = best_seg;
-    x->e_mbd.mbmi.partition_count = vp8_count_labels(labels);
+    x->e_mbd.mode_info_context->mbmi.partitioning = best_seg;
+    x->e_mbd.mode_info_context->mbmi.partition_count = vp8_count_labels(labels);
 
-    for (i = 0; i < x->e_mbd.mbmi.partition_count; i++)
+    for (i = 0; i < x->e_mbd.mode_info_context->mbmi.partition_count; i++)
     {
         int j;
 
@@ -1414,8 +1414,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                 break;
         }
 
-        x->e_mbd.mbmi.partition_bmi[i].mode = x->e_mbd.block[j].bmi.mode;
-        x->e_mbd.mbmi.partition_bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
+        x->e_mbd.mode_info_context->mbmi.partition_bmi[i].mode = x->e_mbd.block[j].bmi.mode;
+        x->e_mbd.mode_info_context->mbmi.partition_bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
     }
 
     return best_segment_rd;
@@ -1515,9 +1515,9 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
     vpx_memset(mode_mv, 0, sizeof(mode_mv));
 
-    x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+    x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
     vp8_rd_pick_intra_mbuv_mode(cpi, x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion);
-    uv_intra_mode = x->e_mbd.mbmi.uv_mode;
+    uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode;
     {
         uvintra_eob = 0;
 
@@ -1561,18 +1561,18 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
         this_mode = vp8_mode_order[mode_index];
 
-        x->e_mbd.mbmi.mode = this_mode;
-        x->e_mbd.mbmi.uv_mode = DC_PRED;
-        x->e_mbd.mbmi.ref_frame = vp8_ref_frame_order[mode_index];
+        x->e_mbd.mode_info_context->mbmi.mode = this_mode;
+        x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED;
+        x->e_mbd.mode_info_context->mbmi.ref_frame = vp8_ref_frame_order[mode_index];
 
         //Only consider ZEROMV/ALTREF_FRAME for alt ref frame.
         if (cpi->is_src_frame_alt_ref)
         {
-            if (this_mode != ZEROMV || x->e_mbd.mbmi.ref_frame != ALTREF_FRAME)
+            if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME)
                 continue;
         }
 
-        if (x->e_mbd.mbmi.ref_frame == LAST_FRAME)
+        if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
         {
             YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx];
 
@@ -1586,7 +1586,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             x->e_mbd.pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset;
             x->e_mbd.pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset;
         }
-        else if (x->e_mbd.mbmi.ref_frame == GOLDEN_FRAME)
+        else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
         {
             YV12_BUFFER_CONFIG *gld_yv12 = &cpi->common.yv12_fb[cpi->common.gld_fb_idx];
 
@@ -1601,7 +1601,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             x->e_mbd.pre.u_buffer = gld_yv12->u_buffer + recon_uvoffset;
             x->e_mbd.pre.v_buffer = gld_yv12->v_buffer + recon_uvoffset;
         }
-        else if (x->e_mbd.mbmi.ref_frame == ALTREF_FRAME)
+        else if (x->e_mbd.mode_info_context->mbmi.ref_frame == ALTREF_FRAME)
         {
             YV12_BUFFER_CONFIG *alt_yv12 = &cpi->common.yv12_fb[cpi->common.alt_fb_idx];
 
@@ -1623,11 +1623,11 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         vp8_find_near_mvs(&x->e_mbd,
                           x->e_mbd.mode_info_context,
                           &mode_mv[NEARESTMV], &mode_mv[NEARMV], &best_ref_mv,
-                          mdcounts, x->e_mbd.mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
+                          mdcounts, x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias);
 
 
         // Estimate the reference frame signaling cost and add it to the rolling cost variable.
-        frame_cost = ref_frame_cost[x->e_mbd.mbmi.ref_frame];
+        frame_cost = ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame];
         rate2 += frame_cost;
 
         if (this_mode <= B_PRED)
@@ -1694,9 +1694,9 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             int breakout_rd = best_rd - frame_cost_rd;
             int tmp_rd;
 
-            if (x->e_mbd.mbmi.ref_frame == LAST_FRAME)
+            if (x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME)
                 tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWMV], cpi->common.full_pixel) ;
-            else if (x->e_mbd.mbmi.ref_frame == GOLDEN_FRAME)
+            else if (x->e_mbd.mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
                 tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWG], cpi->common.full_pixel) ;
             else
                 tmp_rd = vp8_rd_pick_best_mbsegmentation(cpi, x, &best_ref_mv, breakout_rd, mdcounts, &rate, &rate_y, &distortion, cpi->compressor_speed, x->mvcost, cpi->rd_threshes[THR_NEWA], cpi->common.full_pixel) ;
@@ -1753,16 +1753,16 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             if (0)//x->e_mbd.mbmi.partition_count == 4)
             {
 
-                if (x->e_mbd.mbmi.partition_bmi[0].mv.as_int == x->e_mbd.mbmi.partition_bmi[1].mv.as_int
-                    && x->e_mbd.mbmi.partition_bmi[2].mv.as_int == x->e_mbd.mbmi.partition_bmi[3].mv.as_int)
+                if (x->e_mbd.mode_info_context->mbmi.partition_bmi[0].mv.as_int == x->e_mbd.mode_info_context->mbmi.partition_bmi[1].mv.as_int
+                    && x->e_mbd.mode_info_context->mbmi.partition_bmi[2].mv.as_int == x->e_mbd.mode_info_context->mbmi.partition_bmi[3].mv.as_int)
                 {
                     const int *labels = vp8_mbsplits[2];
-                    x->e_mbd.mbmi.partitioning = 0;
+                    x->e_mbd.mode_info_context->mbmi.partitioning = 0;
                     rate -= vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + 2);
                     rate += vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings);
                     //rate -=  x->inter_bmode_costs[  x->e_mbd.mbmi.partition_bmi[1]];
                     //rate -=  x->inter_bmode_costs[  x->e_mbd.mbmi.partition_bmi[3]];
-                    x->e_mbd.mbmi.partition_bmi[1] = x->e_mbd.mbmi.partition_bmi[2];
+                    x->e_mbd.mode_info_context->mbmi.partition_bmi[1] = x->e_mbd.mode_info_context->mbmi.partition_bmi[2];
                 }
             }
 
@@ -1772,14 +1772,14 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         case V_PRED:
         case H_PRED:
         case TM_PRED:
-            x->e_mbd.mbmi.ref_frame = INTRA_FRAME;
+            x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME;
             vp8_build_intra_predictors_mby_ptr(&x->e_mbd);
             {
                 macro_block_yrd(x, &rate, &distortion, IF_RTCD(&cpi->rtcd.encodemb)) ;
                 rate2 += rate;
                 rate_y = rate;
                 distortion2 += distortion;
-                rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mbmi.mode];
+                rate2 += x->mbmode_cost[x->e_mbd.frame_type][x->e_mbd.mode_info_context->mbmi.mode];
                 rate2 += uv_intra_rate;
                 rate_uv = uv_intra_rate_tokenonly;
                 distortion2 += uv_intra_distortion;
@@ -2085,7 +2085,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         //all_rates[mode_index] = rate2;
         //all_dist[mode_index] = distortion2;
 
-        if ((x->e_mbd.mbmi.ref_frame == INTRA_FRAME)  && (this_rd < *returnintra))
+        if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME)  && (this_rd < *returnintra))
         {
             *returnintra = this_rd ;
         }
@@ -2095,17 +2095,17 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         {
             // Note index of best mode so far
             best_mode_index = mode_index;
-            x->e_mbd.mbmi.force_no_skip = force_no_skip;
+            x->e_mbd.mode_info_context->mbmi.force_no_skip = force_no_skip;
 
             if (this_mode <= B_PRED)
             {
-                x->e_mbd.mbmi.uv_mode = uv_intra_mode;
+                x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode;
             }
 
             *returnrate = rate2;
             *returndistortion = distortion2;
             best_rd = this_rd;
-            vpx_memcpy(&best_mbmode, &x->e_mbd.mbmi, sizeof(MB_MODE_INFO));
+            vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
 
             for (i = 0; i < 16; i++)
             {
@@ -2186,28 +2186,28 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         best_mbmode.partitioning = 0;
         best_mbmode.dc_diff = 0;
 
-        vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+        vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
 
         for (i = 0; i < 16; i++)
         {
             vpx_memset(&x->e_mbd.block[i].bmi, 0, sizeof(B_MODE_INFO));
         }
 
-        x->e_mbd.mbmi.mv.as_int = 0;
+        x->e_mbd.mode_info_context->mbmi.mv.as_int = 0;
 
         return best_rd;
     }
 
 
     // macroblock modes
-    vpx_memcpy(&x->e_mbd.mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+    vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
 
     for (i = 0; i < 16; i++)
     {
         vpx_memcpy(&x->e_mbd.block[i].bmi, &best_bmodes[i], sizeof(B_MODE_INFO));
     }
 
-    x->e_mbd.mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
+    x->e_mbd.mode_info_context->mbmi.mv.as_mv = x->e_mbd.block[15].bmi.mv.as_mv;
 
     return best_rd;
 }
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index da44f6960..83365d38a 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -276,7 +276,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
     TOKENEXTRA *start = *t;
     TOKENEXTRA *tp = *t;
 
-    x->mbmi.dc_diff = 1;
+    x->mode_info_context->mbmi.dc_diff = 1;
 
 #if 0
 
@@ -291,7 +291,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
 
 #if 1
 
-    if (x->mbmi.mb_skip_coeff)
+    if (x->mode_info_context->mbmi.mb_skip_coeff)
     {
 
         cpi->skip_true_count++;
@@ -303,10 +303,10 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
             vp8_fix_contexts(cpi, x);
         }
 
-        if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
-            x->mbmi.dc_diff = 0;
+        if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+            x->mode_info_context->mbmi.dc_diff = 0;
         else
-            x->mbmi.dc_diff = 1;
+            x->mode_info_context->mbmi.dc_diff = 1;
 
 
         return;
@@ -347,7 +347,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
     vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts));
 #endif
 
-    if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV)
+    if (x->mode_info_context->mbmi.mode == B_PRED || x->mode_info_context->mbmi.mode == SPLITMV)
     {
         plane_type = 3;
     }
@@ -592,10 +592,10 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
     plane_type = 0;
 
 
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
-        x->mbmi.dc_diff = 0;
+    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
+        x->mode_info_context->mbmi.dc_diff = 0;
     else
-        x->mbmi.dc_diff = 1;
+        x->mode_info_context->mbmi.dc_diff = 1;
 
 
     for (b = 0; b < 16; b++)
@@ -629,7 +629,7 @@ void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x)
     x->above_context[UCONTEXT][1]  = 0;
     x->above_context[VCONTEXT][1]  = 0;
 
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
     {
         x->left_context[Y2CONTEXT][0] = 0;
         x->above_context[Y2CONTEXT][0] = 0;

From 1ec7981c34ba3b9cc304f7b5c487a28d47425b52 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Thu, 12 Aug 2010 13:06:47 -0400
Subject: [PATCH 118/307] remove unused definition

asm_offsets contains some definitions which are no longer used. this
was one of them. v6 build works now

Change-Id: If370cfa8acd145de4fead2d9a11b048fccc090df
---
 vp8/common/arm/vpx_asm_offsets.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vp8/common/arm/vpx_asm_offsets.c b/vp8/common/arm/vpx_asm_offsets.c
index c342f8fdc..0604ae3d1 100644
--- a/vp8/common/arm/vpx_asm_offsets.c
+++ b/vp8/common/arm/vpx_asm_offsets.c
@@ -51,7 +51,6 @@ DEFINE(mb_dst_y_stride,                         offsetof(MACROBLOCKD, dst.y_stri
 DEFINE(mb_dst_y_buffer,                         offsetof(MACROBLOCKD, dst.y_buffer));
 DEFINE(mb_dst_u_buffer,                         offsetof(MACROBLOCKD, dst.u_buffer));
 DEFINE(mb_dst_v_buffer,                         offsetof(MACROBLOCKD, dst.v_buffer));
-DEFINE(mb_mbmi_mode,                            offsetof(MACROBLOCKD, mbmi.mode));
 DEFINE(mb_up_available,                         offsetof(MACROBLOCKD, up_available));
 DEFINE(mb_left_available,                       offsetof(MACROBLOCKD, left_available));
 

From 633646b73b1294b02f8c6073864079118494b334 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Thu, 12 Aug 2010 13:27:07 -0400
Subject: [PATCH 119/307] update structure

mode_info_context->mbmi no longer gets copied up a level

Change-Id: Icd2d27d381909721326c34594a1ccdc26d48a995
---
 vp8/common/arm/reconintra_arm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c
index 3b4fe1a5b..e72261c03 100644
--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@@ -29,7 +29,7 @@ void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x)
     unsigned char *y_buffer = x->dst.y_buffer;
     unsigned char *ypred_ptr = x->predictor;
     int y_stride = x->dst.y_stride;
-    int mode = x->mbmi.mode;
+    int mode = x->mode_info_context->mbmi.mode;
     int Up = x->up_available;
     int Left = x->left_available;
 
@@ -52,7 +52,7 @@ void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x)
     unsigned char *y_buffer = x->dst.y_buffer;
     unsigned char *ypred_ptr = x->predictor;
     int y_stride = x->dst.y_stride;
-    int mode = x->mbmi.mode;
+    int mode = x->mode_info_context->mbmi.mode;
     int Up = x->up_available;
     int Left = x->left_available;
 

From 9602799cd9ecd0529e291f8d1af951bf2fde787b Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Thu, 12 Aug 2010 09:05:37 -0400
Subject: [PATCH 120/307] framework for assembly version of the detokenizer

adds a compile time option: --enable-arm-asm-detok which pulls in
vp8/decoder/arm/detokenize.asm

currently about break even speed wise, but changes are pending to
the fill code (branch and load 3 bytes versus conditionally always
load one) and the error handling. Currently it doesn't handle zero
runs or overrunning the buffer.

this is really just so i don't have to rebase my changes all the
time to run benchmarks - now just need to replace one file!

Change-Id: I56d0e2354dc0ca3811bffd0e88fe1f952fa6c797
---
 configure                        |   3 +
 vp8/decoder/arm/detokenize.asm   | 333 +++++++++++++++++++++++++++++++
 vp8/decoder/arm/detokenize_arm.h |  22 ++
 vp8/decoder/detokenize.c         |  59 ++++++
 vp8/decoder/detokenize.h         |  10 +-
 vp8/decoder/onyxd_if.c           |   7 +-
 vp8/vp8dx_arm.mk                 |   9 +-
 7 files changed, 429 insertions(+), 14 deletions(-)
 create mode 100644 vp8/decoder/arm/detokenize.asm
 create mode 100644 vp8/decoder/arm/detokenize_arm.h

diff --git a/configure b/configure
index 5c908d4b3..ac3d1621d 100755
--- a/configure
+++ b/configure
@@ -38,6 +38,7 @@ Advanced options:
   ${toggle_realtime_only}         enable this option while building for real-time encoding
   ${toggle_runtime_cpu_detect}    runtime cpu detection
   ${toggle_shared}                shared library support
+  ${toggle_arm_asm_detok}         assembly version of the detokenizer (ARM platforms only)
 
 Codecs:
   Codecs can be selectively enabled or disabled individually, or by family:
@@ -242,6 +243,7 @@ CONFIG_LIST="
     spatial_resampling
     realtime_only
     shared
+    arm_asm_detok
 "
 CMDLINE_SELECT="
     extra_warnings
@@ -278,6 +280,7 @@ CMDLINE_SELECT="
     spatial_resampling
     realtime_only
     shared
+    arm_asm_detok
 "
 
 process_cmdline() {
diff --git a/vp8/decoder/arm/detokenize.asm b/vp8/decoder/arm/detokenize.asm
new file mode 100644
index 000000000..bafacb957
--- /dev/null
+++ b/vp8/decoder/arm/detokenize.asm
@@ -0,0 +1,333 @@
+;
+;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_decode_mb_tokens_v6|
+
+    AREA    |.text|, CODE, READONLY  ; name this block of code
+
+    INCLUDE vpx_asm_offsets.asm
+
+l_qcoeff    EQU     0
+l_i         EQU     4
+l_type      EQU     8
+l_stop      EQU     12
+l_c         EQU     16
+l_l_ptr     EQU     20
+l_a_ptr     EQU     24
+l_bc        EQU     28
+l_coef_ptr  EQU     32
+l_stacksize EQU     64
+
+
+;; constant offsets -- these should be created at build time
+c_onyxblock2left_offset      EQU 25
+c_onyxblock2above_offset     EQU 50
+c_entropy_nodes              EQU 11
+c_dct_eob_token              EQU 11
+
+|vp8_decode_mb_tokens_v6| PROC
+    stmdb       sp!, {r4 - r11, lr}
+    sub         sp, sp, #l_stacksize
+    mov         r7, r1                      ; type
+    mov         r9, r0                      ; detoken
+
+    ldr         r1, [r9, #detok_current_bc]
+    ldr         r0, [r9, #detok_qcoeff_start_ptr]
+    mov         r11, #0                     ; i
+    mov         r3, #0x10                   ; stop
+
+    cmp         r7, #1                      ; type ?= 1
+    addeq       r11, r11, #24               ; i = 24
+    addeq       r3, r3, #8                  ; stop = 24
+    addeq       r0, r0, #3, 24              ; qcoefptr += 24*16 ?CHECKME
+
+    str         r0, [sp, #l_qcoeff]
+    str         r11, [sp, #l_i]
+    str         r7, [sp, #l_type]
+    str         r3, [sp, #l_stop]
+    str         r1, [sp, #l_bc]
+
+    add         lr, r9, r7, lsl #2          ; detoken + type*4
+
+    ldr         r8, [r1, #bool_decoder_user_buffer]
+
+    ldr         r10, [lr, #detok_coef_probs] ; coef_probs[type]
+    ldr         r5, [r1, #bool_decoder_count]
+    ldr         r6, [r1, #bool_decoder_range]
+    ldr         r4, [r1, #bool_decoder_value]
+
+    str         r10, [sp, #l_coef_ptr]
+
+    ;align 4
+BLOCK_LOOP
+    ldr         r3, [r9, #detok_ptr_onyxblock2context_leftabove]
+    ldr         r2, [r9, #detok_A]
+    ldr         r1, [r9, #detok_L]
+    ldrb        r12, [r3, r11]!             ; onyxblock2context[i]
+
+    cmp         r7, #0                      ; c = !type
+    moveq       r7, #1
+    movne       r7, #0
+
+    ldr         r0, [r2, r12, lsl #2]       ; A[onyxblock2context[i]]
+    add         r1, r1, r12, lsl #4         ; L + onyxblock2context[i] << 4
+      ; A is ptr to ptr (**)
+      ; L is ptr to data (*[4])
+
+    ldrb        r2, [r3, #c_onyxblock2above_offset] ; + above offset
+    ldrb        r3, [r3, #c_onyxblock2left_offset] ; + left offset
+    mov         lr, #c_entropy_nodes        ; ENTROPY_NODES = 11
+;;  ;++
+
+    ldr         r2, [r0, r2, lsl #2]!       ; A + above offset
+    ldr         r3, [r1, r3, lsl #2]!       ; L + left offset
+; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
+    cmp         r2, #0                      ; *a ?= 0
+    movne       r2, #1                      ; haha if a == 0 no need to set up another var to state that pretty sweet :)
+    cmp         r3, #0                      ; *l ?= 0
+    addne       r2, r2, #1                  ; t
+
+    str         r1, [sp, #l_l_ptr]          ; save &l
+    str         r0, [sp, #l_a_ptr]          ; save &a
+    smlabb      r0, r2, lr, r10             ; Prob = coef_probs + (t * ENTROPY_NODES)
+    mov         r1, #0                      ; t = 0
+    str         r7, [sp, #l_c]
+
+    ;align 4
+COEFF_LOOP
+    ldr         r3, [r9, #detok_ptr_onyx_coef_bands_x]
+    ldr         lr, [r9, #detok_onyx_coef_tree_ptr]
+
+      ; onyx_coef_bands_x is UINT16
+    add         r3, r3, r7, lsl #1            ; coef_bands_x[c]
+    ldrh        r3, [r3]                      ; UINT16
+
+    ;++
+    add         r0, r0, r3                  ; Prob += coef_bands_x[c]
+
+    ;align 4
+get_token_loop
+    ldrb        r2, [r0, +r1, asr #1]       ; Prob[t >> 1]
+    mov         r3, r6, lsl #8              ; range << 8
+    sub         r3, r3, #256                ; (range << 8) - (1 << 8)
+    mov         r10, #1                     ; 1
+
+    smlawb      r2, r3, r2, r10             ; split = 1 + (((range-1) * probability) >> 8)
+
+    ldrb        r12, [r8]                   ; load cx data byte in stall slot : r8 = bufptr
+    ;++
+
+    subs        r3, r4, r2, lsl #24         ; value-(split<<24): used later to calculate shift for NORMALIZE
+    addhs       r1, r1, #1                  ; t += 1
+    movhs       r4, r3                      ; value -= bigsplit (split << 24)
+    subhs       r2, r6, r2                  ; range -= split
+ ;   movlo       r6, r2                      ; range = split
+
+    ldrsb     r1, [lr, r1]                  ; t = onyx_coef_tree_ptr[t]
+
+; NORMALIZE
+    clz         r3, r2                      ; vp8dx_bitreader_norm[range] + 24
+    sub         r3, r3, #24                 ; vp8dx_bitreader_norm[range]
+    subs        r5, r5, r3                  ; count -= shift
+    mov         r6, r2, lsl r3              ; range <<= shift
+    mov         r4, r4, lsl r3              ; value <<= shift
+
+; if count <= 0, += BR_COUNT; value |= *bufptr++ << (BR_COUNT-count); BR_COUNT = 8, but need to upshift values by +16
+    addle         r5, r5, #8                ; count += 8
+    rsble         r3, r5, #24               ; 24 - count
+    addle         r8, r8, #1                ; bufptr++
+    orrle         r4, r4, r12, lsl r3       ; value |= *bufptr << shift + 16
+
+    cmp         r1, #0                      ; t ?= 0
+    bgt         get_token_loop              ; while (t > 0)
+
+    cmn         r1, #c_dct_eob_token        ; if(t == -DCT_EOB_TOKEN)
+    beq         END_OF_BLOCK                ; break
+
+    rsb         lr, r1, #0                  ; v = -t;
+
+    cmp         lr, #4                      ; if(v > FOUR_TOKEN)
+    ble         SKIP_EXTRABITS
+
+    ldr         r3, [r9, #detok_teb_base_ptr]
+    mov         r11, #1                     ; 1 in split = 1 + ... nope, v+= 1 << bits_count
+    add         r7, r3, lr, lsl #4          ; detok_teb_base_ptr + (v << 4)
+
+    ldrsh       lr, [r7, #tokenextrabits_min_val] ; v = teb_ptr->min_val
+    ldrsh       r0, [r7, #tokenextrabits_length] ; bits_count = teb_ptr->Length
+
+extrabits_loop
+    add         r3, r0, r7                  ; &teb_ptr->Probs[bits_count]
+
+    ldrb        r2, [r3, #4]                ; probability. why +4?
+    mov         r3, r6, lsl #8              ; range << 8
+    sub         r3, r3, #256                ; range << 8 + 1 << 8
+
+    smlawb      r2, r3, r2, r11             ; split = 1 +  (((range-1) * probability) >> 8)
+
+    ldrb        r12, [r8]                   ; *bufptr
+    ;++
+
+    subs        r10, r4, r2, lsl #24        ; value - (split<<24)
+    movhs       r4, r10                     ; value = value - (split << 24)
+    subhs       r2, r6, r2                  ; range = range - split
+    addhs       lr, lr, r11, lsl r0         ; v += ((UINT16)1<<bits_count)
+
+; NORMALIZE
+    clz         r3, r2                      ; shift - leading zeros in split
+    sub         r3, r3, #24                 ; don't count first 3 bytes
+    subs        r5, r5, r3                  ; count -= shift
+    mov         r6, r2, lsl r3              ; range = range << shift
+    mov         r4, r4, lsl r3              ; value <<= shift
+
+    addle       r5, r5, #8                  ; count += BR_COUNT
+    addle       r8, r8, #1                  ; bufptr++
+    rsble       r3, r5, #24                 ; BR_COUNT - count
+    orrle       r4, r4, r12, lsl r3         ; value |= *bufptr << (BR_COUNT - count)
+
+    subs        r0, r0, #1                  ; bits_count --
+    bpl         extrabits_loop
+
+
+SKIP_EXTRABITS
+    ldr         r11, [sp, #l_qcoeff]
+    ldr         r0, [sp, #l_coef_ptr]       ; Prob = coef_probs
+
+    cmp         r1, #0                      ; check for nonzero token - if (t)
+    beq         SKIP_EOB_CHECK              ; if t is zero, we will skip the eob table chec
+
+    add         r3, r6, #1                  ; range + 1
+    mov         r2, r3, lsr #1              ; split = (range + 1) >> 1
+
+    subs        r3, r4, r2, lsl #24         ; value - (split<<24)
+    movhs       r4, r3                      ; value -= (split << 24)
+    subhs       r2, r6, r2                  ; range -= split
+    mvnhs       r3, lr                      ; -v
+    addhs       lr, r3, #1                  ; v = (v ^ -1) + 1
+
+; NORMALIZE
+    clz         r3, r2                      ; leading 0s in split
+    sub         r3, r3, #24                 ; shift
+    subs        r5, r5, r3                  ; count -= shift
+    mov         r6, r2, lsl r3              ; range <<= shift
+    mov         r4, r4, lsl r3              ; value <<= shift
+    ldrleb      r2, [r8], #1                ; *(bufptr++)
+    addle       r5, r5, #8                  ; count += 8
+    rsble       r3, r5, #24                  ; BR_COUNT - count
+    orrle       r4, r4, r2, lsl r3          ; value |= *bufptr << (BR_COUNT - count)
+
+    add         r0, r0, #0xB                ; Prob += ENTROPY_NODES (11)
+
+    cmn         r1, #1                      ; t < -ONE_TOKEN
+
+    addlt       r0, r0, #0xB                ; Prob += ENTROPY_NODES (11)
+
+    mvn         r1, #1                      ; t = -1 ???? C is -2
+
+SKIP_EOB_CHECK
+    ldr         r7, [sp, #l_c]              ; c
+    ldr         r3, [r9, #detok_scan]
+    add         r1, r1, #2                  ; t+= 2
+    cmp         r7, #(0x10 - 1)             ; c should will be one higher
+
+    ldr         r3, [r3, +r7, lsl #2]       ; scan[c] this needs pre-inc c value
+    add         r7, r7, #1                  ; c++
+    add         r3, r11, r3, lsl #1         ; qcoeff + scan[c]
+
+    str         r7, [sp, #l_c]              ; store c
+    strh        lr, [r3]                    ; qcoef_ptr[scan[c]] = v
+
+    blt         COEFF_LOOP
+
+    sub         r7, r7, #1                  ; if(t != -DCT_EOB_TOKEN) --c ; never stored! no condition!
+
+END_OF_BLOCK
+    ldr         r3, [sp, #l_type]           ; type
+    ldr         r10, [sp, #l_coef_ptr]      ; coef_ptr
+    ldr         r0, [sp, #l_qcoeff]         ; qcoeff
+    ldr         r11, [sp, #l_i]             ; i
+    ldr         r12, [sp, #l_stop]          ; stop
+
+    cmp         r3, #0                      ; type ?= 0
+    moveq       r1, #1
+    movne       r1, #0
+    add         r3, r11, r9                 ; detok + i
+
+    cmp         r7, r1                      ; c ?= !type
+    strb        r7, [r3, #detok_eob]        ; eob[i] = c
+
+    ldr         r7, [sp, #l_l_ptr]          ; l
+    ldr         r2, [sp, #l_a_ptr]          ; a
+    movne       r3, #1                      ; t
+    moveq       r3, #0
+
+    add         r0, r0, #0x20               ; qcoeff += 32 (16 * 2?)
+    add         r11, r11, #1                ; i++
+    str         r3, [r7]                    ; *l = t
+    str         r3, [r2]                    ; *a = t
+    str         r0, [sp, #l_qcoeff]         ; qcoeff
+    str         r11, [sp, #l_i]             ; i
+
+    cmp         r11, r12                    ; i >= stop ? VERIFY should be strictly LT(<)?
+    ldr         r7, [sp, #l_type]           ; type
+    mov         lr, #0xB                    ; 11 (ENTORPY_NODES?)
+
+    blt         BLOCK_LOOP
+
+    cmp         r11, #0x19                  ; i ?= 25
+    bne         ln2_decode_mb_to
+
+    ldr         r12, [r9, #detok_qcoeff_start_ptr]
+    ldr         r10, [r9, #detok_coef_probs]
+    mov         r7, #0                      ; type/i = 0
+    mov         r3, #0x10                   ; stop = 0
+    str         r12, [sp, #l_qcoeff]        ; qcoeff_ptr = qcoeff_start_ptr
+    str         r7, [sp, #l_i]
+    str         r7, [sp, #l_type]
+    str         r3, [sp, #l_stop]
+
+    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type] (0)
+
+    b           BLOCK_LOOP
+
+ln2_decode_mb_to
+    cmp         r11, #0x10                  ; i ?= 16
+    bne         ln1_decode_mb_to
+
+    mov         r10, #detok_coef_probs
+    add         r10, r10, #2*4              ; coef_probs[type]
+    ldr         r10, [r9, r10]              ; detok + 48 - THIS IS PROBABLY THE ISSUE: NEW STRUCTURE
+
+    mov         r7, #2                      ; type = 2
+    mov         r3, #0x18                   ; stop = 24
+
+    str         r7, [sp, #l_type]
+    str         r3, [sp, #l_stop]
+
+    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type] - didn't want to add 2 to coef_probs
+    b           BLOCK_LOOP
+
+ln1_decode_mb_to
+    ldr         r2, [sp, #l_bc]
+    mov         r0, #0
+    nop
+
+    str         r8, [r2, #bool_decoder_user_buffer]
+    str         r5, [r2, #bool_decoder_count]
+    str         r4, [r2, #bool_decoder_value]
+    str         r6, [r2, #bool_decoder_range]
+
+    add         sp, sp, #l_stacksize
+    ldmia       sp!, {r4 - r11, pc}
+
+    ENDP  ; |vp8_decode_mb_tokens_v6|
+
+    END
diff --git a/vp8/decoder/arm/detokenize_arm.h b/vp8/decoder/arm/detokenize_arm.h
new file mode 100644
index 000000000..1c53f7b78
--- /dev/null
+++ b/vp8/decoder/arm/detokenize_arm.h
@@ -0,0 +1,22 @@
+/*
+ *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef DETOKENIZE_ARM_H
+#define DETOKENIZE_ARM_H
+
+#if HAVE_ARMV6
+#if CONFIG_ARM_ASM_DETOK
+void vp8_init_detokenizer(VP8D_COMP *dx);
+void vp8_decode_mb_tokens_v6(DETOK *detoken, int type);
+#endif
+#endif
+
+#endif
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 740741745..34faae3aa 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -14,6 +14,7 @@
 #include "onyxd_int.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
+#include "detokenize.h"
 
 #define BOOL_DATA UINT8
 
@@ -103,6 +104,34 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
         *l = 0;
     }
 }
+
+#if CONFIG_ARM_ASM_DETOK
+DECLARE_ALIGNED(16, const UINT8, vp8_block2context_leftabove[25*3]) =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, //end of vp8_block2context
+    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0, //end of vp8_block2left
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0  //end of vp8_block2above
+};
+
+void vp8_init_detokenizer(VP8D_COMP *dx)
+{
+    const VP8_COMMON *const oc = & dx->common;
+    MACROBLOCKD *x = & dx->mb;
+
+    dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree;
+    dx->detoken.ptr_onyxblock2context_leftabove = vp8_block2context_leftabove;
+    dx->detoken.ptr_onyx_coef_bands_x = vp8_coef_bands_x;
+    dx->detoken.scan = vp8_default_zig_zag1d;
+    dx->detoken.teb_base_ptr = vp8d_token_extra_bits2;
+    dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
+
+    dx->detoken.coef_probs[0] = (oc->fc.coef_probs [0] [ 0 ] [0]);
+    dx->detoken.coef_probs[1] = (oc->fc.coef_probs [1] [ 0 ] [0]);
+    dx->detoken.coef_probs[2] = (oc->fc.coef_probs [2] [ 0 ] [0]);
+    dx->detoken.coef_probs[3] = (oc->fc.coef_probs [3] [ 0 ] [0]);
+}
+#endif
+
 DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
 #define FILL \
     if(count < 0) \
@@ -200,6 +229,35 @@ DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
     }\
     NORMALIZE
 
+#if CONFIG_ARM_ASM_DETOK
+int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
+{
+    int eobtotal = 0;
+    int i, type;
+
+    dx->detoken.current_bc = x->current_bc;
+    dx->detoken.A = x->above_context;
+    dx->detoken.L = x->left_context;
+
+    type = 3;
+
+    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+    {
+        type = 1;
+        eobtotal -= 16;
+    }
+
+    vp8_decode_mb_tokens_v6(&dx->detoken, type);
+
+    for (i = 0; i < 25; i++)
+    {
+        x->block[i].eob = dx->detoken.eob[i];
+        eobtotal += dx->detoken.eob[i];
+    }
+
+    return eobtotal;
+}
+#else
 int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
 {
     ENTROPY_CONTEXT **const A = x->above_context;
@@ -395,3 +453,4 @@ BLOCK_FINISHED:
     return eobtotal;
 
 }
+#endif //!CONFIG_ASM_DETOK
diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h
index 2f6b4a996..aa98deae7 100644
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@@ -9,12 +9,16 @@
  */
 
 
-#ifndef detokenize_h
-#define detokenize_h 1
+#ifndef DETOKENIZE_H
+#define DETOKENIZE_H
 
 #include "onyxd_int.h"
 
+#if ARCH_ARM
+#include "arm/detokenize_arm.h"
+#endif
+
 void vp8_reset_mb_tokens_context(MACROBLOCKD *x);
 int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *);
 
-#endif /* detokenize_h */
+#endif /* DETOKENIZE_H */
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 728d5ca8c..5a88ba017 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -29,13 +29,11 @@
 #include "vpx_scale/vpxscale.h"
 #include "systemdependent.h"
 #include "vpx_ports/vpx_timer.h"
-
+#include "detokenize.h"
 
 extern void vp8_init_loop_filter(VP8_COMMON *cm);
-
 extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
 
-// DEBUG code
 #if CONFIG_DEBUG
 void vp8_recon_write_yuv_frame(unsigned char *name, YV12_BUFFER_CONFIG *s)
 {
@@ -129,6 +127,9 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
         cm->last_sharpness_level = cm->sharpness_level;
     }
 
+#if CONFIG_ARM_ASM_DETOK
+    vp8_init_detokenizer(pbi);
+#endif
     pbi->common.error.setjmp = 0;
     return (VP8D_PTR) pbi;
 }
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index e9674ca1c..d40f76e22 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -11,24 +11,17 @@
 
 #VP8_DX_SRCS list is modified according to different platforms.
 
-#File list for arm
-# decoder
-#VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/decodframe_arm.c
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dequantize_arm.c
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dsystemdependent.c
-
-#VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/decodframe.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/dequantize.c
 VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/generic/dsystemdependent.c
+VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK)  += decoder/arm/detokenize$(ASM)
 
 #File list for armv6
-# decoder
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_dc_idct_v6$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_idct_v6$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantize_v6$(ASM)
 
 #File list for neon
-# decoder
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_dc_idct_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_idct_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantizeb_neon$(ASM)

From 80d3923a78e0fa85194b58d327d619e63919fbd8 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 13 Aug 2010 14:50:51 -0400
Subject: [PATCH 121/307] move segmentation_common to encoder

vp8_update_gf_useage_maps() is only used by the encoder. This patch
fixes the ability to build in decode-only or encode-only
configurations.

Change-Id: I3a5211428e539886ba998e09e8abd747ac55c9aa
---
 vp8/encoder/encodeframe.c                                    | 2 +-
 vp8/encoder/onyx_if.c                                        | 2 +-
 vp8/{common/segmentation_common.c => encoder/segmentation.c} | 2 +-
 vp8/{common/segmentation_common.h => encoder/segmentation.h} | 0
 vp8/vp8_common.mk                                            | 2 --
 vp8/vp8cx.mk                                                 | 2 ++
 6 files changed, 5 insertions(+), 5 deletions(-)
 rename vp8/{common/segmentation_common.c => encoder/segmentation.c} (98%)
 rename vp8/{common/segmentation_common.h => encoder/segmentation.h} (100%)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index bddb55b49..0f9bf4a1d 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -17,7 +17,7 @@
 #include "extend.h"
 #include "entropymode.h"
 #include "quant_common.h"
-#include "segmentation_common.h"
+#include "segmentation.h"
 #include "setupintrarecon.h"
 #include "encodeintra.h"
 #include "reconinter.h"
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index f768e60c7..17b33d79a 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -21,7 +21,7 @@
 #include "extend.h"
 #include "ratectrl.h"
 #include "quant_common.h"
-#include "segmentation_common.h"
+#include "segmentation.h"
 #include "g_common.h"
 #include "vpx_scale/yv12extend.h"
 #include "postproc.h"
diff --git a/vp8/common/segmentation_common.c b/vp8/encoder/segmentation.c
similarity index 98%
rename from vp8/common/segmentation_common.c
rename to vp8/encoder/segmentation.c
index 16b96e9db..bb78614c6 100644
--- a/vp8/common/segmentation_common.c
+++ b/vp8/encoder/segmentation.c
@@ -9,7 +9,7 @@
  */
 
 
-#include "segmentation_common.h"
+#include "segmentation.h"
 #include "vpx_mem/vpx_mem.h"
 
 void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x)
diff --git a/vp8/common/segmentation_common.h b/vp8/encoder/segmentation.h
similarity index 100%
rename from vp8/common/segmentation_common.h
rename to vp8/encoder/segmentation.h
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index a8a252a17..dea237377 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -27,7 +27,6 @@ VP8_COMMON_SRCS-yes += common/onyxd.h
 
 CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)common
 
-VP8_COMMON_SRCS-yes += common/segmentation_common.c
 VP8_COMMON_SRCS-yes += common/alloccommon.c
 VP8_COMMON_SRCS-yes += common/blockd.c
 VP8_COMMON_SRCS-yes += common/coefupdateprobs.h
@@ -64,7 +63,6 @@ VP8_COMMON_SRCS-yes += common/recon.h
 VP8_COMMON_SRCS-yes += common/reconinter.h
 VP8_COMMON_SRCS-yes += common/reconintra.h
 VP8_COMMON_SRCS-yes += common/reconintra4x4.h
-VP8_COMMON_SRCS-yes += common/segmentation_common.h
 VP8_COMMON_SRCS-yes += common/setupintrarecon.h
 VP8_COMMON_SRCS-yes += common/subpixel.h
 VP8_COMMON_SRCS-yes += common/swapyv12buffer.h
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index c0a58ae29..50eb29731 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -74,6 +74,8 @@ VP8_CX_SRCS-yes += encoder/quantize.c
 VP8_CX_SRCS-yes += encoder/ratectrl.c
 VP8_CX_SRCS-yes += encoder/rdopt.c
 VP8_CX_SRCS-yes += encoder/sad_c.c
+VP8_CX_SRCS-yes += encoder/segmentation.c
+VP8_CX_SRCS-yes += encoder/segmentation.h
 VP8_CX_SRCS-$(CONFIG_PSNR) += encoder/ssim.c
 VP8_CX_SRCS-yes += encoder/tokenize.c
 VP8_CX_SRCS-yes += encoder/treewriter.c

From 9aa498b82a7666491f6407b13087c7375834a490 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 16 Aug 2010 09:34:30 -0400
Subject: [PATCH 122/307] arm: fix missing dependency with --enable-shared

The C version of the dequant/idct/add function depends on the C
version of the IDCT, but this isn't compiled in on ARM. Since this
code has asm version, we can just remove this file to eliminate the
link error.

Change-Id: I21de74d89d3765a1db2da27292b20727c53178e9
---
 vp8/vp8dx_arm.mk | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index d40f76e22..61a1ce4e6 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -14,6 +14,7 @@
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dequantize_arm.c
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dsystemdependent.c
 VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/generic/dsystemdependent.c
+VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/dequantize.c
 VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK)  += decoder/arm/detokenize$(ASM)
 
 #File list for armv6

From c75f3993c00772b6ec5ee00a1e6cbde91243c016 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Mon, 16 Aug 2010 10:32:15 -0400
Subject: [PATCH 123/307] store more vars than we removed

only saved r4-11+lr, but were storing r4-r12+lr

Change-Id: If77df1998af50e9badee7d99ef53543046434675
---
 vp8/common/arm/armv6/simpleloopfilter_v6.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/common/arm/armv6/simpleloopfilter_v6.asm b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
index 3ba6ccaca..2f090dbdd 100644
--- a/vp8/common/arm/armv6/simpleloopfilter_v6.asm
+++ b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
@@ -309,7 +309,7 @@ pstep       RN  r1
 
     bne         simple_vnext8
 
-    ldmia       sp!, {r4 - r12, pc}
+    ldmia       sp!, {r4 - r11, pc}
     ENDP        ; |vp8_loop_filter_simple_vertical_edge_armv6|
 
 ; Constant Pool

From 6ea5bb85cd1547b846f4c794e8684de5abcf9f62 Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Wed, 18 Aug 2010 15:29:38 -0400
Subject: [PATCH 124/307] Removed ssse3 sixtap code

Change-Id: I0f20fbb898ee31eb94a143471aa6f1ca17a229a4
---
 vp8/common/x86/subpixel_ssse3.asm    | 928 ---------------------------
 vp8/common/x86/subpixel_x86.h        |  32 -
 vp8/common/x86/vp8_asm_stubs.c       | 191 ------
 vp8/common/x86/x86_systemdependent.c |  11 -
 vp8/vp8_common.mk                    |   1 -
 5 files changed, 1163 deletions(-)
 delete mode 100644 vp8/common/x86/subpixel_ssse3.asm

diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
deleted file mode 100644
index 6b3f9994f..000000000
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ /dev/null
@@ -1,928 +0,0 @@
-;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-%include "vpx_ports/x86_abi_support.asm"
-
-%define BLOCK_HEIGHT_WIDTH 4
-%define VP8_FILTER_WEIGHT 128
-%define VP8_FILTER_SHIFT  7
-
-
-;/************************************************************************************
-; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
-; input pixel array has output_height rows. This routine assumes that output_height is an
-; even number. This function handles 8 pixels in horizontal direction, calculating ONE
-; rows each iteration to take advantage of the 128 bits operations.
-;*************************************************************************************/
-;void vp8_filter_block1d8_h6_ssse3
-;(
-;    unsigned char  *src_ptr,
-;    unsigned int    src_pixels_per_line,
-;    unsigned char *output_ptr,
-;    unsigned int    output_pitch,
-;    unsigned int    output_height,
-;    unsigned int    vp8_filter_index
-;)
-global sym(vp8_filter_block1d8_h6_ssse3)
-sym(vp8_filter_block1d8_h6_ssse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-    movsxd      rdx, DWORD PTR arg(5)   ;table index
-    xor         rsi, rsi
-    shl         rdx, 4
-
-    movdqa      xmm7, [rd GLOBAL]
-
-    lea         rax, [k0_k5 GLOBAL]
-    add         rax, rdx
-    mov         rdi, arg(2)             ;output_ptr
-
-    cmp         esi, DWORD PTR [rax]
-    je          vp8_filter_block1d8_h4_ssse3
-
-    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
-    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
-    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
-
-    mov         rsi, arg(0)             ;src_ptr
-    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
-    movsxd      rcx, dword ptr arg(4)   ;output_height
-
-    movsxd      rdx, dword ptr arg(3)   ;output_pitch
-
-    sub         rdi, rdx
-;xmm3 free
-filter_block1d8_h6_rowloop_ssse3:
-    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
-
-    movdqa      xmm1, xmm0
-    pshufb      xmm0, [shuf1b GLOBAL]
-
-    movdqa      xmm2, xmm1
-    pshufb      xmm1, [shuf2b GLOBAL]
-    pmaddubsw   xmm0, xmm4
-    pmaddubsw   xmm1, xmm5
-
-    pshufb      xmm2, [shuf3b GLOBAL]
-    add         rdi, rdx
-    pmaddubsw   xmm2, xmm6
-
-    lea         rsi,    [rsi + rax]
-    dec         rcx
-    paddsw      xmm0, xmm1
-    paddsw      xmm0, xmm7
-    paddsw      xmm0, xmm2
-    psraw       xmm0, 7
-    packuswb    xmm0, xmm0
-
-    movq        MMWORD Ptr [rdi], xmm0
-    jnz         filter_block1d8_h6_rowloop_ssse3
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-vp8_filter_block1d8_h4_ssse3:
-    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
-    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
-
-    movdqa      xmm3, XMMWORD PTR [shuf2b GLOBAL]
-    movdqa      xmm4, XMMWORD PTR [shuf3b GLOBAL]
-
-    mov         rsi, arg(0)             ;src_ptr
-
-    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
-    movsxd      rcx, dword ptr arg(4)   ;output_height
-
-    movsxd      rdx, dword ptr arg(3)   ;output_pitch
-
-    sub         rdi, rdx
-;xmm3 free
-filter_block1d8_h4_rowloop_ssse3:
-    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
-
-    movdqa      xmm2, xmm0
-    pshufb      xmm0, xmm3 ;[shuf2b GLOBAL]
-    pshufb      xmm2, xmm4 ;[shuf3b GLOBAL]
-
-    pmaddubsw   xmm0, xmm5
-    add         rdi, rdx
-    pmaddubsw   xmm2, xmm6
-
-    lea         rsi,    [rsi + rax]
-    dec         rcx
-    paddsw      xmm0, xmm7
-    paddsw      xmm0, xmm2
-    psraw       xmm0, 7
-    packuswb    xmm0, xmm0
-
-    movq        MMWORD Ptr [rdi], xmm0
-
-    jnz         filter_block1d8_h4_rowloop_ssse3
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-;void vp8_filter_block1d16_h6_ssse3
-;(
-;    unsigned char  *src_ptr,
-;    unsigned int    src_pixels_per_line,
-;    unsigned char  *output_ptr,
-;    unsigned int    output_pitch,
-;    unsigned int    output_height,
-;    unsigned int    vp8_filter_index
-;)
-global sym(vp8_filter_block1d16_h6_ssse3)
-sym(vp8_filter_block1d16_h6_ssse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    SAVE_XMM
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-    movsxd      rdx, DWORD PTR arg(5)   ;table index
-    xor         rsi, rsi
-    shl         rdx, 4      ;
-
-    lea         rax, [k0_k5 GLOBAL]
-    add         rax, rdx
-
-    mov         rdi, arg(2)             ;output_ptr
-    movdqa      xmm7, [rd GLOBAL]
-
-;;
-;;    cmp         esi, DWORD PTR [rax]
-;;    je          vp8_filter_block1d16_h4_ssse3
-
-    mov         rsi, arg(0)             ;src_ptr
-
-    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
-    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
-    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
-
-    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
-    movsxd      rcx, dword ptr arg(4)   ;output_height
-    movsxd      rdx, dword ptr arg(3)   ;output_pitch
-
-filter_block1d16_h6_rowloop_ssse3:
-    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
-
-    movdqa      xmm1, xmm0
-    pshufb      xmm0, [shuf1b GLOBAL]
-    movdqa      xmm2, xmm1
-    pmaddubsw   xmm0, xmm4
-    pshufb      xmm1, [shuf2b GLOBAL]
-    pshufb      xmm2, [shuf3b GLOBAL]
-    pmaddubsw   xmm1, xmm5
-
-    movdqu      xmm3,   XMMWORD PTR [rsi + 6]
-
-    pmaddubsw   xmm2, xmm6
-    paddsw      xmm0, xmm1
-    movdqa      xmm1, xmm3
-    pshufb      xmm3, [shuf1b GLOBAL]
-    paddsw      xmm0, xmm7
-    pmaddubsw   xmm3, xmm4
-    paddsw      xmm0, xmm2
-    movdqa      xmm2, xmm1
-    pshufb      xmm1, [shuf2b GLOBAL]
-    pshufb      xmm2, [shuf3b GLOBAL]
-    pmaddubsw   xmm1, xmm5
-    pmaddubsw   xmm2, xmm6
-
-    psraw       xmm0, 7
-    packuswb    xmm0, xmm0
-    lea         rsi,    [rsi + rax]
-    paddsw      xmm3, xmm1
-    paddsw      xmm3, xmm7
-    paddsw      xmm3, xmm2
-    psraw       xmm3, 7
-    packuswb    xmm3, xmm3
-
-    punpcklqdq  xmm0, xmm3
-
-    movdqa      XMMWORD Ptr [rdi], xmm0
-
-    add         rdi, rdx
-    dec         rcx
-    jnz         filter_block1d16_h6_rowloop_ssse3
-
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-vp8_filter_block1d16_h4_ssse3:
-    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
-    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
-
-    mov         rsi, arg(0)             ;src_ptr
-    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
-    movsxd      rcx, dword ptr arg(4)   ;output_height
-    movsxd      rdx, dword ptr arg(3)   ;output_pitch
-
-filter_block1d16_h4_rowloop_ssse3:
-    movdqu      xmm1,   XMMWORD PTR [rsi - 2]
-
-    movdqa      xmm2, xmm1
-    pshufb      xmm1, [shuf2b GLOBAL]
-    pshufb      xmm2, [shuf3b GLOBAL]
-    pmaddubsw   xmm1, xmm5
-
-    movdqu      xmm3,   XMMWORD PTR [rsi + 6]
-
-    pmaddubsw   xmm2, xmm6
-    movdqa      xmm0, xmm3
-    pshufb      xmm3, [shuf3b GLOBAL]
-    pshufb      xmm0, [shuf2b GLOBAL]
-
-    paddsw      xmm1, xmm7
-    paddsw      xmm1, xmm2
-
-    pmaddubsw   xmm0, xmm5
-    pmaddubsw   xmm3, xmm6
-
-    psraw       xmm1, 7
-    packuswb    xmm1, xmm1
-    lea         rsi,    [rsi + rax]
-    paddsw      xmm3, xmm0
-    paddsw      xmm3, xmm7
-    psraw       xmm3, 7
-    packuswb    xmm3, xmm3
-
-    punpcklqdq  xmm1, xmm3
-
-    movdqa      XMMWORD Ptr [rdi], xmm1
-
-    add         rdi, rdx
-    dec         rcx
-    jnz         filter_block1d16_h4_rowloop_ssse3
-
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-;void vp8_filter_block1d4_h6_ssse3
-;(
-;    unsigned char  *src_ptr,
-;    unsigned int    src_pixels_per_line,
-;    unsigned char  *output_ptr,
-;    unsigned int    output_pitch,
-;    unsigned int    output_height,
-;    unsigned int    vp8_filter_index
-;)
-global sym(vp8_filter_block1d4_h6_ssse3)
-sym(vp8_filter_block1d4_h6_ssse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-    movsxd      rdx, DWORD PTR arg(5)   ;table index
-    xor         rsi, rsi
-    shl         rdx, 4      ;
-
-    lea         rax, [k0_k5 GLOBAL]
-    add         rax, rdx
-    movdqa      xmm7, [rd GLOBAL]
-
-    cmp         esi, DWORD PTR [rax]
-    je          vp8_filter_block1d4_h4_ssse3
-
-    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
-    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
-    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
-
-    mov         rsi, arg(0)             ;src_ptr
-    mov         rdi, arg(2)             ;output_ptr
-    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
-    movsxd      rcx, dword ptr arg(4)   ;output_height
-
-    movsxd      rdx, dword ptr arg(3)   ;output_pitch
-
-;xmm3 free
-filter_block1d4_h6_rowloop_ssse3:
-    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
-
-    movdqa      xmm1, xmm0
-    pshufb      xmm0, [shuf1b GLOBAL]
-
-    movdqa      xmm2, xmm1
-    pshufb      xmm1, [shuf2b GLOBAL]
-    pmaddubsw   xmm0, xmm4
-    pshufb      xmm2, [shuf3b GLOBAL]
-    pmaddubsw   xmm1, xmm5
-
-;--
-    pmaddubsw   xmm2, xmm6
-
-    lea         rsi,    [rsi + rax]
-;--
-    paddsw      xmm0, xmm1
-    paddsw      xmm0, xmm7
-    pxor        xmm1, xmm1
-    paddsw      xmm0, xmm2
-    psraw       xmm0, 7
-    packuswb    xmm0, xmm0
-
-    movd        DWORD PTR [rdi], xmm0
-
-    add         rdi, rdx
-    dec         rcx
-    jnz         filter_block1d4_h6_rowloop_ssse3
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-vp8_filter_block1d4_h4_ssse3:
-    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
-    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
-    movdqa      xmm0, XMMWORD PTR [shuf2b GLOBAL]
-    movdqa      xmm3, XMMWORD PTR [shuf3b GLOBAL]
-
-    mov         rsi, arg(0)             ;src_ptr
-    mov         rdi, arg(2)             ;output_ptr
-    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
-    movsxd      rcx, dword ptr arg(4)   ;output_height
-
-    movsxd      rdx, dword ptr arg(3)   ;output_pitch
-
-filter_block1d4_h4_rowloop_ssse3:
-    movdqu      xmm1,   XMMWORD PTR [rsi - 2]
-
-    movdqa      xmm2, xmm1
-    pshufb      xmm1, xmm0 ;;[shuf2b GLOBAL]
-    pshufb      xmm2, xmm3 ;;[shuf3b GLOBAL]
-    pmaddubsw   xmm1, xmm5
-
-;--
-    pmaddubsw   xmm2, xmm6
-
-    lea         rsi,    [rsi + rax]
-;--
-    paddsw      xmm1, xmm7
-    paddsw      xmm1, xmm2
-    psraw       xmm1, 7
-    packuswb    xmm1, xmm1
-
-    movd        DWORD PTR [rdi], xmm1
-
-    add         rdi, rdx
-    dec         rcx
-    jnz         filter_block1d4_h4_rowloop_ssse3
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
-
-;void vp8_filter_block1d16_v6_ssse3
-;(
-;    unsigned char *src_ptr,
-;    unsigned int   src_pitch,
-;    unsigned char *output_ptr,
-;    unsigned int   out_pitch,
-;    unsigned int   output_height,
-;    unsigned int   vp8_filter_index
-;)
-global sym(vp8_filter_block1d16_v6_ssse3)
-sym(vp8_filter_block1d16_v6_ssse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-    movsxd      rdx, DWORD PTR arg(5)   ;table index
-    xor         rsi, rsi
-    shl         rdx, 4      ;
-
-    lea         rax, [k0_k5 GLOBAL]
-    add         rax, rdx
-
-    cmp         esi, DWORD PTR [rax]
-    je          vp8_filter_block1d16_v4_ssse3
-
-    movdqa      xmm5, XMMWORD PTR [rax]         ;k0_k5
-    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
-    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
-
-    mov         rsi, arg(0)             ;src_ptr
-    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
-    mov         rdi, arg(2)             ;output_ptr
-
-%if ABI_IS_32BIT=0
-    movsxd      r8, DWORD PTR arg(3)    ;out_pitch
-%endif
-    mov         rax, rsi
-    movsxd      rcx, DWORD PTR arg(4)   ;output_height
-    add         rax, rdx
-
-
-vp8_filter_block1d16_v6_ssse3_loop:
-    movq        xmm1, MMWORD PTR [rsi]                  ;A
-    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
-    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
-    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
-    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
-
-    punpcklbw   xmm2, xmm4                  ;B D
-    punpcklbw   xmm3, xmm0                  ;C E
-
-    movq        xmm0, MMWORD PTR [rax + rdx * 4]        ;F
-
-    pmaddubsw   xmm3, xmm6
-    punpcklbw   xmm1, xmm0                  ;A F
-    pmaddubsw   xmm2, xmm7
-    pmaddubsw   xmm1, xmm5
-
-    paddsw      xmm2, xmm3
-    paddsw      xmm2, xmm1
-    paddsw      xmm2, [rd GLOBAL]
-    psraw       xmm2, 7
-    packuswb    xmm2, xmm2
-
-    movq        MMWORD PTR [rdi], xmm2          ;store the results
-
-    movq        xmm1, MMWORD PTR [rsi + 8]                  ;A
-    movq        xmm2, MMWORD PTR [rsi + rdx + 8]            ;B
-    movq        xmm3, MMWORD PTR [rsi + rdx * 2 + 8]        ;C
-    movq        xmm4, MMWORD PTR [rax + rdx * 2 + 8]        ;D
-    movq        xmm0, MMWORD PTR [rsi + rdx * 4 + 8]        ;E
-
-    punpcklbw   xmm2, xmm4                  ;B D
-    punpcklbw   xmm3, xmm0                  ;C E
-
-    movq        xmm0, MMWORD PTR [rax + rdx * 4 + 8]        ;F
-    pmaddubsw   xmm3, xmm6
-    punpcklbw   xmm1, xmm0                  ;A F
-    pmaddubsw   xmm2, xmm7
-    pmaddubsw   xmm1, xmm5
-
-    add         rsi,  rdx
-    add         rax,  rdx
-;--
-;--
-    paddsw      xmm2, xmm3
-    paddsw      xmm2, xmm1
-    paddsw      xmm2, [rd GLOBAL]
-    psraw       xmm2, 7
-    packuswb    xmm2, xmm2
-
-    movq        MMWORD PTR [rdi+8], xmm2
-
-%if ABI_IS_32BIT
-    add         rdi,        DWORD PTR arg(3) ;out_pitch
-%else
-    add         rdi,        r8
-%endif
-    dec         rcx
-    jnz         vp8_filter_block1d16_v6_ssse3_loop
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-vp8_filter_block1d16_v4_ssse3:
-    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
-    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
-
-    mov         rsi, arg(0)             ;src_ptr
-    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
-    mov         rdi, arg(2)             ;output_ptr
-
-%if ABI_IS_32BIT=0
-    movsxd      r8, DWORD PTR arg(3)    ;out_pitch
-%endif
-    mov         rax, rsi
-    movsxd      rcx, DWORD PTR arg(4)   ;output_height
-    add         rax, rdx
-
-vp8_filter_block1d16_v4_ssse3_loop:
-    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
-    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
-    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
-    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
-
-    punpcklbw   xmm2, xmm4                  ;B D
-    punpcklbw   xmm3, xmm0                  ;C E
-
-    pmaddubsw   xmm3, xmm6
-    pmaddubsw   xmm2, xmm7
-    movq        xmm5, MMWORD PTR [rsi + rdx + 8]            ;B
-    movq        xmm1, MMWORD PTR [rsi + rdx * 2 + 8]        ;C
-    movq        xmm4, MMWORD PTR [rax + rdx * 2 + 8]        ;D
-    movq        xmm0, MMWORD PTR [rsi + rdx * 4 + 8]        ;E
-
-    paddsw      xmm2, [rd GLOBAL]
-    paddsw      xmm2, xmm3
-    psraw       xmm2, 7
-    packuswb    xmm2, xmm2
-
-    punpcklbw   xmm5, xmm4                  ;B D
-    punpcklbw   xmm1, xmm0                  ;C E
-
-    pmaddubsw   xmm1, xmm6
-    pmaddubsw   xmm5, xmm7
-
-    movdqa      xmm4, [rd GLOBAL]
-    add         rsi,  rdx
-    add         rax,  rdx
-;--
-;--
-    paddsw      xmm5, xmm1
-    paddsw      xmm5, xmm4
-    psraw       xmm5, 7
-    packuswb    xmm5, xmm5
-
-    punpcklqdq  xmm2, xmm5
-
-    movdqa       XMMWORD PTR [rdi], xmm2
-
-%if ABI_IS_32BIT
-    add         rdi,        DWORD PTR arg(3) ;out_pitch
-%else
-    add         rdi,        r8
-%endif
-    dec         rcx
-    jnz         vp8_filter_block1d16_v4_ssse3_loop
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-;void vp8_filter_block1d8_v6_ssse3
-;(
-;    unsigned char *src_ptr,
-;    unsigned int   src_pitch,
-;    unsigned char *output_ptr,
-;    unsigned int   out_pitch,
-;    unsigned int   output_height,
-;    unsigned int   vp8_filter_index
-;)
-global sym(vp8_filter_block1d8_v6_ssse3)
-sym(vp8_filter_block1d8_v6_ssse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-    movsxd      rdx, DWORD PTR arg(5)   ;table index
-    xor         rsi, rsi
-    shl         rdx, 4      ;
-
-    lea         rax, [k0_k5 GLOBAL]
-    add         rax, rdx
-
-    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
-    mov         rdi, arg(2)             ;output_ptr
-%if ABI_IS_32BIT=0
-    movsxd      r8, DWORD PTR arg(3)    ; out_pitch
-%endif
-    movsxd      rcx, DWORD PTR arg(4)   ;[output_height]
-
-    cmp         esi, DWORD PTR [rax]
-    je          vp8_filter_block1d8_v4_ssse3
-
-    movdqa      xmm5, XMMWORD PTR [rax]         ;k0_k5
-    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
-    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
-
-    mov         rsi, arg(0)             ;src_ptr
-
-    mov         rax, rsi
-    add         rax, rdx
-
-vp8_filter_block1d8_v6_ssse3_loop:
-    movq        xmm1, MMWORD PTR [rsi]                  ;A
-    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
-    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
-    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
-    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
-
-    punpcklbw   xmm2, xmm4                  ;B D
-    punpcklbw   xmm3, xmm0                  ;C E
-
-    movq        xmm0, MMWORD PTR [rax + rdx * 4]        ;F
-    movdqa      xmm4, [rd GLOBAL]
-
-    pmaddubsw   xmm3, xmm6
-    punpcklbw   xmm1, xmm0                  ;A F
-    pmaddubsw   xmm2, xmm7
-    pmaddubsw   xmm1, xmm5
-    add         rsi,  rdx
-    add         rax,  rdx
-;--
-;--
-    paddsw      xmm2, xmm3
-    paddsw      xmm2, xmm1
-    paddsw      xmm2, xmm4
-    psraw       xmm2, 7
-    packuswb    xmm2, xmm2
-
-    movq        MMWORD PTR [rdi], xmm2
-
-%if ABI_IS_32BIT
-    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
-%else
-    add         rdi,        r8
-%endif
-    dec         rcx
-    jnz         vp8_filter_block1d8_v6_ssse3_loop
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-vp8_filter_block1d8_v4_ssse3:
-    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
-    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
-    movdqa      xmm5, [rd GLOBAL]
-
-    mov         rsi, arg(0)             ;src_ptr
-
-    mov         rax, rsi
-    add         rax, rdx
-
-vp8_filter_block1d8_v4_ssse3_loop:
-    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
-    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
-    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
-    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
-
-    punpcklbw   xmm2, xmm4                  ;B D
-    punpcklbw   xmm3, xmm0                  ;C E
-
-    pmaddubsw   xmm3, xmm6
-    pmaddubsw   xmm2, xmm7
-    add         rsi,  rdx
-    add         rax,  rdx
-;--
-;--
-    paddsw      xmm2, xmm3
-    paddsw      xmm2, xmm5
-    psraw       xmm2, 7
-    packuswb    xmm2, xmm2
-
-    movq        MMWORD PTR [rdi], xmm2
-
-%if ABI_IS_32BIT
-    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
-%else
-    add         rdi,        r8
-%endif
-    dec         rcx
-    jnz         vp8_filter_block1d8_v4_ssse3_loop
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-;void vp8_filter_block1d4_v6_ssse3
-;(
-;    unsigned char *src_ptr,
-;    unsigned int   src_pitch,
-;    unsigned char *output_ptr,
-;    unsigned int   out_pitch,
-;    unsigned int   output_height,
-;    unsigned int   vp8_filter_index
-;)
-global sym(vp8_filter_block1d4_v6_ssse3)
-sym(vp8_filter_block1d4_v6_ssse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 6
-    GET_GOT     rbx
-    push        rsi
-    push        rdi
-    ; end prolog
-
-    movsxd      rdx, DWORD PTR arg(5)   ;table index
-    xor         rsi, rsi
-    shl         rdx, 4      ;
-
-    lea         rax, [k0_k5 GLOBAL]
-    add         rax, rdx
-
-    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
-    mov         rdi, arg(2)             ;output_ptr
-%if ABI_IS_32BIT=0
-    movsxd      r8, DWORD PTR arg(3)    ; out_pitch
-%endif
-    movsxd      rcx, DWORD PTR arg(4)   ;[output_height]
-
-    cmp         esi, DWORD PTR [rax]
-    je          vp8_filter_block1d4_v4_ssse3
-
-    movq        mm5, MMWORD PTR [rax]         ;k0_k5
-    movq        mm6, MMWORD PTR [rax+256]     ;k2_k4
-    movq        mm7, MMWORD PTR [rax+128]     ;k1_k3
-
-    mov         rsi, arg(0)             ;src_ptr
-
-    mov         rax, rsi
-    add         rax, rdx
-
-vp8_filter_block1d4_v6_ssse3_loop:
-    movd        mm1, DWORD PTR [rsi]                  ;A
-    movd        mm2, DWORD PTR [rsi + rdx]            ;B
-    movd        mm3, DWORD PTR [rsi + rdx * 2]        ;C
-    movd        mm4, DWORD PTR [rax + rdx * 2]        ;D
-    movd        mm0, DWORD PTR [rsi + rdx * 4]        ;E
-
-    punpcklbw   mm2, mm4                  ;B D
-    punpcklbw   mm3, mm0                  ;C E
-
-    movd        mm0, DWORD PTR [rax + rdx * 4]        ;F
-
-    movq        mm4, [rd GLOBAL]
-
-    pmaddubsw   mm3, mm6
-    punpcklbw   mm1, mm0                  ;A F
-    pmaddubsw   mm2, mm7
-    pmaddubsw   mm1, mm5
-    add         rsi,  rdx
-    add         rax,  rdx
-;--
-;--
-    paddsw      mm2, mm3
-    paddsw      mm2, mm1
-    paddsw      mm2, mm4
-    psraw       mm2, 7
-    packuswb    mm2, mm2
-
-    movd        DWORD PTR [rdi], mm2
-
-%if ABI_IS_32BIT
-    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
-%else
-    add         rdi,        r8
-%endif
-    dec         rcx
-    jnz         vp8_filter_block1d4_v6_ssse3_loop
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-vp8_filter_block1d4_v4_ssse3:
-    movq        mm6, MMWORD PTR [rax+256]     ;k2_k4
-    movq        mm7, MMWORD PTR [rax+128]     ;k1_k3
-    movq        mm5, MMWORD PTR [rd GLOBAL]
-
-    mov         rsi, arg(0)             ;src_ptr
-
-    mov         rax, rsi
-    add         rax, rdx
-
-vp8_filter_block1d4_v4_ssse3_loop:
-    movd        mm2, DWORD PTR [rsi + rdx]            ;B
-    movd        mm3, DWORD PTR [rsi + rdx * 2]        ;C
-    movd        mm4, DWORD PTR [rax + rdx * 2]        ;D
-    movd        mm0, DWORD PTR [rsi + rdx * 4]        ;E
-
-    punpcklbw   mm2, mm4                  ;B D
-    punpcklbw   mm3, mm0                  ;C E
-
-    pmaddubsw   mm3, mm6
-    pmaddubsw   mm2, mm7
-    add         rsi,  rdx
-    add         rax,  rdx
-;--
-;--
-    paddsw      mm2, mm3
-    paddsw      mm2, mm5
-    psraw       mm2, 7
-    packuswb    mm2, mm2
-
-    movd        DWORD PTR [rdi], mm2
-
-%if ABI_IS_32BIT
-    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
-%else
-    add         rdi,        r8
-%endif
-    dec         rcx
-    jnz         vp8_filter_block1d4_v4_ssse3_loop
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-SECTION_RODATA
-align 16
-shuf1b:
-    db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
-shuf2b:
-    db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11
-shuf3b:
-    db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10
-
-align 16
-rd:
-    times 8 dw 0x40
-
-align 16
-k0_k5:
-    times 8 db 0, 0             ;placeholder
-    times 8 db 0, 0
-    times 8 db 2, 1
-    times 8 db 0, 0
-    times 8 db 3, 3
-    times 8 db 0, 0
-    times 8 db 1, 2
-    times 8 db 0, 0
-k1_k3:
-    times 8 db  0,    0         ;placeholder
-    times 8 db  -6,  12
-    times 8 db -11,  36
-    times 8 db  -9,  50
-    times 8 db -16,  77
-    times 8 db  -6,  93
-    times 8 db  -8, 108
-    times 8 db  -1, 123
-k2_k4:
-    times 8 db 128,    0        ;placeholder
-    times 8 db 123,   -1
-    times 8 db 108,   -8
-    times 8 db  93,   -6
-    times 8 db  77,  -16
-    times 8 db  50,   -9
-    times 8 db  36,  -11
-    times 8 db  12,   -6
-
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index b371892c9..9d6c8e53b 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -86,37 +86,5 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
 #endif
 #endif
 
-#if HAVE_SSSE3
-extern prototype_subpixel_predict(vp8_sixtap_predict16x16_ssse3);
-extern prototype_subpixel_predict(vp8_sixtap_predict8x8_ssse3);
-extern prototype_subpixel_predict(vp8_sixtap_predict8x4_ssse3);
-extern prototype_subpixel_predict(vp8_sixtap_predict4x4_ssse3);
-//extern prototype_subpixel_predict(vp8_bilinear_predict16x16_sse2);
-//extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
-
-#if !CONFIG_RUNTIME_CPU_DETECT
-#undef  vp8_subpix_sixtap16x16
-#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_ssse3
-
-#undef  vp8_subpix_sixtap8x8
-#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_ssse3
-
-#undef  vp8_subpix_sixtap8x4
-#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_ssse3
-
-#undef  vp8_subpix_sixtap4x4
-#define vp8_subpix_sixtap4x4 vp8_sixtap_predict4x4_ssse3
-
-
-//#undef  vp8_subpix_bilinear16x16
-//#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_sse2
-
-//#undef  vp8_subpix_bilinear8x8
-//#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_sse2
-
-#endif
-#endif
-
-
 
 #endif
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 8b54b2327..0d7e095d7 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -360,194 +360,3 @@ void vp8_sixtap_predict8x4_sse2
 
 #endif
 
-#if HAVE_SSSE3
-
-extern void vp8_filter_block1d8_h6_ssse3
-(
-    unsigned char  *src_ptr,
-    unsigned int    src_pixels_per_line,
-    unsigned char  *output_ptr,
-    unsigned int    output_pitch,
-    unsigned int    output_height,
-    unsigned int    vp8_filter_index
-);
-
-extern void vp8_filter_block1d16_h6_ssse3
-(
-    unsigned char  *src_ptr,
-    unsigned int    src_pixels_per_line,
-    unsigned char  *output_ptr,
-    unsigned int    output_pitch,
-    unsigned int    output_height,
-    unsigned int    vp8_filter_index
-);
-
-extern void vp8_filter_block1d16_v6_ssse3
-(
-    unsigned char *src_ptr,
-    unsigned int   src_pitch,
-    unsigned char *output_ptr,
-    unsigned int   out_pitch,
-    unsigned int   output_height,
-    unsigned int   vp8_filter_index
-);
-
-extern void vp8_filter_block1d8_v6_ssse3
-(
-    unsigned char *src_ptr,
-    unsigned int   src_pitch,
-    unsigned char *output_ptr,
-    unsigned int   out_pitch,
-    unsigned int   output_height,
-    unsigned int   vp8_filter_index
-);
-
-extern void vp8_filter_block1d4_h6_ssse3
-(
-    unsigned char  *src_ptr,
-    unsigned int    src_pixels_per_line,
-    unsigned char  *output_ptr,
-    unsigned int    output_pitch,
-    unsigned int    output_height,
-    unsigned int    vp8_filter_index
-);
-
-extern void vp8_filter_block1d4_v6_ssse3
-(
-    unsigned char *src_ptr,
-    unsigned int   src_pitch,
-    unsigned char *output_ptr,
-    unsigned int   out_pitch,
-    unsigned int   output_height,
-    unsigned int   vp8_filter_index
-);
-
-void vp8_sixtap_predict16x16_ssse3
-(
-    unsigned char  *src_ptr,
-    int   src_pixels_per_line,
-    int  xoffset,
-    int  yoffset,
-    unsigned char *dst_ptr,
-    int dst_pitch
-
-)
-{
-    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
-
-    if (xoffset)
-    {
-        if (yoffset)
-        {
-            vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
-            vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
-        }
-        else
-        {
-            // First-pass only
-            vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
-        }
-    }
-    else
-    {
-        // Second-pass only
-        vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
-    }
-}
-
-void vp8_sixtap_predict8x8_ssse3
-(
-    unsigned char  *src_ptr,
-    int   src_pixels_per_line,
-    int  xoffset,
-    int  yoffset,
-    unsigned char *dst_ptr,
-    int dst_pitch
-)
-{
-    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
-
-    if (xoffset)
-    {
-        if (yoffset)
-        {
-            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
-            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
-        }
-        else
-        {
-            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
-        }
-    }
-    else
-    {
-        // Second-pass only
-        vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
-    }
-}
-
-
-void vp8_sixtap_predict8x4_ssse3
-(
-    unsigned char  *src_ptr,
-    int   src_pixels_per_line,
-    int  xoffset,
-    int  yoffset,
-    unsigned char *dst_ptr,
-    int dst_pitch
-)
-{
-    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
-
-    if (xoffset)
-    {
-        if (yoffset)
-        {
-            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
-            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
-        }
-        else
-        {
-            // First-pass only
-            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
-        }
-    }
-    else
-    {
-        // Second-pass only
-        vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
-    }
-}
-
-void vp8_sixtap_predict4x4_ssse3
-(
-    unsigned char  *src_ptr,
-    int   src_pixels_per_line,
-    int  xoffset,
-    int  yoffset,
-    unsigned char *dst_ptr,
-    int dst_pitch
-)
-{
-  DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
-
-  if (xoffset)
-  {
-      if (yoffset)
-      {
-          vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
-          vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
-      }
-      else
-      {
-          vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
-      }
-  }
-  else
-  {
-      vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
-  }
-
-}
-
-#endif
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index ce487ff9f..021be92b6 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -116,16 +116,5 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
 
 #endif
 
-#if HAVE_SSSE3
-
-    if (SSSE3Enabled)
-    {
-        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_ssse3;
-        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_ssse3;
-        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_ssse3;
-        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_ssse3;
-    }
-#endif
-
 #endif
 }
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index dea237377..0ea906eb0 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -107,7 +107,6 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
-VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm
 ifeq ($(CONFIG_POSTPROC),yes)
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm

From 467a0b99abb8bbf9c64cc517eff78b96aa81818c Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Thu, 19 Aug 2010 11:29:21 -0400
Subject: [PATCH 125/307] fix armv6 simpleloop filter

test cases were causing a crash because the count was being read
incorrectly. after fixing that, noticed that the output was not
matching. fixed that.

Change-Id: Idb0edb887736bd566a3cf6d4aa1a03ea8d20eb27
---
 vp8/common/arm/armv6/simpleloopfilter_v6.asm | 115 ++++++++-----------
 1 file changed, 47 insertions(+), 68 deletions(-)

diff --git a/vp8/common/arm/armv6/simpleloopfilter_v6.asm b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
index 2f090dbdd..011808430 100644
--- a/vp8/common/arm/armv6/simpleloopfilter_v6.asm
+++ b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
@@ -63,23 +63,22 @@ pstep       RN  r1
 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
     stmdb       sp!, {r4 - r11, lr}
 
-    sub         src, src, pstep, lsl #1     ; move src pointer down by 2 lines
+    ldr         r12, [r3]                   ; limit
+    ldr         r3, [src, -pstep, lsl #1]   ; p1
 
-    ldr         r12, [r3], #4               ; limit
-    ldr         r3, [src], pstep            ; p1
+    ldr         r9, [sp, #40]               ; count for 8-in-parallel
+    ldr         r4, [src, -pstep]           ; p0
 
-    ldr         r9, [sp, #36]               ; count for 8-in-parallel
-    ldr         r4, [src], pstep            ; p0
-
-    ldr         r7, [r2], #4                ; flimit
-    ldr         r5, [src], pstep            ; q0
+    ldr         r7, [r2]                    ; flimit
+    ldr         r5, [src]                   ; q0
     ldr         r2, c0x80808080
 
-    ldr         r6, [src]                   ; q1
+    ldr         r6, [src, pstep]            ; q1
 
     uadd8       r7, r7, r7                  ; flimit * 2
-    mov         r9, r9, lsl #1              ; 4-in-parallel
+    mov         r9, r9, lsl #1              ; double the count. we're doing 4 at a time
     uadd8       r12, r7, r12                ; flimit * 2 + limit
+    mov         lr, #0
 
 |simple_hnext8|
     ; vp8_simple_filter_mask() function
@@ -89,22 +88,19 @@ pstep       RN  r1
     uqsub8      r10, r4, r5                 ; p0 - q0
     uqsub8      r11, r5, r4                 ; q0 - p0
     orr         r8, r8, r7                  ; abs(p1 - q1)
-    ldr         lr, c0x7F7F7F7F             ; 01111111 mask
     orr         r10, r10, r11               ; abs(p0 - q0)
-    and         r8, lr, r8, lsr #1          ; abs(p1 - q1) / 2
+    uhadd8      r8, r8, lr                  ; abs(p1 - q2) >> 1
     uqadd8      r10, r10, r10               ; abs(p0 - q0) * 2
-    mvn         lr, #0                      ; r10 == -1
+    ; STALL waiting on r10
     uqadd8      r10, r10, r8                ; abs(p0 - q0)*2 + abs(p1 - q1)/2
-    ; STALL waiting on r10 :(
-    uqsub8      r10, r10, r12               ; compare to flimit
-    mov         r8, #0
-
-    usub8       r10, r8, r10                ; use usub8 instead of ssub8
-    ; STALL (maybe?) when are flags set? :/
-    sel         r10, lr, r8                 ; filter mask: lr
-
+    ; STALL waiting on r10
+    mvn         r8, #0
+    uqsub8      r10, r10, r12               ; compare to flimit. need to do this twice because uqsub8 doesn't set GE flags
+                                            ; and usub8 doesn't saturate
+    usub8       r10, lr, r10                ; set GE flags for each byte
+    sel         r10, r8, lr                 ; filter mask: F or 0
     cmp         r10, #0
-    beq         simple_hskip_filter         ; skip filtering
+    beq         simple_hskip_filter         ; skip filtering if we're &ing with 0s. would just write out the same values
 
     ;vp8_simple_filter() function
 
@@ -113,55 +109,45 @@ pstep       RN  r1
     eor         r4, r4, r2                  ; p0 offset to convert to a signed value
     eor         r5, r5, r2                  ; q0 offset to convert to a signed value
 
-    qsub8       r3, r3, r6                  ; vp8_filter (r3) = vp8_signed_char_clamp(p1-q1)
-    qsub8       r6, r5, r4                  ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( q0 - p0))
+    qsub8       r3, r3, r6                  ; vp8_signed_char_clamp(p1-q1)
+    qsub8       r6, r5, r4                  ; vp8_signed_char_clamp(q0-p0)
+    qadd8       r3, r3, r6                  ; += q0-p0
+    qadd8       r3, r3, r6                  ; += q0-p0
+    qadd8       r3, r3, r6                  ; p1-q1 + 3*(q0-p0))
+    and         r3, r3, r10                 ; &= mask
 
-    qadd8       r3, r3, r6
-    ldr         r8, c0x03030303             ; r8 = 3
-
-    qadd8       r3, r3, r6
     ldr         r7, c0x04040404
-
-    qadd8       r3, r3, r6
-    and         r3, r3, lr                  ; vp8_filter &= mask;
+    ldr         r8, c0x03030303
 
     ;save bottom 3 bits so that we round one side +4 and the other +3
+    qadd8       r7 , r3 , r7                ; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
     qadd8       r8 , r3 , r8                ; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3)
-    qadd8       r3 , r3 , r7                ; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
 
-    mov         r7, #0
-    shadd8      r8 , r8 , r7                ; Filter2 >>= 3
-    shadd8      r3 , r3 , r7                ; Filter1 >>= 3
-    shadd8      r8 , r8 , r7
-    shadd8      r3 , r3 , r7
-    shadd8      r8 , r8 , r7                ; r8: Filter2
-    shadd8      r3 , r3 , r7                ; r7: filter1
+    mov         r3, #0
+    shadd8      r7 , r7 , r3
+    shadd8      r8 , r8 , r3
+    shadd8      r7 , r7 , r3
+    shadd8      r8 , r8 , r3
+    shadd8      r7 , r7 , r3                ; Filter1 >>= 3
+    shadd8      r8 , r8 , r3                ; Filter2 >>= 3
 
-    ;calculate output
-    sub         src, src, pstep, lsl #1
 
+    qsub8       r5 ,r5, r7                  ; u = vp8_signed_char_clamp(q0 - Filter1)
     qadd8       r4, r4, r8                  ; u = vp8_signed_char_clamp(p0 + Filter2)
-    qsub8       r5 ,r5, r3                  ; u = vp8_signed_char_clamp(q0 - Filter1)
-    eor         r4, r4, r2                  ; *op0 = u^0x80
-    str         r4, [src], pstep            ; store op0 result
     eor         r5, r5, r2                  ; *oq0 = u^0x80
-    str         r5, [src], pstep            ; store oq0 result
+    str         r5, [src]                   ; store oq0 result
+    eor         r4, r4, r2                  ; *op0 = u^0x80
+    str         r4, [src, -pstep]           ; store op0 result
 
 |simple_hskip_filter|
-    add         src, src, #4
-    sub         src, src, pstep
-    sub         src, src, pstep, lsl #1
 
     subs        r9, r9, #1
+    addne       src, src, #4                ; next row
 
-    ;pld            [src]
-    ;pld            [src, pstep]
-    ;pld            [src, pstep, lsl #1]
-
-    ldrne           r3, [src], pstep            ; p1
-    ldrne           r4, [src], pstep            ; p0
-    ldrne           r5, [src], pstep            ; q0
-    ldrne           r6, [src]                   ; q1
+    ldrne       r3, [src, -pstep, lsl #1]   ; p1
+    ldrne       r4, [src, -pstep]           ; p0
+    ldrne       r5, [src]                   ; q0
+    ldrne       r6, [src, pstep]            ; q1
 
     bne         simple_hnext8
 
@@ -174,9 +160,9 @@ pstep       RN  r1
 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
     stmdb       sp!, {r4 - r11, lr}
 
-    ldr         r12, [r2], #4               ; r12: flimit
+    ldr         r12, [r2]                   ; r12: flimit
     ldr         r2, c0x80808080
-    ldr         r7, [r3], #4                ; limit
+    ldr         r7, [r3]                    ; limit
 
     ; load soure data to r7, r8, r9, r10
     ldrh        r3, [src, #-2]
@@ -213,16 +199,15 @@ pstep       RN  r1
     uqsub8      r10, r5, r4                 ; q0 - p0
     orr         r7, r7, r8                  ; abs(p1 - q1)
     orr         r9, r9, r10                 ; abs(p0 - q0)
-    ldr         lr, c0x7F7F7F7F             ; 0111 1111 mask
-    uqadd8      r9, r9, r9                  ; abs(p0 - q0) * 2
-    and         r7, lr, r7, lsr #1          ; abs(p1 - q1) / 2
     mov         r8, #0
+    uqadd8      r9, r9, r9                  ; abs(p0 - q0) * 2
+    uhadd8      r7, r7, r8                  ; abs(p1 - q1) / 2
     uqadd8      r7, r7, r9                  ; abs(p0 - q0)*2 + abs(p1 - q1)/2
     mvn         r10, #0                     ; r10 == -1
     uqsub8      r7, r7, r12                 ; compare to flimit
 
     usub8       r7, r8, r7
-    sel         r7, r10, r8                 ; filter mask: lr
+    sel         lr, r10, r8                 ; filter mask
 
     cmp         lr, #0
     beq         simple_vskip_filter         ; skip filtering
@@ -286,10 +271,6 @@ pstep       RN  r1
 |simple_vskip_filter|
     subs        r11, r11, #1
 
-    ;pld            [src]
-    ;pld            [src, pstep]
-    ;pld            [src, pstep, lsl #1]
-
     ; load soure data to r7, r8, r9, r10
     ldrneh      r3, [src, #-2]
     ldrneh      r4, [src], pstep
@@ -316,7 +297,5 @@ pstep       RN  r1
 c0x80808080 DCD     0x80808080
 c0x03030303 DCD     0x03030303
 c0x04040404 DCD     0x04040404
-c0x01010101 DCD     0x01010101
-c0x7F7F7F7F DCD     0x7F7F7F7F
 
     END

From 52852da7c9740c335e5295531a77363a413cc140 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Thu, 19 Aug 2010 13:37:40 -0400
Subject: [PATCH 126/307] cleanup simple loop filter

move some things around, reorder some instructions

constant 0 is used several times. load it once per call in horiz,
once per loop in vert.

separate saturating instructions to avoid stalls.

just use one usub8 call to set GE flags, rather than uqsub8 followed by
usub8 w/ 0

document some stalls for further consideration

Change-Id: Ic3877e0ddbe314bb8a17fd5db73501a7d64570ec
---
 vp8/common/arm/armv6/simpleloopfilter_v6.asm | 106 ++++++++-----------
 1 file changed, 46 insertions(+), 60 deletions(-)

diff --git a/vp8/common/arm/armv6/simpleloopfilter_v6.asm b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
index 011808430..3a700cd59 100644
--- a/vp8/common/arm/armv6/simpleloopfilter_v6.asm
+++ b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
@@ -55,8 +55,8 @@ pstep       RN  r1
 ;stack  const char *thresh,
 ;stack  int  count
 
-;Note: All 16 elements in flimit are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
+; All 16 elements in flimit are equal. So, in the code, only one load is needed
+; for flimit. Same applies to limit. thresh is not used in simple looopfilter
 
 ;-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
 |vp8_loop_filter_simple_horizontal_edge_armv6| PROC
@@ -65,23 +65,19 @@ pstep       RN  r1
 
     ldr         r12, [r3]                   ; limit
     ldr         r3, [src, -pstep, lsl #1]   ; p1
-
-    ldr         r9, [sp, #40]               ; count for 8-in-parallel
     ldr         r4, [src, -pstep]           ; p0
-
-    ldr         r7, [r2]                    ; flimit
     ldr         r5, [src]                   ; q0
-    ldr         r2, c0x80808080
-
     ldr         r6, [src, pstep]            ; q1
-
+    ldr         r7, [r2]                    ; flimit
+    ldr         r2, c0x80808080
+    ldr         r9, [sp, #40]               ; count for 8-in-parallel
     uadd8       r7, r7, r7                  ; flimit * 2
     mov         r9, r9, lsl #1              ; double the count. we're doing 4 at a time
     uadd8       r12, r7, r12                ; flimit * 2 + limit
-    mov         lr, #0
+    mov         lr, #0                      ; need 0 in a couple places
 
 |simple_hnext8|
-    ; vp8_simple_filter_mask() function
+    ; vp8_simple_filter_mask()
 
     uqsub8      r7, r3, r6                  ; p1 - q1
     uqsub8      r8, r6, r3                  ; q1 - p1
@@ -89,58 +85,50 @@ pstep       RN  r1
     uqsub8      r11, r5, r4                 ; q0 - p0
     orr         r8, r8, r7                  ; abs(p1 - q1)
     orr         r10, r10, r11               ; abs(p0 - q0)
-    uhadd8      r8, r8, lr                  ; abs(p1 - q2) >> 1
     uqadd8      r10, r10, r10               ; abs(p0 - q0) * 2
-    ; STALL waiting on r10
+    uhadd8      r8, r8, lr                  ; abs(p1 - q2) >> 1
     uqadd8      r10, r10, r8                ; abs(p0 - q0)*2 + abs(p1 - q1)/2
-    ; STALL waiting on r10
     mvn         r8, #0
-    uqsub8      r10, r10, r12               ; compare to flimit. need to do this twice because uqsub8 doesn't set GE flags
-                                            ; and usub8 doesn't saturate
-    usub8       r10, lr, r10                ; set GE flags for each byte
+    usub8       r10, r12, r10               ; compare to flimit. usub8 sets GE flags
     sel         r10, r8, lr                 ; filter mask: F or 0
     cmp         r10, #0
-    beq         simple_hskip_filter         ; skip filtering if we're &ing with 0s. would just write out the same values
+    beq         simple_hskip_filter         ; skip filtering if all masks are 0x00
 
-    ;vp8_simple_filter() function
+    ;vp8_simple_filter()
 
     eor         r3, r3, r2                  ; p1 offset to convert to a signed value
     eor         r6, r6, r2                  ; q1 offset to convert to a signed value
     eor         r4, r4, r2                  ; p0 offset to convert to a signed value
     eor         r5, r5, r2                  ; q0 offset to convert to a signed value
 
-    qsub8       r3, r3, r6                  ; vp8_signed_char_clamp(p1-q1)
-    qsub8       r6, r5, r4                  ; vp8_signed_char_clamp(q0-p0)
-    qadd8       r3, r3, r6                  ; += q0-p0
-    qadd8       r3, r3, r6                  ; += q0-p0
-    qadd8       r3, r3, r6                  ; p1-q1 + 3*(q0-p0))
-    and         r3, r3, r10                 ; &= mask
-
+    qsub8       r3, r3, r6                  ; vp8_filter = p1 - q1
+    qsub8       r6, r5, r4                  ; q0 - p0
+    qadd8       r3, r3, r6                  ; += q0 - p0
     ldr         r7, c0x04040404
+    qadd8       r3, r3, r6                  ; += q0 - p0
     ldr         r8, c0x03030303
+    qadd8       r3, r3, r6                  ; vp8_filter = p1-q1 + 3*(q0-p0))
+    ;STALL
+    and         r3, r3, r10                 ; vp8_filter &= mask
 
-    ;save bottom 3 bits so that we round one side +4 and the other +3
-    qadd8       r7 , r3 , r7                ; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
-    qadd8       r8 , r3 , r8                ; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3)
+    qadd8       r7 , r3 , r7                ; Filter1 = vp8_filter + 4
+    qadd8       r8 , r3 , r8                ; Filter2 = vp8_filter + 3
 
-    mov         r3, #0
-    shadd8      r7 , r7 , r3
-    shadd8      r8 , r8 , r3
-    shadd8      r7 , r7 , r3
-    shadd8      r8 , r8 , r3
-    shadd8      r7 , r7 , r3                ; Filter1 >>= 3
-    shadd8      r8 , r8 , r3                ; Filter2 >>= 3
+    shadd8      r7 , r7 , lr
+    shadd8      r8 , r8 , lr
+    shadd8      r7 , r7 , lr
+    shadd8      r8 , r8 , lr
+    shadd8      r7 , r7 , lr                ; Filter1 >>= 3
+    shadd8      r8 , r8 , lr                ; Filter2 >>= 3
 
-
-    qsub8       r5 ,r5, r7                  ; u = vp8_signed_char_clamp(q0 - Filter1)
-    qadd8       r4, r4, r8                  ; u = vp8_signed_char_clamp(p0 + Filter2)
+    qsub8       r5 ,r5, r7                  ; u = q0 - Filter1
+    qadd8       r4, r4, r8                  ; u = p0 + Filter2
     eor         r5, r5, r2                  ; *oq0 = u^0x80
     str         r5, [src]                   ; store oq0 result
     eor         r4, r4, r2                  ; *op0 = u^0x80
     str         r4, [src, -pstep]           ; store op0 result
 
 |simple_hskip_filter|
-
     subs        r9, r9, #1
     addne       src, src, #4                ; next row
 
@@ -204,9 +192,8 @@ pstep       RN  r1
     uhadd8      r7, r7, r8                  ; abs(p1 - q1) / 2
     uqadd8      r7, r7, r9                  ; abs(p0 - q0)*2 + abs(p1 - q1)/2
     mvn         r10, #0                     ; r10 == -1
-    uqsub8      r7, r7, r12                 ; compare to flimit
 
-    usub8       r7, r8, r7
+    usub8       r7, r12, r7                 ; compare to flimit
     sel         lr, r10, r8                 ; filter mask
 
     cmp         lr, #0
@@ -218,35 +205,34 @@ pstep       RN  r1
     eor         r4, r4, r2                  ; p0 offset to convert to a signed value
     eor         r5, r5, r2                  ; q0 offset to convert to a signed value
 
-    qsub8       r3, r3, r6                  ; vp8_filter (r3) = vp8_signed_char_clamp(p1-q1)
-    qsub8       r6, r5, r4                  ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( q0 - p0))
+    qsub8       r3, r3, r6                  ; vp8_filter = p1 - q1
+    qsub8       r6, r5, r4                  ; q0 - p0
 
-    qadd8       r3, r3, r6
-    ldr         r8, c0x03030303             ; r8 = 3
+    qadd8       r3, r3, r6                  ; vp8_filter += q0 - p0
+    ldr         r9, c0x03030303             ; r9 = 3
 
-    qadd8       r3, r3, r6
+    qadd8       r3, r3, r6                  ; vp8_filter += q0 - p0
     ldr         r7, c0x04040404
 
-    qadd8       r3, r3, r6
+    qadd8       r3, r3, r6                  ; vp8_filter = p1-q1 + 3*(q0-p0))
+    ;STALL
     and         r3, r3, lr                  ; vp8_filter &= mask
 
-    ;save bottom 3 bits so that we round one side +4 and the other +3
-    qadd8       r8 , r3 , r8                ; Filter2 (r8) = vp8_signed_char_clamp(vp8_filter+3)
-    qadd8       r3 , r3 , r7                ; Filter1 (r3) = vp8_signed_char_clamp(vp8_filter+4)
+    qadd8       r9 , r3 , r9                ; Filter2 = vp8_filter + 3
+    qadd8       r3 , r3 , r7                ; Filter1 = vp8_filter + 4
 
-    mov         r7, #0
-    shadd8      r8 , r8 , r7                ; Filter2 >>= 3
-    shadd8      r3 , r3 , r7                ; Filter1 >>= 3
-    shadd8      r8 , r8 , r7
-    shadd8      r3 , r3 , r7
-    shadd8      r8 , r8 , r7                ; r8: filter2
-    shadd8      r3 , r3 , r7                ; r7: filter1
+    shadd8      r9 , r9 , r8
+    shadd8      r3 , r3 , r8
+    shadd8      r9 , r9 , r8
+    shadd8      r3 , r3 , r8
+    shadd8      r9 , r9 , r8                ; Filter2 >>= 3
+    shadd8      r3 , r3 , r8                ; Filter1 >>= 3
 
     ;calculate output
     sub         src, src, pstep, lsl #2
 
-    qadd8       r4, r4, r8                  ; u = vp8_signed_char_clamp(p0 + Filter2)
-    qsub8       r5, r5, r3                  ; u = vp8_signed_char_clamp(q0 - Filter1)
+    qadd8       r4, r4, r9                  ; u = p0 + Filter2
+    qsub8       r5, r5, r3                  ; u = q0 - Filter1
     eor         r4, r4, r2                  ; *op0 = u^0x80
     eor         r5, r5, r2                  ; *oq0 = u^0x80
 

From b0660457fe46a48246e42a8e5c0ce78c0e2e4164 Mon Sep 17 00:00:00 2001
From: Jim Bankoski <jimbankoski@google.com>
Date: Thu, 19 Aug 2010 15:50:29 -0400
Subject: [PATCH 127/307] Revert "Removed ssse3 sixtap code"

This reverts commit 6ea5bb85cd1547b846f4c794e8684de5abcf9f62.
---
 vp8/common/x86/subpixel_ssse3.asm    | 931 +++++++++++++++++++++++++++
 vp8/common/x86/subpixel_x86.h        |  32 +
 vp8/common/x86/vp8_asm_stubs.c       | 191 ++++++
 vp8/common/x86/x86_systemdependent.c |  11 +
 vp8/vp8_common.mk                    |   1 +
 5 files changed, 1166 insertions(+)
 create mode 100644 vp8/common/x86/subpixel_ssse3.asm

diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
new file mode 100644
index 000000000..87173f2ca
--- /dev/null
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -0,0 +1,931 @@
+;
+;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%define BLOCK_HEIGHT_WIDTH 4
+%define VP8_FILTER_WEIGHT 128
+%define VP8_FILTER_SHIFT  7
+
+
+;/************************************************************************************
+; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The
+; input pixel array has output_height rows. This routine assumes that output_height is an
+; even number. This function handles 8 pixels in horizontal direction, calculating ONE
+; rows each iteration to take advantage of the 128 bits operations.
+;
+; This is an implementation of some of the SSE optimizations first seen in ffvp8
+;
+;*************************************************************************************/
+;void vp8_filter_block1d8_h6_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char *output_ptr,
+;    unsigned int    output_pitch,
+;    unsigned int    output_height,
+;    unsigned int    vp8_filter_index
+;)
+global sym(vp8_filter_block1d8_h6_ssse3)
+sym(vp8_filter_block1d8_h6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4
+
+    movdqa      xmm7, [rd GLOBAL]
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+    mov         rdi, arg(2)             ;output_ptr
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d8_h4_ssse3
+
+    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+    sub         rdi, rdx
+;xmm3 free
+filter_block1d8_h6_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm1, xmm0
+    pshufb      xmm0, [shuf1b GLOBAL]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pmaddubsw   xmm0, xmm4
+    pmaddubsw   xmm1, xmm5
+
+    pshufb      xmm2, [shuf3b GLOBAL]
+    add         rdi, rdx
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+    dec         rcx
+    paddsw      xmm0, xmm1
+    paddsw      xmm0, xmm7
+    paddsw      xmm0, xmm2
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+
+    movq        MMWORD Ptr [rdi], xmm0
+    jnz         filter_block1d8_h6_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d8_h4_ssse3:
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    movdqa      xmm3, XMMWORD PTR [shuf2b GLOBAL]
+    movdqa      xmm4, XMMWORD PTR [shuf3b GLOBAL]
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+    sub         rdi, rdx
+;xmm3 free
+filter_block1d8_h4_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm2, xmm0
+    pshufb      xmm0, xmm3 ;[shuf2b GLOBAL]
+    pshufb      xmm2, xmm4 ;[shuf3b GLOBAL]
+
+    pmaddubsw   xmm0, xmm5
+    add         rdi, rdx
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+    dec         rcx
+    paddsw      xmm0, xmm7
+    paddsw      xmm0, xmm2
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+
+    movq        MMWORD Ptr [rdi], xmm0
+
+    jnz         filter_block1d8_h4_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+;void vp8_filter_block1d16_h6_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char  *output_ptr,
+;    unsigned int    output_pitch,
+;    unsigned int    output_height,
+;    unsigned int    vp8_filter_index
+;)
+global sym(vp8_filter_block1d16_h6_ssse3)
+sym(vp8_filter_block1d16_h6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+
+    mov         rdi, arg(2)             ;output_ptr
+    movdqa      xmm7, [rd GLOBAL]
+
+;;
+;;    cmp         esi, DWORD PTR [rax]
+;;    je          vp8_filter_block1d16_h4_ssse3
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+filter_block1d16_h6_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm1, xmm0
+    pshufb      xmm0, [shuf1b GLOBAL]
+    movdqa      xmm2, xmm1
+    pmaddubsw   xmm0, xmm4
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+    movdqu      xmm3,   XMMWORD PTR [rsi + 6]
+
+    pmaddubsw   xmm2, xmm6
+    paddsw      xmm0, xmm1
+    movdqa      xmm1, xmm3
+    pshufb      xmm3, [shuf1b GLOBAL]
+    paddsw      xmm0, xmm7
+    pmaddubsw   xmm3, xmm4
+    paddsw      xmm0, xmm2
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+    pmaddubsw   xmm2, xmm6
+
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+    lea         rsi,    [rsi + rax]
+    paddsw      xmm3, xmm1
+    paddsw      xmm3, xmm7
+    paddsw      xmm3, xmm2
+    psraw       xmm3, 7
+    packuswb    xmm3, xmm3
+
+    punpcklqdq  xmm0, xmm3
+
+    movdqa      XMMWORD Ptr [rdi], xmm0
+
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d16_h6_rowloop_ssse3
+
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d16_h4_ssse3:
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+filter_block1d16_h4_rowloop_ssse3:
+    movdqu      xmm1,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+    movdqu      xmm3,   XMMWORD PTR [rsi + 6]
+
+    pmaddubsw   xmm2, xmm6
+    movdqa      xmm0, xmm3
+    pshufb      xmm3, [shuf3b GLOBAL]
+    pshufb      xmm0, [shuf2b GLOBAL]
+
+    paddsw      xmm1, xmm7
+    paddsw      xmm1, xmm2
+
+    pmaddubsw   xmm0, xmm5
+    pmaddubsw   xmm3, xmm6
+
+    psraw       xmm1, 7
+    packuswb    xmm1, xmm1
+    lea         rsi,    [rsi + rax]
+    paddsw      xmm3, xmm0
+    paddsw      xmm3, xmm7
+    psraw       xmm3, 7
+    packuswb    xmm3, xmm3
+
+    punpcklqdq  xmm1, xmm3
+
+    movdqa      XMMWORD Ptr [rdi], xmm1
+
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d16_h4_rowloop_ssse3
+
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_filter_block1d4_h6_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    unsigned int    src_pixels_per_line,
+;    unsigned char  *output_ptr,
+;    unsigned int    output_pitch,
+;    unsigned int    output_height,
+;    unsigned int    vp8_filter_index
+;)
+global sym(vp8_filter_block1d4_h6_ssse3)
+sym(vp8_filter_block1d4_h6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+    movdqa      xmm7, [rd GLOBAL]
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d4_h4_ssse3
+
+    movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    mov         rdi, arg(2)             ;output_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+;xmm3 free
+filter_block1d4_h6_rowloop_ssse3:
+    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm1, xmm0
+    pshufb      xmm0, [shuf1b GLOBAL]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, [shuf2b GLOBAL]
+    pmaddubsw   xmm0, xmm4
+    pshufb      xmm2, [shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+;--
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+;--
+    paddsw      xmm0, xmm1
+    paddsw      xmm0, xmm7
+    pxor        xmm1, xmm1
+    paddsw      xmm0, xmm2
+    psraw       xmm0, 7
+    packuswb    xmm0, xmm0
+
+    movd        DWORD PTR [rdi], xmm0
+
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d4_h6_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d4_h4_ssse3:
+    movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
+    movdqa      xmm0, XMMWORD PTR [shuf2b GLOBAL]
+    movdqa      xmm3, XMMWORD PTR [shuf3b GLOBAL]
+
+    mov         rsi, arg(0)             ;src_ptr
+    mov         rdi, arg(2)             ;output_ptr
+    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)   ;output_height
+
+    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+
+filter_block1d4_h4_rowloop_ssse3:
+    movdqu      xmm1,   XMMWORD PTR [rsi - 2]
+
+    movdqa      xmm2, xmm1
+    pshufb      xmm1, xmm0 ;;[shuf2b GLOBAL]
+    pshufb      xmm2, xmm3 ;;[shuf3b GLOBAL]
+    pmaddubsw   xmm1, xmm5
+
+;--
+    pmaddubsw   xmm2, xmm6
+
+    lea         rsi,    [rsi + rax]
+;--
+    paddsw      xmm1, xmm7
+    paddsw      xmm1, xmm2
+    psraw       xmm1, 7
+    packuswb    xmm1, xmm1
+
+    movd        DWORD PTR [rdi], xmm1
+
+    add         rdi, rdx
+    dec         rcx
+    jnz         filter_block1d4_h4_rowloop_ssse3
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+
+;void vp8_filter_block1d16_v6_ssse3
+;(
+;    unsigned char *src_ptr,
+;    unsigned int   src_pitch,
+;    unsigned char *output_ptr,
+;    unsigned int   out_pitch,
+;    unsigned int   output_height,
+;    unsigned int   vp8_filter_index
+;)
+global sym(vp8_filter_block1d16_v6_ssse3)
+sym(vp8_filter_block1d16_v6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d16_v4_ssse3
+
+    movdqa      xmm5, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ;out_pitch
+%endif
+    mov         rax, rsi
+    movsxd      rcx, DWORD PTR arg(4)   ;output_height
+    add         rax, rdx
+
+
+vp8_filter_block1d16_v6_ssse3_loop:
+    movq        xmm1, MMWORD PTR [rsi]                  ;A
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    movq        xmm0, MMWORD PTR [rax + rdx * 4]        ;F
+
+    pmaddubsw   xmm3, xmm6
+    punpcklbw   xmm1, xmm0                  ;A F
+    pmaddubsw   xmm2, xmm7
+    pmaddubsw   xmm1, xmm5
+
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm1
+    paddsw      xmm2, [rd GLOBAL]
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi], xmm2          ;store the results
+
+    movq        xmm1, MMWORD PTR [rsi + 8]                  ;A
+    movq        xmm2, MMWORD PTR [rsi + rdx + 8]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2 + 8]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2 + 8]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4 + 8]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    movq        xmm0, MMWORD PTR [rax + rdx * 4 + 8]        ;F
+    pmaddubsw   xmm3, xmm6
+    punpcklbw   xmm1, xmm0                  ;A F
+    pmaddubsw   xmm2, xmm7
+    pmaddubsw   xmm1, xmm5
+
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm1
+    paddsw      xmm2, [rd GLOBAL]
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi+8], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;out_pitch
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d16_v6_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d16_v4_ssse3:
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ;out_pitch
+%endif
+    mov         rax, rsi
+    movsxd      rcx, DWORD PTR arg(4)   ;output_height
+    add         rax, rdx
+
+vp8_filter_block1d16_v4_ssse3_loop:
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    pmaddubsw   xmm3, xmm6
+    pmaddubsw   xmm2, xmm7
+    movq        xmm5, MMWORD PTR [rsi + rdx + 8]            ;B
+    movq        xmm1, MMWORD PTR [rsi + rdx * 2 + 8]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2 + 8]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4 + 8]        ;E
+
+    paddsw      xmm2, [rd GLOBAL]
+    paddsw      xmm2, xmm3
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    punpcklbw   xmm5, xmm4                  ;B D
+    punpcklbw   xmm1, xmm0                  ;C E
+
+    pmaddubsw   xmm1, xmm6
+    pmaddubsw   xmm5, xmm7
+
+    movdqa      xmm4, [rd GLOBAL]
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm5, xmm1
+    paddsw      xmm5, xmm4
+    psraw       xmm5, 7
+    packuswb    xmm5, xmm5
+
+    punpcklqdq  xmm2, xmm5
+
+    movdqa       XMMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;out_pitch
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d16_v4_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_filter_block1d8_v6_ssse3
+;(
+;    unsigned char *src_ptr,
+;    unsigned int   src_pitch,
+;    unsigned char *output_ptr,
+;    unsigned int   out_pitch,
+;    unsigned int   output_height,
+;    unsigned int   vp8_filter_index
+;)
+global sym(vp8_filter_block1d8_v6_ssse3)
+sym(vp8_filter_block1d8_v6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ; out_pitch
+%endif
+    movsxd      rcx, DWORD PTR arg(4)   ;[output_height]
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d8_v4_ssse3
+
+    movdqa      xmm5, XMMWORD PTR [rax]         ;k0_k5
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    mov         rax, rsi
+    add         rax, rdx
+
+vp8_filter_block1d8_v6_ssse3_loop:
+    movq        xmm1, MMWORD PTR [rsi]                  ;A
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    movq        xmm0, MMWORD PTR [rax + rdx * 4]        ;F
+    movdqa      xmm4, [rd GLOBAL]
+
+    pmaddubsw   xmm3, xmm6
+    punpcklbw   xmm1, xmm0                  ;A F
+    pmaddubsw   xmm2, xmm7
+    pmaddubsw   xmm1, xmm5
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm1
+    paddsw      xmm2, xmm4
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d8_v6_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d8_v4_ssse3:
+    movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
+    movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
+    movdqa      xmm5, [rd GLOBAL]
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    mov         rax, rsi
+    add         rax, rdx
+
+vp8_filter_block1d8_v4_ssse3_loop:
+    movq        xmm2, MMWORD PTR [rsi + rdx]            ;B
+    movq        xmm3, MMWORD PTR [rsi + rdx * 2]        ;C
+    movq        xmm4, MMWORD PTR [rax + rdx * 2]        ;D
+    movq        xmm0, MMWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   xmm2, xmm4                  ;B D
+    punpcklbw   xmm3, xmm0                  ;C E
+
+    pmaddubsw   xmm3, xmm6
+    pmaddubsw   xmm2, xmm7
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      xmm2, xmm3
+    paddsw      xmm2, xmm5
+    psraw       xmm2, 7
+    packuswb    xmm2, xmm2
+
+    movq        MMWORD PTR [rdi], xmm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d8_v4_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+;void vp8_filter_block1d4_v6_ssse3
+;(
+;    unsigned char *src_ptr,
+;    unsigned int   src_pitch,
+;    unsigned char *output_ptr,
+;    unsigned int   out_pitch,
+;    unsigned int   output_height,
+;    unsigned int   vp8_filter_index
+;)
+global sym(vp8_filter_block1d4_v6_ssse3)
+sym(vp8_filter_block1d4_v6_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    xor         rsi, rsi
+    shl         rdx, 4      ;
+
+    lea         rax, [k0_k5 GLOBAL]
+    add         rax, rdx
+
+    movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
+    mov         rdi, arg(2)             ;output_ptr
+%if ABI_IS_32BIT=0
+    movsxd      r8, DWORD PTR arg(3)    ; out_pitch
+%endif
+    movsxd      rcx, DWORD PTR arg(4)   ;[output_height]
+
+    cmp         esi, DWORD PTR [rax]
+    je          vp8_filter_block1d4_v4_ssse3
+
+    movq        mm5, MMWORD PTR [rax]         ;k0_k5
+    movq        mm6, MMWORD PTR [rax+256]     ;k2_k4
+    movq        mm7, MMWORD PTR [rax+128]     ;k1_k3
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    mov         rax, rsi
+    add         rax, rdx
+
+vp8_filter_block1d4_v6_ssse3_loop:
+    movd        mm1, DWORD PTR [rsi]                  ;A
+    movd        mm2, DWORD PTR [rsi + rdx]            ;B
+    movd        mm3, DWORD PTR [rsi + rdx * 2]        ;C
+    movd        mm4, DWORD PTR [rax + rdx * 2]        ;D
+    movd        mm0, DWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   mm2, mm4                  ;B D
+    punpcklbw   mm3, mm0                  ;C E
+
+    movd        mm0, DWORD PTR [rax + rdx * 4]        ;F
+
+    movq        mm4, [rd GLOBAL]
+
+    pmaddubsw   mm3, mm6
+    punpcklbw   mm1, mm0                  ;A F
+    pmaddubsw   mm2, mm7
+    pmaddubsw   mm1, mm5
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      mm2, mm3
+    paddsw      mm2, mm1
+    paddsw      mm2, mm4
+    psraw       mm2, 7
+    packuswb    mm2, mm2
+
+    movd        DWORD PTR [rdi], mm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d4_v6_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+vp8_filter_block1d4_v4_ssse3:
+    movq        mm6, MMWORD PTR [rax+256]     ;k2_k4
+    movq        mm7, MMWORD PTR [rax+128]     ;k1_k3
+    movq        mm5, MMWORD PTR [rd GLOBAL]
+
+    mov         rsi, arg(0)             ;src_ptr
+
+    mov         rax, rsi
+    add         rax, rdx
+
+vp8_filter_block1d4_v4_ssse3_loop:
+    movd        mm2, DWORD PTR [rsi + rdx]            ;B
+    movd        mm3, DWORD PTR [rsi + rdx * 2]        ;C
+    movd        mm4, DWORD PTR [rax + rdx * 2]        ;D
+    movd        mm0, DWORD PTR [rsi + rdx * 4]        ;E
+
+    punpcklbw   mm2, mm4                  ;B D
+    punpcklbw   mm3, mm0                  ;C E
+
+    pmaddubsw   mm3, mm6
+    pmaddubsw   mm2, mm7
+    add         rsi,  rdx
+    add         rax,  rdx
+;--
+;--
+    paddsw      mm2, mm3
+    paddsw      mm2, mm5
+    psraw       mm2, 7
+    packuswb    mm2, mm2
+
+    movd        DWORD PTR [rdi], mm2
+
+%if ABI_IS_32BIT
+    add         rdi,        DWORD PTR arg(3) ;[out_pitch]
+%else
+    add         rdi,        r8
+%endif
+    dec         rcx
+    jnz         vp8_filter_block1d4_v4_ssse3_loop
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+SECTION_RODATA
+align 16
+shuf1b:
+    db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
+shuf2b:
+    db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11
+shuf3b:
+    db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10
+
+align 16
+rd:
+    times 8 dw 0x40
+
+align 16
+k0_k5:
+    times 8 db 0, 0             ;placeholder
+    times 8 db 0, 0
+    times 8 db 2, 1
+    times 8 db 0, 0
+    times 8 db 3, 3
+    times 8 db 0, 0
+    times 8 db 1, 2
+    times 8 db 0, 0
+k1_k3:
+    times 8 db  0,    0         ;placeholder
+    times 8 db  -6,  12
+    times 8 db -11,  36
+    times 8 db  -9,  50
+    times 8 db -16,  77
+    times 8 db  -6,  93
+    times 8 db  -8, 108
+    times 8 db  -1, 123
+k2_k4:
+    times 8 db 128,    0        ;placeholder
+    times 8 db 123,   -1
+    times 8 db 108,   -8
+    times 8 db  93,   -6
+    times 8 db  77,  -16
+    times 8 db  50,   -9
+    times 8 db  36,  -11
+    times 8 db  12,   -6
+
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index 9d6c8e53b..b371892c9 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -86,5 +86,37 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
 #endif
 #endif
 
+#if HAVE_SSSE3
+extern prototype_subpixel_predict(vp8_sixtap_predict16x16_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x8_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict8x4_ssse3);
+extern prototype_subpixel_predict(vp8_sixtap_predict4x4_ssse3);
+//extern prototype_subpixel_predict(vp8_bilinear_predict16x16_sse2);
+//extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_subpix_sixtap16x16
+#define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_ssse3
+
+#undef  vp8_subpix_sixtap8x8
+#define vp8_subpix_sixtap8x8 vp8_sixtap_predict8x8_ssse3
+
+#undef  vp8_subpix_sixtap8x4
+#define vp8_subpix_sixtap8x4 vp8_sixtap_predict8x4_ssse3
+
+#undef  vp8_subpix_sixtap4x4
+#define vp8_subpix_sixtap4x4 vp8_sixtap_predict4x4_ssse3
+
+
+//#undef  vp8_subpix_bilinear16x16
+//#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_sse2
+
+//#undef  vp8_subpix_bilinear8x8
+//#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_sse2
+
+#endif
+#endif
+
+
 
 #endif
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 0d7e095d7..8b54b2327 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -360,3 +360,194 @@ void vp8_sixtap_predict8x4_sse2
 
 #endif
 
+#if HAVE_SSSE3
+
+extern void vp8_filter_block1d8_h6_ssse3
+(
+    unsigned char  *src_ptr,
+    unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    unsigned int    output_pitch,
+    unsigned int    output_height,
+    unsigned int    vp8_filter_index
+);
+
+extern void vp8_filter_block1d16_h6_ssse3
+(
+    unsigned char  *src_ptr,
+    unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    unsigned int    output_pitch,
+    unsigned int    output_height,
+    unsigned int    vp8_filter_index
+);
+
+extern void vp8_filter_block1d16_v6_ssse3
+(
+    unsigned char *src_ptr,
+    unsigned int   src_pitch,
+    unsigned char *output_ptr,
+    unsigned int   out_pitch,
+    unsigned int   output_height,
+    unsigned int   vp8_filter_index
+);
+
+extern void vp8_filter_block1d8_v6_ssse3
+(
+    unsigned char *src_ptr,
+    unsigned int   src_pitch,
+    unsigned char *output_ptr,
+    unsigned int   out_pitch,
+    unsigned int   output_height,
+    unsigned int   vp8_filter_index
+);
+
+extern void vp8_filter_block1d4_h6_ssse3
+(
+    unsigned char  *src_ptr,
+    unsigned int    src_pixels_per_line,
+    unsigned char  *output_ptr,
+    unsigned int    output_pitch,
+    unsigned int    output_height,
+    unsigned int    vp8_filter_index
+);
+
+extern void vp8_filter_block1d4_v6_ssse3
+(
+    unsigned char *src_ptr,
+    unsigned int   src_pitch,
+    unsigned char *output_ptr,
+    unsigned int   out_pitch,
+    unsigned int   output_height,
+    unsigned int   vp8_filter_index
+);
+
+void vp8_sixtap_predict16x16_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+
+)
+{
+    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 24*24);
+
+    if (xoffset)
+    {
+        if (yoffset)
+        {
+            vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset);
+            vp8_filter_block1d16_v6_ssse3(FData2 , 16, dst_ptr, dst_pitch, 16, yoffset);
+        }
+        else
+        {
+            // First-pass only
+            vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
+        }
+    }
+    else
+    {
+        // Second-pass only
+        vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
+    }
+}
+
+void vp8_sixtap_predict8x8_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
+
+    if (xoffset)
+    {
+        if (yoffset)
+        {
+            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset);
+            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset);
+        }
+        else
+        {
+            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset);
+        }
+    }
+    else
+    {
+        // Second-pass only
+        vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
+    }
+}
+
+
+void vp8_sixtap_predict8x4_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+    DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 256);
+
+    if (xoffset)
+    {
+        if (yoffset)
+        {
+            vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset);
+            vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset);
+        }
+        else
+        {
+            // First-pass only
+            vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
+        }
+    }
+    else
+    {
+        // Second-pass only
+        vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
+    }
+}
+
+void vp8_sixtap_predict4x4_ssse3
+(
+    unsigned char  *src_ptr,
+    int   src_pixels_per_line,
+    int  xoffset,
+    int  yoffset,
+    unsigned char *dst_ptr,
+    int dst_pitch
+)
+{
+  DECLARE_ALIGNED_ARRAY(16, unsigned char, FData2, 4*9);
+
+  if (xoffset)
+  {
+      if (yoffset)
+      {
+          vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset);
+          vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset);
+      }
+      else
+      {
+          vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
+      }
+  }
+  else
+  {
+      vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
+  }
+
+}
+
+#endif
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index 021be92b6..ce487ff9f 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -116,5 +116,16 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
 
 #endif
 
+#if HAVE_SSSE3
+
+    if (SSSE3Enabled)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_ssse3;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_ssse3;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_ssse3;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_ssse3;
+    }
+#endif
+
 #endif
 }
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 0ea906eb0..dea237377 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -107,6 +107,7 @@ VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm
+VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm
 ifeq ($(CONFIG_POSTPROC),yes)
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm

From 8e7ebacb19be1a44a9b724ead70f7ae40a6827c3 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 20 Aug 2010 11:04:10 -0400
Subject: [PATCH 128/307] increase rate control buffer level precision

The external API exposes the RC initial/optimal/full buffer level in
milliseconds, but this value was truncated internally to seconds. This
patch allows the use of the full precision during the conversion from
time to bits.

Change-Id: If8dd2a87614c05747f81432cbe75dd9e6ed2f04e
---
 vp8/encoder/onyx_if.c | 54 +++++++++++++++++++++++++++++++------------
 vp8/vp8_cx_iface.c    |  6 ++---
 2 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 17b33d79a..99c434f52 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1326,6 +1326,18 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
     }
 }
 
+
+static int
+rescale(int val, int num, int denom)
+{
+    int64_t llnum = num;
+    int64_t llden = denom;
+    int64_t llval = val;
+
+    return llval * llnum / llden;
+}
+
+
 void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
 {
     VP8_COMP *cpi = (VP8_COMP *)(ptr);
@@ -1353,9 +1365,9 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
         cpi->oxcf.worst_allowed_q           = MAXQ;
 
         cpi->oxcf.end_usage                = USAGE_STREAM_FROM_SERVER;
-        cpi->oxcf.starting_buffer_level     =   4;
-        cpi->oxcf.optimal_buffer_level      =   5;
-        cpi->oxcf.maximum_buffer_size       =   6;
+        cpi->oxcf.starting_buffer_level     =   4000;
+        cpi->oxcf.optimal_buffer_level      =   5000;
+        cpi->oxcf.maximum_buffer_size       =   6000;
         cpi->oxcf.under_shoot_pct           =  90;
         cpi->oxcf.allow_df                 =   0;
         cpi->oxcf.drop_frames_water_mark     =  20;
@@ -1504,26 +1516,32 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
     // local file playback mode == really big buffer
     if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
     {
-        cpi->oxcf.starting_buffer_level   = 60;
-        cpi->oxcf.optimal_buffer_level    = 60;
-        cpi->oxcf.maximum_buffer_size     = 240;
+        cpi->oxcf.starting_buffer_level   = 60000;
+        cpi->oxcf.optimal_buffer_level    = 60000;
+        cpi->oxcf.maximum_buffer_size     = 240000;
 
     }
 
 
     // Convert target bandwidth from Kbit/s to Bit/s
     cpi->oxcf.target_bandwidth       *= 1000;
-    cpi->oxcf.starting_buffer_level   *= cpi->oxcf.target_bandwidth;
+    cpi->oxcf.starting_buffer_level =
+        rescale(cpi->oxcf.starting_buffer_level,
+                cpi->oxcf.target_bandwidth, 1000);
 
     if (cpi->oxcf.optimal_buffer_level == 0)
         cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
     else
-        cpi->oxcf.optimal_buffer_level *= cpi->oxcf.target_bandwidth;
+        cpi->oxcf.optimal_buffer_level =
+            rescale(cpi->oxcf.optimal_buffer_level,
+                    cpi->oxcf.target_bandwidth, 1000);
 
     if (cpi->oxcf.maximum_buffer_size == 0)
         cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
     else
-        cpi->oxcf.maximum_buffer_size     *= cpi->oxcf.target_bandwidth;
+        cpi->oxcf.maximum_buffer_size =
+            rescale(cpi->oxcf.maximum_buffer_size,
+                    cpi->oxcf.target_bandwidth, 1000);
 
     cpi->buffer_level                = cpi->oxcf.starting_buffer_level;
     cpi->bits_off_target              = cpi->oxcf.starting_buffer_level;
@@ -1783,26 +1801,32 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
     // local file playback mode == really big buffer
     if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK)
     {
-        cpi->oxcf.starting_buffer_level   = 60;
-        cpi->oxcf.optimal_buffer_level    = 60;
-        cpi->oxcf.maximum_buffer_size     = 240;
+        cpi->oxcf.starting_buffer_level   = 60000;
+        cpi->oxcf.optimal_buffer_level    = 60000;
+        cpi->oxcf.maximum_buffer_size     = 240000;
 
     }
 
     // Convert target bandwidth from Kbit/s to Bit/s
     cpi->oxcf.target_bandwidth       *= 1000;
 
-    cpi->oxcf.starting_buffer_level   *= cpi->oxcf.target_bandwidth;
+    cpi->oxcf.starting_buffer_level =
+        rescale(cpi->oxcf.starting_buffer_level,
+                cpi->oxcf.target_bandwidth, 1000);
 
     if (cpi->oxcf.optimal_buffer_level == 0)
         cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8;
     else
-        cpi->oxcf.optimal_buffer_level *= cpi->oxcf.target_bandwidth;
+        cpi->oxcf.optimal_buffer_level =
+            rescale(cpi->oxcf.optimal_buffer_level,
+                    cpi->oxcf.target_bandwidth, 1000);
 
     if (cpi->oxcf.maximum_buffer_size == 0)
         cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8;
     else
-        cpi->oxcf.maximum_buffer_size     *= cpi->oxcf.target_bandwidth;
+        cpi->oxcf.maximum_buffer_size =
+            rescale(cpi->oxcf.maximum_buffer_size,
+                    cpi->oxcf.target_bandwidth, 1000);
 
     cpi->buffer_level                = cpi->oxcf.starting_buffer_level;
     cpi->bits_off_target              = cpi->oxcf.starting_buffer_level;
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index d0c47b35a..40cb73776 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -296,9 +296,9 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
     oxcf->under_shoot_pct         = cfg.rc_undershoot_pct;
     //oxcf->over_shoot_pct        = cfg.rc_overshoot_pct;
 
-    oxcf->maximum_buffer_size     = cfg.rc_buf_sz / 1000;
-    oxcf->starting_buffer_level   = cfg.rc_buf_initial_sz / 1000;
-    oxcf->optimal_buffer_level    = cfg.rc_buf_optimal_sz / 1000;
+    oxcf->maximum_buffer_size     = cfg.rc_buf_sz;
+    oxcf->starting_buffer_level   = cfg.rc_buf_initial_sz;
+    oxcf->optimal_buffer_level    = cfg.rc_buf_optimal_sz;
 
     oxcf->two_pass_vbrbias        = cfg.rc_2pass_vbr_bias_pct;
     oxcf->two_pass_vbrmin_section  = cfg.rc_2pass_vbr_minsection_pct;

From 93c32a55c2444b8245e8cba9187e1ec654d1fbc6 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Fri, 20 Aug 2010 10:58:19 -0700
Subject: [PATCH 129/307] Rework idct calling structure.

Moving the eob structure allows for a non-struct based
function to handle decoding an entire mb of
idct/dequant/recon data.  This allows for SIMD functions
to idct/dequant/recon multiple blocks at once.

SSE2 implementation gives 3% gain on Atom.

Change-Id: I8a8f3efd546ea4e0535f517d94f347cfb737c9c2
---
 vp8/common/blockd.h                    |   1 +
 vp8/common/x86/idctllm_sse2.asm        | 708 +++++++++++++++++++++++++
 vp8/decoder/arm/armv6/idct_blk_v6.c    | 151 ++++++
 vp8/decoder/arm/dequantize_arm.h       |  24 +
 vp8/decoder/arm/neon/idct_blk_neon.c   | 151 ++++++
 vp8/decoder/decodframe.c               |  71 +--
 vp8/decoder/dequantize.h               |  47 +-
 vp8/decoder/detokenize.c               |   6 +-
 vp8/decoder/generic/dsystemdependent.c |  15 +-
 vp8/decoder/idct_blk.c                 | 116 ++++
 vp8/decoder/x86/dequantize_x86.h       |  31 +-
 vp8/decoder/x86/idct_blk_mmx.c         | 151 ++++++
 vp8/decoder/x86/idct_blk_sse2.c        | 114 ++++
 vp8/decoder/x86/x86_dsystemdependent.c |  20 +-
 vp8/vp8_common.mk                      |   1 +
 vp8/vp8dx.mk                           |   3 +
 vp8/vp8dx_arm.mk                       |   3 +
 17 files changed, 1545 insertions(+), 68 deletions(-)
 create mode 100644 vp8/common/x86/idctllm_sse2.asm
 create mode 100644 vp8/decoder/arm/armv6/idct_blk_v6.c
 create mode 100644 vp8/decoder/arm/neon/idct_blk_neon.c
 create mode 100644 vp8/decoder/idct_blk.c
 create mode 100644 vp8/decoder/x86/idct_blk_mmx.c
 create mode 100644 vp8/decoder/x86/idct_blk_sse2.c

diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index de308ffc3..cb07e9ec7 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -218,6 +218,7 @@ typedef struct
 //not used    DECLARE_ALIGNED(16, short, reference[384]);
     DECLARE_ALIGNED(16, short, qcoeff[400]);
     DECLARE_ALIGNED(16, short, dqcoeff[400]);
+    DECLARE_ALIGNED(16, char,  eobs[25]);
 
     // 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries.
     BLOCKD block[25];
diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
new file mode 100644
index 000000000..058ed8a8c
--- /dev/null
+++ b/vp8/common/x86/idctllm_sse2.asm
@@ -0,0 +1,708 @@
+;
+;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void idct_dequant_0_2x_sse2
+; (
+;   short *qcoeff       - 0
+;   short *dequant      - 1
+;   unsigned char *pre  - 2
+;   unsigned char *dst  - 3
+;   int dst_stride      - 4
+;   int blk_stride      - 5
+; )
+
+global sym(idct_dequant_0_2x_sse2)
+sym(idct_dequant_0_2x_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    ; end prolog
+
+        mov         rdx,            arg(1) ; dequant
+        mov         rax,            arg(0) ; qcoeff
+
+    ; Zero out xmm7, for use unpacking
+        pxor        xmm7,           xmm7
+
+        movd        xmm4,           [rax]
+        movd        xmm5,           [rdx]
+
+        pinsrw      xmm4,           [rax+32],   4
+        pinsrw      xmm5,           [rdx],      4
+
+        pmullw      xmm4,           xmm5
+
+    ; clear coeffs
+        movd        [rax],          xmm7
+        movd        [rax+32],       xmm7
+;pshufb
+        pshuflw     xmm4,           xmm4,       00000000b
+        pshufhw     xmm4,           xmm4,       00000000b
+
+        mov         rax,            arg(2) ; pre
+        paddw       xmm4,           [fours GLOBAL]
+
+        movsxd      rcx,            dword ptr arg(5) ; blk_stride
+        psraw       xmm4,           3
+
+        movq        xmm0,           [rax]
+        movq        xmm1,           [rax+rcx]
+        movq        xmm2,           [rax+2*rcx]
+        lea         rcx,            [3*rcx]
+        movq        xmm3,           [rax+rcx]
+
+        punpcklbw   xmm0,           xmm7
+        punpcklbw   xmm1,           xmm7
+        punpcklbw   xmm2,           xmm7
+        punpcklbw   xmm3,           xmm7
+
+        mov         rax,            arg(3) ; dst
+        movsxd      rdx,            dword ptr arg(4) ; dst_stride
+
+    ; Add to predict buffer
+        paddw       xmm0,           xmm4
+        paddw       xmm1,           xmm4
+        paddw       xmm2,           xmm4
+        paddw       xmm3,           xmm4
+
+    ; pack up before storing
+        packuswb    xmm0,           xmm7
+        packuswb    xmm1,           xmm7
+        packuswb    xmm2,           xmm7
+        packuswb    xmm3,           xmm7
+
+    ; store blocks back out
+        movq        [rax],          xmm0
+        movq        [rax + rdx],    xmm1
+
+        lea         rax,            [rax + 2*rdx]
+
+        movq        [rax],          xmm2
+        movq        [rax + rdx],    xmm3
+
+    ; begin epilog
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(idct_dequant_full_2x_sse2)
+sym(idct_dequant_full_2x_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 7
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ; special case when 2 blocks have 0 or 1 coeffs
+    ; dc is set as first coeff, so no need to load qcoeff
+        mov         rax,            arg(0) ; qcoeff
+        mov         rsi,            arg(2) ; pre
+        mov         rdi,            arg(3) ; dst
+        movsxd      rcx,            dword ptr arg(5) ; blk_stride
+
+    ; Zero out xmm7, for use unpacking
+        pxor        xmm7,           xmm7
+
+        mov         rdx,            arg(1)  ; dequant
+
+    ; note the transpose of xmm1 and xmm2, necessary for shuffle
+    ;   to spit out sensicle data
+        movdqa      xmm0,           [rax]
+        movdqa      xmm2,           [rax+16]
+        movdqa      xmm1,           [rax+32]
+        movdqa      xmm3,           [rax+48]
+
+    ; Clear out coeffs
+        movdqa      [rax],          xmm7
+        movdqa      [rax+16],       xmm7
+        movdqa      [rax+32],       xmm7
+        movdqa      [rax+48],       xmm7
+
+    ; dequantize qcoeff buffer
+        pmullw      xmm0,           [rdx]
+        pmullw      xmm2,           [rdx+16]
+        pmullw      xmm1,           [rdx]
+        pmullw      xmm3,           [rdx+16]
+
+    ; repack so block 0 row x and block 1 row x are together
+        movdqa      xmm4,           xmm0
+        punpckldq   xmm0,           xmm1
+        punpckhdq   xmm4,           xmm1
+
+        pshufd      xmm0,           xmm0,       11011000b
+        pshufd      xmm1,           xmm4,       11011000b
+
+        movdqa      xmm4,           xmm2
+        punpckldq   xmm2,           xmm3
+        punpckhdq   xmm4,           xmm3
+
+        pshufd      xmm2,           xmm2,       11011000b
+        pshufd      xmm3,           xmm4,       11011000b
+
+    ; first pass
+        psubw       xmm0,           xmm2        ; b1 = 0-2
+        paddw       xmm2,           xmm2        ;
+
+        movdqa      xmm5,           xmm1
+        paddw       xmm2,           xmm0        ; a1 = 0+2
+
+        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        paddw       xmm5,           xmm1        ; ip1 * sin(pi/8) * sqrt(2)
+
+        movdqa      xmm7,           xmm3
+        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+
+        paddw       xmm7,           xmm3        ; ip3 * cos(pi/8) * sqrt(2)
+        psubw       xmm7,           xmm5        ; c1
+
+        movdqa      xmm5,           xmm1
+        movdqa      xmm4,           xmm3
+
+        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        paddw       xmm5,           xmm1
+
+        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        paddw       xmm3,           xmm4
+
+        paddw       xmm3,           xmm5        ; d1
+        movdqa      xmm6,           xmm2        ; a1
+
+        movdqa      xmm4,           xmm0        ; b1
+        paddw       xmm2,           xmm3        ;0
+
+        paddw       xmm4,           xmm7        ;1
+        psubw       xmm0,           xmm7        ;2
+
+        psubw       xmm6,           xmm3        ;3
+
+    ; transpose for the second pass
+        movdqa      xmm7,           xmm2        ; 103 102 101 100 003 002 001 000
+        punpcklwd   xmm2,           xmm0        ; 007 003 006 002 005 001 004 000
+        punpckhwd   xmm7,           xmm0        ; 107 103 106 102 105 101 104 100
+
+        movdqa      xmm5,           xmm4        ; 111 110 109 108 011 010 009 008
+        punpcklwd   xmm4,           xmm6        ; 015 011 014 010 013 009 012 008
+        punpckhwd   xmm5,           xmm6        ; 115 111 114 110 113 109 112 108
+
+
+        movdqa      xmm1,           xmm2        ; 007 003 006 002 005 001 004 000
+        punpckldq   xmm2,           xmm4        ; 013 009 005 001 012 008 004 000
+        punpckhdq   xmm1,           xmm4        ; 015 011 007 003 014 010 006 002
+
+        movdqa      xmm6,           xmm7        ; 107 103 106 102 105 101 104 100
+        punpckldq   xmm7,           xmm5        ; 113 109 105 101 112 108 104 100
+        punpckhdq   xmm6,           xmm5        ; 115 111 107 103 114 110 106 102
+
+
+        movdqa      xmm5,           xmm2        ; 013 009 005 001 012 008 004 000
+        punpckldq   xmm2,           xmm7        ; 112 108 012 008 104 100 004 000
+        punpckhdq   xmm5,           xmm7        ; 113 109 013 009 105 101 005 001
+
+        movdqa      xmm7,           xmm1        ; 015 011 007 003 014 010 006 002
+        punpckldq   xmm1,           xmm6        ; 114 110 014 010 106 102 006 002
+        punpckhdq   xmm7,           xmm6        ; 115 111 015 011 107 103 007 003
+
+        pshufd      xmm0,           xmm2,       11011000b
+        pshufd      xmm2,           xmm1,       11011000b
+
+        pshufd      xmm1,           xmm5,       11011000b
+        pshufd      xmm3,           xmm7,       11011000b
+
+    ; second pass
+        psubw       xmm0,           xmm2            ; b1 = 0-2
+        paddw       xmm2,           xmm2
+
+        movdqa      xmm5,           xmm1
+        paddw       xmm2,           xmm0            ; a1 = 0+2
+
+        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        paddw       xmm5,           xmm1            ; ip1 * sin(pi/8) * sqrt(2)
+
+        movdqa      xmm7,           xmm3
+        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+
+        paddw       xmm7,           xmm3            ; ip3 * cos(pi/8) * sqrt(2)
+        psubw       xmm7,           xmm5            ; c1
+
+        movdqa      xmm5,           xmm1
+        movdqa      xmm4,           xmm3
+
+        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        paddw       xmm5,           xmm1
+
+        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        paddw       xmm3,           xmm4
+
+        paddw       xmm3,           xmm5            ; d1
+        paddw       xmm0,           [fours GLOBAL]
+
+        paddw       xmm2,           [fours GLOBAL]
+        movdqa      xmm6,           xmm2            ; a1
+
+        movdqa      xmm4,           xmm0            ; b1
+        paddw       xmm2,           xmm3            ;0
+
+        paddw       xmm4,           xmm7            ;1
+        psubw       xmm0,           xmm7            ;2
+
+        psubw       xmm6,           xmm3            ;3
+        psraw       xmm2,           3
+
+        psraw       xmm0,           3
+        psraw       xmm4,           3
+
+        psraw       xmm6,           3
+
+    ; transpose to save
+        movdqa      xmm7,           xmm2        ; 103 102 101 100 003 002 001 000
+        punpcklwd   xmm2,           xmm0        ; 007 003 006 002 005 001 004 000
+        punpckhwd   xmm7,           xmm0        ; 107 103 106 102 105 101 104 100
+
+        movdqa      xmm5,           xmm4        ; 111 110 109 108 011 010 009 008
+        punpcklwd   xmm4,           xmm6        ; 015 011 014 010 013 009 012 008
+        punpckhwd   xmm5,           xmm6        ; 115 111 114 110 113 109 112 108
+
+
+        movdqa      xmm1,           xmm2        ; 007 003 006 002 005 001 004 000
+        punpckldq   xmm2,           xmm4        ; 013 009 005 001 012 008 004 000
+        punpckhdq   xmm1,           xmm4        ; 015 011 007 003 014 010 006 002
+
+        movdqa      xmm6,           xmm7        ; 107 103 106 102 105 101 104 100
+        punpckldq   xmm7,           xmm5        ; 113 109 105 101 112 108 104 100
+        punpckhdq   xmm6,           xmm5        ; 115 111 107 103 114 110 106 102
+
+
+        movdqa      xmm5,           xmm2        ; 013 009 005 001 012 008 004 000
+        punpckldq   xmm2,           xmm7        ; 112 108 012 008 104 100 004 000
+        punpckhdq   xmm5,           xmm7        ; 113 109 013 009 105 101 005 001
+
+        movdqa      xmm7,           xmm1        ; 015 011 007 003 014 010 006 002
+        punpckldq   xmm1,           xmm6        ; 114 110 014 010 106 102 006 002
+        punpckhdq   xmm7,           xmm6        ; 115 111 015 011 107 103 007 003
+
+        pshufd      xmm0,           xmm2,       11011000b
+        pshufd      xmm2,           xmm1,       11011000b
+
+        pshufd      xmm1,           xmm5,       11011000b
+        pshufd      xmm3,           xmm7,       11011000b
+
+        pxor        xmm7,           xmm7
+
+    ; Load up predict blocks
+        movq        xmm4,           [rsi]
+        movq        xmm5,           [rsi+rcx]
+
+        punpcklbw   xmm4,           xmm7
+        punpcklbw   xmm5,           xmm7
+
+        paddw       xmm0,           xmm4
+        paddw       xmm1,           xmm5
+
+        movq        xmm4,           [rsi+2*rcx]
+        lea         rcx,            [3*rcx]
+        movq        xmm5,           [rsi+rcx]
+
+        punpcklbw   xmm4,           xmm7
+        punpcklbw   xmm5,           xmm7
+
+        paddw       xmm2,           xmm4
+        paddw       xmm3,           xmm5
+
+.finish:
+
+    ; pack up before storing
+        packuswb    xmm0,           xmm7
+        packuswb    xmm1,           xmm7
+        packuswb    xmm2,           xmm7
+        packuswb    xmm3,           xmm7
+
+    ; Load destination stride before writing out,
+    ;   doesn't need to persist
+        movsxd      rdx,            dword ptr arg(4) ; dst_stride
+
+    ; store blocks back out
+        movq        [rdi],          xmm0
+        movq        [rdi + rdx],    xmm1
+
+        lea         rdi,            [rdi + 2*rdx]
+
+        movq        [rdi],          xmm2
+        movq        [rdi + rdx],    xmm3
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void idct_dequant_dc_0_2x_sse2
+; (
+;   short *qcoeff       - 0
+;   short *dequant      - 1
+;   unsigned char *pre  - 2
+;   unsigned char *dst  - 3
+;   int dst_stride      - 4
+;   short *dc           - 5
+; )
+global sym(idct_dequant_dc_0_2x_sse2)
+sym(idct_dequant_dc_0_2x_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 7
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ; special case when 2 blocks have 0 or 1 coeffs
+    ; dc is set as first coeff, so no need to load qcoeff
+        mov         rax,            arg(0) ; qcoeff
+        mov         rsi,            arg(2) ; pre
+        mov         rdi,            arg(3) ; dst
+        mov         rdx,            arg(5) ; dc
+
+    ; Zero out xmm7, for use unpacking
+        pxor        xmm7,           xmm7
+
+    ; load up 2 dc words here == 2*16 = doubleword
+        movd        xmm4,           [rdx]
+
+    ; Load up predict blocks
+        movq        xmm0,           [rsi]
+        movq        xmm1,           [rsi+16]
+        movq        xmm2,           [rsi+32]
+        movq        xmm3,           [rsi+48]
+
+    ; Duplicate and expand dc across
+        punpcklwd   xmm4,           xmm4
+        punpckldq   xmm4,           xmm4
+
+    ; Rounding to dequant and downshift
+        paddw       xmm4,           [fours GLOBAL]
+        psraw       xmm4,           3
+
+    ; Predict buffer needs to be expanded from bytes to words
+        punpcklbw   xmm0,           xmm7
+        punpcklbw   xmm1,           xmm7
+        punpcklbw   xmm2,           xmm7
+        punpcklbw   xmm3,           xmm7
+
+    ; Add to predict buffer
+        paddw       xmm0,           xmm4
+        paddw       xmm1,           xmm4
+        paddw       xmm2,           xmm4
+        paddw       xmm3,           xmm4
+
+    ; pack up before storing
+        packuswb    xmm0,           xmm7
+        packuswb    xmm1,           xmm7
+        packuswb    xmm2,           xmm7
+        packuswb    xmm3,           xmm7
+
+    ; Load destination stride before writing out,
+    ;   doesn't need to persist
+        movsxd      rdx,            dword ptr arg(4) ; dst_stride
+
+    ; store blocks back out
+        movq        [rdi],          xmm0
+        movq        [rdi + rdx],    xmm1
+
+        lea         rdi,            [rdi + 2*rdx]
+
+        movq        [rdi],          xmm2
+        movq        [rdi + rdx],    xmm3
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+global sym(idct_dequant_dc_full_2x_sse2)
+sym(idct_dequant_dc_full_2x_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 7
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ; special case when 2 blocks have 0 or 1 coeffs
+    ; dc is set as first coeff, so no need to load qcoeff
+        mov         rax,            arg(0) ; qcoeff
+        mov         rsi,            arg(2) ; pre
+        mov         rdi,            arg(3) ; dst
+
+    ; Zero out xmm7, for use unpacking
+        pxor        xmm7,           xmm7
+
+        mov         rdx,            arg(1)  ; dequant
+
+    ; note the transpose of xmm1 and xmm2, necessary for shuffle
+    ;   to spit out sensicle data
+        movdqa      xmm0,           [rax]
+        movdqa      xmm2,           [rax+16]
+        movdqa      xmm1,           [rax+32]
+        movdqa      xmm3,           [rax+48]
+
+    ; Clear out coeffs
+        movdqa      [rax],          xmm7
+        movdqa      [rax+16],       xmm7
+        movdqa      [rax+32],       xmm7
+        movdqa      [rax+48],       xmm7
+
+    ; dequantize qcoeff buffer
+        pmullw      xmm0,           [rdx]
+        pmullw      xmm2,           [rdx+16]
+        pmullw      xmm1,           [rdx]
+        pmullw      xmm3,           [rdx+16]
+
+    ; DC component
+        mov         rdx,            arg(5)
+
+    ; repack so block 0 row x and block 1 row x are together
+        movdqa      xmm4,           xmm0
+        punpckldq   xmm0,           xmm1
+        punpckhdq   xmm4,           xmm1
+
+        pshufd      xmm0,           xmm0,       11011000b
+        pshufd      xmm1,           xmm4,       11011000b
+
+        movdqa      xmm4,           xmm2
+        punpckldq   xmm2,           xmm3
+        punpckhdq   xmm4,           xmm3
+
+        pshufd      xmm2,           xmm2,       11011000b
+        pshufd      xmm3,           xmm4,       11011000b
+
+    ; insert DC component
+        pinsrw      xmm0,           [rdx],      0
+        pinsrw      xmm0,           [rdx+2],    4
+
+    ; first pass
+        psubw       xmm0,           xmm2        ; b1 = 0-2
+        paddw       xmm2,           xmm2        ;
+
+        movdqa      xmm5,           xmm1
+        paddw       xmm2,           xmm0        ; a1 = 0+2
+
+        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        paddw       xmm5,           xmm1        ; ip1 * sin(pi/8) * sqrt(2)
+
+        movdqa      xmm7,           xmm3
+        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+
+        paddw       xmm7,           xmm3        ; ip3 * cos(pi/8) * sqrt(2)
+        psubw       xmm7,           xmm5        ; c1
+
+        movdqa      xmm5,           xmm1
+        movdqa      xmm4,           xmm3
+
+        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        paddw       xmm5,           xmm1
+
+        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        paddw       xmm3,           xmm4
+
+        paddw       xmm3,           xmm5        ; d1
+        movdqa      xmm6,           xmm2        ; a1
+
+        movdqa      xmm4,           xmm0        ; b1
+        paddw       xmm2,           xmm3        ;0
+
+        paddw       xmm4,           xmm7        ;1
+        psubw       xmm0,           xmm7        ;2
+
+        psubw       xmm6,           xmm3        ;3
+
+    ; transpose for the second pass
+        movdqa      xmm7,           xmm2        ; 103 102 101 100 003 002 001 000
+        punpcklwd   xmm2,           xmm0        ; 007 003 006 002 005 001 004 000
+        punpckhwd   xmm7,           xmm0        ; 107 103 106 102 105 101 104 100
+
+        movdqa      xmm5,           xmm4        ; 111 110 109 108 011 010 009 008
+        punpcklwd   xmm4,           xmm6        ; 015 011 014 010 013 009 012 008
+        punpckhwd   xmm5,           xmm6        ; 115 111 114 110 113 109 112 108
+
+
+        movdqa      xmm1,           xmm2        ; 007 003 006 002 005 001 004 000
+        punpckldq   xmm2,           xmm4        ; 013 009 005 001 012 008 004 000
+        punpckhdq   xmm1,           xmm4        ; 015 011 007 003 014 010 006 002
+
+        movdqa      xmm6,           xmm7        ; 107 103 106 102 105 101 104 100
+        punpckldq   xmm7,           xmm5        ; 113 109 105 101 112 108 104 100
+        punpckhdq   xmm6,           xmm5        ; 115 111 107 103 114 110 106 102
+
+
+        movdqa      xmm5,           xmm2        ; 013 009 005 001 012 008 004 000
+        punpckldq   xmm2,           xmm7        ; 112 108 012 008 104 100 004 000
+        punpckhdq   xmm5,           xmm7        ; 113 109 013 009 105 101 005 001
+
+        movdqa      xmm7,           xmm1        ; 015 011 007 003 014 010 006 002
+        punpckldq   xmm1,           xmm6        ; 114 110 014 010 106 102 006 002
+        punpckhdq   xmm7,           xmm6        ; 115 111 015 011 107 103 007 003
+
+        pshufd      xmm0,           xmm2,       11011000b
+        pshufd      xmm2,           xmm1,       11011000b
+
+        pshufd      xmm1,           xmm5,       11011000b
+        pshufd      xmm3,           xmm7,       11011000b
+
+    ; second pass
+        psubw       xmm0,           xmm2            ; b1 = 0-2
+        paddw       xmm2,           xmm2
+
+        movdqa      xmm5,           xmm1
+        paddw       xmm2,           xmm0            ; a1 = 0+2
+
+        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        paddw       xmm5,           xmm1            ; ip1 * sin(pi/8) * sqrt(2)
+
+        movdqa      xmm7,           xmm3
+        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+
+        paddw       xmm7,           xmm3            ; ip3 * cos(pi/8) * sqrt(2)
+        psubw       xmm7,           xmm5            ; c1
+
+        movdqa      xmm5,           xmm1
+        movdqa      xmm4,           xmm3
+
+        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        paddw       xmm5,           xmm1
+
+        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        paddw       xmm3,           xmm4
+
+        paddw       xmm3,           xmm5            ; d1
+        paddw       xmm0,           [fours GLOBAL]
+
+        paddw       xmm2,           [fours GLOBAL]
+        movdqa      xmm6,           xmm2            ; a1
+
+        movdqa      xmm4,           xmm0            ; b1
+        paddw       xmm2,           xmm3            ;0
+
+        paddw       xmm4,           xmm7            ;1
+        psubw       xmm0,           xmm7            ;2
+
+        psubw       xmm6,           xmm3            ;3
+        psraw       xmm2,           3
+
+        psraw       xmm0,           3
+        psraw       xmm4,           3
+
+        psraw       xmm6,           3
+
+    ; transpose to save
+        movdqa      xmm7,           xmm2        ; 103 102 101 100 003 002 001 000
+        punpcklwd   xmm2,           xmm0        ; 007 003 006 002 005 001 004 000
+        punpckhwd   xmm7,           xmm0        ; 107 103 106 102 105 101 104 100
+
+        movdqa      xmm5,           xmm4        ; 111 110 109 108 011 010 009 008
+        punpcklwd   xmm4,           xmm6        ; 015 011 014 010 013 009 012 008
+        punpckhwd   xmm5,           xmm6        ; 115 111 114 110 113 109 112 108
+
+
+        movdqa      xmm1,           xmm2        ; 007 003 006 002 005 001 004 000
+        punpckldq   xmm2,           xmm4        ; 013 009 005 001 012 008 004 000
+        punpckhdq   xmm1,           xmm4        ; 015 011 007 003 014 010 006 002
+
+        movdqa      xmm6,           xmm7        ; 107 103 106 102 105 101 104 100
+        punpckldq   xmm7,           xmm5        ; 113 109 105 101 112 108 104 100
+        punpckhdq   xmm6,           xmm5        ; 115 111 107 103 114 110 106 102
+
+
+        movdqa      xmm5,           xmm2        ; 013 009 005 001 012 008 004 000
+        punpckldq   xmm2,           xmm7        ; 112 108 012 008 104 100 004 000
+        punpckhdq   xmm5,           xmm7        ; 113 109 013 009 105 101 005 001
+
+        movdqa      xmm7,           xmm1        ; 015 011 007 003 014 010 006 002
+        punpckldq   xmm1,           xmm6        ; 114 110 014 010 106 102 006 002
+        punpckhdq   xmm7,           xmm6        ; 115 111 015 011 107 103 007 003
+
+        pshufd      xmm0,           xmm2,       11011000b
+        pshufd      xmm2,           xmm1,       11011000b
+
+        pshufd      xmm1,           xmm5,       11011000b
+        pshufd      xmm3,           xmm7,       11011000b
+
+        pxor        xmm7,           xmm7
+
+    ; Load up predict blocks
+        movq        xmm4,           [rsi]
+        movq        xmm5,           [rsi+16]
+
+        punpcklbw   xmm4,           xmm7
+        punpcklbw   xmm5,           xmm7
+
+        paddw       xmm0,           xmm4
+        paddw       xmm1,           xmm5
+
+        movq        xmm4,           [rsi+32]
+        movq        xmm5,           [rsi+48]
+
+        punpcklbw   xmm4,           xmm7
+        punpcklbw   xmm5,           xmm7
+
+        paddw       xmm2,           xmm4
+        paddw       xmm3,           xmm5
+
+.finish:
+
+    ; pack up before storing
+        packuswb    xmm0,           xmm7
+        packuswb    xmm1,           xmm7
+        packuswb    xmm2,           xmm7
+        packuswb    xmm3,           xmm7
+
+    ; Load destination stride before writing out,
+    ;   doesn't need to persist
+        movsxd      rdx,            dword ptr arg(4) ; dst_stride
+
+    ; store blocks back out
+        movq        [rdi],          xmm0
+        movq        [rdi + rdx],    xmm1
+
+        lea         rdi,            [rdi + 2*rdx]
+
+        movq        [rdi],          xmm2
+        movq        [rdi + rdx],    xmm3
+
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+SECTION_RODATA
+align 16
+fours:
+    times 8 dw 0x0004
+align 16
+x_s1sqr2:
+    times 8 dw 0x8A8C
+align 16
+x_c1sqr2less1:
+    times 8 dw 0x4E7B
diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/decoder/arm/armv6/idct_blk_v6.c
new file mode 100644
index 000000000..96aca2b81
--- /dev/null
+++ b/vp8/decoder/arm/armv6/idct_blk_v6.c
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_v6
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_dc_idct_add_v6 (q, dq, pre, dst, 16, stride, dc[0]);
+        else
+            vp8_dc_only_idct_add_v6 (dc[0], pre, dst, 16, stride);
+
+        if (eobs[1] > 1)
+            vp8_dequant_dc_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+        else
+            vp8_dc_only_idct_add_v6 (dc[1], pre+4, dst+4, 16, stride);
+
+        if (eobs[2] > 1)
+            vp8_dequant_dc_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+        else
+            vp8_dc_only_idct_add_v6 (dc[2], pre+8, dst+8, 16, stride);
+
+        if (eobs[3] > 1)
+            vp8_dequant_dc_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+        else
+            vp8_dc_only_idct_add_v6 (dc[3], pre+12, dst+12, 16, stride);
+
+        q    += 64;
+        dc   += 4;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_y_block_v6
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_v6 (q, dq, pre, dst, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dst, 16, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dst+4, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dst+4, 16, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        if (eobs[2] > 1)
+            vp8_dequant_idct_add_v6 (q+32, dq, pre+8, dst+8, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[32]*dq[0], pre+8, dst+8, 16, stride);
+            ((int *)(q+32))[0] = 0;
+        }
+
+        if (eobs[3] > 1)
+            vp8_dequant_idct_add_v6 (q+48, dq, pre+12, dst+12, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[48]*dq[0], pre+12, dst+12, 16, stride);
+            ((int *)(q+48))[0] = 0;
+        }
+
+        q    += 64;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_v6
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_v6 (q, dq, pre, dstu, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstu, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstu+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstu += 4*stride;
+        eobs += 2;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_v6 (q, dq, pre, dstv, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[0]*dq[0], pre, dstv, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_v6 (q+16, dq, pre+4, dstv+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_v6 (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstv += 4*stride;
+        eobs += 2;
+    }
+}
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index 3a044f847..12e836a6f 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -16,6 +16,9 @@
 extern prototype_dequant_block(vp8_dequantize_b_v6);
 extern prototype_dequant_idct_add(vp8_dequant_idct_add_v6);
 extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_v6);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
 
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_v6
@@ -25,12 +28,24 @@ extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_v6);
 
 #undef vp8_dequant_dc_idct_add
 #define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_v6
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_v6
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_v6
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
 #endif
 
 #if HAVE_ARMV7
 extern prototype_dequant_block(vp8_dequantize_b_neon);
 extern prototype_dequant_idct_add(vp8_dequant_idct_add_neon);
 extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_neon);
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neon);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
 
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_neon
@@ -40,6 +55,15 @@ extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_neon);
 
 #undef vp8_dequant_dc_idct_add
 #define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_neon
+
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_neon
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_neon
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
 #endif
 
 #endif
diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c
new file mode 100644
index 000000000..e190bc023
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_blk_neon.c
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_neon
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_dc_idct_add_neon (q, dq, pre, dst, 16, stride, dc[0]);
+        else
+            vp8_dc_only_idct_add_neon (dc[0], pre, dst, 16, stride);
+
+        if (eobs[1] > 1)
+            vp8_dequant_dc_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+        else
+            vp8_dc_only_idct_add_neon (dc[1], pre+4, dst+4, 16, stride);
+
+        if (eobs[2] > 1)
+            vp8_dequant_dc_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+        else
+            vp8_dc_only_idct_add_neon (dc[2], pre+8, dst+8, 16, stride);
+
+        if (eobs[3] > 1)
+            vp8_dequant_dc_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+        else
+            vp8_dc_only_idct_add_neon (dc[3], pre+12, dst+12, 16, stride);
+
+        q    += 64;
+        dc   += 4;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_y_block_neon
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_neon (q, dq, pre, dst, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dst, 16, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dst+4, 16, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        if (eobs[2] > 1)
+            vp8_dequant_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[32]*dq[0], pre+8, dst+8, 16, stride);
+            ((int *)(q+32))[0] = 0;
+        }
+
+        if (eobs[3] > 1)
+            vp8_dequant_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[48]*dq[0], pre+12, dst+12, 16, stride);
+            ((int *)(q+48))[0] = 0;
+        }
+
+        q    += 64;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_neon
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_neon (q, dq, pre, dstu, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstu, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstu+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstu += 4*stride;
+        eobs += 2;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_neon (q, dq, pre, dstv, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstv, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstv+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstv += 4*stride;
+        eobs += 2;
+    }
+}
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 9942e0b57..45d7ec342 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -237,7 +237,7 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
         DEQUANT_INVOKE(&pbi->dequant, block)(b);
 
         // do 2nd order transform on the dc block
-        if (b->eob > 1)
+        if (xd->eobs[24] > 1)
         {
             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
             ((int *)b->qcoeff)[0] = 0;
@@ -255,24 +255,10 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
             ((int *)b->qcoeff)[0] = 0;
         }
 
-
-        for (i = 0; i < 16; i++)
-        {
-
-            b = &xd->block[i];
-
-            if (b->eob > 1)
-            {
-                DEQUANT_INVOKE(&pbi->dequant, dc_idct_add)
-                    (b->qcoeff, &b->dequant[0][0], b->predictor,
-                     *(b->base_dst) + b->dst, 16, b->dst_stride,
-                     xd->block[24].diff[i]);
-            }
-            else
-            {
-                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)(xd->block[24].diff[i], b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride);
-            }
-        }
+        DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
+                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                         xd->predictor, xd->dst.y_buffer,
+                         xd->dst.y_stride, xd->eobs, xd->block[24].diff);
     }
     else if ((xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
     {
@@ -282,13 +268,17 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
             BLOCKD *b = &xd->block[i];
             vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
 
-            if (b->eob > 1)
+            if (xd->eobs[i] > 1)
             {
-                DEQUANT_INVOKE(&pbi->dequant, idct_add)(b->qcoeff, &b->dequant[0][0],  b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride);
+                DEQUANT_INVOKE(&pbi->dequant, idct_add)
+                    (b->qcoeff, &b->dequant[0][0],  b->predictor,
+                    *(b->base_dst) + b->dst, 16, b->dst_stride);
             }
             else
             {
-                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)(b->qcoeff[0] * b->dequant[0][0], b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride);
+                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
+                    (b->qcoeff[0] * b->dequant[0][0], b->predictor,
+                    *(b->base_dst) + b->dst, 16, b->dst_stride);
                 ((int *)b->qcoeff)[0] = 0;
             }
         }
@@ -296,37 +286,16 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
     }
     else
     {
-        for (i = 0; i < 16; i++)
-        {
-            BLOCKD *b = &xd->block[i];
-
-            if (b->eob > 1)
-            {
-                DEQUANT_INVOKE(&pbi->dequant, idct_add)(b->qcoeff, &b->dequant[0][0],  b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride);
-            }
-            else
-            {
-                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)(b->qcoeff[0] * b->dequant[0][0], b->predictor, *(b->base_dst) + b->dst, 16, b->dst_stride);
-                ((int *)b->qcoeff)[0] = 0;
-            }
-        }
+        DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                         xd->predictor, xd->dst.y_buffer,
+                         xd->dst.y_stride, xd->eobs);
     }
 
-    for (i = 16; i < 24; i++)
-    {
-
-        BLOCKD *b = &xd->block[i];
-
-        if (b->eob > 1)
-        {
-            DEQUANT_INVOKE(&pbi->dequant, idct_add)(b->qcoeff, &b->dequant[0][0],  b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride);
-        }
-        else
-        {
-            IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)(b->qcoeff[0] * b->dequant[0][0], b->predictor, *(b->base_dst) + b->dst, 8, b->dst_stride);
-            ((int *)b->qcoeff)[0] = 0;
-        }
-    }
+    DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+                    (xd->qcoeff+16*16, &xd->block[16].dequant[0][0],
+                     xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
+                     xd->dst.uv_stride, xd->eobs+16);
 }
 
 static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index fbca3919c..125d35b05 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -27,6 +27,21 @@
              int pitch, int stride, \
              int dc)
 
+#define prototype_dequant_dc_idct_add_y_block(sym) \
+    void sym(short *q, short *dq, \
+             unsigned char *pre, unsigned char *dst, \
+             int stride, char *eobs, short *dc)
+
+#define prototype_dequant_idct_add_y_block(sym) \
+    void sym(short *q, short *dq, \
+             unsigned char *pre, unsigned char *dst, \
+             int stride, char *eobs)
+
+#define prototype_dequant_idct_add_uv_block(sym) \
+    void sym(short *q, short *dq, \
+             unsigned char *pre, unsigned char *dst_u, \
+             unsigned char *dst_v, int stride, char *eobs)
+
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/dequantize_x86.h"
 #endif
@@ -50,16 +65,42 @@ extern prototype_dequant_idct_add(vp8_dequant_idct_add);
 #endif
 extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add);
 
+#ifndef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_c
+#endif
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block);
+
+#ifndef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_c
+#endif
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block);
+
+#ifndef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_c
+#endif
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block);
+
+
 typedef prototype_dequant_block((*vp8_dequant_block_fn_t));
 
 typedef prototype_dequant_idct_add((*vp8_dequant_idct_add_fn_t));
+
 typedef prototype_dequant_dc_idct_add((*vp8_dequant_dc_idct_add_fn_t));
 
+typedef prototype_dequant_dc_idct_add_y_block((*vp8_dequant_dc_idct_add_y_block_fn_t));
+
+typedef prototype_dequant_idct_add_y_block((*vp8_dequant_idct_add_y_block_fn_t));
+
+typedef prototype_dequant_idct_add_uv_block((*vp8_dequant_idct_add_uv_block_fn_t));
+
 typedef struct
 {
-    vp8_dequant_block_fn_t        block;
-    vp8_dequant_idct_add_fn_t     idct_add;
-    vp8_dequant_dc_idct_add_fn_t  dc_idct_add;
+    vp8_dequant_block_fn_t               block;
+    vp8_dequant_idct_add_fn_t            idct_add;
+    vp8_dequant_dc_idct_add_fn_t         dc_idct_add;
+    vp8_dequant_dc_idct_add_y_block_fn_t dc_idct_add_y_block;
+    vp8_dequant_idct_add_y_block_fn_t    idct_add_y_block;
+    vp8_dequant_idct_add_uv_block_fn_t   idct_add_uv_block;
 } vp8_dequant_rtcd_vtable_t;
 
 #if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 34faae3aa..9cbea23ea 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -266,6 +266,8 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
 
     BOOL_DECODER *bc = x->current_bc;
 
+    char *eobs = x->eobs;
+
     ENTROPY_CONTEXT *a;
     ENTROPY_CONTEXT *l;
     int i;
@@ -416,8 +418,8 @@ ONE_CONTEXT_NODE_0_:
 
     qcoeff_ptr [ scan[15] ] = (INT16) v;
 BLOCK_FINISHED:
-    t = ((x->block[i].eob = c) != !type);   // any nonzero data?
-    eobtotal += x->block[i].eob;
+    t = ((eobs[i] = c) != !type);   // any nonzero data?
+    eobtotal += c;
     *a = *l = t;
     qcoeff_ptr += 16;
 
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index ab085e230..e8104dc42 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -19,12 +19,15 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
 {
     // Pure C:
 #if CONFIG_RUNTIME_CPU_DETECT
-    pbi->mb.rtcd         = &pbi->common.rtcd;
-    pbi->dequant.block   = vp8_dequantize_b_c;
-    pbi->dequant.idct_add    = vp8_dequant_idct_add_c;
-    pbi->dequant.dc_idct_add    = vp8_dequant_dc_idct_add_c;
-    pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
+    pbi->mb.rtcd                     = &pbi->common.rtcd;
+    pbi->dequant.block               = vp8_dequantize_b_c;
+    pbi->dequant.idct_add            = vp8_dequant_idct_add_c;
+    pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_c;
+    pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_c;
+    pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_c;
+    pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_c;
+    pbi->dboolhuff.start             = vp8dx_start_decode_c;
+    pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
 #if 0 //For use with RTCD, when implemented
     pbi->dboolhuff.debool = vp8dx_decode_bool_c;
     pbi->dboolhuff.devalue = vp8dx_decode_value_c;
diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c
new file mode 100644
index 000000000..b18984bc3
--- /dev/null
+++ b/vp8/decoder/idct_blk.c
@@ -0,0 +1,116 @@
+/*
+ *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc)
+{
+    int i, j;
+
+    for (i = 0; i < 4; i++)
+    {
+        for (j = 0; j < 4; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_dc_idct_add_c (q, dq, pre, dst, 16, stride, dc[0]);
+            else
+                vp8_dc_only_idct_add_c (dc[0], pre, dst, 16, stride);
+
+            q   += 16;
+            pre += 4;
+            dst += 4;
+            dc  ++;
+        }
+
+        pre += 64 - 16;
+        dst += 4*stride - 16;
+    }
+}
+
+void vp8_dequant_idct_add_y_block_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i, j;
+
+    for (i = 0; i < 4; i++)
+    {
+        for (j = 0; j < 4; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_c (q, dq, pre, dst, 16, stride);
+            else
+            {
+                vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dst, 16, stride);
+                ((int *)q)[0] = 0;
+            }
+
+            q   += 16;
+            pre += 4;
+            dst += 4;
+        }
+
+        pre += 64 - 16;
+        dst += 4*stride - 16;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_c
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    int i, j;
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_c (q, dq, pre, dstu, 8, stride);
+            else
+            {
+                vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstu, 8, stride);
+                ((int *)q)[0] = 0;
+            }
+
+            q    += 16;
+            pre  += 4;
+            dstu += 4;
+        }
+
+        pre  += 32 - 8;
+        dstu += 4*stride - 8;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            if (*eobs++ > 1)
+                vp8_dequant_idct_add_c (q, dq, pre, dstv, 8, stride);
+            else
+            {
+                vp8_dc_only_idct_add_c (q[0]*dq[0], pre, dstv, 8, stride);
+                ((int *)q)[0] = 0;
+            }
+
+            q    += 16;
+            pre  += 4;
+            dstv += 4;
+        }
+
+        pre  += 32 - 8;
+        dstv += 4*stride - 8;
+    }
+}
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
index 44926761e..201479cfa 100644
--- a/vp8/decoder/x86/dequantize_x86.h
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -23,7 +23,9 @@
 extern prototype_dequant_block(vp8_dequantize_b_mmx);
 extern prototype_dequant_idct_add(vp8_dequant_idct_add_mmx);
 extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_mmx);
-
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_mmx);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_mmx);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_mmx);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_dequant_block
@@ -35,6 +37,33 @@ extern prototype_dequant_dc_idct_add(vp8_dequant_dc_idct_add_mmx);
 #undef  vp8_dequant_dc_idct_add
 #define vp8_dequant_dc_idct_add vp8_dequant_dc_idct_add_mmx
 
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_mmx
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_mmx
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_mmx
+
+#endif
+#endif
+
+#if HAVE_SSE2
+extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_sse2);
+extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_sse2);
+extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_sse2);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef vp8_dequant_dc_idct_add_y_block
+#define vp8_dequant_dc_idct_add_y_block vp8_dequant_dc_idct_add_y_block_sse2
+
+#undef vp8_dequant_idct_add_y_block
+#define vp8_dequant_idct_add_y_block vp8_dequant_idct_add_y_block_sse2
+
+#undef vp8_dequant_idct_add_uv_block
+#define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_sse2
+
 #endif
 #endif
 
diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/decoder/x86/idct_blk_mmx.c
new file mode 100644
index 000000000..1522a8022
--- /dev/null
+++ b/vp8/decoder/x86/idct_blk_mmx.c
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void vp8_dequant_dc_idct_add_y_block_mmx
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_dc_idct_add_mmx (q, dq, pre, dst, 16, stride, dc[0]);
+        else
+            vp8_dc_only_idct_add_mmx (dc[0], pre, dst, 16, stride);
+
+        if (eobs[1] > 1)
+            vp8_dequant_dc_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+        else
+            vp8_dc_only_idct_add_mmx (dc[1], pre+4, dst+4, 16, stride);
+
+        if (eobs[2] > 1)
+            vp8_dequant_dc_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
+        else
+            vp8_dc_only_idct_add_mmx (dc[2], pre+8, dst+8, 16, stride);
+
+        if (eobs[3] > 1)
+            vp8_dequant_dc_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
+        else
+            vp8_dc_only_idct_add_mmx (dc[3], pre+12, dst+12, 16, stride);
+
+        q    += 64;
+        dc   += 4;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_y_block_mmx
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_mmx (q, dq, pre, dst, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dst, 16, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dst+4, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dst+4, 16, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        if (eobs[2] > 1)
+            vp8_dequant_idct_add_mmx (q+32, dq, pre+8, dst+8, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[32]*dq[0], pre+8, dst+8, 16, stride);
+            ((int *)(q+32))[0] = 0;
+        }
+
+        if (eobs[3] > 1)
+            vp8_dequant_idct_add_mmx (q+48, dq, pre+12, dst+12, 16, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[48]*dq[0], pre+12, dst+12, 16, stride);
+            ((int *)(q+48))[0] = 0;
+        }
+
+        q    += 64;
+        pre  += 64;
+        dst  += 4*stride;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_mmx
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_mmx (q, dq, pre, dstu, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstu, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstu+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstu+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstu += 4*stride;
+        eobs += 2;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        if (eobs[0] > 1)
+            vp8_dequant_idct_add_mmx (q, dq, pre, dstv, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[0]*dq[0], pre, dstv, 8, stride);
+            ((int *)q)[0] = 0;
+        }
+
+        if (eobs[1] > 1)
+            vp8_dequant_idct_add_mmx (q+16, dq, pre+4, dstv+4, 8, stride);
+        else
+        {
+            vp8_dc_only_idct_add_mmx (q[16]*dq[0], pre+4, dstv+4, 8, stride);
+            ((int *)(q+16))[0] = 0;
+        }
+
+        q    += 32;
+        pre  += 32;
+        dstv += 4*stride;
+        eobs += 2;
+    }
+}
diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c
new file mode 100644
index 000000000..c5e4ad3a6
--- /dev/null
+++ b/vp8/decoder/x86/idct_blk_sse2.c
@@ -0,0 +1,114 @@
+/*
+ *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "vpx_ports/config.h"
+#include "idct.h"
+#include "dequantize.h"
+
+void idct_dequant_dc_0_2x_sse2
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int dst_stride, short *dc);
+void idct_dequant_dc_full_2x_sse2
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int dst_stride, short *dc);
+
+void idct_dequant_0_2x_sse2
+            (short *q, short *dq ,unsigned char *pre,
+             unsigned char *dst, int dst_stride, int blk_stride);
+void idct_dequant_full_2x_sse2
+            (short *q, short *dq ,unsigned char *pre,
+             unsigned char *dst, int dst_stride, int blk_stride);
+
+void vp8_dequant_dc_idct_add_y_block_sse2
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs, short *dc)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (((short *)(eobs))[0] & 0xfefe)
+            idct_dequant_dc_full_2x_sse2 (q, dq, pre, dst, stride, dc);
+        else
+            idct_dequant_dc_0_2x_sse2 (q, dq, pre, dst, stride, dc);
+
+        if (((short *)(eobs))[1] & 0xfefe)
+            idct_dequant_dc_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+        else
+            idct_dequant_dc_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, dc+2);
+
+        q    += 64;
+        dc   += 4;
+        pre  += 64;
+        dst  += stride*4;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_y_block_sse2
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dst, int stride, char *eobs)
+{
+    int i;
+
+    for (i = 0; i < 4; i++)
+    {
+        if (((short *)(eobs))[0] & 0xfefe)
+            idct_dequant_full_2x_sse2 (q, dq, pre, dst, stride, 16);
+        else
+            idct_dequant_0_2x_sse2 (q, dq, pre, dst, stride, 16);
+
+        if (((short *)(eobs))[1] & 0xfefe)
+            idct_dequant_full_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+        else
+            idct_dequant_0_2x_sse2 (q+32, dq, pre+8, dst+8, stride, 16);
+
+        q    += 64;
+        pre  += 64;
+        dst  += stride*4;
+        eobs += 4;
+    }
+}
+
+void vp8_dequant_idct_add_uv_block_sse2
+            (short *q, short *dq, unsigned char *pre,
+             unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
+{
+    if (((short *)(eobs))[0] & 0xfefe)
+        idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+    else
+        idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+
+    q    += 32;
+    pre  += 32;
+    dstu += stride*4;
+
+    if (((short *)(eobs))[1] & 0xfefe)
+        idct_dequant_full_2x_sse2 (q, dq, pre, dstu, stride, 8);
+    else
+        idct_dequant_0_2x_sse2 (q, dq, pre, dstu, stride, 8);
+
+    q    += 32;
+    pre  += 32;
+
+    if (((short *)(eobs))[2] & 0xfefe)
+        idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+    else
+        idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+
+    q    += 32;
+    pre  += 32;
+    dstv += stride*4;
+
+    if (((short *)(eobs))[3] & 0xfefe)
+        idct_dequant_full_2x_sse2 (q, dq, pre, dstv, stride, 8);
+    else
+        idct_dequant_0_2x_sse2 (q, dq, pre, dstv, stride, 8);
+}
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index 789105141..eb8198f96 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -39,14 +39,24 @@ void vp8_arch_x86_decode_init(VP8D_COMP *pbi)
 #if CONFIG_RUNTIME_CPU_DETECT
     /* Override default functions with fastest ones for this CPU. */
 #if HAVE_MMX
-
     if (flags & HAS_MMX)
     {
-        pbi->dequant.block   = vp8_dequantize_b_mmx;
-        pbi->dequant.idct_add    = vp8_dequant_idct_add_mmx;
-        pbi->dequant.dc_idct_add = vp8_dequant_dc_idct_add_mmx;
+        pbi->dequant.block               = vp8_dequantize_b_mmx;
+        pbi->dequant.idct_add            = vp8_dequant_idct_add_mmx;
+        pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_mmx;
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_mmx;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_mmx;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_mmx;
     }
+#endif
+#if HAVE_SSE2
+    if (flags & HAS_SSE2)
+    {
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_sse2;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_sse2;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_sse2;
+    }
+#endif
 
-#endif
 #endif
 }
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index dea237377..3aad7b7be 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -103,6 +103,7 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/loopfilter_mmx.asm
+VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index 8ab94259c..f6b7d94b2 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -68,9 +68,12 @@ VP8_DX_SRCS-yes += decoder/onyxd_int.h
 VP8_DX_SRCS-yes += decoder/treereader.h
 VP8_DX_SRCS-yes += decoder/onyxd_if.c
 VP8_DX_SRCS-yes += decoder/threading.c
+VP8_DX_SRCS-yes += decoder/idct_blk.c
 
 VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
 
 VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/dequantize_x86.h
 VP8_DX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += decoder/x86/x86_dsystemdependent.c
 VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/dequantize_mmx.asm
+VP8_DX_SRCS-$(HAVE_MMX) += decoder/x86/idct_blk_mmx.c
+VP8_DX_SRCS-$(HAVE_SSE2) += decoder/x86/idct_blk_sse2.c
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index 61a1ce4e6..c4e79af32 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -15,14 +15,17 @@ VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dequantize_arm.c
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dsystemdependent.c
 VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/generic/dsystemdependent.c
 VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/dequantize.c
+VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/idct_blk.c
 VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK)  += decoder/arm/detokenize$(ASM)
 
 #File list for armv6
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_dc_idct_v6$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequant_idct_v6$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantize_v6$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/idct_blk_v6.c
 
 #File list for neon
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_dc_idct_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_idct_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantizeb_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/idct_blk_neon.c

From d73217ab17ed6623fd836a574c504adcb1c5517a Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Mon, 23 Aug 2010 13:35:26 -0400
Subject: [PATCH 130/307] update structures

mbmi and eob moved in previous commits

Change-Id: I30a2eba36addf89ee50b406ad4afdd059a832711
---
 vp8/decoder/detokenize.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 9cbea23ea..0a6235de9 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -241,7 +241,7 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
 
     type = 3;
 
-    if (x->mbmi.mode != B_PRED && x->mbmi.mode != SPLITMV)
+    if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
     {
         type = 1;
         eobtotal -= 16;
@@ -251,7 +251,7 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
 
     for (i = 0; i < 25; i++)
     {
-        x->block[i].eob = dx->detoken.eob[i];
+        x->eobs[i] = dx->detoken.eob[i];
         eobtotal += dx->detoken.eob[i];
     }
 

From 5c244398e18ed6c9aaef5adcccdb7ec4a90d49b2 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Tue, 24 Aug 2010 18:23:16 -0400
Subject: [PATCH 131/307] clean up compiler warnings

did a test compile with clang and got rid of some warnings that have
been annoying me for a while:
vp8/decoder/detokenize.c: In function 'vp8_init_detokenizer':
vp8/decoder/detokenize.c:121: warning: assignment discards qualifiers from pointer target type
vp8/decoder/detokenize.c:122: warning: assignment discards qualifiers from pointer target type
vp8/decoder/detokenize.c:123: warning: assignment from incompatible pointer type
vp8/decoder/detokenize.c:124: warning: assignment discards qualifiers from pointer target type
vp8/decoder/detokenize.c:125: warning: assignment discards qualifiers from pointer target type
vp8/decoder/detokenize.c:128: warning: assignment discards qualifiers from pointer target type
vp8/decoder/detokenize.c:129: warning: assignment discards qualifiers from pointer target type
vp8/decoder/detokenize.c:130: warning: assignment discards qualifiers from pointer target type
vp8/decoder/detokenize.c:131: warning: assignment discards qualifiers from pointer target type

Change-Id: I78ddab176fe47cbeed30379709dc7bab01c0c2e4
---
 vp8/decoder/onyxd_int.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index c08e0fb75..2a7a03640 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -48,21 +48,20 @@ typedef struct
 
 typedef struct
 {
-    int *scan;
-    UINT8 *ptr_onyxblock2context_leftabove;
-    vp8_tree_index *vp8_coef_tree_ptr;  //onyx_coef_tree_ptr; ???
-    TOKENEXTRABITS *teb_base_ptr;
+    int const *scan;
+    UINT8 const *ptr_onyxblock2context_leftabove;
+    vp8_tree_index const *vp8_coef_tree_ptr;
+    TOKENEXTRABITS const *teb_base_ptr;
     unsigned char *norm_ptr;
-//  UINT16 *ptr_onyx_coef_bands_x;
-    UINT8 *ptr_onyx_coef_bands_x;
+    UINT16 *ptr_onyx_coef_bands_x;
 
-    ENTROPY_CONTEXT   **A;
-    ENTROPY_CONTEXT(*L)[4];
+    ENTROPY_CONTEXT **A;
+    ENTROPY_CONTEXT (*L)[4];
 
     INT16 *qcoeff_start_ptr;
     BOOL_DECODER *current_bc;
 
-    UINT8 *coef_probs[4];
+    vp8_prob const *coef_probs[4];
 
     UINT8 eob[25];
 

From a790906c3b93bca3beedfeb7a63fd7ce501b4901 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Tue, 24 Aug 2010 16:27:49 -0700
Subject: [PATCH 132/307] Allow --cpu= to work for x86.

--cpu was already implemented for most of our embedded
platforms, this just extends it to x86.  Corner case for
Atom processor as it doesn't respond to the --march=
option under icc.

Change-Id: I2d57a7a6e9d0b55c0059e9bc46cfc9bf9468c185
---
 build/make/configure.sh | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index 3b6c919fd..cebdd57ba 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -797,11 +797,21 @@ process_common_toolchain() {
                 add_ldflags -i-static
                 enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE3 -axSSE3
                 enabled x86_64 && AR=xiar
+                case ${tune_cpu} in
+                    atom*)
+                        tune_cflags="-x"
+                        tune_cpu="SSE3_ATOM"
+                    ;;
+                    *)
+                        tune_cflags="-march="
+                    ;;
+                esac
                 ;;
             gcc*)
                 add_cflags  -m${bits}
                 add_ldflags -m${bits}
                 link_with_cc=gcc
+                tune_cflags="-march="
             setup_gnu_toolchain
                 ;;
         esac

From e105e245ef02e7ce3c78950af01cfc78ce2a7459 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Fri, 27 Aug 2010 15:21:22 -0700
Subject: [PATCH 133/307] Fix two-pass framrate for Y4M input.

The timebase was being set to the value in the Y4M file on each
 pass, but only doubled to account for the altref placement on
 the first past.
This avoids reseting it on the second pass.

Change-Id: Ie342639bad1ffe9c2214fbbaaded72cfed835b42
---
 ivfenc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ivfenc.c b/ivfenc.c
index 3487b3594..9afcf7c69 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -890,6 +890,8 @@ int main(int argc, const char **argv_)
                 {
                     cfg.g_timebase.num = y4m.fps_d;
                     cfg.g_timebase.den = y4m.fps_n;
+                    /* And don't reset it in the second pass.*/
+                    arg_have_timebase = 1;
                 }
                 arg_use_i420 = 0;
             }

From 7a8e0a29359f561e7e2f22ab553f5136c361239e Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Wed, 5 May 2010 19:14:36 -0400
Subject: [PATCH 134/307] Fix harmless off-by-1 error.

The memory being zeroed in vp8_update_mode_info_border() was just
 allocated with calloc, and so the entire function is actually
 redundant, but it should be made correct in case someone expects
 it to actually work in the future.

Change-Id: If7a84e489157ab34ab77ec6e2fe034fb71cf8c79
---
 vp8/common/alloccommon.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 369d48101..3fcc3088c 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -24,7 +24,7 @@ extern  void vp8_init_scan_order_mask();
 void vp8_update_mode_info_border(MODE_INFO *mi, int rows, int cols)
 {
     int i;
-    vpx_memset(mi - cols - 1, 0, sizeof(MODE_INFO) * cols + 1);
+    vpx_memset(mi - cols - 2, 0, sizeof(MODE_INFO) * (cols + 1));
 
     for (i = 0; i < rows; i++)
     {

From e85e631504ef12a4079f06fbf915ce0e5902b24b Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Tue, 31 Aug 2010 10:49:57 -0400
Subject: [PATCH 135/307] Changed above and left context data layout

The main reason for the change was to reduce cycles in the token
decoder. (~1.5% gain for 32 bit)  This layout should be more
cache friendly.

As a result of this change, the encoder had to be updated.

Change-Id: Id5e804169d8889da0378b3a519ac04dabd28c837
Note: dixie uses a similar layout
---
 vp8/common/alloccommon.c  |  38 ++---------
 vp8/common/blockd.c       |  18 ++---
 vp8/common/blockd.h       |  22 +++---
 vp8/common/onyxc_int.h    |   4 +-
 vp8/decoder/dboolhuff.c   |   2 +-
 vp8/decoder/dboolhuff.h   |   2 +-
 vp8/decoder/decodframe.c  |  20 ++----
 vp8/decoder/detokenize.c  |  82 +++++++----------------
 vp8/decoder/threading.c   |  28 +++-----
 vp8/encoder/encodeframe.c |  28 ++------
 vp8/encoder/encodemb.c    |  68 ++++++++++---------
 vp8/encoder/ethreading.c  |  21 ++----
 vp8/encoder/pickinter.c   |  15 +++--
 vp8/encoder/rdopt.c       |  91 +++++++++++++++++--------
 vp8/encoder/tokenize.c    | 136 ++++++++------------------------------
 15 files changed, 216 insertions(+), 359 deletions(-)

diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 3fcc3088c..1105a25f8 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -42,16 +42,10 @@ void vp8_de_alloc_frame_buffers(VP8_COMMON *oci)
     vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame);
     vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer);
 
-    vpx_free(oci->above_context[Y1CONTEXT]);
-    vpx_free(oci->above_context[UCONTEXT]);
-    vpx_free(oci->above_context[VCONTEXT]);
-    vpx_free(oci->above_context[Y2CONTEXT]);
+    vpx_free(oci->above_context);
     vpx_free(oci->mip);
 
-    oci->above_context[Y1CONTEXT] = 0;
-    oci->above_context[UCONTEXT]  = 0;
-    oci->above_context[VCONTEXT]  = 0;
-    oci->above_context[Y2CONTEXT] = 0;
+    oci->above_context = 0;
     oci->mip = 0;
 
 }
@@ -118,33 +112,9 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
     oci->mi = oci->mip + oci->mode_info_stride + 1;
 
 
-    oci->above_context[Y1CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 4 , 1);
+    oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1);
 
-    if (!oci->above_context[Y1CONTEXT])
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    oci->above_context[UCONTEXT]  = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
-
-    if (!oci->above_context[UCONTEXT])
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    oci->above_context[VCONTEXT]  = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols * 2 , 1);
-
-    if (!oci->above_context[VCONTEXT])
-    {
-        vp8_de_alloc_frame_buffers(oci);
-        return ALLOC_FAILURE;
-    }
-
-    oci->above_context[Y2CONTEXT] = vpx_calloc(sizeof(ENTROPY_CONTEXT) * oci->mb_cols     , 1);
-
-    if (!oci->above_context[Y2CONTEXT])
+    if (!oci->above_context)
     {
         vp8_de_alloc_frame_buffers(oci);
         return ALLOC_FAILURE;
diff --git a/vp8/common/blockd.c b/vp8/common/blockd.c
index 7bb127ae8..f39e5b2c0 100644
--- a/vp8/common/blockd.c
+++ b/vp8/common/blockd.c
@@ -12,13 +12,13 @@
 #include "blockd.h"
 #include "vpx_mem/vpx_mem.h"
 
-void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count)
-{
-    vpx_memcpy(t->l, l, sizeof(ENTROPY_CONTEXT) * count);
-    vpx_memcpy(t->a, a, sizeof(ENTROPY_CONTEXT) * count);
-}
-
-const int vp8_block2left[25] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0};
-const int vp8_block2above[25] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0};
 const int vp8_block2type[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 1};
-const int vp8_block2context[25] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3};
+
+const unsigned char vp8_block2left[25] =
+{
+    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8
+};
+const unsigned char vp8_block2above[25] =
+{
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
+};
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index cb07e9ec7..b286e2e1a 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -49,19 +49,19 @@ typedef struct
 } POS;
 
 
-typedef int ENTROPY_CONTEXT;
-
+typedef char ENTROPY_CONTEXT;
 typedef struct
 {
-    ENTROPY_CONTEXT l[4];
-    ENTROPY_CONTEXT a[4];
-} TEMP_CONTEXT;
+    ENTROPY_CONTEXT y1[4];
+    ENTROPY_CONTEXT u[2];
+    ENTROPY_CONTEXT v[2];
+    ENTROPY_CONTEXT y2;
+} ENTROPY_CONTEXT_PLANES;
 
-extern void vp8_setup_temp_context(TEMP_CONTEXT *t, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int count);
-extern const int vp8_block2left[25];
-extern const int vp8_block2above[25];
 extern const int vp8_block2type[25];
-extern const int vp8_block2context[25];
+
+extern const unsigned char vp8_block2left[25];
+extern const unsigned char vp8_block2above[25];
 
 #define VP8_COMBINEENTROPYCONTEXTS( Dest, A, B) \
     Dest = ((A)!=0) + ((B)!=0);
@@ -237,8 +237,8 @@ typedef struct
     int left_available;
 
     // Y,U,V,Y2
-    ENTROPY_CONTEXT *above_context[4];   // row of context for each plane
-    ENTROPY_CONTEXT(*left_context)[4];   // (up to) 4 contexts ""
+    ENTROPY_CONTEXT_PLANES *above_context;
+    ENTROPY_CONTEXT_PLANES *left_context;
 
     // 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
     unsigned char segmentation_enabled;
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 8dce00824..7b44f2a6f 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -165,8 +165,8 @@ typedef struct VP8Common
     int ref_frame_sign_bias[MAX_REF_FRAMES];    // Two state 0, 1
 
     // Y,U,V,Y2
-    ENTROPY_CONTEXT *above_context[4];   // row of context for each plane
-    ENTROPY_CONTEXT left_context[4][4];  // (up to) 4 contexts ""
+    ENTROPY_CONTEXT_PLANES *above_context;   // row of context for each plane
+    ENTROPY_CONTEXT_PLANES left_context;  // (up to) 4 contexts ""
 
 
     // keyframe block modes are predicted by their above, left neighbors
diff --git a/vp8/decoder/dboolhuff.c b/vp8/decoder/dboolhuff.c
index 2aba5583d..377c7b631 100644
--- a/vp8/decoder/dboolhuff.c
+++ b/vp8/decoder/dboolhuff.c
@@ -13,7 +13,7 @@
 #include "vpx_ports/mem.h"
 #include "vpx_mem/vpx_mem.h"
 
-DECLARE_ALIGNED(16, const unsigned int, vp8dx_bitreader_norm[256]) =
+DECLARE_ALIGNED(16, const unsigned char, vp8dx_bitreader_norm[256]) =
 {
     0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index 6ddceee20..050bab1d5 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -95,7 +95,7 @@ typedef struct vp8_dboolhuff_rtcd_vtable {
 #define IF_RTCD(x) NULL
 //#endif
 
-DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
+DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
 
 /* wrapper functions to hide RTCD. static means inline means hopefully no
  * penalty
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 45d7ec342..fb1794126 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -338,15 +338,12 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
 
-    vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
+    vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
     recon_yoffset = mb_row * recon_y_stride * 16;
     recon_uvoffset = mb_row * recon_uv_stride * 8;
     // reset above block coeffs
 
-    xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
-    xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
-    xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
-    xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
+    xd->above_context = pc->above_context;
     xd->up_available = (mb_row != 0);
 
     xd->mb_to_top_edge = -((mb_row * 16)) << 3;
@@ -403,10 +400,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
 
         ++xd->mode_info_context;  /* next mb */
 
-        xd->above_context[Y1CONTEXT] += 4;
-        xd->above_context[UCONTEXT ] += 2;
-        xd->above_context[VCONTEXT ] += 2;
-        xd->above_context[Y2CONTEXT] ++;
+        xd->above_context++;
 
         pbi->current_mb_col_main = mb_col;
     }
@@ -561,7 +555,7 @@ static void init_frame(VP8D_COMP *pbi)
         }
     }
 
-    xd->left_context = pc->left_context;
+    xd->left_context = &pc->left_context;
     xd->mode_info_context = pc->mi;
     xd->frame_type = pc->frame_type;
     xd->mode_info_context->mbmi.mode = DC_PRED;
@@ -849,11 +843,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     else
         vp8_decode_mode_mvs(pbi);
 
-    // reset since these guys are used as iterators
-    vpx_memset(pc->above_context[Y1CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 4);
-    vpx_memset(pc->above_context[UCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
-    vpx_memset(pc->above_context[VCONTEXT ], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols * 2);
-    vpx_memset(pc->above_context[Y2CONTEXT], 0, sizeof(ENTROPY_CONTEXT) * pc->mb_cols);
+    vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
 
     vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO));
 
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 0a6235de9..e4d0ac234 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -19,7 +19,7 @@
 #define BOOL_DATA UINT8
 
 #define OCB_X PREV_COEF_CONTEXTS * ENTROPY_NODES
-DECLARE_ALIGNED(16, UINT16, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
+DECLARE_ALIGNED(16, UINT8, vp8_coef_bands_x[16]) = { 0, 1 * OCB_X, 2 * OCB_X, 3 * OCB_X, 6 * OCB_X, 4 * OCB_X, 5 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 6 * OCB_X, 7 * OCB_X};
 #define EOB_CONTEXT_NODE            0
 #define ZERO_CONTEXT_NODE           1
 #define ONE_CONTEXT_NODE            2
@@ -61,47 +61,16 @@ DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTR
 
 void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
 {
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-
-    ENTROPY_CONTEXT *a;
-    ENTROPY_CONTEXT *l;
-
-    /* Clear entropy contexts for Y blocks */
-    a = A[Y1CONTEXT];
-    l = L[Y1CONTEXT];
-    *a = 0;
-    *(a+1) = 0;
-    *(a+2) = 0;
-    *(a+3) = 0;
-    *l = 0;
-    *(l+1) = 0;
-    *(l+2) = 0;
-    *(l+3) = 0;
-
-    /* Clear entropy contexts for U blocks */
-    a = A[UCONTEXT];
-    l = L[UCONTEXT];
-    *a = 0;
-    *(a+1) = 0;
-    *l = 0;
-    *(l+1) = 0;
-
-    /* Clear entropy contexts for V blocks */
-    a = A[VCONTEXT];
-    l = L[VCONTEXT];
-    *a = 0;
-    *(a+1) = 0;
-    *l = 0;
-    *(l+1) = 0;
-
     /* Clear entropy contexts for Y2 blocks */
     if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
     {
-        a = A[Y2CONTEXT];
-        l = L[Y2CONTEXT];
-        *a = 0;
-        *l = 0;
+        vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+        vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+    }
+    else
+    {
+        vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
+        vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
     }
 }
 
@@ -132,7 +101,7 @@ void vp8_init_detokenizer(VP8D_COMP *dx)
 }
 #endif
 
-DECLARE_ALIGNED(16, extern const unsigned int, vp8dx_bitreader_norm[256]);
+DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
 #define FILL \
     if(count < 0) \
         VP8DX_BOOL_DECODER_FILL(count, value, bufptr, bufend);
@@ -260,8 +229,8 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
 #else
 int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
 {
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+    ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)x->above_context;
+    ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)x->left_context;
     const VP8_COMMON *const oc = & dx->common;
 
     BOOL_DECODER *bc = x->current_bc;
@@ -291,29 +260,24 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
     int stop;
     INT16 val, bits_count;
     INT16 c;
-    INT16 t;
     INT16 v;
     const vp8_prob *Prob;
 
-    //int *scan;
     type = 3;
     i = 0;
     stop = 16;
 
+    scan = vp8_default_zig_zag1d;
+    qcoeff_ptr = &x->qcoeff[0];
+
     if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
     {
         i = 24;
         stop = 24;
         type = 1;
-        qcoeff_ptr = &x->qcoeff[24*16];
-        scan = vp8_default_zig_zag1d;
+        qcoeff_ptr += 24*16;
         eobtotal -= 16;
     }
-    else
-    {
-        scan = vp8_default_zig_zag1d;
-        qcoeff_ptr = &x->qcoeff[0];
-    }
 
     bufend  = bc->user_buffer_end;
     bufptr  = bc->user_buffer;
@@ -325,13 +289,15 @@ int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x)
     coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
 
 BLOCK_LOOP:
-    a = A[ vp8_block2context[i] ] + vp8_block2above[i];
-    l = L[ vp8_block2context[i] ] + vp8_block2left[i];
+    a = A + vp8_block2above[i];
+    l = L + vp8_block2left[i];
+
     c = (INT16)(!type);
 
-    VP8_COMBINEENTROPYCONTEXTS(t, *a, *l);
+//    Dest = ((A)!=0) + ((B)!=0);
+    VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
     Prob = coef_probs;
-    Prob += t * ENTROPY_NODES;
+    Prob += v * ENTROPY_NODES;
 
 DO_WHILE:
     Prob += vp8_coef_bands_x[c];
@@ -418,9 +384,8 @@ ONE_CONTEXT_NODE_0_:
 
     qcoeff_ptr [ scan[15] ] = (INT16) v;
 BLOCK_FINISHED:
-    t = ((eobs[i] = c) != !type);   // any nonzero data?
+    *a = *l = ((eobs[i] = c) != !type);   // any nonzero data?
     eobtotal += c;
-    *a = *l = t;
     qcoeff_ptr += 16;
 
     i++;
@@ -430,12 +395,11 @@ BLOCK_FINISHED:
 
     if (i == 25)
     {
-        scan = vp8_default_zig_zag1d;//x->scan_order1d;
         type = 0;
         i = 0;
         stop = 16;
         coef_probs = oc->fc.coef_probs [type] [ 0 ] [0];
-        qcoeff_ptr = &x->qcoeff[0];
+        qcoeff_ptr -= (24*16 + 16);
         goto BLOCK_LOOP;
     }
 
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index fd422eb50..ba8439508 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -157,7 +157,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
     int ithread = ((DECODETHREAD_DATA *)p_data)->ithread;
     VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1);
     MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2);
-    ENTROPY_CONTEXT mb_row_left_context[4][4];
+    ENTROPY_CONTEXT_PLANES mb_row_left_context;
 
     while (1)
     {
@@ -197,12 +197,9 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                     recon_uvoffset = mb_row * recon_uv_stride * 8;
                     // reset above block coeffs
 
-                    xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
-                    xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
-                    xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
-                    xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
-                    xd->left_context = mb_row_left_context;
-                    vpx_memset(mb_row_left_context, 0, sizeof(mb_row_left_context));
+                    xd->above_context = pc->above_context;
+                    xd->left_context = &mb_row_left_context;
+                    vpx_memset(&mb_row_left_context, 0, sizeof(mb_row_left_context));
                     xd->up_available = (mb_row != 0);
 
                     xd->mb_to_top_edge = -((mb_row * 16)) << 3;
@@ -260,10 +257,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                         ++xd->mode_info_context;  /* next mb */
 
-                        xd->above_context[Y1CONTEXT] += 4;
-                        xd->above_context[UCONTEXT ] += 2;
-                        xd->above_context[VCONTEXT ] += 2;
-                        xd->above_context[Y2CONTEXT] ++;
+                        xd->above_context++;
 
                         //pbi->mb_row_di[ithread].current_mb_col = mb_col;
                         pbi->current_mb_col[mb_row] = mb_col;
@@ -604,15 +598,12 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
             if (mb_row > 0)
                 last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
 
-            vpx_memset(pc->left_context, 0, sizeof(pc->left_context));
+            vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
             recon_yoffset = mb_row * recon_y_stride * 16;
             recon_uvoffset = mb_row * recon_uv_stride * 8;
             // reset above block coeffs
 
-            xd->above_context[Y1CONTEXT] = pc->above_context[Y1CONTEXT];
-            xd->above_context[UCONTEXT ] = pc->above_context[UCONTEXT];
-            xd->above_context[VCONTEXT ] = pc->above_context[VCONTEXT];
-            xd->above_context[Y2CONTEXT] = pc->above_context[Y2CONTEXT];
+            xd->above_context = pc->above_context;
             xd->up_available = (mb_row != 0);
 
             xd->mb_to_top_edge = -((mb_row * 16)) << 3;
@@ -672,10 +663,7 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
 
                 ++xd->mode_info_context;  /* next mb */
 
-                xd->above_context[Y1CONTEXT] += 4;
-                xd->above_context[UCONTEXT ] += 2;
-                xd->above_context[VCONTEXT ] += 2;
-                xd->above_context[Y2CONTEXT] ++;
+                xd->above_context++;
 
                 //pbi->current_mb_col_main = mb_col;
                 pbi->current_mb_col[mb_row] = mb_col;
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 0f9bf4a1d..b28ba1a8b 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -348,10 +348,7 @@ void encode_mb_row(VP8_COMP *cpi,
 
 
     // reset above block coeffs
-    xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT];
-    xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ];
-    xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ];
-    xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT];
+    xd->above_context = cm->above_context;
 
     xd->up_available = (mb_row != 0);
     recon_yoffset = (mb_row * recon_y_stride * 16);
@@ -472,10 +469,7 @@ void encode_mb_row(VP8_COMP *cpi,
         // skip to next mb
         xd->mode_info_context++;
 
-        xd->above_context[Y1CONTEXT] += 4;
-        xd->above_context[UCONTEXT ] += 2;
-        xd->above_context[VCONTEXT ] += 2;
-        xd->above_context[Y2CONTEXT] ++;
+        xd->above_context++;
         cpi->current_mb_col_main = mb_col;
     }
 
@@ -626,7 +620,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
     xd->mode_info_context->mbmi.mode = DC_PRED;
     xd->mode_info_context->mbmi.uv_mode = DC_PRED;
 
-    xd->left_context = cm->left_context;
+    xd->left_context = &cm->left_context;
 
     vp8_zero(cpi->count_mb_ref_frame_usage)
     vp8_zero(cpi->ymode_count)
@@ -634,17 +628,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
 
     x->mvc = cm->fc.mvc;
 
-    // vp8_zero( entropy_stats)
-    {
-        ENTROPY_CONTEXT **p = cm->above_context;
-        const size_t L = cm->mb_cols;
-
-        vp8_zero_array(p [Y1CONTEXT], L * 4)
-        vp8_zero_array(p [ UCONTEXT], L * 2)
-        vp8_zero_array(p [ VCONTEXT], L * 2)
-        vp8_zero_array(p [Y2CONTEXT], L)
-    }
-
+    vpx_memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols);
 
     {
         struct vpx_usec_timer  emr_timer;
@@ -1128,7 +1112,7 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
 extern int cnt_pm;
 #endif
 
-extern void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x);
+extern void vp8_fix_contexts(MACROBLOCKD *x);
 
 int vp8cx_encode_inter_macroblock
 (
@@ -1282,7 +1266,7 @@ int vp8cx_encode_inter_macroblock
 
             xd->mode_info_context->mbmi.mb_skip_coeff = 1;
             cpi->skip_true_count ++;
-            vp8_fix_contexts(cpi, xd);
+            vp8_fix_contexts(xd);
         }
         else
         {
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 52f0291f9..0cc7880a9 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -488,12 +488,18 @@ void vp8_optimize_b(MACROBLOCK *mb, int i, int type,
 void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
     int b;
-    TEMP_CONTEXT t, t2;
     int type;
     int has_2nd_order;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
+
+    vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
 
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT],
-        x->e_mbd.left_context[Y1CONTEXT], 4);
     has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
         && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
     type = has_2nd_order ? 0 : 3;
@@ -501,24 +507,19 @@ void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
     for (b = 0; b < 16; b++)
     {
         vp8_optimize_b(x, b, type,
-            t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT],
-        x->e_mbd.left_context[UCONTEXT], 2);
-    vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT],
-        x->e_mbd.left_context[VCONTEXT], 2);
-
     for (b = 16; b < 20; b++)
     {
         vp8_optimize_b(x, b, vp8_block2type[b],
-            t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
     for (b = 20; b < 24; b++)
     {
         vp8_optimize_b(x, b, vp8_block2type[b],
-            t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
 
@@ -565,17 +566,25 @@ static void vp8_find_mb_skip_coef(MACROBLOCK *x)
 void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
     int b;
-    TEMP_CONTEXT t;
     int type;
     int has_2nd_order;
 
-    if (!x->e_mbd.above_context[Y1CONTEXT])
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
+
+    if (!x->e_mbd.above_context)
         return;
 
-    if (!x->e_mbd.left_context[Y1CONTEXT])
+    if (!x->e_mbd.left_context)
         return;
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[Y1CONTEXT],
-        x->e_mbd.left_context[Y1CONTEXT], 4);
+
+    vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
+
     has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED
         && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
     type = has_2nd_order ? 0 : 3;
@@ -583,7 +592,7 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
     for (b = 0; b < 16; b++)
     {
         vp8_optimize_b(x, b, type,
-            t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
+        ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
     /*
@@ -599,33 +608,32 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
     int b;
-    TEMP_CONTEXT t, t2;
-    if (!x->e_mbd.above_context[UCONTEXT])
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
+
+    if (!x->e_mbd.above_context)
         return;
 
-    if (!x->e_mbd.left_context[UCONTEXT])
+    if (!x->e_mbd.left_context)
         return;
 
-    if (!x->e_mbd.above_context[VCONTEXT])
-        return;
+    vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
 
-    if (!x->e_mbd.left_context[VCONTEXT])
-        return;
-
-    vp8_setup_temp_context(&t, x->e_mbd.above_context[UCONTEXT], x->e_mbd.left_context[UCONTEXT], 2);
-    vp8_setup_temp_context(&t2, x->e_mbd.above_context[VCONTEXT], x->e_mbd.left_context[VCONTEXT], 2);
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
 
     for (b = 16; b < 20; b++)
     {
         vp8_optimize_b(x, b, vp8_block2type[b],
-                       t.a + vp8_block2above[b], t.l + vp8_block2left[b], rtcd);
-
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
     for (b = 20; b < 24; b++)
     {
         vp8_optimize_b(x, b, vp8_block2type[b],
-                       t2.a + vp8_block2above[b], t2.l + vp8_block2left[b], rtcd);
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
 }
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index e87a06cb3..b677fde66 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -28,7 +28,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
     int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread;
     VP8_COMP *cpi   = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1);
     MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2);
-    ENTROPY_CONTEXT mb_row_left_context[4][4];
+    ENTROPY_CONTEXT_PLANES mb_row_left_context;
 
     //printf("Started thread %d\n", ithread);
 
@@ -68,11 +68,8 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                         last_row_current_mb_col = &cpi->current_mb_col_main;
 
                     // reset above block coeffs
-                    xd->above_context[Y1CONTEXT] = cm->above_context[Y1CONTEXT];
-                    xd->above_context[UCONTEXT ] = cm->above_context[UCONTEXT ];
-                    xd->above_context[VCONTEXT ] = cm->above_context[VCONTEXT ];
-                    xd->above_context[Y2CONTEXT] = cm->above_context[Y2CONTEXT];
-                    xd->left_context = mb_row_left_context;
+                    xd->above_context = cm->above_context;
+                    xd->left_context = &mb_row_left_context;
 
                     vp8_zero(mb_row_left_context);
 
@@ -183,10 +180,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
                         // skip to next mb
                         xd->mode_info_context++;
 
-                        xd->above_context[Y1CONTEXT] += 4;
-                        xd->above_context[UCONTEXT ] += 2;
-                        xd->above_context[VCONTEXT ] += 2;
-                        xd->above_context[Y2CONTEXT] ++;
+                        xd->above_context++;
 
                         cpi->mb_row_ei[ithread].current_mb_col = mb_col;
 
@@ -330,11 +324,6 @@ static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc)
         zd->mb_segement_abs_delta      = xd->mb_segement_abs_delta;
         vpx_memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
 
-        /*
-        memcpy(zd->above_context,        xd->above_context, sizeof(xd->above_context));
-        memcpy(zd->mb_segment_tree_probs,  xd->mb_segment_tree_probs, sizeof(xd->mb_segment_tree_probs));
-        memcpy(zd->segment_feature_data,  xd->segment_feature_data, sizeof(xd->segment_feature_data));
-        */
         for (i = 0; i < 25; i++)
         {
             zd->block[i].dequant = xd->block[i].dequant;
@@ -402,7 +391,7 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
         mb->rddiv = cpi->RDDIV;
         mb->rdmult = cpi->RDMULT;
 
-        mbd->left_context = cm->left_context;
+        mbd->left_context = &cm->left_context;
         mb->mvc = cm->fc.mvc;
 
         setup_mbby_copy(&mbr_ei[i].mb, x);
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index cd615dcd6..9da715e99 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -220,13 +220,20 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int
 {
     MACROBLOCKD *const xd = &mb->e_mbd;
     int i;
-    TEMP_CONTEXT t;
     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
     int error = RD_ESTIMATE(mb->rdmult, mb->rddiv, cost, 0); // Rd estimate for the cost of the block prediction mode
     int distortion = 0;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
+
+    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
 
     vp8_intra_prediction_down_copy(xd);
-    vp8_setup_temp_context(&t, xd->above_context[Y1CONTEXT], xd->left_context[Y1CONTEXT], 4);
 
     for (i = 0; i < 16; i++)
     {
@@ -239,8 +246,8 @@ int vp8_pick_intra4x4mby_modes(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *mb, int
 
         error += pick_intra4x4block(rtcd,
                                     mb, mb->block + i, xd->block + i, &best_mode, A, L,
-                                    t.a + vp8_block2above[i],
-                                    t.l + vp8_block2left[i], &r, &d);
+                                    ta + vp8_block2above[i],
+                                    tl + vp8_block2left[i], &r, &d);
 
         cost += r;
         distortion += d;
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index c2a3ce1f7..c5dd59a61 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -614,24 +614,28 @@ int vp8_rdcost_mby(MACROBLOCK *mb)
 {
     int cost = 0;
     int b;
-    TEMP_CONTEXT t, t2;
     int type = 0;
-
     MACROBLOCKD *x = &mb->e_mbd;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
 
-    vp8_setup_temp_context(&t, x->above_context[Y1CONTEXT], x->left_context[Y1CONTEXT], 4);
-    vp8_setup_temp_context(&t2, x->above_context[Y2CONTEXT], x->left_context[Y2CONTEXT], 1);
+    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
 
     if (x->mode_info_context->mbmi.mode == SPLITMV)
         type = 3;
 
     for (b = 0; b < 16; b++)
         cost += cost_coeffs(mb, x->block + b, type,
-                            t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
+                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
 
     if (x->mode_info_context->mbmi.mode != SPLITMV)
         cost += cost_coeffs(mb, x->block + 24, 1,
-                            t2.a + vp8_block2above[24], t2.l + vp8_block2left[24]);
+                    ta + vp8_block2above[24], tl + vp8_block2left[24]);
 
     return cost;
 }
@@ -710,13 +714,20 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int
 {
     MACROBLOCKD *const xd = &mb->e_mbd;
     int i;
-    TEMP_CONTEXT t;
     int cost = mb->mbmode_cost [xd->frame_type] [B_PRED];
     int distortion = 0;
     int tot_rate_y = 0;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
+
+    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
 
     vp8_intra_prediction_down_copy(xd);
-    vp8_setup_temp_context(&t, xd->above_context[Y1CONTEXT], xd->left_context[Y1CONTEXT], 4);
 
     for (i = 0; i < 16; i++)
     {
@@ -729,8 +740,8 @@ int vp8_rd_pick_intra4x4mby_modes(VP8_COMP *cpi, MACROBLOCK *mb, int *Rate, int
 
         rd_pick_intra4x4block(
             cpi, mb, mb->block + i, xd->block + i, &best_mode, A, L,
-            t.a + vp8_block2above[i],
-            t.l + vp8_block2left[i], &r, &ry, &d);
+            ta + vp8_block2above[i],
+            tl + vp8_block2left[i], &r, &ry, &d);
 
         cost += r;
         distortion += d;
@@ -792,21 +803,26 @@ int vp8_rd_pick_intra16x16mby_mode(VP8_COMP *cpi, MACROBLOCK *x, int *Rate, int
 
 static int rd_cost_mbuv(MACROBLOCK *mb)
 {
-    TEMP_CONTEXT t, t2;
     int b;
     int cost = 0;
     MACROBLOCKD *x = &mb->e_mbd;
+    ENTROPY_CONTEXT_PLANES t_above, t_left;
+    ENTROPY_CONTEXT *ta;
+    ENTROPY_CONTEXT *tl;
 
-    vp8_setup_temp_context(&t, x->above_context[UCONTEXT], x->left_context[UCONTEXT], 2);
-    vp8_setup_temp_context(&t2, x->above_context[VCONTEXT], x->left_context[VCONTEXT], 2);
+    vpx_memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+    vpx_memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+    ta = (ENTROPY_CONTEXT *)&t_above;
+    tl = (ENTROPY_CONTEXT *)&t_left;
 
     for (b = 16; b < 20; b++)
         cost += cost_coeffs(mb, x->block + b, vp8_block2type[b],
-                            t.a + vp8_block2above[b], t.l + vp8_block2left[b]);
+                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
 
     for (b = 20; b < 24; b++)
         cost += cost_coeffs(mb, x->block + b, vp8_block2type[b],
-                            t2.a + vp8_block2above[b], t2.l + vp8_block2left[b]);
+                    ta + vp8_block2above[b], tl + vp8_block2left[b]);
 
     return cost;
 }
@@ -995,18 +1011,19 @@ static int labels2mode(
     return cost;
 }
 
-static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels, int which_label, TEMP_CONTEXT *t)
+static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels,
+                              int which_label, ENTROPY_CONTEXT *ta,
+                              ENTROPY_CONTEXT *tl)
 {
     int cost = 0;
     int b;
     MACROBLOCKD *x = &mb->e_mbd;
 
-
     for (b = 0; b < 16; b++)
         if (labels[ b] == which_label)
             cost += cost_coeffs(mb, x->block + b, 3,
-                                t->a + vp8_block2above[b],
-                                t->l + vp8_block2left[b]);
+                                ta + vp8_block2above[b],
+                                tl + vp8_block2left[b]);
 
     return cost;
 
@@ -1139,9 +1156,20 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
 
         vp8_variance_fn_ptr_t v_fn_ptr;
 
-        TEMP_CONTEXT t;
-        TEMP_CONTEXT tb;
-        vp8_setup_temp_context(&t, xc->above_context[Y1CONTEXT], xc->left_context[Y1CONTEXT], 4);
+        ENTROPY_CONTEXT_PLANES t_above, t_left;
+        ENTROPY_CONTEXT *ta;
+        ENTROPY_CONTEXT *tl;
+        ENTROPY_CONTEXT_PLANES t_above_b, t_left_b;
+        ENTROPY_CONTEXT *ta_b;
+        ENTROPY_CONTEXT *tl_b;
+
+        vpx_memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES));
+        vpx_memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES));
+
+        ta = (ENTROPY_CONTEXT *)&t_above;
+        tl = (ENTROPY_CONTEXT *)&t_left;
+        ta_b = (ENTROPY_CONTEXT *)&t_above_b;
+        tl_b = (ENTROPY_CONTEXT *)&t_left_b;
 
         br = 0;
         bd = 0;
@@ -1226,9 +1254,15 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                 int this_rd;
                 int num00;
                 int labelyrate;
+                ENTROPY_CONTEXT_PLANES t_above_s, t_left_s;
+                ENTROPY_CONTEXT *ta_s;
+                ENTROPY_CONTEXT *tl_s;
 
-                TEMP_CONTEXT ts;
-                vp8_setup_temp_context(&ts, &t.a[0], &t.l[0], 4);
+                vpx_memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES));
+                vpx_memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES));
+
+                ta_s = (ENTROPY_CONTEXT *)&t_above_s;
+                tl_s = (ENTROPY_CONTEXT *)&t_left_s;
 
                 if (this_mode == NEW4X4)
                 {
@@ -1313,7 +1347,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
 
                 distortion = vp8_encode_inter_mb_segment(x, labels, i, IF_RTCD(&cpi->rtcd.encodemb)) / 4;
 
-                labelyrate = rdcost_mbsegment_y(x, labels, i, &ts);
+                labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s);
                 rate += labelyrate;
 
                 this_rd = RDFUNC(x->rdmult, x->rddiv, rate, distortion, cpi->target_bits_per_mb);
@@ -1325,12 +1359,15 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                     bestlabelyrate = labelyrate;
                     mode_selected = this_mode;
                     best_label_rd = this_rd;
-                    vp8_setup_temp_context(&tb, &ts.a[0], &ts.l[0], 4);
+
+                    vpx_memcpy(ta_b, ta_s, sizeof(ENTROPY_CONTEXT_PLANES));
+                    vpx_memcpy(tl_b, tl_s, sizeof(ENTROPY_CONTEXT_PLANES));
 
                 }
             }
 
-            vp8_setup_temp_context(&t, &tb.a[0], &tb.l[0], 4);
+            vpx_memcpy(ta, ta_b, sizeof(ENTROPY_CONTEXT_PLANES));
+            vpx_memcpy(tl, tl_b, sizeof(ENTROPY_CONTEXT_PLANES));
 
             labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], best_ref_mv, mvcost);
 
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 83365d38a..2d0a45c75 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -24,7 +24,7 @@
 _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens];
 #endif
 void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
-void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x);
+void vp8_fix_contexts(MACROBLOCKD *x);
 
 TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
 const TOKENEXTRA *vp8_dct_value_tokens_ptr;
@@ -197,79 +197,11 @@ static void tokenize1st_order_b
     *a = *l = pt;
 
 }
-#if 0
+
 void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
 {
-    //int i;
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
-    int plane_type;
-    int b;
-
-    TOKENEXTRA *start = *t;
-    TOKENEXTRA *tp = *t;
-
-    x->mbmi.dc_diff = 1;
-
-    vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts));
-
-    if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV)
-    {
-        plane_type = 3;
-    }
-    else
-    {
-        tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type,
-                            A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi);
-        plane_type = 0;
-
-    }
-
-    for (b = 0; b < 16; b++)
-        tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type,
-                            A[vp8_block2context[b]] + vp8_block2above[b],
-                            L[vp8_block2context[b]] + vp8_block2left[b], cpi);
-
-    for (b = 16; b < 24; b++)
-        tokenize1st_order_b(x->block + b, t, 2, x->frame_type,
-                            A[vp8_block2context[b]] + vp8_block2above[b],
-                            L[vp8_block2context[b]] + vp8_block2left[b], cpi);
-
-    if (cpi->common.mb_no_coeff_skip)
-    {
-        x->mbmi.mb_skip_coeff = 1;
-
-        while ((tp != *t) && x->mbmi.mb_skip_coeff)
-        {
-            x->mbmi.mb_skip_coeff = (x->mbmi.mb_skip_coeff && (tp->Token == DCT_EOB_TOKEN));
-            tp ++;
-        }
-
-        if (x->mbmi.mb_skip_coeff == 1)
-        {
-            x->mbmi.dc_diff = 0;
-            //redo the coutnts
-            vpx_memcpy(cpi->coef_counts, cpi->coef_counts_backup, sizeof(cpi->coef_counts));
-
-            *t = start;
-            cpi->skip_true_count++;
-
-            //skip_true_count++;
-        }
-        else
-        {
-
-            cpi->skip_false_count++;
-            //skip_false_count++;
-        }
-    }
-}
-#else
-void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
-{
-    //int i;
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+    ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
+    ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
     int plane_type;
     int b;
 
@@ -300,7 +232,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
             vp8_stuff_mb(cpi, x, t) ;
         else
         {
-            vp8_fix_contexts(cpi, x);
+            vp8_fix_contexts(x);
         }
 
         if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
@@ -354,20 +286,20 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
     else
     {
         tokenize2nd_order_b(x->block + 24, t, 1, x->frame_type,
-                            A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi);
+                   A + vp8_block2above[24], L + vp8_block2left[24], cpi);
         plane_type = 0;
 
     }
 
     for (b = 0; b < 16; b++)
         tokenize1st_order_b(x->block + b, t, plane_type, x->frame_type,
-                            A[vp8_block2context[b]] + vp8_block2above[b],
-                            L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+                            A + vp8_block2above[b],
+                            L + vp8_block2left[b], cpi);
 
     for (b = 16; b < 24; b++)
         tokenize1st_order_b(x->block + b, t, 2, x->frame_type,
-                            A[vp8_block2context[b]] + vp8_block2above[b],
-                            L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+                            A + vp8_block2above[b],
+                            L + vp8_block2left[b], cpi);
 
 #if 0
 
@@ -406,7 +338,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
 
 #endif
 }
-#endif
+
 
 #ifdef ENTROPY_STATS
 
@@ -581,14 +513,13 @@ void stuff1st_order_buv
 
 void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
 {
-    //int i;
-    ENTROPY_CONTEXT **const A = x->above_context;
-    ENTROPY_CONTEXT(* const L)[4] = x->left_context;
+    ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
+    ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context;
     int plane_type;
     int b;
 
     stuff2nd_order_b(x->block + 24, t, 1, x->frame_type,
-                     A[Y2CONTEXT] + vp8_block2above[24], L[Y2CONTEXT] + vp8_block2left[24], cpi);
+                     A + vp8_block2above[24], L + vp8_block2left[24], cpi);
     plane_type = 0;
 
 
@@ -600,38 +531,27 @@ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
 
     for (b = 0; b < 16; b++)
         stuff1st_order_b(x->block + b, t, plane_type, x->frame_type,
-                         A[vp8_block2context[b]] + vp8_block2above[b],
-                         L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+                         A + vp8_block2above[b],
+                         L + vp8_block2left[b], cpi);
 
     for (b = 16; b < 24; b++)
         stuff1st_order_buv(x->block + b, t, 2, x->frame_type,
-                           A[vp8_block2context[b]] + vp8_block2above[b],
-                           L[vp8_block2context[b]] + vp8_block2left[b], cpi);
+                           A + vp8_block2above[b],
+                           L + vp8_block2left[b], cpi);
 
 }
-void vp8_fix_contexts(VP8_COMP *cpi, MACROBLOCKD *x)
+void vp8_fix_contexts(MACROBLOCKD *x)
 {
-    x->left_context[Y1CONTEXT][0] = 0;
-    x->left_context[Y1CONTEXT][1] = 0;
-    x->left_context[Y1CONTEXT][2] = 0;
-    x->left_context[Y1CONTEXT][3] = 0;
-    x->left_context[UCONTEXT][0]  = 0;
-    x->left_context[VCONTEXT][0]  = 0;
-    x->left_context[UCONTEXT][1]  = 0;
-    x->left_context[VCONTEXT][1]  = 0;
-
-    x->above_context[Y1CONTEXT][0] = 0;
-    x->above_context[Y1CONTEXT][1] = 0;
-    x->above_context[Y1CONTEXT][2] = 0;
-    x->above_context[Y1CONTEXT][3] = 0;
-    x->above_context[UCONTEXT][0]  = 0;
-    x->above_context[VCONTEXT][0]  = 0;
-    x->above_context[UCONTEXT][1]  = 0;
-    x->above_context[VCONTEXT][1]  = 0;
-
+    /* Clear entropy contexts for Y2 blocks */
     if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV)
     {
-        x->left_context[Y2CONTEXT][0] = 0;
-        x->above_context[Y2CONTEXT][0] = 0;
+        vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
+        vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES));
     }
+    else
+    {
+        vpx_memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
+        vpx_memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)-1);
+    }
+
 }

From 0b94f5d6e827c0b4d6b2590592be4285f60c8477 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Thu, 26 Aug 2010 16:11:30 -0400
Subject: [PATCH 136/307] followup arm patch

make the arm asm detokenizer work with the new structures

Change-Id: I7cd92c2a018ec24032bb1cfd1bb9739bc84b444a
---
 vp8/common/arm/vpx_asm_offsets.c |  6 +--
 vp8/decoder/arm/detokenize.asm   | 89 ++++++++++++++------------------
 vp8/decoder/detokenize.c         | 15 +++---
 vp8/decoder/onyxd_int.h          |  8 +--
 4 files changed, 54 insertions(+), 64 deletions(-)

diff --git a/vp8/common/arm/vpx_asm_offsets.c b/vp8/common/arm/vpx_asm_offsets.c
index 0604ae3d1..8009a683c 100644
--- a/vp8/common/arm/vpx_asm_offsets.c
+++ b/vp8/common/arm/vpx_asm_offsets.c
@@ -55,11 +55,11 @@ DEFINE(mb_up_available,                         offsetof(MACROBLOCKD, up_availab
 DEFINE(mb_left_available,                       offsetof(MACROBLOCKD, left_available));
 
 DEFINE(detok_scan,                              offsetof(DETOK, scan));
-DEFINE(detok_ptr_onyxblock2context_leftabove,   offsetof(DETOK, ptr_onyxblock2context_leftabove));
-DEFINE(detok_onyx_coef_tree_ptr,                offsetof(DETOK, vp8_coef_tree_ptr));
+DEFINE(detok_ptr_block2leftabove,               offsetof(DETOK, ptr_block2leftabove));
+DEFINE(detok_coef_tree_ptr,                     offsetof(DETOK, vp8_coef_tree_ptr));
 DEFINE(detok_teb_base_ptr,                      offsetof(DETOK, teb_base_ptr));
 DEFINE(detok_norm_ptr,                          offsetof(DETOK, norm_ptr));
-DEFINE(detok_ptr_onyx_coef_bands_x,             offsetof(DETOK, ptr_onyx_coef_bands_x));
+DEFINE(detok_ptr_coef_bands_x,                  offsetof(DETOK, ptr_coef_bands_x));
 
 DEFINE(detok_A,                                 offsetof(DETOK, A));
 DEFINE(detok_L,                                 offsetof(DETOK, L));
diff --git a/vp8/decoder/arm/detokenize.asm b/vp8/decoder/arm/detokenize.asm
index bafacb957..a4f0cb0a5 100644
--- a/vp8/decoder/arm/detokenize.asm
+++ b/vp8/decoder/arm/detokenize.asm
@@ -28,8 +28,7 @@ l_stacksize EQU     64
 
 
 ;; constant offsets -- these should be created at build time
-c_onyxblock2left_offset      EQU 25
-c_onyxblock2above_offset     EQU 50
+c_block2above_offset         EQU 25
 c_entropy_nodes              EQU 11
 c_dct_eob_token              EQU 11
 
@@ -42,12 +41,12 @@ c_dct_eob_token              EQU 11
     ldr         r1, [r9, #detok_current_bc]
     ldr         r0, [r9, #detok_qcoeff_start_ptr]
     mov         r11, #0                     ; i
-    mov         r3, #0x10                   ; stop
+    mov         r3, #16                     ; stop
 
     cmp         r7, #1                      ; type ?= 1
     addeq       r11, r11, #24               ; i = 24
     addeq       r3, r3, #8                  ; stop = 24
-    addeq       r0, r0, #3, 24              ; qcoefptr += 24*16 ?CHECKME
+    addeq       r0, r0, #3, 24              ; qcoefptr += 24*16
 
     str         r0, [sp, #l_qcoeff]
     str         r11, [sp, #l_i]
@@ -59,61 +58,50 @@ c_dct_eob_token              EQU 11
 
     ldr         r8, [r1, #bool_decoder_user_buffer]
 
-    ldr         r10, [lr, #detok_coef_probs] ; coef_probs[type]
+    ldr         r10, [lr, #detok_coef_probs]
     ldr         r5, [r1, #bool_decoder_count]
     ldr         r6, [r1, #bool_decoder_range]
     ldr         r4, [r1, #bool_decoder_value]
 
     str         r10, [sp, #l_coef_ptr]
 
-    ;align 4
 BLOCK_LOOP
-    ldr         r3, [r9, #detok_ptr_onyxblock2context_leftabove]
-    ldr         r2, [r9, #detok_A]
+    ldr         r3, [r9, #detok_ptr_block2leftabove]
     ldr         r1, [r9, #detok_L]
-    ldrb        r12, [r3, r11]!             ; onyxblock2context[i]
+    ldr         r2, [r9, #detok_A]
+    ldrb        r12, [r3, r11]!             ; block2left[i]
+    ldrb        r3, [r3, #c_block2above_offset]; block2above[i]
 
     cmp         r7, #0                      ; c = !type
     moveq       r7, #1
     movne       r7, #0
 
-    ldr         r0, [r2, r12, lsl #2]       ; A[onyxblock2context[i]]
-    add         r1, r1, r12, lsl #4         ; L + onyxblock2context[i] << 4
-      ; A is ptr to ptr (**)
-      ; L is ptr to data (*[4])
-
-    ldrb        r2, [r3, #c_onyxblock2above_offset] ; + above offset
-    ldrb        r3, [r3, #c_onyxblock2left_offset] ; + left offset
+    ldrb        r0, [r1, r12]!              ; *(L += block2left[i])
+    ldrb        r3, [r2, r3]!               ; *(A += block2above[i])
     mov         lr, #c_entropy_nodes        ; ENTROPY_NODES = 11
-;;  ;++
 
-    ldr         r2, [r0, r2, lsl #2]!       ; A + above offset
-    ldr         r3, [r1, r3, lsl #2]!       ; L + left offset
 ; VP8_COMBINEENTROPYCONTETEXTS(t, *a, *l) => t = ((*a) != 0) + ((*l) !=0)
-    cmp         r2, #0                      ; *a ?= 0
-    movne       r2, #1                      ; haha if a == 0 no need to set up another var to state that pretty sweet :)
-    cmp         r3, #0                      ; *l ?= 0
-    addne       r2, r2, #1                  ; t
+    cmp         r0, #0                      ; *l ?= 0
+    movne       r0, #1
+    cmp         r3, #0                      ; *a ?= 0
+    addne       r0, r0, #1                  ; t
 
     str         r1, [sp, #l_l_ptr]          ; save &l
-    str         r0, [sp, #l_a_ptr]          ; save &a
-    smlabb      r0, r2, lr, r10             ; Prob = coef_probs + (t * ENTROPY_NODES)
+    str         r2, [sp, #l_a_ptr]          ; save &a
+    smlabb      r0, r0, lr, r10             ; Prob = coef_probs + (t * ENTROPY_NODES)
     mov         r1, #0                      ; t = 0
     str         r7, [sp, #l_c]
 
     ;align 4
 COEFF_LOOP
-    ldr         r3, [r9, #detok_ptr_onyx_coef_bands_x]
-    ldr         lr, [r9, #detok_onyx_coef_tree_ptr]
-
-      ; onyx_coef_bands_x is UINT16
-    add         r3, r3, r7, lsl #1            ; coef_bands_x[c]
-    ldrh        r3, [r3]                      ; UINT16
-
-    ;++
+    ldr         r3, [r9, #detok_ptr_coef_bands_x]
+    ldr         lr, [r9, #detok_coef_tree_ptr]
+    ;STALL
+    ldrb        r3, [r3, r7]                ; coef_bands_x[c]
+    ;STALL
+    ;STALL
     add         r0, r0, r3                  ; Prob += coef_bands_x[c]
 
-    ;align 4
 get_token_loop
     ldrb        r2, [r0, +r1, asr #1]       ; Prob[t >> 1]
     mov         r3, r6, lsl #8              ; range << 8
@@ -221,14 +209,14 @@ SKIP_EXTRABITS
     mov         r4, r4, lsl r3              ; value <<= shift
     ldrleb      r2, [r8], #1                ; *(bufptr++)
     addle       r5, r5, #8                  ; count += 8
-    rsble       r3, r5, #24                  ; BR_COUNT - count
+    rsble       r3, r5, #24                 ; BR_COUNT - count
     orrle       r4, r4, r2, lsl r3          ; value |= *bufptr << (BR_COUNT - count)
 
-    add         r0, r0, #0xB                ; Prob += ENTROPY_NODES (11)
+    add         r0, r0, #11                 ; Prob += ENTROPY_NODES (11)
 
     cmn         r1, #1                      ; t < -ONE_TOKEN
 
-    addlt       r0, r0, #0xB                ; Prob += ENTROPY_NODES (11)
+    addlt       r0, r0, #11                 ; Prob += ENTROPY_NODES (11)
 
     mvn         r1, #1                      ; t = -1 ???? C is -2
 
@@ -236,7 +224,7 @@ SKIP_EOB_CHECK
     ldr         r7, [sp, #l_c]              ; c
     ldr         r3, [r9, #detok_scan]
     add         r1, r1, #2                  ; t+= 2
-    cmp         r7, #(0x10 - 1)             ; c should will be one higher
+    cmp         r7, #15                     ; c should will be one higher
 
     ldr         r3, [r3, +r7, lsl #2]       ; scan[c] this needs pre-inc c value
     add         r7, r7, #1                  ; c++
@@ -247,7 +235,7 @@ SKIP_EOB_CHECK
 
     blt         COEFF_LOOP
 
-    sub         r7, r7, #1                  ; if(t != -DCT_EOB_TOKEN) --c ; never stored! no condition!
+    sub         r7, r7, #1                  ; if(t != -DCT_EOB_TOKEN) --c
 
 END_OF_BLOCK
     ldr         r3, [sp, #l_type]           ; type
@@ -269,50 +257,49 @@ END_OF_BLOCK
     movne       r3, #1                      ; t
     moveq       r3, #0
 
-    add         r0, r0, #0x20               ; qcoeff += 32 (16 * 2?)
+    add         r0, r0, #32                 ; qcoeff += 32 (16 * 2?)
     add         r11, r11, #1                ; i++
-    str         r3, [r7]                    ; *l = t
-    str         r3, [r2]                    ; *a = t
+    strb        r3, [r7]                    ; *l = t
+    strb        r3, [r2]                    ; *a = t
     str         r0, [sp, #l_qcoeff]         ; qcoeff
     str         r11, [sp, #l_i]             ; i
 
-    cmp         r11, r12                    ; i >= stop ? VERIFY should be strictly LT(<)?
+    cmp         r11, r12                    ; i < stop
     ldr         r7, [sp, #l_type]           ; type
-    mov         lr, #0xB                    ; 11 (ENTORPY_NODES?)
 
     blt         BLOCK_LOOP
 
-    cmp         r11, #0x19                  ; i ?= 25
+    cmp         r11, #25                    ; i ?= 25
     bne         ln2_decode_mb_to
 
     ldr         r12, [r9, #detok_qcoeff_start_ptr]
     ldr         r10, [r9, #detok_coef_probs]
     mov         r7, #0                      ; type/i = 0
-    mov         r3, #0x10                   ; stop = 0
+    mov         r3, #16                     ; stop = 16
     str         r12, [sp, #l_qcoeff]        ; qcoeff_ptr = qcoeff_start_ptr
     str         r7, [sp, #l_i]
     str         r7, [sp, #l_type]
     str         r3, [sp, #l_stop]
 
-    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type] (0)
+    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type=0]
 
     b           BLOCK_LOOP
 
 ln2_decode_mb_to
-    cmp         r11, #0x10                  ; i ?= 16
+    cmp         r11, #16                    ; i ?= 16
     bne         ln1_decode_mb_to
 
     mov         r10, #detok_coef_probs
     add         r10, r10, #2*4              ; coef_probs[type]
-    ldr         r10, [r9, r10]              ; detok + 48 - THIS IS PROBABLY THE ISSUE: NEW STRUCTURE
+    ldr         r10, [r9, r10]              ; detok + detok_coef_probs[type]
 
     mov         r7, #2                      ; type = 2
-    mov         r3, #0x18                   ; stop = 24
+    mov         r3, #24                     ; stop = 24
 
     str         r7, [sp, #l_type]
     str         r3, [sp, #l_stop]
 
-    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type] - didn't want to add 2 to coef_probs
+    str         r10, [sp, #l_coef_ptr]      ; coef_probs = coef_probs[type]
     b           BLOCK_LOOP
 
 ln1_decode_mb_to
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index e4d0ac234..75b739251 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -75,11 +75,14 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
 }
 
 #if CONFIG_ARM_ASM_DETOK
-DECLARE_ALIGNED(16, const UINT8, vp8_block2context_leftabove[25*3]) =
+// mashup of vp8_block2left and vp8_block2above so we only need one pointer
+// for the assembly version.
+DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) =
 {
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, //end of vp8_block2context
-    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 0, 0, 1, 1, 0, 0, 1, 1, 0, //end of vp8_block2left
-    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 0, 1, 0, 1, 0, 1, 0  //end of vp8_block2above
+    //vp8_block2left
+    0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
+    //vp8_block2above
+    0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
 };
 
 void vp8_init_detokenizer(VP8D_COMP *dx)
@@ -88,8 +91,8 @@ void vp8_init_detokenizer(VP8D_COMP *dx)
     MACROBLOCKD *x = & dx->mb;
 
     dx->detoken.vp8_coef_tree_ptr = vp8_coef_tree;
-    dx->detoken.ptr_onyxblock2context_leftabove = vp8_block2context_leftabove;
-    dx->detoken.ptr_onyx_coef_bands_x = vp8_coef_bands_x;
+    dx->detoken.ptr_block2leftabove = vp8_block2leftabove;
+    dx->detoken.ptr_coef_bands_x = vp8_coef_bands_x;
     dx->detoken.scan = vp8_default_zig_zag1d;
     dx->detoken.teb_base_ptr = vp8d_token_extra_bits2;
     dx->detoken.qcoeff_start_ptr = &x->qcoeff[0];
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 2a7a03640..af1182fb4 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -49,14 +49,14 @@ typedef struct
 typedef struct
 {
     int const *scan;
-    UINT8 const *ptr_onyxblock2context_leftabove;
+    UINT8 const *ptr_block2leftabove;
     vp8_tree_index const *vp8_coef_tree_ptr;
     TOKENEXTRABITS const *teb_base_ptr;
     unsigned char *norm_ptr;
-    UINT16 *ptr_onyx_coef_bands_x;
+    UINT8 *ptr_coef_bands_x;
 
-    ENTROPY_CONTEXT **A;
-    ENTROPY_CONTEXT (*L)[4];
+    ENTROPY_CONTEXT_PLANES *A;
+    ENTROPY_CONTEXT_PLANES *L;
 
     INT16 *qcoeff_start_ptr;
     BOOL_DECODER *current_bc;

From c239a1b67c9c5ff25a04ba89eca45245b1e615a2 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Fri, 20 Aug 2010 12:27:26 +0100
Subject: [PATCH 137/307] Improved Force Key Frame Behaviour

These changes improve the behaviour of the code with
forced key frames sent in by a calling application.

The sizing of the frames is still suboptimal for two pass in
particular but the behaviour is much better than it was.

Change-Id: I35fae610c67688ccc69d11f385e87dfc884e65a1
---
 vp8/encoder/firstpass.c | 54 +++++++++++++++++++++++++----------------
 vp8/encoder/onyx_if.c   | 22 ++++++-----------
 vp8/encoder/ratectrl.c  |  7 ++++--
 3 files changed, 46 insertions(+), 37 deletions(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 5cbd2a43e..a58b9b75d 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -2095,33 +2095,34 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         kf_group_intra_err += this_frame->intra_error;
         kf_group_coded_err += this_frame->coded_error;
 
+        // load a the next frame's stats
         vpx_memcpy(&last_frame, this_frame, sizeof(*this_frame));
+        vp8_input_stats(cpi, this_frame);
 
         // Provided that we are not at the end of the file...
-        if (EOF != vp8_input_stats(cpi, this_frame))
+        if (cpi->oxcf.auto_key
+            && lookup_next_frame_stats(cpi, &next_frame) != EOF)
         {
-            if (lookup_next_frame_stats(cpi, &next_frame) != EOF)
-            {
-                if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
-                    break;
-            }
-        }
+            if (test_candidate_kf(cpi, &last_frame, this_frame, &next_frame))
+                break;
 
-        // Step on to the next frame
-        cpi->frames_to_key ++;
-
-        // If we don't have a real key frame within the next two
-        // forcekeyframeevery intervals then break out of the loop.
-        if (cpi->frames_to_key >= 2 *(int)cpi->key_frame_frequency)
-            break;
+            // Step on to the next frame
+            cpi->frames_to_key ++;
 
+            // If we don't have a real key frame within the next two
+            // forcekeyframeevery intervals then break out of the loop.
+            if (cpi->frames_to_key >= 2 *(int)cpi->key_frame_frequency)
+                break;
+        } else
+            cpi->frames_to_key ++;
     }
 
     // If there is a max kf interval set by the user we must obey it.
     // We already breakout of the loop above at 2x max.
     // This code centers the extra kf if the actual natural
     // interval is between 1x and 2x
-    if ( cpi->frames_to_key > (int)cpi->key_frame_frequency )
+    if (cpi->oxcf.auto_key
+        && cpi->frames_to_key > (int)cpi->key_frame_frequency )
     {
         cpi->frames_to_key /= 2;
 
@@ -2388,23 +2389,34 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
                 cpi->kf_bits = (3 * cpi->buffer_level) >> 2;
         }
 
-        // If the key frame is actually easier than the average for the kf group (which does sometimes happen... eg a blank intro frame)
-        // Then use an alternate calculation based on the kf error score which should give a smaller key frame.
+        // If the key frame is actually easier than the average for the
+        // kf group (which does sometimes happen... eg a blank intro frame)
+        // Then use an alternate calculation based on the kf error score
+        // which should give a smaller key frame.
         if (kf_mod_err < kf_group_err / cpi->frames_to_key)
         {
-            double  alt_kf_grp_bits = ((double)cpi->bits_left * (kf_mod_err * (double)cpi->frames_to_key) / cpi->modified_total_error_left) ;
+            double  alt_kf_grp_bits =
+                        ((double)cpi->bits_left *
+                         (kf_mod_err * (double)cpi->frames_to_key) /
+                         DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left));
 
-            alt_kf_bits = (int)((double)kf_boost * (alt_kf_grp_bits / (double)allocation_chunks));
+            alt_kf_bits = (int)((double)kf_boost *
+                                (alt_kf_grp_bits / (double)allocation_chunks));
 
             if (cpi->kf_bits > alt_kf_bits)
             {
                 cpi->kf_bits = alt_kf_bits;
             }
         }
-        // Else if it is much harder than other frames in the group make sure it at least receives an allocation in keeping with its relative error score
+        // Else if it is much harder than other frames in the group make sure
+        // it at least receives an allocation in keeping with its relative
+        // error score
         else
         {
-            alt_kf_bits = (int)((double)cpi->bits_left * (kf_mod_err / cpi->modified_total_error_left));
+            alt_kf_bits =
+                (int)((double)cpi->bits_left *
+                      (kf_mod_err /
+                       DOUBLE_DIVIDE_CHECK(cpi->modified_total_error_left)));
 
             if (alt_kf_bits > cpi->kf_bits)
             {
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 99c434f52..fe83ae976 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2818,24 +2818,18 @@ static int pick_frame_size(VP8_COMP *cpi)
         cm->frame_type = KEY_FRAME;
 
     }
-    // Auto key frames (Only two pass will enter here)
+    // Special case for forced key frames
+    // The frame sizing here is still far from ideal for 2 pass.
+    else if (cm->frame_flags & FRAMEFLAGS_KEY)
+    {
+        cm->frame_type = KEY_FRAME;
+        resize_key_frame(cpi);
+        vp8_calc_iframe_target_size(cpi);
+    }
     else if (cm->frame_type == KEY_FRAME)
     {
         vp8_calc_auto_iframe_target_size(cpi);
     }
-    // Forced key frames (by interval or an external signal)
-    else if ((cm->frame_flags & FRAMEFLAGS_KEY) ||
-             (cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0)))
-    {
-        // Key frame from VFW/auto-keyframe/first frame
-        cm->frame_type = KEY_FRAME;
-
-        resize_key_frame(cpi);
-
-        // Compute target frame size
-        if (cpi->pass != 2)
-            vp8_calc_iframe_target_size(cpi);
-    }
     else
     {
         // INTER frame: compute target frame size
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index d32808165..371272268 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1444,6 +1444,9 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi)
     }
     else
     {
+        int last_kf_interval =
+                (cpi->frames_since_key > 0) ? cpi->frames_since_key : 1;
+
         // reset keyframe context and calculate weighted average of last KEY_FRAME_CONTEXT keyframes
         for (i = 0; i < KEY_FRAME_CONTEXT; i++)
         {
@@ -1454,8 +1457,8 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi)
             }
             else
             {
-                cpi->prior_key_frame_size[KEY_FRAME_CONTEXT - 1]     = cpi->projected_frame_size;
-                cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1] = cpi->frames_since_key;
+                cpi->prior_key_frame_size[i]     = cpi->projected_frame_size;
+                cpi->prior_key_frame_distance[i] = last_kf_interval;
             }
 
             av_key_frame_bits      += prior_key_frame_weight[i] * cpi->prior_key_frame_size[i];

From 0e78efad0be73d293880d1b71053c0d70a50a080 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Mon, 30 Aug 2010 18:16:04 -0400
Subject: [PATCH 138/307] Replace sleep(0) calls in multi-threaded decoder

This is a workaround for gLucid problem.

Change-Id: I188a016a07e4c2ea212444c5a6284ff3c48a5caa
---
 vp8/common/threading.h  |  3 ++-
 vp8/decoder/threading.c | 14 +++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index 96be710c4..cd2236168 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -75,7 +75,8 @@
 #define thread_sleep(nms) // { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
 #else
 #include <unistd.h>
-#define thread_sleep(nms) usleep(nms*1000);// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
+#include <sched.h>
+#define thread_sleep(nms) sched_yield();// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
 #endif
 /* Not Windows. Assume pthreads */
 
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index ba8439508..10e72ce2d 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -611,15 +611,12 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
 
             for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
             {
-
                 if ( mb_row > 0 && (mb_col & 7) == 0){
-               //if ( mb_row > 0 ){
-                  while (mb_col > (*last_row_current_mb_col - 8) && *last_row_current_mb_col != pc->mb_cols - 1)
-                  {
-                      x86_pause_hint();
-                      thread_sleep(0);
-                  }
-
+                    while (mb_col > (*last_row_current_mb_col - 8) && *last_row_current_mb_col != pc->mb_cols - 1)
+                    {
+                        x86_pause_hint();
+                        thread_sleep(0);
+                    }
                 }
 
                 if (xd->mode_info_context->mbmi.mode == SPLITMV || xd->mode_info_context->mbmi.mode == B_PRED)
@@ -763,7 +760,6 @@ void vp8_mt_loop_filter_frame( VP8D_COMP *pbi)
         {
             int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
 
-
             if ( mb_row > 0 && (mb_col & 7) == 0){
             // if ( mb_row > 0 ){
                 while (mb_col > (*last_row_current_mb_col-8) && *last_row_current_mb_col != cm->mb_cols - 1)

From c834a74e2326b5d6fad4c188b372ddf908c22070 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 1 Sep 2010 13:19:08 -0400
Subject: [PATCH 139/307] Fix compilation without --enable-experimental

Remove unconditional reference to vpx_codec_vp8x_cx_algo.

Change-Id: I2f152a5bc014a2c8f7418e90b360ce18238e8ec1
---
 ivfenc.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/ivfenc.c b/ivfenc.c
index 3a50198e6..b3344d3ff 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -815,8 +815,11 @@ int main(int argc, const char **argv_)
     /* Handle codec specific options */
 #if CONFIG_VP8_ENCODER
 
-    if (codec->iface == &vpx_codec_vp8_cx_algo ||
-        codec->iface == &vpx_codec_vp8x_cx_algo)
+    if (codec->iface == &vpx_codec_vp8_cx_algo
+#if CONFIG_EXPERIMENTAL
+        || codec->iface == &vpx_codec_vp8x_cx_algo
+#endif
+        )
     {
         ctrl_args = vp8_args;
         ctrl_args_map = vp8_arg_ctrl_map;

From d45e55015e85897c4307959b5f9df94da5b41f33 Mon Sep 17 00:00:00 2001
From: Frank Galligan <fgalligan@google.com>
Date: Wed, 1 Sep 2010 16:40:18 -0400
Subject: [PATCH 140/307] Fix rare deadlock before loop filter

There was an extremely rare deadlock that happened when one thread
was waiting to start the loop filter on frame n while the other
threads were starting to work on frame n+1.

Change-Id: Icc94f728b3b6663405435640d9a2996735ba19ef
---
 vp8/decoder/threading.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 10e72ce2d..02edba2be 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -161,6 +161,8 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
     while (1)
     {
+        int current_filter_level = 0;
+
         if (pbi->b_multithreaded_rd == 0)
             break;
 
@@ -279,6 +281,11 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                 }
             }
         }
+
+        // If |pbi->common.filter_level| is 0 the value can change in-between
+        // the sem_post and the check to call vp8_thread_loop_filter.
+        current_filter_level = pbi->common.filter_level;
+
         //  add this to each frame
         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
         {
@@ -286,7 +293,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
             sem_post(&pbi->h_event_end_decoding);
         }
 
-        if ((pbi->b_multithreaded_lf) &&(pbi->common.filter_level))
+        if ((pbi->b_multithreaded_lf) && (current_filter_level))
             vp8_thread_loop_filter(pbi, mbrd, ithread);
 
     }

From 23216211bc27bd90bd4b32a4730e839a4de29559 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 2 Sep 2010 09:32:03 -0400
Subject: [PATCH 141/307] Disable frame dropping by default

This is not the behavior that most users expect.

Change-Id: I226126ea400c22cf1f7918e80ea7fe0771c569cb
---
 vp8/vp8_cx_iface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 40cb73776..3cc84fc76 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -1024,7 +1024,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
 
         0,                  /* g_lag_in_frames */
 
-        70,                 /* rc_dropframe_thresh */
+        0,                  /* rc_dropframe_thresh */
         0,                  /* rc_resize_allowed */
         60,                 /* rc_resize_down_thresold */
         30,                 /* rc_resize_up_thresold */

From fca129203ab3f77432577af668db224b142c82c6 Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Mon, 16 Aug 2010 16:16:24 -0700
Subject: [PATCH 142/307] added separate rounding/zbin constants for 2nd order

This allows experiments of using different rounding and
zerobin constants for 2nd order blocks.

Change-Id: Idd829adba3edd1f713c66151a8d29bb245e33a71
---
 vp8/encoder/encodeframe.c | 59 +++++++++++++++++++++++++++++++++------
 1 file changed, 51 insertions(+), 8 deletions(-)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index b28ba1a8b..e00154c80 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -101,6 +101,49 @@ static const int qzbin_factors[129] =
     80, 80, 80, 80, 80, 80, 80, 80,
     80,
 };
+
+static const int qrounding_factors_y2[129] =
+{
+    56, 56, 56, 56, 48, 48, 56, 56,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48,
+};
+
+static const int qzbin_factors_y2[129] =
+{
+    72, 72, 72, 72, 80, 80, 72, 72,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80, 80, 80, 80, 80, 80, 80, 80,
+    80,
+};
+
 //#define EXACT_QUANT
 #ifdef EXACT_QUANT
 static void vp8cx_invert_quant(short *quant, short *shift, short d)
@@ -138,8 +181,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
         vp8cx_invert_quant(cpi->Y2quant[Q][0] + 0,
                            cpi->Y2quant_shift[Q][0] + 0, quant_val);
-        cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-        cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->Y2zbin[Q][0][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][0][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
         cpi->common.Y2dequant[Q][0][0] = quant_val;
         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
@@ -169,8 +212,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
             vp8cx_invert_quant(cpi->Y2quant[Q][r] + c,
                                cpi->Y2quant_shift[Q][r] + c, quant_val);
-            cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->Y2zbin[Q][r][c] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+            cpi->Y2round[Q][r][c] = (qrounding_factors_y2[Q] * quant_val) >> 7;
             cpi->common.Y2dequant[Q][r][c] = quant_val;
             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
@@ -206,8 +249,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
 
         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
         cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
-        cpi->Y2zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-        cpi->Y2round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->Y2zbin[Q][0][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][0][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
         cpi->common.Y2dequant[Q][0][0] = quant_val;
         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
@@ -234,8 +277,8 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
 
             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
             cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
-            cpi->Y2zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->Y2round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->Y2zbin[Q][r][c] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+            cpi->Y2round[Q][r][c] = (qrounding_factors_y2[Q] * quant_val) >> 7;
             cpi->common.Y2dequant[Q][r][c] = quant_val;
             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 

From 76640f85dac7145a7f275ca04708496fc136bbe5 Mon Sep 17 00:00:00 2001
From: James Zern <jzern@google.com>
Date: Fri, 20 Aug 2010 16:06:56 -0400
Subject: [PATCH 143/307] encoder: remove postproc dependency

Remove the dependency on postproc.c for the encoder in general, the only
unchecked need for it is when CONFIG_PSNR is enabled. All other cases
are already wrapped in CONFIG_POSTPROC. In the CONFIG_PSNR case the file
will still be included.

Additionally, when VP8_SET_POSTPROC is used with the encoder when post
processing has been disabled an error will be returned.

This addresses issue #153.

Change-Id: Ia6dfe20167f7077734a6058cbd1d794550346089
---
 vp8/common/arm/systemdependent.c     | 2 ++
 vp8/common/generic/systemdependent.c | 2 +-
 vp8/vp8_common.mk                    | 2 --
 vp8/vp8_cx_iface.c                   | 8 ++++++++
 vp8/vp8cx.mk                         | 2 ++
 5 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c
index b58cd789f..6e8651754 100644
--- a/vp8/common/arm/systemdependent.c
+++ b/vp8/common/arm/systemdependent.c
@@ -126,11 +126,13 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
     rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;
 #endif
 
+#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
     rtcd->postproc.down        = vp8_mbpost_proc_down_c;
     rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
     rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
     rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
 #endif
+#endif
 
 #if HAVE_ARMV7
     vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby_neon;
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 2fbeaee4c..91077b3cf 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -61,7 +61,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
     rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
     rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;
 
-#if CONFIG_POSTPROC || CONFIG_VP8_ENCODER
+#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
     rtcd->postproc.down        = vp8_mbpost_proc_down_c;
     rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
     rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 3aad7b7be..481bca435 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -96,8 +96,6 @@ VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/vp8_asm_stubs.c
 VP8_COMMON_SRCS-$(ARCH_X86)$(ARCH_X86_64) += common/x86/loopfilter_x86.c
 VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h
 VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c
-VP8_COMMON_SRCS-$(CONFIG_VP8_ENCODER) += common/postproc.h
-VP8_COMMON_SRCS-$(CONFIG_VP8_ENCODER) += common/postproc.c
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/iwalsh_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 3cc84fc76..da74a4fc8 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -826,7 +826,9 @@ static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx,
         int ctr_id,
         va_list args)
 {
+#if CONFIG_POSTPROC
     vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *);
+    (void)ctr_id;
 
     if (data)
     {
@@ -835,6 +837,12 @@ static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx,
     }
     else
         return VPX_CODEC_INVALID_PARAM;
+#else
+    (void)ctx;
+    (void)ctr_id;
+    (void)args;
+    return VPX_CODEC_INCAPABLE;
+#endif
 }
 
 
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 50eb29731..cafd06554 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -80,6 +80,8 @@ VP8_CX_SRCS-$(CONFIG_PSNR) += encoder/ssim.c
 VP8_CX_SRCS-yes += encoder/tokenize.c
 VP8_CX_SRCS-yes += encoder/treewriter.c
 VP8_CX_SRCS-yes += encoder/variance_c.c
+VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.h
+VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.c
 
 ifeq ($(CONFIG_REALTIME_ONLY),yes)
 VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c

From 21039ce16eb794bf250fcd5484c784b2f7c4f2b1 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 2 Sep 2010 11:17:05 -0400
Subject: [PATCH 144/307] Use -fno-common for mingw

Fixes http://code.google.com/p/webm/issues/detail?id=112

Thanks to Ramiro Polla for the issue/fix.

Change-Id: I7f7b547a4ea3270e183f59280510066cc29a619e
---
 build/make/configure.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index cebdd57ba..4b732e63f 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -781,6 +781,9 @@ process_common_toolchain() {
         soft_enable ssse3
 
         case  ${tgt_os} in
+            win*)
+                enabled gcc && add_cflags -fno-common
+                ;;
             solaris*)
                 CC=${CC:-${CROSS}gcc}
                 LD=${LD:-${CROSS}gcc}

From d6ee72a7bf2e2ef889827f4bc307fe1f930e2f1c Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 2 Sep 2010 11:51:45 -0400
Subject: [PATCH 145/307] Fix target detection on mingw32

gcc -dumpmachine returns only 'mingw32'

Change-Id: I774d05a97c5131fc12009e436712c319e54490a5
---
 build/make/configure.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index 4b732e63f..73cb60085 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -518,6 +518,7 @@ process_common_toolchain() {
                 tgt_os=darwin9
                 ;;
             *mingw32*|*cygwin*)
+                [ -z "$tgt_isa" ] && tgt_isa=x86
                 tgt_os=win32
                 ;;
             *linux*|*bsd*)

From daab4bcba60759a252bd210ee5f8d7cd390962e5 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 2 Sep 2010 12:03:51 -0400
Subject: [PATCH 146/307] Use native win32 timers on mingw

Changed to use QueryPerformanceCounter on Windows rather than only
when building with MSVC, so that MSVC can link libs built with
MinGW.

Fixes issue #149.

Change-Id: Ie2dc7edc8f4d096cf95ec5ffb1ab00f2d67b3e7d
---
 vpx_ports/vpx_timer.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h
index f5e817ff4..f2a5a7ec1 100644
--- a/vpx_ports/vpx_timer.h
+++ b/vpx_ports/vpx_timer.h
@@ -12,7 +12,7 @@
 #ifndef VPX_TIMER_H
 #define VPX_TIMER_H
 
-#if defined(_MSC_VER)
+#if defined(_WIN32)
 /*
  * Win32 specific includes
  */
@@ -43,7 +43,7 @@
 
 struct vpx_usec_timer
 {
-#if defined(_MSC_VER)
+#if defined(_WIN32)
     LARGE_INTEGER  begin, end;
 #else
     struct timeval begin, end;
@@ -54,7 +54,7 @@ struct vpx_usec_timer
 static void
 vpx_usec_timer_start(struct vpx_usec_timer *t)
 {
-#if defined(_MSC_VER)
+#if defined(_WIN32)
     QueryPerformanceCounter(&t->begin);
 #else
     gettimeofday(&t->begin, NULL);
@@ -65,7 +65,7 @@ vpx_usec_timer_start(struct vpx_usec_timer *t)
 static void
 vpx_usec_timer_mark(struct vpx_usec_timer *t)
 {
-#if defined(_MSC_VER)
+#if defined(_WIN32)
     QueryPerformanceCounter(&t->end);
 #else
     gettimeofday(&t->end, NULL);
@@ -76,7 +76,7 @@ vpx_usec_timer_mark(struct vpx_usec_timer *t)
 static long
 vpx_usec_timer_elapsed(struct vpx_usec_timer *t)
 {
-#if defined(_MSC_VER)
+#if defined(_WIN32)
     LARGE_INTEGER freq, diff;
 
     diff.QuadPart = t->end.QuadPart - t->begin.QuadPart;

From 4496db45e355dec1d49b9b81fe0b964997285a68 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 2 Sep 2010 13:33:01 -0400
Subject: [PATCH 147/307] Whitespace: nuke CRLFs

Change-Id: I8b9fdf9875a8fcff4cb49a3357ce44f18108c2e7
---
 vp8/encoder/encodeframe.c | 126 ++++++++++++-------------
 vp8/encoder/quantize.c    | 190 +++++++++++++++++++-------------------
 2 files changed, 158 insertions(+), 158 deletions(-)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index e00154c80..1e0e1abc3 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -228,69 +228,69 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
     }
 }
 #else
-void vp8cx_init_quantizer(VP8_COMP *cpi)
-{
-    int r, c;
-    int i;
-    int quant_val;
-    int Q;
-
-    int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
-
-    for (Q = 0; Q < QINDEX_RANGE; Q++)
-    {
-        // dc values
-        quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;
-        cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-        cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.Y1dequant[Q][0][0] = quant_val;
-        cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
-
-        quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
-        cpi->Y2zbin[Q][0][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-        cpi->Y2round[Q][0][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-        cpi->common.Y2dequant[Q][0][0] = quant_val;
-        cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
-
-        quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;
-        cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
-        cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.UVdequant[Q][0][0] = quant_val;
-        cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
-
-        // all the ac values = ;
-        for (i = 1; i < 16; i++)
-        {
-            int rc = vp8_default_zig_zag1d[i];
-            r = (rc >> 2);
-            c = (rc & 3);
-
-            quant_val = vp8_ac_yquant(Q);
-            cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;
-            cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.Y1dequant[Q][r][c] = quant_val;
-            cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
-
-            quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
-            cpi->Y2zbin[Q][r][c] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-            cpi->Y2round[Q][r][c] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-            cpi->common.Y2dequant[Q][r][c] = quant_val;
-            cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
-
-            quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;
-            cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.UVdequant[Q][r][c] = quant_val;
-            cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
-        }
-    }
-}
+void vp8cx_init_quantizer(VP8_COMP *cpi)
+{
+    int r, c;
+    int i;
+    int quant_val;
+    int Q;
+
+    int zbin_boost[16] = {0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44};
+
+    for (Q = 0; Q < QINDEX_RANGE; Q++)
+    {
+        // dc values
+        quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
+        cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;
+        cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.Y1dequant[Q][0][0] = quant_val;
+        cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
+        cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
+        cpi->Y2zbin[Q][0][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][0][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+        cpi->common.Y2dequant[Q][0][0] = quant_val;
+        cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
+        cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;
+        cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
+        cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.UVdequant[Q][0][0] = quant_val;
+        cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
+
+        // all the ac values = ;
+        for (i = 1; i < 16; i++)
+        {
+            int rc = vp8_default_zig_zag1d[i];
+            r = (rc >> 2);
+            c = (rc & 3);
+
+            quant_val = vp8_ac_yquant(Q);
+            cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;
+            cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.Y1dequant[Q][r][c] = quant_val;
+            cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+
+            quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
+            cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
+            cpi->Y2zbin[Q][r][c] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+            cpi->Y2round[Q][r][c] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+            cpi->common.Y2dequant[Q][r][c] = quant_val;
+            cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+
+            quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
+            cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;
+            cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.UVdequant[Q][r][c] = quant_val;
+            cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
+        }
+    }
+}
 #endif
 void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
 {
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 2ea16d8f0..f495940b3 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -119,101 +119,101 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
     d->eob = eob + 1;
 }
 #else
-void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
-{
-    int i, rc, eob;
-    int zbin;
-    int x, y, z, sz;
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
-
-    vpx_memset(qcoeff_ptr, 0, 32);
-    vpx_memset(dqcoeff_ptr, 0, 32);
-
-    eob = -1;
-
-    for (i = 0; i < 16; i++)
-    {
-        rc   = vp8_default_zig_zag1d[i];
-        z    = coeff_ptr[rc];
-        zbin = zbin_ptr[rc] ;
-
-        sz = (z >> 31);                                 // sign of z
-        x  = (z ^ sz) - sz;                             // x = abs(z)
-
-        if (x >= zbin)
-        {
-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
-            x  = (y ^ sz) - sz;                         // get the sign back
-            qcoeff_ptr[rc] = x;                          // write to destination
-            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
-
-            if (y)
-            {
-                eob = i;                                // last nonzero coeffs
-            }
-        }
-    }
-    d->eob = eob + 1;
-}
-
-void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
-{
-    int i, rc, eob;
-    int zbin;
-    int x, y, z, sz;
-    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
-    short zbin_oq_value = b->zbin_extra;
-
-    vpx_memset(qcoeff_ptr, 0, 32);
-    vpx_memset(dqcoeff_ptr, 0, 32);
-
-    eob = -1;
-
-    for (i = 0; i < 16; i++)
-    {
-        rc   = vp8_default_zig_zag1d[i];
-        z    = coeff_ptr[rc];
-
-        //if ( i == 0 )
-        //    zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2;
-        //else
-        zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
-
-        zbin_boost_ptr ++;
-        sz = (z >> 31);                                 // sign of z
-        x  = (z ^ sz) - sz;                             // x = abs(z)
-
-        if (x >= zbin)
-        {
-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
-            x  = (y ^ sz) - sz;                         // get the sign back
-            qcoeff_ptr[rc]  = x;                         // write to destination
-            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
-
-            if (y)
-            {
-                eob = i;                                // last nonzero coeffs
-                zbin_boost_ptr = &b->zrun_zbin_boost[0];    // reset zero runlength
-            }
-        }
-    }
-
-    d->eob = eob + 1;
-}
-
+void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
+{
+    int i, rc, eob;
+    int zbin;
+    int x, y, z, sz;
+    short *coeff_ptr  = &b->coeff[0];
+    short *zbin_ptr   = &b->zbin[0][0];
+    short *round_ptr  = &b->round[0][0];
+    short *quant_ptr  = &b->quant[0][0];
+    short *qcoeff_ptr = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = &d->dequant[0][0];
+
+    vpx_memset(qcoeff_ptr, 0, 32);
+    vpx_memset(dqcoeff_ptr, 0, 32);
+
+    eob = -1;
+
+    for (i = 0; i < 16; i++)
+    {
+        rc   = vp8_default_zig_zag1d[i];
+        z    = coeff_ptr[rc];
+        zbin = zbin_ptr[rc] ;
+
+        sz = (z >> 31);                                 // sign of z
+        x  = (z ^ sz) - sz;                             // x = abs(z)
+
+        if (x >= zbin)
+        {
+            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x  = (y ^ sz) - sz;                         // get the sign back
+            qcoeff_ptr[rc] = x;                          // write to destination
+            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
+
+            if (y)
+            {
+                eob = i;                                // last nonzero coeffs
+            }
+        }
+    }
+    d->eob = eob + 1;
+}
+
+void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
+{
+    int i, rc, eob;
+    int zbin;
+    int x, y, z, sz;
+    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
+    short *coeff_ptr  = &b->coeff[0];
+    short *zbin_ptr   = &b->zbin[0][0];
+    short *round_ptr  = &b->round[0][0];
+    short *quant_ptr  = &b->quant[0][0];
+    short *qcoeff_ptr = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = &d->dequant[0][0];
+    short zbin_oq_value = b->zbin_extra;
+
+    vpx_memset(qcoeff_ptr, 0, 32);
+    vpx_memset(dqcoeff_ptr, 0, 32);
+
+    eob = -1;
+
+    for (i = 0; i < 16; i++)
+    {
+        rc   = vp8_default_zig_zag1d[i];
+        z    = coeff_ptr[rc];
+
+        //if ( i == 0 )
+        //    zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value/2;
+        //else
+        zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value;
+
+        zbin_boost_ptr ++;
+        sz = (z >> 31);                                 // sign of z
+        x  = (z ^ sz) - sz;                             // x = abs(z)
+
+        if (x >= zbin)
+        {
+            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+            x  = (y ^ sz) - sz;                         // get the sign back
+            qcoeff_ptr[rc]  = x;                         // write to destination
+            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
+
+            if (y)
+            {
+                eob = i;                                // last nonzero coeffs
+                zbin_boost_ptr = &b->zrun_zbin_boost[0];    // reset zero runlength
+            }
+        }
+    }
+
+    d->eob = eob + 1;
+}
+
 #endif
 
 /* Perform regular quantization, with unbiased rounding and no zero bin. */

From e4b5002490a0ae9df972ed3715b80634a96cc69b Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 2 Sep 2010 13:40:43 -0400
Subject: [PATCH 148/307] Update AUTHORS

Change-Id: I0395ffa107651a773fd11d12682ab9372f76a90b
---
 .mailmap |  2 ++
 AUTHORS  | 10 ++++++++++
 2 files changed, 12 insertions(+)
 create mode 100644 .mailmap

diff --git a/.mailmap b/.mailmap
new file mode 100644
index 000000000..f77ff26e4
--- /dev/null
+++ b/.mailmap
@@ -0,0 +1,2 @@
+Adrian Grange <agrange@google.com>
+Johann Koenig <johannkoenig@google.com>
diff --git a/AUTHORS b/AUTHORS
index 9715f0071..6708d5aa9 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,16 +1,26 @@
 # This file is automatically generated from the git commit history
 # by tools/gen_authors.sh.
 
+Adrian Grange <agrange@google.com>
 Alex Converse <alex.converse@gmail.com>
 Andres Mejia <mcitadel@gmail.com>
 Fabio Pedretti <fabio.ped@libero.it>
 Frank Galligan <fgalligan@google.com>
+Fredrik Söderquist <fs@opera.com>
+Fritz Koenig <frkoenig@google.com>
+Giuseppe Scrivano <gscrivano@gnu.org>
 Guillermo Ballester Valor <gbvalor@gmail.com>
 James Zern <jzern@google.com>
+Jan Kratochvil <jan.kratochvil@redhat.com>
+Jeff Muizelaar <jmuizelaar@mozilla.com>
+Jim Bankoski <jimbankoski@google.com>
+Johann Koenig <johannkoenig@google.com>
 John Koleszar <jkoleszar@google.com>
 Justin Clift <justin@salasaga.org>
+Justin Lebar <justin.lebar@gmail.com>
 Luca Barbato <lu_zero@gentoo.org>
 Makoto Kato <makoto.kt@gmail.com>
+Michael Kohler <michaelkohler@live.com>
 Paul Wilkins <paulwilkins@google.com>
 Pavol Rusnak <stick@gk2.sk>
 Philip Jägenstedt <philipj@opera.com>

From b0519a26b120c8a5763f89f097f8b128d9257a6c Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 2 Sep 2010 14:56:47 -0400
Subject: [PATCH 149/307] Update CHANGELOG for v0.9.2 release

Change-Id: I184e927987544e9f34f890249b589ea13a93a330
---
 CHANGELOG | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index c445a52df..2b2803740 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,53 @@
+2010-09-02 v0.9.2
+  - Enhancements:
+      Disable frame dropping by default
+      Improved multithreaded performance
+      Improved Force Key Frame Behaviour
+      Increased rate control buffer level precision
+      Fix bug in 1st pass motion compensation
+      ivfenc: correct fixed kf interval, --disable-kf
+  - Speed:
+      Changed above and left context data layout
+      Rework idct calling structure.
+      Removed unnecessary MB_MODE_INFO copies
+      x86: SSSE3 sixtap prediction
+      Reworked IDCT to include reconstruction (add) step
+      Swap alt/gold/new/last frame buffer ptrs instead of copying.
+      Improve SSE2 loopfilter functions
+      Change bitreader to use a larger window.
+      Avoid loopfilter reinitialization when possible
+  - Quality:
+      Normalize quantizer's zero bin and rounding factors
+      Add trellis quantization.
+      Make the quantizer exact.
+      Updates to ARNR filtering algorithm
+      Fix breakout thresh computation for golden & AltRef frames
+      Redo the forward 4x4 dct
+      Improve the accuracy of forward walsh-hadamard transform
+      Further adjustment of RD behaviour with Q and Zbin.
+  - Build System:
+      Allow linking of libs built with MinGW to MSVC
+      Fix target auto-detection on mingw32
+      Allow --cpu= to work for x86.
+      configure: pass original arguments through to make dist
+      Fix builds without runtime CPU detection
+      msvs: fix install of codec sources
+      msvs: Change devenv.com command line for better msys support
+      msvs: Add vs9 targets.
+      Add x86_64-linux-icc target
+  - Bugs:
+      Potential crashes on older MinGW builds
+      Fix two-pass framrate for Y4M input.
+      Fixed simple loop filter, other crashes on ARM v6
+      arm: fix missing dependency with --enable-shared
+      configure: support directories containing .o
+      Replace pinsrw (SSE) with MMX instructions
+      apple: include proper mach primatives
+      Fixed rate control bug with long key frame interval.
+      Fix DSO link errors on x86-64 when not using a version script
+      Fixed buffer selection for UV in AltRef filtering
+
+
 2010-06-17 v0.9.1
   - Enhancements:
       * ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O

From 0de458f6b9627844160768c0b2417058c7a865bc Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Thu, 2 Sep 2010 16:17:52 -0400
Subject: [PATCH 150/307] Reduced the size of MB_MODE_INFO

Moved partition_bmi and partition_count out of MB_MODE_INFO and
placed into MACROBLOCK.  Also reduced the size of other members
of the MB_MODE_INFO struct.  For 1080p, the memory was reduced
by 1,209,516 bytes.  The decoder performance appeared to improve
by 3% for the clip used.
Note:  The main goal for this change is to improve the decoder
performance.  The encoder will be revisited at a later date for
further structure cleanup.

Change-Id: I4733621292ee9cc3fffa4046cb3fd4d99bd14613
---
 vp8/common/blockd.h       | 19 +++++++++----------
 vp8/decoder/decodemv.c    |  8 ++++----
 vp8/decoder/threading.c   |  2 --
 vp8/encoder/bitstream.c   |  8 ++++++--
 vp8/encoder/block.h       |  9 +++++++++
 vp8/encoder/encodeframe.c |  7 +++++--
 vp8/encoder/ethreading.c  |  8 ++++++--
 vp8/encoder/firstpass.c   |  2 ++
 vp8/encoder/onyx_if.c     | 26 ++++++++++++++++++++++++++
 vp8/encoder/pickinter.c   |  4 ++++
 vp8/encoder/rdopt.c       | 24 ++++++++++++++----------
 11 files changed, 85 insertions(+), 32 deletions(-)

diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index b286e2e1a..1cc5a1f2c 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -168,14 +168,15 @@ typedef struct
         int as_int;
         MV  as_mv;
     } mv;
-    int partitioning;
-    int partition_count;
-    int mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
-    int dc_diff;
-    unsigned char   segment_id;                  // Which set of segmentation parameters should be used for this MB
-    int force_no_skip;
-    int need_to_clamp_mvs;
-    B_MODE_INFO partition_bmi[16];
+
+    char partitioning;
+    unsigned char mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
+    unsigned char dc_diff;
+    unsigned char need_to_clamp_mvs;
+
+    unsigned char segment_id;                  // Which set of segmentation parameters should be used for this MB
+
+    unsigned char force_no_skip; //encoder only
 } MB_MODE_INFO;
 
 
@@ -227,8 +228,6 @@ typedef struct
     YV12_BUFFER_CONFIG dst;
 
     MODE_INFO *mode_info_context;
-    MODE_INFO *mode_info;
-
     int mode_info_stride;
 
     FRAME_TYPE frame_type;
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 4f24b445f..d14126739 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -219,8 +219,8 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
 
                     do  /* for each subset j */
                     {
-                        B_MODE_INFO *const bmi = mbmi->partition_bmi + j;
-                        MV *const mv = & bmi->mv.as_mv;
+                        B_MODE_INFO bmi;
+                        MV *const mv = & bmi.mv.as_mv;
 
                         int k = -1;  /* first block in subset j */
                         int mv_contz;
@@ -237,7 +237,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
 
                         mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
 
-                        switch (bmi->mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
+                        switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
                         {
                         case NEW4X4:
                             read_mv(bc, mv, (const MV_CONTEXT *) mvc);
@@ -285,7 +285,7 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
                            refer back to us via "left" or "above". */
                         do
                             if (j == L[k])
-                                mi->bmi[k] = *bmi;
+                                mi->bmi[k] = bmi;
 
                         while (++k < 16);
                     }
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 02edba2be..93acd368b 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -52,7 +52,6 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
         mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
         mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
 
-        mbd->mode_info        = pc->mi - 1;
         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
         mbd->mode_info_stride  = pc->mode_info_stride;
 
@@ -105,7 +104,6 @@ void vp8_setup_loop_filter_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
         //mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
         //mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
 
-        mbd->mode_info        = pc->mi - 1;
         mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
         mbd->mode_info_stride  = pc->mode_info_stride;
 
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 21629841b..c706395a8 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -872,6 +872,8 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
     int prob_skip_false = 0;
     ms = pc->mi - 1;
 
+    cpi->mb.partition_info = cpi->mb.pi;
+
     // Calculate the probabilities to be used to code the reference frame based on actual useage this frame
     if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter)))
         cpi->prob_intra_coded = 1;
@@ -1020,7 +1022,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
 
                     do
                     {
-                        const B_MODE_INFO *const b = mi->partition_bmi + j;
+                        const B_MODE_INFO *const b = cpi->mb.partition_info->bmi + j;
                         const int *const  L = vp8_mbsplits [mi->partitioning];
                         int k = -1;  /* first block in subset j */
                         int mv_contz;
@@ -1042,7 +1044,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
                             write_mv(w, &b->mv.as_mv, &best_mv, (const MV_CONTEXT *) mvc);
                         }
                     }
-                    while (++j < mi->partition_count);
+                    while (++j < cpi->mb.partition_info->count);
                 }
                 break;
                 default:
@@ -1051,9 +1053,11 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
             }
 
             ++m;
+            cpi->mb.partition_info++;
         }
 
         ++m;  /* skip L prediction border */
+        cpi->mb.partition_info++;
     }
 }
 
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index c914a32b6..be2b81678 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -50,6 +50,12 @@ typedef struct
 
 } BLOCK;
 
+typedef struct
+{
+    int count;
+    B_MODE_INFO bmi[16];
+} PARTITION_INFO;
+
 typedef struct
 {
     DECLARE_ALIGNED(16, short, src_diff[400]);       // 16x16 Y 8x8 U 8x8 V 4x4 2nd Y
@@ -61,6 +67,9 @@ typedef struct
     YV12_BUFFER_CONFIG src;
 
     MACROBLOCKD e_mbd;
+    PARTITION_INFO *partition_info; /* work pointer */
+    PARTITION_INFO *pi;   /* Corresponds to upper left visible macroblock */
+    PARTITION_INFO *pip;  /* Base of allocated array */
 
     search_site *ss;
     int ss_count;
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 1e0e1abc3..96f36ee63 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -458,7 +458,7 @@ void encode_mb_row(VP8_COMP *cpi,
 
                 for (b = 0; b < xd->mbmi.partition_count; b++)
                 {
-                    inter_b_modes[xd->mbmi.partition_bmi[b].mode] ++;
+                    inter_b_modes[x->partition->bmi[b].mode] ++;
                 }
             }
 
@@ -511,6 +511,7 @@ void encode_mb_row(VP8_COMP *cpi,
 
         // skip to next mb
         xd->mode_info_context++;
+        x->partition_info++;
 
         xd->above_context++;
         cpi->current_mb_col_main = mb_col;
@@ -525,6 +526,7 @@ void encode_mb_row(VP8_COMP *cpi,
 
     // this is to account for the border
     xd->mode_info_context++;
+    x->partition_info++;
 }
 
 
@@ -594,7 +596,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
 
     totalrate = 0;
 
-    xd->mode_info = cm->mi - 1;
+    x->partition_info = x->pi;
 
     xd->mode_info_context = cm->mi;
     xd->mode_info_stride = cm->mode_info_stride;
@@ -730,6 +732,7 @@ void vp8_encode_frame(VP8_COMP *cpi)
                 x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 
                 xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
+                x->partition_info  += xd->mode_info_stride * cpi->encoding_thread_count;
 
                 if (mb_row < cm->mb_rows - 1)
                     //WaitForSingleObject(cpi->h_event_main, INFINITE);
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index b677fde66..04093ff43 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -147,7 +147,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
 
                                 for (b = 0; b < xd->mbmi.partition_count; b++)
                                 {
-                                    inter_b_modes[xd->mbmi.partition_bmi[b].mode] ++;
+                                    inter_b_modes[x->partition->bmi[b].mode] ++;
                                 }
                             }
 
@@ -179,6 +179,7 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
 
                         // skip to next mb
                         xd->mode_info_context++;
+                        x->partition_info++;
 
                         xd->above_context++;
 
@@ -195,12 +196,14 @@ THREAD_FUNCTION thread_encoding_proc(void *p_data)
 
                     // this is to account for the border
                     xd->mode_info_context++;
+                    x->partition_info++;
 
                     x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols;
                     x->src.u_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
                     x->src.v_buffer +=  8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols;
 
                     xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count;
+                    x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count;
 
                     if (ithread == (cpi->encoding_thread_count - 1) || mb_row == cm->mb_rows - 1)
                     {
@@ -364,7 +367,8 @@ void vp8cx_init_mbrthread_data(VP8_COMP *cpi,
         vpx_memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts));
         mbr_ei[i].totalrate = 0;
 
-        mbd->mode_info        = cm->mi - 1;
+        mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1);
+
         mbd->mode_info_context = cm->mi   + x->e_mbd.mode_info_stride * (i + 1);
         mbd->mode_info_stride  = cm->mode_info_stride;
 
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index a58b9b75d..fea4e3d6a 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -565,6 +565,8 @@ void vp8_first_pass(VP8_COMP *cpi)
     xd->pre = *lst_yv12;
     xd->dst = *new_yv12;
 
+    x->partition_info = x->pi;
+
     xd->mode_info_context = cm->mi;
 
     vp8_build_block_offsets(x);
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index fe83ae976..201de3377 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -29,6 +29,8 @@
 #include "swapyv12buffer.h"
 #include "threading.h"
 #include "vpx_ports/vpx_timer.h"
+#include "vpxerrors.h"
+
 #include <math.h>
 #include <stdio.h>
 #include <limits.h>
@@ -230,6 +232,11 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
 
     cpi->gf_active_flags = 0;
 
+    if(cpi->mb.pip)
+        vpx_free(cpi->mb.pip);
+
+    cpi->mb.pip = 0;
+
 }
 
 static void enable_segmentation(VP8_PTR ptr)
@@ -1221,6 +1228,20 @@ static void alloc_raw_frame_buffers(VP8_COMP *cpi)
 
     cpi->source_buffer_count = 0;
 }
+
+static int vp8_alloc_partition_data(VP8_COMP *cpi)
+{
+    cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) *
+                                (cpi->common.mb_rows + 1),
+                                sizeof(PARTITION_INFO));
+    if(!cpi->mb.pip)
+        return ALLOC_FAILURE;
+
+    cpi->mb.pi = cpi->mb.pip + cpi->common.mode_info_stride + 1;
+
+    return 0;
+}
+
 void vp8_alloc_compressor_data(VP8_COMP *cpi)
 {
     VP8_COMMON *cm = & cpi->common;
@@ -1232,6 +1253,11 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
         vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
                            "Failed to allocate frame buffers");
 
+    if (vp8_alloc_partition_data(cpi))
+        vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+                           "Failed to allocate partition data");
+
+
     if ((width & 0xf) != 0)
         width += 16 - (width & 0xf);
 
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 9da715e99..2be412c99 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -430,6 +430,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
     MACROBLOCKD *xd = &x->e_mbd;
     B_MODE_INFO best_bmodes[16];
     MB_MODE_INFO best_mbmode;
+    PARTITION_INFO best_partition;
     MV best_ref_mv1;
     MV mode_mv[MB_MODE_COUNT];
     MB_PREDICTION_MODE this_mode;
@@ -832,6 +833,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
             *returndistortion = distortion2;
             best_rd = this_rd;
             vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
+            vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
 
             if (this_mode == B_PRED || this_mode == SPLITMV)
                 for (i = 0; i < 16; i++)
@@ -906,6 +908,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
         best_mbmode.dc_diff = 0;
 
         vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+        vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
         for (i = 0; i < 16; i++)
         {
@@ -920,6 +923,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
 
     // macroblock modes
     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+    vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
     if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED || x->e_mbd.mode_info_context->mbmi.mode == SPLITMV)
         for (i = 0; i < 16; i++)
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index c5dd59a61..75a71d7df 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1439,9 +1439,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
     // save partitions
     labels = vp8_mbsplits[best_seg];
     x->e_mbd.mode_info_context->mbmi.partitioning = best_seg;
-    x->e_mbd.mode_info_context->mbmi.partition_count = vp8_count_labels(labels);
+    x->partition_info->count = vp8_count_labels(labels);
 
-    for (i = 0; i < x->e_mbd.mode_info_context->mbmi.partition_count; i++)
+    for (i = 0; i < x->partition_info->count; i++)
     {
         int j;
 
@@ -1451,8 +1451,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                 break;
         }
 
-        x->e_mbd.mode_info_context->mbmi.partition_bmi[i].mode = x->e_mbd.block[j].bmi.mode;
-        x->e_mbd.mode_info_context->mbmi.partition_bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
+        x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode;
+        x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;
     }
 
     return best_segment_rd;
@@ -1466,6 +1466,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
     MACROBLOCKD *xd = &x->e_mbd;
     B_MODE_INFO best_bmodes[16];
     MB_MODE_INFO best_mbmode;
+    PARTITION_INFO best_partition;
     MV best_ref_mv;
     MV mode_mv[MB_MODE_COUNT];
     MB_PREDICTION_MODE this_mode;
@@ -1787,19 +1788,19 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             }
 
             // trap cases where the 8x8s can be promoted to 8x16s or 16x8s
-            if (0)//x->e_mbd.mbmi.partition_count == 4)
+            if (0)//x->partition_info->count == 4)
             {
 
-                if (x->e_mbd.mode_info_context->mbmi.partition_bmi[0].mv.as_int == x->e_mbd.mode_info_context->mbmi.partition_bmi[1].mv.as_int
-                    && x->e_mbd.mode_info_context->mbmi.partition_bmi[2].mv.as_int == x->e_mbd.mode_info_context->mbmi.partition_bmi[3].mv.as_int)
+                if (x->partition_info->bmi[0].mv.as_int == x->partition_info->bmi[1].mv.as_int
+                    && x->partition_info->bmi[2].mv.as_int == x->partition_info->bmi[3].mv.as_int)
                 {
                     const int *labels = vp8_mbsplits[2];
                     x->e_mbd.mode_info_context->mbmi.partitioning = 0;
                     rate -= vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + 2);
                     rate += vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings);
-                    //rate -=  x->inter_bmode_costs[  x->e_mbd.mbmi.partition_bmi[1]];
-                    //rate -=  x->inter_bmode_costs[  x->e_mbd.mbmi.partition_bmi[3]];
-                    x->e_mbd.mode_info_context->mbmi.partition_bmi[1] = x->e_mbd.mode_info_context->mbmi.partition_bmi[2];
+                    //rate -=  x->inter_bmode_costs[  x->partition_info->bmi[1]];
+                    //rate -=  x->inter_bmode_costs[  x->partition_info->bmi[3]];
+                    x->partition_info->bmi[1] = x->partition_info->bmi[2];
                 }
             }
 
@@ -2143,6 +2144,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
             *returndistortion = distortion2;
             best_rd = this_rd;
             vpx_memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO));
+            vpx_memcpy(&best_partition, x->partition_info, sizeof(PARTITION_INFO));
 
             for (i = 0; i < 16; i++)
             {
@@ -2224,6 +2226,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         best_mbmode.dc_diff = 0;
 
         vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+        vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
         for (i = 0; i < 16; i++)
         {
@@ -2238,6 +2241,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
     // macroblock modes
     vpx_memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO));
+    vpx_memcpy(x->partition_info, &best_partition, sizeof(PARTITION_INFO));
 
     for (i = 0; i < 16; i++)
     {

From 3fb37162a8328eb6183406062e5cd92d23276fca Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Tue, 7 Sep 2010 10:52:54 -0700
Subject: [PATCH 151/307] Bilinear subpixel optimizations for ssse3.

Used pmaddubsw for multiply and add of two filter taps
at once for 16x16 and 8x8 blocks.

Change-Id: Idccf2d6e094561624407b109fa7e80ba799355ea
---
 vp8/common/x86/subpixel_ssse3.asm    | 577 +++++++++++++++++++++++++++
 vp8/common/x86/subpixel_x86.h        |  12 +-
 vp8/common/x86/x86_systemdependent.c |   2 +
 3 files changed, 585 insertions(+), 6 deletions(-)

diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
index 87173f2ca..8e3c38e32 100644
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -887,6 +887,573 @@ vp8_filter_block1d4_v4_ssse3_loop:
     pop         rbp
     ret
 
+;void vp8_bilinear_predict16x16_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    int   src_pixels_per_line,
+;    int  xoffset,
+;    int  yoffset,
+;    unsigned char *dst_ptr,
+;    int dst_pitch
+;)
+global sym(vp8_bilinear_predict16x16_ssse3)
+sym(vp8_bilinear_predict16x16_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        lea         rcx,        [vp8_bilinear_filters_ssse3 GLOBAL]
+        movsxd      rax,        dword ptr arg(2)    ; xoffset
+
+        cmp         rax,        0                   ; skip first_pass filter if xoffset=0
+        je          b16x16_sp_only
+
+        shl         rax,        4
+        lea         rax,        [rax + rcx]         ; HFilter
+
+        mov         rdi,        arg(4)              ; dst_ptr
+        mov         rsi,        arg(0)              ; src_ptr
+        movsxd      rdx,        dword ptr arg(5)    ; dst_pitch
+
+        movdqa      xmm1,       [rax]
+
+        movsxd      rax,        dword ptr arg(3)    ; yoffset
+
+        cmp         rax,        0                   ; skip second_pass filter if yoffset=0
+        je          b16x16_fp_only
+
+        shl         rax,        4
+        lea         rax,        [rax + rcx]         ; VFilter
+
+        lea         rcx,        [rdi+rdx*8]
+        lea         rcx,        [rcx+rdx*8]
+        movsxd      rdx,        dword ptr arg(1)    ; src_pixels_per_line
+
+        movdqa      xmm2,       [rax]
+
+%if ABI_IS_32BIT=0
+        movsxd      r8,         dword ptr arg(5)    ; dst_pitch
+%endif
+        movdqu      xmm3,       [rsi]               ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+        movdqa      xmm4,       xmm3
+
+        movdqu      xmm5,       [rsi+1]             ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16
+        lea         rsi,        [rsi + rdx]         ; next line
+
+        punpcklbw   xmm3,       xmm5                ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
+        pmaddubsw   xmm3,       xmm1                ; 00 02 04 06 08 10 12 14
+
+        punpckhbw   xmm4,       xmm5                ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16
+        pmaddubsw   xmm4,       xmm1                ; 01 03 05 07 09 11 13 15
+
+        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        psraw       xmm3,       VP8_FILTER_SHIFT    ; xmm3 /= 128
+
+        paddw       xmm4,       [rd GLOBAL]         ; xmm4 += round value
+        psraw       xmm4,       VP8_FILTER_SHIFT    ; xmm4 /= 128
+
+        movdqa      xmm7,       xmm3
+        packuswb    xmm7,       xmm4                ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+.next_row:
+        movdqu      xmm6,       [rsi]               ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+        movdqa      xmm4,       xmm6
+
+        movdqu      xmm5,       [rsi+1]             ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16
+        lea         rsi,        [rsi + rdx]         ; next line
+
+        punpcklbw   xmm6,       xmm5
+        pmaddubsw   xmm6,       xmm1
+
+        punpckhbw   xmm4,       xmm5
+        pmaddubsw   xmm4,       xmm1
+
+        paddw       xmm6,       [rd GLOBAL]         ; xmm6 += round value
+        psraw       xmm6,       VP8_FILTER_SHIFT    ; xmm6 /= 128
+
+        paddw       xmm4,       [rd GLOBAL]         ; xmm4 += round value
+        psraw       xmm4,       VP8_FILTER_SHIFT    ; xmm4 /= 128
+
+        packuswb    xmm6,       xmm4
+        movdqa      xmm5,       xmm7
+
+        punpcklbw   xmm5,       xmm6
+        pmaddubsw   xmm5,       xmm2
+
+        punpckhbw   xmm7,       xmm6
+        pmaddubsw   xmm7,       xmm2
+
+        paddw       xmm5,       [rd GLOBAL]         ; xmm5 += round value
+        psraw       xmm5,       VP8_FILTER_SHIFT    ; xmm5 /= 128
+
+        paddw       xmm7,       [rd GLOBAL]         ; xmm7 += round value
+        psraw       xmm7,       VP8_FILTER_SHIFT    ; xmm7 /= 128
+
+        packuswb    xmm5,       xmm7
+        movdqa      xmm7,       xmm6
+
+        movdqa      [rdi],      xmm5                ; store the results in the destination
+%if ABI_IS_32BIT
+        add         rdi,        DWORD PTR arg(5)    ; dst_pitch
+%else
+        add         rdi,        r8
+%endif
+
+        cmp         rdi,        rcx
+        jne         .next_row
+
+        jmp         done
+
+b16x16_sp_only:
+        movsxd      rax,        dword ptr arg(3)    ; yoffset
+        shl         rax,        4
+        lea         rax,        [rax + rcx]         ; VFilter
+
+        mov         rdi,        arg(4)              ; dst_ptr
+        mov         rsi,        arg(0)              ; src_ptr
+        movsxd      rdx,        dword ptr arg(5)    ; dst_pitch
+
+        movdqa      xmm1,       [rax]               ; VFilter
+
+        lea         rcx,        [rdi+rdx*8]
+        lea         rcx,        [rcx+rdx*8]
+        movsxd      rax,        dword ptr arg(1)    ; src_pixels_per_line
+
+        ; get the first horizontal line done
+        movdqu      xmm2,       [rsi]               ; load row 0
+
+        lea         rsi,        [rsi + rax]         ; next line
+.next_row:
+        movdqu      xmm3,       [rsi]               ; load row + 1
+
+        movdqu      xmm4,       xmm2
+        punpcklbw   xmm4,       xmm3
+
+        pmaddubsw   xmm4,       xmm1
+        movdqu      xmm7,       [rsi + rax]         ; load row + 2
+
+        punpckhbw   xmm2,       xmm3
+        movdqu      xmm6,       xmm3
+
+        pmaddubsw   xmm2,       xmm1
+        punpcklbw   xmm6,       xmm7
+
+        paddw       xmm4,       [rd GLOBAL]
+        pmaddubsw   xmm6,       xmm1
+
+        psraw       xmm4,       VP8_FILTER_SHIFT
+        punpckhbw   xmm3,       xmm7
+
+        paddw       xmm2,       [rd GLOBAL]
+        pmaddubsw   xmm3,       xmm1
+
+        psraw       xmm2,       VP8_FILTER_SHIFT
+        paddw       xmm6,       [rd GLOBAL]
+
+        packuswb    xmm4,       xmm2
+        psraw       xmm6,       VP8_FILTER_SHIFT
+
+        movdqa      [rdi],      xmm4                ; store row 0
+        paddw       xmm3,       [rd GLOBAL]
+
+        psraw       xmm3,       VP8_FILTER_SHIFT
+        lea         rsi,        [rsi + 2*rax]
+
+        packuswb    xmm6,       xmm3
+        movdqa      xmm2,       xmm7
+
+        movdqa      [rdi + rdx],xmm6                ; store row 1
+        lea         rdi,        [rdi + 2*rdx]
+
+        cmp         rdi,        rcx
+        jne         .next_row
+
+        jmp         done
+
+b16x16_fp_only:
+        lea         rcx,        [rdi+rdx*8]
+        lea         rcx,        [rcx+rdx*8]
+        movsxd      rax,        dword ptr arg(1)    ; src_pixels_per_line
+
+.next_row:
+        movdqu      xmm2,       [rsi]               ; row 0
+        movdqa      xmm3,       xmm2
+
+        movdqu      xmm4,       [rsi + 1]           ; row 0 + 1
+        lea         rsi,        [rsi + rax]         ; next line
+
+        punpcklbw   xmm2,       xmm4
+        movdqu      xmm5,       [rsi]               ; row 1
+
+        pmaddubsw   xmm2,       xmm1
+        movdqa      xmm6,       xmm5
+
+        punpckhbw   xmm3,       xmm4
+        movdqu      xmm7,       [rsi + 1]           ; row 1 + 1
+
+        pmaddubsw   xmm3,       xmm1
+        paddw       xmm2,       [rd GLOBAL]
+
+        psraw       xmm2,       VP8_FILTER_SHIFT
+        punpcklbw   xmm5,       xmm7
+
+        paddw       xmm3,       [rd GLOBAL]
+        pmaddubsw   xmm5,       xmm1
+
+        psraw       xmm3,       VP8_FILTER_SHIFT
+        punpckhbw   xmm6,       xmm7
+
+        packuswb    xmm2,       xmm3
+        pmaddubsw   xmm6,       xmm1
+
+        movdqa      [rdi],      xmm2                ; store the results in the destination
+        paddw       xmm5,       [rd GLOBAL]
+
+        lea         rdi,        [rdi + rdx]         ; dst_pitch
+        psraw       xmm5,       VP8_FILTER_SHIFT
+
+        paddw       xmm6,       [rd GLOBAL]
+        psraw       xmm6,       VP8_FILTER_SHIFT
+
+        packuswb    xmm5,       xmm6
+        lea         rsi,        [rsi + rax]         ; next line
+
+        movdqa      [rdi],      xmm5                ; store the results in the destination
+        lea         rdi,        [rdi + rdx]         ; dst_pitch
+
+        cmp         rdi,        rcx
+
+        jne         .next_row
+
+done:
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+;void vp8_bilinear_predict8x8_ssse3
+;(
+;    unsigned char  *src_ptr,
+;    int   src_pixels_per_line,
+;    int  xoffset,
+;    int  yoffset,
+;    unsigned char *dst_ptr,
+;    int dst_pitch
+;)
+global sym(vp8_bilinear_predict8x8_ssse3)
+sym(vp8_bilinear_predict8x8_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    SAVE_XMM
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+    sub         rsp, 144                         ; reserve 144 bytes
+
+        lea         rcx,        [vp8_bilinear_filters_ssse3 GLOBAL]
+
+        mov         rsi,        arg(0) ;src_ptr
+        movsxd      rdx,        dword ptr arg(1) ;src_pixels_per_line
+
+    ;Read 9-line unaligned data in and put them on stack. This gives a big
+    ;performance boost.
+        movdqu      xmm0,       [rsi]
+        lea         rax,        [rdx + rdx*2]
+        movdqu      xmm1,       [rsi+rdx]
+        movdqu      xmm2,       [rsi+rdx*2]
+        add         rsi,        rax
+        movdqu      xmm3,       [rsi]
+        movdqu      xmm4,       [rsi+rdx]
+        movdqu      xmm5,       [rsi+rdx*2]
+        add         rsi,        rax
+        movdqu      xmm6,       [rsi]
+        movdqu      xmm7,       [rsi+rdx]
+
+        movdqa      XMMWORD PTR [rsp],            xmm0
+
+        movdqu      xmm0,       [rsi+rdx*2]
+
+        movdqa      XMMWORD PTR [rsp+16],         xmm1
+        movdqa      XMMWORD PTR [rsp+32],         xmm2
+        movdqa      XMMWORD PTR [rsp+48],         xmm3
+        movdqa      XMMWORD PTR [rsp+64],         xmm4
+        movdqa      XMMWORD PTR [rsp+80],         xmm5
+        movdqa      XMMWORD PTR [rsp+96],         xmm6
+        movdqa      XMMWORD PTR [rsp+112],        xmm7
+        movdqa      XMMWORD PTR [rsp+128],        xmm0
+
+        movsxd      rax,        dword ptr arg(2)    ; xoffset
+        cmp         rax,        0                   ; skip first_pass filter if xoffset=0
+        je          b8x8_sp_only
+
+        shl         rax,        4
+        add         rax,        rcx                 ; HFilter
+
+        mov         rdi,        arg(4)              ; dst_ptr
+        movsxd      rdx,        dword ptr arg(5)    ; dst_pitch
+
+        movdqa      xmm0,       [rax]
+
+        movsxd      rax,        dword ptr arg(3)    ; yoffset
+        cmp         rax,        0                   ; skip second_pass filter if yoffset=0
+        je          b8x8_fp_only
+
+        shl         rax,        4
+        lea         rax,        [rax + rcx]         ; VFilter
+
+        lea         rcx,        [rdi+rdx*8]
+
+        movdqa      xmm1,       [rax]
+
+        ; get the first horizontal line done
+        movdqa      xmm3,       [rsp]               ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+        movdqa      xmm5,       xmm3                ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 xx
+
+        psrldq      xmm5,       1
+        lea         rsp,        [rsp + 16]          ; next line
+
+        punpcklbw   xmm3,       xmm5                ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
+        pmaddubsw   xmm3,       xmm0                ; 00 02 04 06 08 10 12 14
+
+        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        psraw       xmm3,       VP8_FILTER_SHIFT    ; xmm3 /= 128
+
+        movdqa      xmm7,       xmm3
+        packuswb    xmm7,       xmm7                ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+
+.next_row:
+        movdqa      xmm6,       [rsp]               ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
+        lea         rsp,        [rsp + 16]          ; next line
+
+        movdqa      xmm5,       xmm6
+
+        psrldq      xmm5,       1
+
+        punpcklbw   xmm6,       xmm5
+        pmaddubsw   xmm6,       xmm0
+
+        paddw       xmm6,       [rd GLOBAL]         ; xmm6 += round value
+        psraw       xmm6,       VP8_FILTER_SHIFT    ; xmm6 /= 128
+
+        packuswb    xmm6,       xmm6
+
+        punpcklbw   xmm7,       xmm6
+        pmaddubsw   xmm7,       xmm1
+
+        paddw       xmm7,       [rd GLOBAL]         ; xmm7 += round value
+        psraw       xmm7,       VP8_FILTER_SHIFT    ; xmm7 /= 128
+
+        packuswb    xmm7,       xmm7
+
+        movq        [rdi],      xmm7                ; store the results in the destination
+        lea         rdi,        [rdi + rdx]
+
+        movdqa      xmm7,       xmm6
+
+        cmp         rdi,        rcx
+        jne         .next_row
+
+        jmp         done8x8
+
+b8x8_sp_only:
+        movsxd      rax,        dword ptr arg(3)    ; yoffset
+        shl         rax,        4
+        lea         rax,        [rax + rcx]         ; VFilter
+
+        mov         rdi,        arg(4) ;dst_ptr
+        movsxd      rdx,        dword ptr arg(5)    ; dst_pitch
+
+        movdqa      xmm0,       [rax]               ; VFilter
+
+        movq        xmm1,       XMMWORD PTR [rsp]
+        movq        xmm2,       XMMWORD PTR [rsp+16]
+
+        movq        xmm3,       XMMWORD PTR [rsp+32]
+        punpcklbw   xmm1,       xmm2
+
+        movq        xmm4,       XMMWORD PTR [rsp+48]
+        punpcklbw   xmm2,       xmm3
+
+        movq        xmm5,       XMMWORD PTR [rsp+64]
+        punpcklbw   xmm3,       xmm4
+
+        movq        xmm6,       XMMWORD PTR [rsp+80]
+        punpcklbw   xmm4,       xmm5
+
+        movq        xmm7,       XMMWORD PTR [rsp+96]
+        punpcklbw   xmm5,       xmm6
+
+        pmaddubsw   xmm1,       xmm0
+        pmaddubsw   xmm2,       xmm0
+
+        pmaddubsw   xmm3,       xmm0
+        pmaddubsw   xmm4,       xmm0
+
+        pmaddubsw   xmm5,       xmm0
+        punpcklbw   xmm6,       xmm7
+
+        pmaddubsw   xmm6,       xmm0
+        paddw       xmm1,       [rd GLOBAL]
+
+        paddw       xmm2,       [rd GLOBAL]
+        psraw       xmm1,       VP8_FILTER_SHIFT
+
+        paddw       xmm3,       [rd GLOBAL]
+        psraw       xmm2,       VP8_FILTER_SHIFT
+
+        paddw       xmm4,       [rd GLOBAL]
+        psraw       xmm3,       VP8_FILTER_SHIFT
+
+        paddw       xmm5,       [rd GLOBAL]
+        psraw       xmm4,       VP8_FILTER_SHIFT
+
+        paddw       xmm6,       [rd GLOBAL]
+        psraw       xmm5,       VP8_FILTER_SHIFT
+
+        psraw       xmm6,       VP8_FILTER_SHIFT
+        packuswb    xmm1,       xmm1
+
+        packuswb    xmm2,       xmm2
+        movq        [rdi],      xmm1
+
+        packuswb    xmm3,       xmm3
+        movq        [rdi+rdx],  xmm2
+
+        packuswb    xmm4,       xmm4
+        movq        xmm1,       XMMWORD PTR [rsp+112]
+
+        lea         rdi,        [rdi + 2*rdx]
+        movq        xmm2,       XMMWORD PTR [rsp+128]
+
+        packuswb    xmm5,       xmm5
+        movq        [rdi],      xmm3
+
+        packuswb    xmm6,       xmm6
+        movq        [rdi+rdx],  xmm4
+
+        lea         rdi,        [rdi + 2*rdx]
+        punpcklbw   xmm7,       xmm1
+
+        movq        [rdi],      xmm5
+        pmaddubsw   xmm7,       xmm0
+
+        movq        [rdi+rdx],  xmm6
+        punpcklbw   xmm1,       xmm2
+
+        pmaddubsw   xmm1,       xmm0
+        paddw       xmm7,       [rd GLOBAL]
+
+        psraw       xmm7,       VP8_FILTER_SHIFT
+        paddw       xmm1,       [rd GLOBAL]
+
+        psraw       xmm1,       VP8_FILTER_SHIFT
+        packuswb    xmm7,       xmm7
+
+        packuswb    xmm1,       xmm1
+        lea         rdi,        [rdi + 2*rdx]
+
+        movq        [rdi],      xmm7
+
+        movq        [rdi+rdx],  xmm1
+        lea         rsp,        [rsp + 144]
+
+        jmp         done8x8
+
+b8x8_fp_only:
+        lea         rcx,        [rdi+rdx*8]
+
+.next_row:
+        movdqa      xmm1,       XMMWORD PTR [rsp]
+        movdqa      xmm3,       XMMWORD PTR [rsp+16]
+
+        movdqa      xmm2,       xmm1
+        movdqa      xmm5,       XMMWORD PTR [rsp+32]
+
+        psrldq      xmm2,       1
+        movdqa      xmm7,       XMMWORD PTR [rsp+48]
+
+        movdqa      xmm4,       xmm3
+        psrldq      xmm4,       1
+
+        movdqa      xmm6,       xmm5
+        psrldq      xmm6,       1
+
+        punpcklbw   xmm1,       xmm2
+        pmaddubsw   xmm1,       xmm0
+
+        punpcklbw   xmm3,       xmm4
+        pmaddubsw   xmm3,       xmm0
+
+        punpcklbw   xmm5,       xmm6
+        pmaddubsw   xmm5,       xmm0
+
+        movdqa      xmm2,       xmm7
+        psrldq      xmm2,       1
+
+        punpcklbw   xmm7,       xmm2
+        pmaddubsw   xmm7,       xmm0
+
+        paddw       xmm1,       [rd GLOBAL]
+        psraw       xmm1,       VP8_FILTER_SHIFT
+
+        paddw       xmm3,       [rd GLOBAL]
+        psraw       xmm3,       VP8_FILTER_SHIFT
+
+        paddw       xmm5,       [rd GLOBAL]
+        psraw       xmm5,       VP8_FILTER_SHIFT
+
+        paddw       xmm7,       [rd GLOBAL]
+        psraw       xmm7,       VP8_FILTER_SHIFT
+
+        packuswb    xmm1,       xmm1
+        packuswb    xmm3,       xmm3
+
+        packuswb    xmm5,       xmm5
+        movq        [rdi],      xmm1
+
+        packuswb    xmm7,       xmm7
+        movq        [rdi+rdx],  xmm3
+
+        lea         rdi,        [rdi + 2*rdx]
+        movq        [rdi],      xmm5
+
+        lea         rsp,        [rsp + 4*16]
+        movq        [rdi+rdx],  xmm7
+
+        lea         rdi,        [rdi + 2*rdx]
+        cmp         rdi,        rcx
+
+        jne         .next_row
+
+        lea         rsp,        [rsp + 16]
+
+done8x8:
+    ;add rsp, 144
+    pop         rsp
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    RESTORE_XMM
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
 SECTION_RODATA
 align 16
 shuf1b:
@@ -928,4 +1495,14 @@ k2_k4:
     times 8 db  50,   -9
     times 8 db  36,  -11
     times 8 db  12,   -6
+align 16
+vp8_bilinear_filters_ssse3:
+    times 8 db 128, 0
+    times 8 db 112, 16
+    times 8 db 96,  32
+    times 8 db 80,  48
+    times 8 db 64,  64
+    times 8 db 48,  80
+    times 8 db 32,  96
+    times 8 db 16,  112
 
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index b371892c9..38692a1bb 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -91,8 +91,8 @@ extern prototype_subpixel_predict(vp8_sixtap_predict16x16_ssse3);
 extern prototype_subpixel_predict(vp8_sixtap_predict8x8_ssse3);
 extern prototype_subpixel_predict(vp8_sixtap_predict8x4_ssse3);
 extern prototype_subpixel_predict(vp8_sixtap_predict4x4_ssse3);
-//extern prototype_subpixel_predict(vp8_bilinear_predict16x16_sse2);
-//extern prototype_subpixel_predict(vp8_bilinear_predict8x8_sse2);
+extern prototype_subpixel_predict(vp8_bilinear_predict16x16_ssse3);
+extern prototype_subpixel_predict(vp8_bilinear_predict8x8_ssse3);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_subpix_sixtap16x16
@@ -108,11 +108,11 @@ extern prototype_subpixel_predict(vp8_sixtap_predict4x4_ssse3);
 #define vp8_subpix_sixtap4x4 vp8_sixtap_predict4x4_ssse3
 
 
-//#undef  vp8_subpix_bilinear16x16
-//#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_sse2
+#undef  vp8_subpix_bilinear16x16
+#define vp8_subpix_bilinear16x16 vp8_bilinear_predict16x16_ssse3
 
-//#undef  vp8_subpix_bilinear8x8
-//#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_sse2
+#undef  vp8_subpix_bilinear8x8
+#define vp8_subpix_bilinear8x8 vp8_bilinear_predict8x8_ssse3
 
 #endif
 #endif
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index ce487ff9f..509d5a4e0 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -124,6 +124,8 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
         rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_ssse3;
         rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_ssse3;
         rtcd->subpix.sixtap4x4     = vp8_sixtap_predict4x4_ssse3;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_ssse3;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_ssse3;
     }
 #endif
 

From 63ccfbd54537cde423298703b42cd30001e2b053 Mon Sep 17 00:00:00 2001
From: Jim Bankoski <jimbankoski@google.com>
Date: Thu, 22 Jul 2010 16:07:13 -0400
Subject: [PATCH 152/307] Enable ARFs for non-lagged compress

ARFs were explicitly disabled except in lagged compress mode. New
ARF logic allows for the ARF buffer to hold an older golden frame,
which does not require lagged compress.

Change-Id: I1dff82b6f53e8311f1e0514b1794ae05919d5f79
---
 vp8/encoder/onyx_if.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 201de3377..d9ff6b05b 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1652,13 +1652,6 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
     else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
         cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
 
-    // force play_alternate to 0 if allow_lag is 0, lag_in_frames is too small, Mode is real time or one pass compress enabled.
-    if (cpi->oxcf.allow_lag == 0 || cpi->oxcf.lag_in_frames <= 5 || (cpi->oxcf.Mode < MODE_SECONDPASS))
-    {
-        cpi->oxcf.play_alternate = 0;
-        cpi->ref_frame_flags = cpi->ref_frame_flags & ~VP8_ALT_FLAG;
-    }
-
     // YX Temp
     cpi->last_alt_ref_sei    = -1;
     cpi->is_src_frame_alt_ref = 0;
@@ -1937,13 +1930,6 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
     else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS)
         cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS;
 
-    // force play_alternate to 0 if allow_lag is 0, lag_in_frames is too small, Mode is real time or one pass compress enabled.
-    if (cpi->oxcf.allow_lag == 0 || cpi->oxcf.lag_in_frames <= 5 || (cpi->oxcf.Mode < MODE_SECONDPASS))
-    {
-        cpi->oxcf.play_alternate = 0;
-        cpi->ref_frame_flags = cpi->ref_frame_flags & ~VP8_ALT_FLAG;
-    }
-
     // YX Temp
     cpi->last_alt_ref_sei    = -1;
     cpi->is_src_frame_alt_ref = 0;

From 69ae8f475dd895fba0bc40ffdab0ca2d7537888c Mon Sep 17 00:00:00 2001
From: Jim Bankoski <jimbankoski@google.com>
Date: Thu, 22 Jul 2010 16:07:13 -0400
Subject: [PATCH 153/307] Skip unnecessary search of identical frames

vp8_get_compressed_data() was defeating logic in
encode_frame_to_datarate() that determined the reference buffers to
search and forcing all frames to be eligible to search. In cases
where buffers have identical contents, this is unnecessary extra
work.

Change-Id: I9e667ac39128ae32dc455a3db4c62e3efce6f114
---
 vp8/encoder/onyx_if.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index d9ff6b05b..99a09deb2 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -5209,8 +5209,6 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
     {
 
         // return to normal state
-        cpi->ref_frame_flags = VP8_ALT_FLAG | VP8_GOLD_FLAG | VP8_LAST_FLAG;
-
         cm->refresh_entropy_probs = 1;
         cm->refresh_alt_ref_frame = 0;
         cm->refresh_golden_frame = 0;

From c2140b8af169a6d93a70efc403a5956f5ec84dba Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 9 Sep 2010 08:16:39 -0400
Subject: [PATCH 154/307] Use WebM in copyright notice for consistency

Changes 'The VP8 project' to 'The WebM project', for consistency
with other webmproject.org repositories.

Fixes issue #97.

Change-Id: I37c13ed5fbdb9d334ceef71c6350e9febed9bbba
---
 args.c                                                      | 2 +-
 args.h                                                      | 2 +-
 build/arm-wince-vs8/obj_int_extract.bat                     | 2 +-
 build/make/Makefile                                         | 2 +-
 build/make/ads2gas.pl                                       | 2 +-
 build/make/ads2gas_apple.pl                                 | 2 +-
 build/make/armlink_adapter.sh                               | 2 +-
 build/make/gen_asm_deps.sh                                  | 2 +-
 build/make/gen_msvs_def.sh                                  | 2 +-
 build/make/gen_msvs_proj.sh                                 | 2 +-
 build/make/gen_msvs_sln.sh                                  | 2 +-
 build/make/obj_int_extract.c                                | 2 +-
 build/make/version.sh                                       | 2 +-
 docs.mk                                                     | 2 +-
 example_xma.c                                               | 2 +-
 examples.mk                                                 | 2 +-
 examples/decoder_tmpl.c                                     | 2 +-
 examples/encoder_tmpl.c                                     | 2 +-
 examples/gen_example_code.sh                                | 2 +-
 examples/gen_example_doxy.php                               | 2 +-
 examples/gen_example_text.sh                                | 2 +-
 ivfdec.c                                                    | 2 +-
 ivfenc.c                                                    | 2 +-
 libs.doxy_template                                          | 2 +-
 libs.mk                                                     | 2 +-
 release.sh                                                  | 2 +-
 solution.mk                                                 | 2 +-
 vp8/common/alloccommon.c                                    | 2 +-
 vp8/common/alloccommon.h                                    | 2 +-
 vp8/common/arm/armv6/bilinearfilter_v6.asm                  | 2 +-
 vp8/common/arm/armv6/copymem16x16_v6.asm                    | 2 +-
 vp8/common/arm/armv6/copymem8x4_v6.asm                      | 2 +-
 vp8/common/arm/armv6/copymem8x8_v6.asm                      | 2 +-
 vp8/common/arm/armv6/dc_only_idct_add_v6.asm                | 2 +-
 vp8/common/arm/armv6/filter_v6.asm                          | 2 +-
 vp8/common/arm/armv6/idct_v6.asm                            | 2 +-
 vp8/common/arm/armv6/iwalsh_v6.asm                          | 2 +-
 vp8/common/arm/armv6/loopfilter_v6.asm                      | 2 +-
 vp8/common/arm/armv6/recon_v6.asm                           | 2 +-
 vp8/common/arm/armv6/simpleloopfilter_v6.asm                | 2 +-
 vp8/common/arm/armv6/sixtappredict8x4_v6.asm                | 2 +-
 vp8/common/arm/bilinearfilter_arm.c                         | 2 +-
 vp8/common/arm/filter_arm.c                                 | 2 +-
 vp8/common/arm/idct_arm.h                                   | 2 +-
 vp8/common/arm/loopfilter_arm.c                             | 2 +-
 vp8/common/arm/loopfilter_arm.h                             | 2 +-
 vp8/common/arm/neon/bilinearpredict16x16_neon.asm           | 2 +-
 vp8/common/arm/neon/bilinearpredict4x4_neon.asm             | 2 +-
 vp8/common/arm/neon/bilinearpredict8x4_neon.asm             | 2 +-
 vp8/common/arm/neon/bilinearpredict8x8_neon.asm             | 2 +-
 vp8/common/arm/neon/buildintrapredictorsmby_neon.asm        | 2 +-
 vp8/common/arm/neon/copymem16x16_neon.asm                   | 2 +-
 vp8/common/arm/neon/copymem8x4_neon.asm                     | 2 +-
 vp8/common/arm/neon/copymem8x8_neon.asm                     | 2 +-
 vp8/common/arm/neon/dc_only_idct_add_neon.asm               | 2 +-
 vp8/common/arm/neon/iwalsh_neon.asm                         | 2 +-
 vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm    | 2 +-
 vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm     | 2 +-
 vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm | 2 +-
 vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm   | 2 +-
 vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm      | 2 +-
 vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm       | 2 +-
 vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm  | 2 +-
 vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm   | 2 +-
 vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm    | 2 +-
 vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm     | 2 +-
 vp8/common/arm/neon/recon16x16mb_neon.asm                   | 2 +-
 vp8/common/arm/neon/recon2b_neon.asm                        | 2 +-
 vp8/common/arm/neon/recon4b_neon.asm                        | 2 +-
 vp8/common/arm/neon/reconb_neon.asm                         | 2 +-
 vp8/common/arm/neon/save_neon_reg.asm                       | 2 +-
 vp8/common/arm/neon/shortidct4x4llm_1_neon.asm              | 2 +-
 vp8/common/arm/neon/shortidct4x4llm_neon.asm                | 2 +-
 vp8/common/arm/neon/sixtappredict16x16_neon.asm             | 2 +-
 vp8/common/arm/neon/sixtappredict4x4_neon.asm               | 2 +-
 vp8/common/arm/neon/sixtappredict8x4_neon.asm               | 2 +-
 vp8/common/arm/neon/sixtappredict8x8_neon.asm               | 2 +-
 vp8/common/arm/recon_arm.c                                  | 2 +-
 vp8/common/arm/recon_arm.h                                  | 2 +-
 vp8/common/arm/reconintra4x4_arm.c                          | 2 +-
 vp8/common/arm/reconintra_arm.c                             | 2 +-
 vp8/common/arm/subpixel_arm.h                               | 2 +-
 vp8/common/arm/systemdependent.c                            | 2 +-
 vp8/common/arm/vpx_asm_offsets.c                            | 2 +-
 vp8/common/bigend.h                                         | 2 +-
 vp8/common/blockd.c                                         | 2 +-
 vp8/common/blockd.h                                         | 2 +-
 vp8/common/boolcoder.h                                      | 2 +-
 vp8/common/codec_common_interface.h                         | 2 +-
 vp8/common/coefupdateprobs.h                                | 2 +-
 vp8/common/common.h                                         | 2 +-
 vp8/common/common_types.h                                   | 2 +-
 vp8/common/context.c                                        | 2 +-
 vp8/common/debugmodes.c                                     | 2 +-
 vp8/common/defaultcoefcounts.h                              | 2 +-
 vp8/common/dma_desc.h                                       | 2 +-
 vp8/common/duck_io.h                                        | 2 +-
 vp8/common/entropy.c                                        | 2 +-
 vp8/common/entropy.h                                        | 2 +-
 vp8/common/entropymode.c                                    | 2 +-
 vp8/common/entropymode.h                                    | 2 +-
 vp8/common/entropymv.c                                      | 2 +-
 vp8/common/entropymv.h                                      | 2 +-
 vp8/common/extend.c                                         | 2 +-
 vp8/common/extend.h                                         | 2 +-
 vp8/common/filter_c.c                                       | 2 +-
 vp8/common/findnearmv.c                                     | 2 +-
 vp8/common/findnearmv.h                                     | 2 +-
 vp8/common/fourcc.hpp                                       | 2 +-
 vp8/common/g_common.h                                       | 2 +-
 vp8/common/generic/systemdependent.c                        | 2 +-
 vp8/common/header.h                                         | 2 +-
 vp8/common/idct.h                                           | 2 +-
 vp8/common/idctllm.c                                        | 2 +-
 vp8/common/invtrans.c                                       | 2 +-
 vp8/common/invtrans.h                                       | 2 +-
 vp8/common/littlend.h                                       | 2 +-
 vp8/common/loopfilter.c                                     | 2 +-
 vp8/common/loopfilter.h                                     | 2 +-
 vp8/common/loopfilter_filters.c                             | 2 +-
 vp8/common/mac_specs.h                                      | 2 +-
 vp8/common/mbpitch.c                                        | 2 +-
 vp8/common/modecont.c                                       | 2 +-
 vp8/common/modecont.h                                       | 2 +-
 vp8/common/modecontext.c                                    | 2 +-
 vp8/common/mv.h                                             | 2 +-
 vp8/common/onyx.h                                           | 2 +-
 vp8/common/onyxc_int.h                                      | 2 +-
 vp8/common/onyxd.h                                          | 2 +-
 vp8/common/partialgfupdate.h                                | 2 +-
 vp8/common/postproc.c                                       | 2 +-
 vp8/common/postproc.h                                       | 2 +-
 vp8/common/ppc/copy_altivec.asm                             | 2 +-
 vp8/common/ppc/filter_altivec.asm                           | 2 +-
 vp8/common/ppc/filter_bilinear_altivec.asm                  | 2 +-
 vp8/common/ppc/idctllm_altivec.asm                          | 2 +-
 vp8/common/ppc/loopfilter_altivec.c                         | 2 +-
 vp8/common/ppc/loopfilter_filters_altivec.asm               | 2 +-
 vp8/common/ppc/platform_altivec.asm                         | 2 +-
 vp8/common/ppc/recon_altivec.asm                            | 2 +-
 vp8/common/ppc/systemdependent.c                            | 2 +-
 vp8/common/ppflags.h                                        | 2 +-
 vp8/common/pragmas.h                                        | 2 +-
 vp8/common/predictdc.c                                      | 2 +-
 vp8/common/predictdc.h                                      | 2 +-
 vp8/common/preproc.h                                        | 2 +-
 vp8/common/preprocif.h                                      | 2 +-
 vp8/common/proposed.h                                       | 2 +-
 vp8/common/quant_common.c                                   | 2 +-
 vp8/common/quant_common.h                                   | 2 +-
 vp8/common/recon.c                                          | 2 +-
 vp8/common/recon.h                                          | 2 +-
 vp8/common/reconinter.c                                     | 2 +-
 vp8/common/reconinter.h                                     | 2 +-
 vp8/common/reconintra.c                                     | 2 +-
 vp8/common/reconintra.h                                     | 2 +-
 vp8/common/reconintra4x4.c                                  | 2 +-
 vp8/common/reconintra4x4.h                                  | 2 +-
 vp8/common/setupintrarecon.c                                | 2 +-
 vp8/common/setupintrarecon.h                                | 2 +-
 vp8/common/subpixel.h                                       | 2 +-
 vp8/common/swapyv12buffer.c                                 | 2 +-
 vp8/common/swapyv12buffer.h                                 | 2 +-
 vp8/common/systemdependent.h                                | 2 +-
 vp8/common/textblit.c                                       | 2 +-
 vp8/common/threading.h                                      | 2 +-
 vp8/common/treecoder.c                                      | 2 +-
 vp8/common/treecoder.h                                      | 2 +-
 vp8/common/type_aliases.h                                   | 2 +-
 vp8/common/vfwsetting.hpp                                   | 2 +-
 vp8/common/vpx_ref_build_prefix.h                           | 2 +-
 vp8/common/vpxblit.h                                        | 2 +-
 vp8/common/vpxblit_c64.h                                    | 2 +-
 vp8/common/vpxerrors.h                                      | 2 +-
 vp8/common/x86/boolcoder.cxx                                | 2 +-
 vp8/common/x86/idct_x86.h                                   | 2 +-
 vp8/common/x86/idctllm_mmx.asm                              | 2 +-
 vp8/common/x86/idctllm_sse2.asm                             | 2 +-
 vp8/common/x86/iwalsh_mmx.asm                               | 2 +-
 vp8/common/x86/iwalsh_sse2.asm                              | 2 +-
 vp8/common/x86/loopfilter_mmx.asm                           | 2 +-
 vp8/common/x86/loopfilter_sse2.asm                          | 2 +-
 vp8/common/x86/loopfilter_x86.c                             | 2 +-
 vp8/common/x86/loopfilter_x86.h                             | 2 +-
 vp8/common/x86/postproc_mmx.asm                             | 2 +-
 vp8/common/x86/postproc_mmx.c                               | 2 +-
 vp8/common/x86/postproc_sse2.asm                            | 2 +-
 vp8/common/x86/postproc_x86.h                               | 2 +-
 vp8/common/x86/recon_mmx.asm                                | 2 +-
 vp8/common/x86/recon_sse2.asm                               | 2 +-
 vp8/common/x86/recon_x86.h                                  | 2 +-
 vp8/common/x86/subpixel_mmx.asm                             | 2 +-
 vp8/common/x86/subpixel_sse2.asm                            | 2 +-
 vp8/common/x86/subpixel_ssse3.asm                           | 2 +-
 vp8/common/x86/subpixel_x86.h                               | 2 +-
 vp8/common/x86/vp8_asm_stubs.c                              | 2 +-
 vp8/common/x86/x86_systemdependent.c                        | 2 +-
 vp8/decoder/arm/armv5/dequantize_v5.asm                     | 2 +-
 vp8/decoder/arm/armv6/dboolhuff_v6.asm                      | 2 +-
 vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm                | 2 +-
 vp8/decoder/arm/armv6/dequant_idct_v6.asm                   | 2 +-
 vp8/decoder/arm/armv6/dequantize_v6.asm                     | 2 +-
 vp8/decoder/arm/armv6/idct_blk_v6.c                         | 2 +-
 vp8/decoder/arm/dequantize_arm.c                            | 2 +-
 vp8/decoder/arm/dequantize_arm.h                            | 2 +-
 vp8/decoder/arm/detokenize.asm                              | 2 +-
 vp8/decoder/arm/detokenize_arm.h                            | 2 +-
 vp8/decoder/arm/dsystemdependent.c                          | 2 +-
 vp8/decoder/arm/neon/dboolhuff_neon.asm                     | 2 +-
 vp8/decoder/arm/neon/dequant_dc_idct_neon.asm               | 2 +-
 vp8/decoder/arm/neon/dequant_idct_neon.asm                  | 2 +-
 vp8/decoder/arm/neon/dequantizeb_neon.asm                   | 2 +-
 vp8/decoder/arm/neon/idct_blk_neon.c                        | 2 +-
 vp8/decoder/dboolhuff.c                                     | 2 +-
 vp8/decoder/dboolhuff.h                                     | 2 +-
 vp8/decoder/decodemv.c                                      | 2 +-
 vp8/decoder/decodemv.h                                      | 2 +-
 vp8/decoder/decoderthreading.h                              | 2 +-
 vp8/decoder/decodframe.c                                    | 2 +-
 vp8/decoder/demode.c                                        | 2 +-
 vp8/decoder/demode.h                                        | 2 +-
 vp8/decoder/dequantize.c                                    | 2 +-
 vp8/decoder/dequantize.h                                    | 2 +-
 vp8/decoder/detokenize.c                                    | 2 +-
 vp8/decoder/detokenize.h                                    | 2 +-
 vp8/decoder/generic/dsystemdependent.c                      | 2 +-
 vp8/decoder/idct_blk.c                                      | 2 +-
 vp8/decoder/onyxd_if.c                                      | 2 +-
 vp8/decoder/onyxd_int.h                                     | 2 +-
 vp8/decoder/threading.c                                     | 2 +-
 vp8/decoder/treereader.h                                    | 2 +-
 vp8/decoder/x86/dequantize_mmx.asm                          | 2 +-
 vp8/decoder/x86/dequantize_x86.h                            | 2 +-
 vp8/decoder/x86/idct_blk_mmx.c                              | 2 +-
 vp8/decoder/x86/idct_blk_sse2.c                             | 2 +-
 vp8/decoder/x86/onyxdxv.c                                   | 2 +-
 vp8/decoder/x86/x86_dsystemdependent.c                      | 2 +-
 vp8/decoder/xprintf.c                                       | 2 +-
 vp8/decoder/xprintf.h                                       | 2 +-
 vp8/encoder/arm/armv6/walsh_v6.asm                          | 2 +-
 vp8/encoder/arm/boolhuff_arm.c                              | 2 +-
 vp8/encoder/arm/csystemdependent.c                          | 2 +-
 vp8/encoder/arm/dct_arm.h                                   | 2 +-
 vp8/encoder/arm/encodemb_arm.c                              | 2 +-
 vp8/encoder/arm/encodemb_arm.h                              | 2 +-
 vp8/encoder/arm/mcomp_arm.c                                 | 2 +-
 vp8/encoder/arm/neon/boolhuff_armv7.asm                     | 2 +-
 vp8/encoder/arm/neon/fastfdct4x4_neon.asm                   | 2 +-
 vp8/encoder/arm/neon/fastfdct8x4_neon.asm                   | 2 +-
 vp8/encoder/arm/neon/fastquantizeb_neon.asm                 | 2 +-
 vp8/encoder/arm/neon/sad16_neon.asm                         | 2 +-
 vp8/encoder/arm/neon/sad8_neon.asm                          | 2 +-
 vp8/encoder/arm/neon/shortfdct_neon.asm                     | 2 +-
 vp8/encoder/arm/neon/subtract_neon.asm                      | 2 +-
 vp8/encoder/arm/neon/variance_neon.asm                      | 2 +-
 vp8/encoder/arm/neon/vp8_memcpy_neon.asm                    | 2 +-
 vp8/encoder/arm/neon/vp8_mse16x16_neon.asm                  | 2 +-
 vp8/encoder/arm/neon/vp8_packtokens_armv7.asm               | 2 +-
 vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm         | 2 +-
 vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm    | 2 +-
 vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm             | 2 +-
 vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm     | 2 +-
 vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm    | 2 +-
 vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm       | 2 +-
 vp8/encoder/arm/picklpf_arm.c                               | 2 +-
 vp8/encoder/arm/quantize_arm.c                              | 2 +-
 vp8/encoder/arm/quantize_arm.h                              | 2 +-
 vp8/encoder/arm/variance_arm.h                              | 2 +-
 vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c                   | 2 +-
 vp8/encoder/bitstream.c                                     | 2 +-
 vp8/encoder/bitstream.h                                     | 2 +-
 vp8/encoder/block.h                                         | 2 +-
 vp8/encoder/boolhuff.c                                      | 2 +-
 vp8/encoder/boolhuff.h                                      | 2 +-
 vp8/encoder/dct.c                                           | 2 +-
 vp8/encoder/dct.h                                           | 2 +-
 vp8/encoder/encodeframe.c                                   | 2 +-
 vp8/encoder/encodeintra.c                                   | 2 +-
 vp8/encoder/encodeintra.h                                   | 2 +-
 vp8/encoder/encodemb.c                                      | 2 +-
 vp8/encoder/encodemb.h                                      | 2 +-
 vp8/encoder/encodemv.c                                      | 2 +-
 vp8/encoder/encodemv.h                                      | 2 +-
 vp8/encoder/ethreading.c                                    | 2 +-
 vp8/encoder/firstpass.c                                     | 2 +-
 vp8/encoder/firstpass.h                                     | 2 +-
 vp8/encoder/generic/csystemdependent.c                      | 2 +-
 vp8/encoder/mcomp.c                                         | 2 +-
 vp8/encoder/mcomp.h                                         | 2 +-
 vp8/encoder/modecosts.c                                     | 2 +-
 vp8/encoder/modecosts.h                                     | 2 +-
 vp8/encoder/onyx_if.c                                       | 2 +-
 vp8/encoder/onyx_int.h                                      | 2 +-
 vp8/encoder/parms.cpp                                       | 2 +-
 vp8/encoder/pickinter.c                                     | 2 +-
 vp8/encoder/pickinter.h                                     | 2 +-
 vp8/encoder/picklpf.c                                       | 2 +-
 vp8/encoder/ppc/csystemdependent.c                          | 2 +-
 vp8/encoder/ppc/encodemb_altivec.asm                        | 2 +-
 vp8/encoder/ppc/fdct_altivec.asm                            | 2 +-
 vp8/encoder/ppc/rdopt_altivec.asm                           | 2 +-
 vp8/encoder/ppc/sad_altivec.asm                             | 2 +-
 vp8/encoder/ppc/variance_altivec.asm                        | 2 +-
 vp8/encoder/ppc/variance_subpixel_altivec.asm               | 2 +-
 vp8/encoder/preproc.c                                       | 2 +-
 vp8/encoder/psnr.c                                          | 2 +-
 vp8/encoder/psnr.h                                          | 2 +-
 vp8/encoder/quantize.c                                      | 2 +-
 vp8/encoder/quantize.h                                      | 2 +-
 vp8/encoder/ratectrl.c                                      | 2 +-
 vp8/encoder/ratectrl.h                                      | 2 +-
 vp8/encoder/rdopt.c                                         | 2 +-
 vp8/encoder/rdopt.h                                         | 2 +-
 vp8/encoder/sad_c.c                                         | 2 +-
 vp8/encoder/segmentation.c                                  | 2 +-
 vp8/encoder/segmentation.h                                  | 2 +-
 vp8/encoder/ssim.c                                          | 2 +-
 vp8/encoder/tokenize.c                                      | 2 +-
 vp8/encoder/tokenize.h                                      | 2 +-
 vp8/encoder/treewriter.c                                    | 2 +-
 vp8/encoder/treewriter.h                                    | 2 +-
 vp8/encoder/variance.h                                      | 2 +-
 vp8/encoder/variance_c.c                                    | 2 +-
 vp8/encoder/x86/csystemdependent.c                          | 2 +-
 vp8/encoder/x86/dct_mmx.asm                                 | 2 +-
 vp8/encoder/x86/dct_sse2.asm                                | 2 +-
 vp8/encoder/x86/dct_x86.h                                   | 2 +-
 vp8/encoder/x86/encodemb_x86.h                              | 2 +-
 vp8/encoder/x86/encodeopt.asm                               | 2 +-
 vp8/encoder/x86/fwalsh_sse2.asm                             | 2 +-
 vp8/encoder/x86/mcomp_x86.h                                 | 2 +-
 vp8/encoder/x86/preproc_mmx.c                               | 2 +-
 vp8/encoder/x86/quantize_mmx.asm                            | 2 +-
 vp8/encoder/x86/quantize_sse2.asm                           | 2 +-
 vp8/encoder/x86/quantize_x86.h                              | 2 +-
 vp8/encoder/x86/sad_mmx.asm                                 | 2 +-
 vp8/encoder/x86/sad_sse2.asm                                | 2 +-
 vp8/encoder/x86/sad_sse3.asm                                | 2 +-
 vp8/encoder/x86/sad_ssse3.asm                               | 2 +-
 vp8/encoder/x86/subtract_mmx.asm                            | 2 +-
 vp8/encoder/x86/variance_impl_mmx.asm                       | 2 +-
 vp8/encoder/x86/variance_impl_sse2.asm                      | 2 +-
 vp8/encoder/x86/variance_mmx.c                              | 2 +-
 vp8/encoder/x86/variance_sse2.c                             | 2 +-
 vp8/encoder/x86/variance_x86.h                              | 2 +-
 vp8/encoder/x86/x86_csystemdependent.c                      | 2 +-
 vp8/vp8_common.mk                                           | 2 +-
 vp8/vp8_cx_iface.c                                          | 2 +-
 vp8/vp8_dx_iface.c                                          | 2 +-
 vp8/vp8cx.mk                                                | 2 +-
 vp8/vp8cx_arm.mk                                            | 2 +-
 vp8/vp8dx.mk                                                | 2 +-
 vp8/vp8dx_arm.mk                                            | 2 +-
 vpx/internal/vpx_codec_internal.h                           | 2 +-
 vpx/src/vpx_codec.c                                         | 2 +-
 vpx/src/vpx_decoder.c                                       | 2 +-
 vpx/src/vpx_decoder_compat.c                                | 2 +-
 vpx/src/vpx_encoder.c                                       | 2 +-
 vpx/src/vpx_image.c                                         | 2 +-
 vpx/vp8.h                                                   | 2 +-
 vpx/vp8cx.h                                                 | 2 +-
 vpx/vp8dx.h                                                 | 2 +-
 vpx/vp8e.h                                                  | 2 +-
 vpx/vpx_codec.h                                             | 2 +-
 vpx/vpx_codec.mk                                            | 2 +-
 vpx/vpx_codec_impl_bottom.h                                 | 2 +-
 vpx/vpx_codec_impl_top.h                                    | 2 +-
 vpx/vpx_decoder.h                                           | 2 +-
 vpx/vpx_decoder_compat.h                                    | 2 +-
 vpx/vpx_encoder.h                                           | 2 +-
 vpx/vpx_image.h                                             | 2 +-
 vpx/vpx_integer.h                                           | 2 +-
 vpx_mem/include/nds/vpx_mem_nds.h                           | 2 +-
 vpx_mem/include/vpx_mem_intrnl.h                            | 2 +-
 vpx_mem/include/vpx_mem_tracker.h                           | 2 +-
 vpx_mem/intel_linux/vpx_mem.c                               | 2 +-
 vpx_mem/intel_linux/vpx_mem_tracker.c                       | 2 +-
 vpx_mem/memory_manager/hmm_alloc.c                          | 2 +-
 vpx_mem/memory_manager/hmm_base.c                           | 2 +-
 vpx_mem/memory_manager/hmm_dflt_abort.c                     | 2 +-
 vpx_mem/memory_manager/hmm_grow.c                           | 2 +-
 vpx_mem/memory_manager/hmm_largest.c                        | 2 +-
 vpx_mem/memory_manager/hmm_resize.c                         | 2 +-
 vpx_mem/memory_manager/hmm_shrink.c                         | 2 +-
 vpx_mem/memory_manager/hmm_true.c                           | 2 +-
 vpx_mem/memory_manager/include/cavl_if.h                    | 2 +-
 vpx_mem/memory_manager/include/cavl_impl.h                  | 2 +-
 vpx_mem/memory_manager/include/heapmm.h                     | 2 +-
 vpx_mem/memory_manager/include/hmm_cnfg.h                   | 2 +-
 vpx_mem/memory_manager/include/hmm_intrnl.h                 | 2 +-
 vpx_mem/nds/vpx_mem_nds.c                                   | 2 +-
 vpx_mem/ti_c6x/vpx_mem_ti_6cx.c                             | 2 +-
 vpx_mem/vpx_mem.c                                           | 2 +-
 vpx_mem/vpx_mem.h                                           | 2 +-
 vpx_mem/vpx_mem_tracker.c                                   | 2 +-
 vpx_ports/config.h                                          | 2 +-
 vpx_ports/emms.asm                                          | 2 +-
 vpx_ports/mem.h                                             | 2 +-
 vpx_ports/mem_ops.h                                         | 2 +-
 vpx_ports/mem_ops_aligned.h                                 | 2 +-
 vpx_ports/vpx_timer.h                                       | 2 +-
 vpx_ports/vpxtypes.h                                        | 2 +-
 vpx_ports/x86.h                                             | 2 +-
 vpx_ports/x86_abi_support.asm                               | 2 +-
 vpx_scale/arm/armv4/gen_scalers_armv4.asm                   | 2 +-
 vpx_scale/arm/nds/yv12extend.c                              | 2 +-
 vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm      | 2 +-
 vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm      | 2 +-
 vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm   | 2 +-
 vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm  | 2 +-
 vpx_scale/arm/scalesystemdependant.c                        | 2 +-
 vpx_scale/arm/yv12extend_arm.c                              | 2 +-
 vpx_scale/blackfin/yv12config.c                             | 2 +-
 vpx_scale/blackfin/yv12extend.c                             | 2 +-
 vpx_scale/dm642/bicubic_scaler_c64.c                        | 2 +-
 vpx_scale/dm642/gen_scalers_c64.c                           | 2 +-
 vpx_scale/dm642/yv12extend.c                                | 2 +-
 vpx_scale/generic/bicubic_scaler.c                          | 2 +-
 vpx_scale/generic/gen_scalers.c                             | 2 +-
 vpx_scale/generic/scalesystemdependant.c                    | 2 +-
 vpx_scale/generic/vpxscale.c                                | 2 +-
 vpx_scale/generic/yv12config.c                              | 2 +-
 vpx_scale/generic/yv12extend.c                              | 2 +-
 vpx_scale/include/arm/vpxscale_nofp.h                       | 2 +-
 vpx_scale/include/generic/vpxscale_arbitrary.h              | 2 +-
 vpx_scale/include/generic/vpxscale_depricated.h             | 2 +-
 vpx_scale/include/generic/vpxscale_nofp.h                   | 2 +-
 vpx_scale/include/leapster/vpxscale.h                       | 2 +-
 vpx_scale/include/symbian/vpxscale_nofp.h                   | 2 +-
 vpx_scale/include/vpxscale_nofp.h                           | 2 +-
 vpx_scale/intel_linux/scaleopt.c                            | 2 +-
 vpx_scale/intel_linux/scalesystemdependant.c                | 2 +-
 vpx_scale/leapster/doptsystemdependant_lf.c                 | 2 +-
 vpx_scale/leapster/gen_scalers_lf.c                         | 2 +-
 vpx_scale/leapster/vpxscale_lf.c                            | 2 +-
 vpx_scale/leapster/yv12extend.c                             | 2 +-
 vpx_scale/scale_mode.h                                      | 2 +-
 vpx_scale/symbian/gen_scalers_armv4.asm                     | 2 +-
 vpx_scale/symbian/scalesystemdependant.c                    | 2 +-
 vpx_scale/vpxscale.h                                        | 2 +-
 vpx_scale/wce/gen_scalers_armv4.asm                         | 2 +-
 vpx_scale/wce/scalesystemdependant.c                        | 2 +-
 vpx_scale/win32/scaleopt.c                                  | 2 +-
 vpx_scale/win32/scalesystemdependant.c                      | 2 +-
 vpx_scale/x86_64/scaleopt.c                                 | 2 +-
 vpx_scale/x86_64/scalesystemdependant.c                     | 2 +-
 vpx_scale/yv12config.h                                      | 2 +-
 vpx_scale/yv12extend.h                                      | 2 +-
 wince_wmain_adapter.cpp                                     | 2 +-
 y4minput.c                                                  | 2 +-
 y4minput.h                                                  | 2 +-
 451 files changed, 451 insertions(+), 451 deletions(-)

diff --git a/args.c b/args.c
index 0b9772bd5..5365e9120 100644
--- a/args.c
+++ b/args.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/args.h b/args.h
index a75c43f97..4fafcf8a4 100644
--- a/args.h
+++ b/args.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/build/arm-wince-vs8/obj_int_extract.bat b/build/arm-wince-vs8/obj_int_extract.bat
index 84894508b..a361fc346 100644
--- a/build/arm-wince-vs8/obj_int_extract.bat
+++ b/build/arm-wince-vs8/obj_int_extract.bat
@@ -1,5 +1,5 @@
 @echo off
-REM   Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+REM   Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 REM
 REM   Use of this source code is governed by a BSD-style license
 REM   that can be found in the LICENSE file in the root of the source
diff --git a/build/make/Makefile b/build/make/Makefile
index 5011ce36d..1ca747a26 100755
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/build/make/ads2gas.pl b/build/make/ads2gas.pl
index 276db996d..3dff048b5 100755
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@@ -1,6 +1,6 @@
 #!/usr/bin/perl
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/build/make/ads2gas_apple.pl b/build/make/ads2gas_apple.pl
index b90c682c1..5014c61fb 100755
--- a/build/make/ads2gas_apple.pl
+++ b/build/make/ads2gas_apple.pl
@@ -1,6 +1,6 @@
 #!/usr/bin/env perl
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/build/make/armlink_adapter.sh b/build/make/armlink_adapter.sh
index df31c157a..571e46ec3 100755
--- a/build/make/armlink_adapter.sh
+++ b/build/make/armlink_adapter.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/build/make/gen_asm_deps.sh b/build/make/gen_asm_deps.sh
index 53a8909c4..7c6c5d565 100755
--- a/build/make/gen_asm_deps.sh
+++ b/build/make/gen_asm_deps.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/build/make/gen_msvs_def.sh b/build/make/gen_msvs_def.sh
index a231b4916..4defcc2e7 100755
--- a/build/make/gen_msvs_def.sh
+++ b/build/make/gen_msvs_def.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/build/make/gen_msvs_proj.sh b/build/make/gen_msvs_proj.sh
index 5181d3252..584477f92 100755
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/build/make/gen_msvs_sln.sh b/build/make/gen_msvs_sln.sh
index 24d6f5d27..9cf090067 100755
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/build/make/obj_int_extract.c b/build/make/obj_int_extract.c
index 289d2e1a7..e01870f27 100644
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/build/make/version.sh b/build/make/version.sh
index ff97300d2..3efb956bb 100755
--- a/build/make/version.sh
+++ b/build/make/version.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/docs.mk b/docs.mk
index f90d15711..28df9d262 100644
--- a/docs.mk
+++ b/docs.mk
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/example_xma.c b/example_xma.c
index 69d14f4b8..72eb47092 100644
--- a/example_xma.c
+++ b/example_xma.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/examples.mk b/examples.mk
index c2cf88c08..00ffc7037 100644
--- a/examples.mk
+++ b/examples.mk
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/examples/decoder_tmpl.c b/examples/decoder_tmpl.c
index 561d732a7..ba3ac987f 100644
--- a/examples/decoder_tmpl.c
+++ b/examples/decoder_tmpl.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c
index c549784cb..fdfc3af8f 100644
--- a/examples/encoder_tmpl.c
+++ b/examples/encoder_tmpl.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/examples/gen_example_code.sh b/examples/gen_example_code.sh
index 4b221308a..1133b4951 100755
--- a/examples/gen_example_code.sh
+++ b/examples/gen_example_code.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/examples/gen_example_doxy.php b/examples/gen_example_doxy.php
index a2d0790ed..701bbd30d 100755
--- a/examples/gen_example_doxy.php
+++ b/examples/gen_example_doxy.php
@@ -1,6 +1,6 @@
 #!/usr/bin/env php
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/examples/gen_example_text.sh b/examples/gen_example_text.sh
index f872acd38..9a8703d7f 100755
--- a/examples/gen_example_text.sh
+++ b/examples/gen_example_text.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/ivfdec.c b/ivfdec.c
index 0985dfbdd..3919d6bb2 100644
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/ivfenc.c b/ivfenc.c
index 9afcf7c69..ad172927b 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/libs.doxy_template b/libs.doxy_template
index 92334089e..02e290242 100644
--- a/libs.doxy_template
+++ b/libs.doxy_template
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/libs.mk b/libs.mk
index 32f8a34bc..45cf9bfdc 100644
--- a/libs.mk
+++ b/libs.mk
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/release.sh b/release.sh
index ca8ef71d1..800bdf82f 100755
--- a/release.sh
+++ b/release.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/solution.mk b/solution.mk
index 3b6e50eb1..8e852ec5d 100644
--- a/solution.mk
+++ b/solution.mk
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 1105a25f8..408c25306 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/alloccommon.h b/vp8/common/alloccommon.h
index 63dcd8a36..ea93c2522 100644
--- a/vp8/common/alloccommon.h
+++ b/vp8/common/alloccommon.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/bilinearfilter_v6.asm b/vp8/common/arm/armv6/bilinearfilter_v6.asm
index ae3526211..09d7338d9 100644
--- a/vp8/common/arm/armv6/bilinearfilter_v6.asm
+++ b/vp8/common/arm/armv6/bilinearfilter_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/copymem16x16_v6.asm b/vp8/common/arm/armv6/copymem16x16_v6.asm
index b74e294e4..fca91a0db 100644
--- a/vp8/common/arm/armv6/copymem16x16_v6.asm
+++ b/vp8/common/arm/armv6/copymem16x16_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/copymem8x4_v6.asm b/vp8/common/arm/armv6/copymem8x4_v6.asm
index 5488131fc..d8362ef05 100644
--- a/vp8/common/arm/armv6/copymem8x4_v6.asm
+++ b/vp8/common/arm/armv6/copymem8x4_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/copymem8x8_v6.asm b/vp8/common/arm/armv6/copymem8x8_v6.asm
index 0c1d4bd23..c6a60c610 100644
--- a/vp8/common/arm/armv6/copymem8x8_v6.asm
+++ b/vp8/common/arm/armv6/copymem8x8_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/dc_only_idct_add_v6.asm b/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
index 19227282e..e0660e9fd 100644
--- a/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
+++ b/vp8/common/arm/armv6/dc_only_idct_add_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license and patent
 ;  grant that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/filter_v6.asm b/vp8/common/arm/armv6/filter_v6.asm
index e37d3aa67..8bc6d7735 100644
--- a/vp8/common/arm/armv6/filter_v6.asm
+++ b/vp8/common/arm/armv6/filter_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/idct_v6.asm b/vp8/common/arm/armv6/idct_v6.asm
index d96908cc6..27215afcd 100644
--- a/vp8/common/arm/armv6/idct_v6.asm
+++ b/vp8/common/arm/armv6/idct_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/iwalsh_v6.asm b/vp8/common/arm/armv6/iwalsh_v6.asm
index cab6bc916..463bff0f5 100644
--- a/vp8/common/arm/armv6/iwalsh_v6.asm
+++ b/vp8/common/arm/armv6/iwalsh_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/loopfilter_v6.asm b/vp8/common/arm/armv6/loopfilter_v6.asm
index a51da10eb..b6417dee6 100644
--- a/vp8/common/arm/armv6/loopfilter_v6.asm
+++ b/vp8/common/arm/armv6/loopfilter_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/recon_v6.asm b/vp8/common/arm/armv6/recon_v6.asm
index de0449350..99c7bcf2d 100644
--- a/vp8/common/arm/armv6/recon_v6.asm
+++ b/vp8/common/arm/armv6/recon_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/simpleloopfilter_v6.asm b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
index 3a700cd59..013712036 100644
--- a/vp8/common/arm/armv6/simpleloopfilter_v6.asm
+++ b/vp8/common/arm/armv6/simpleloopfilter_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
index 781aa5fcc..8fb80ef29 100644
--- a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
+++ b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c
index 363eabde5..247f95b6c 100644
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index c67f5663c..5ed4f8094 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h
index 6d917c445..f28d7f649 100644
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index 12e56abd0..f86bca1ea 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h
index 9a19386db..6c3628ae9 100644
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
index 668772312..bb72bad1f 100644
--- a/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict16x16_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
index 4cbc63a4a..6d4820b7e 100644
--- a/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict4x4_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
index dd8b4c9a2..b9f3ce034 100644
--- a/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict8x4_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
index 7c8cc988e..f7a7d1496 100644
--- a/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
+++ b/vp8/common/arm/neon/bilinearpredict8x8_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
index 1e4340361..e3ea91fe6 100644
--- a/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
+++ b/vp8/common/arm/neon/buildintrapredictorsmby_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/copymem16x16_neon.asm b/vp8/common/arm/neon/copymem16x16_neon.asm
index 58961c0ec..bda4b9654 100644
--- a/vp8/common/arm/neon/copymem16x16_neon.asm
+++ b/vp8/common/arm/neon/copymem16x16_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/copymem8x4_neon.asm b/vp8/common/arm/neon/copymem8x4_neon.asm
index 296d11fb1..35c0f6708 100644
--- a/vp8/common/arm/neon/copymem8x4_neon.asm
+++ b/vp8/common/arm/neon/copymem8x4_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/copymem8x8_neon.asm b/vp8/common/arm/neon/copymem8x8_neon.asm
index 4f40b5a12..1f5b9411b 100644
--- a/vp8/common/arm/neon/copymem8x8_neon.asm
+++ b/vp8/common/arm/neon/copymem8x8_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/dc_only_idct_add_neon.asm b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
index e6f141fda..49ba05fb0 100644
--- a/vp8/common/arm/neon/dc_only_idct_add_neon.asm
+++ b/vp8/common/arm/neon/dc_only_idct_add_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license and patent
 ;  grant that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/iwalsh_neon.asm b/vp8/common/arm/neon/iwalsh_neon.asm
index 18b56537c..663bf390e 100644
--- a/vp8/common/arm/neon/iwalsh_neon.asm
+++ b/vp8/common/arm/neon/iwalsh_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
index 7590d0bd1..c0c3e337c 100644
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
index 99458e601..a8314cdd7 100644
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
index 542d74659..0b84dc750 100644
--- a/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
index 515ded43e..a793d095a 100644
--- a/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
+++ b/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
index 2722dd9ac..57913d2bc 100644
--- a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
index 7e7eb9fe5..2eb695ff0 100644
--- a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
index 759143da0..4576a6a8f 100644
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
index 321e2b93b..8e85caa45 100644
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
index 9d9ae5069..d9dbdcfe5 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
index c11fcde2a..bdffc62ee 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/recon16x16mb_neon.asm b/vp8/common/arm/neon/recon16x16mb_neon.asm
index 595484e31..3f1a30f48 100644
--- a/vp8/common/arm/neon/recon16x16mb_neon.asm
+++ b/vp8/common/arm/neon/recon16x16mb_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/recon2b_neon.asm b/vp8/common/arm/neon/recon2b_neon.asm
index b33e58a30..99b251c91 100644
--- a/vp8/common/arm/neon/recon2b_neon.asm
+++ b/vp8/common/arm/neon/recon2b_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/recon4b_neon.asm b/vp8/common/arm/neon/recon4b_neon.asm
index 2ff204ac4..991727746 100644
--- a/vp8/common/arm/neon/recon4b_neon.asm
+++ b/vp8/common/arm/neon/recon4b_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/reconb_neon.asm b/vp8/common/arm/neon/reconb_neon.asm
index 687b4b8f1..288c0ef01 100644
--- a/vp8/common/arm/neon/reconb_neon.asm
+++ b/vp8/common/arm/neon/reconb_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/save_neon_reg.asm b/vp8/common/arm/neon/save_neon_reg.asm
index 32539ac4d..fd7002e7a 100644
--- a/vp8/common/arm/neon/save_neon_reg.asm
+++ b/vp8/common/arm/neon/save_neon_reg.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
index ff1590fb8..d7bdbae75 100644
--- a/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
+++ b/vp8/common/arm/neon/shortidct4x4llm_1_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/shortidct4x4llm_neon.asm b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
index 76721c1df..d77a2879e 100644
--- a/vp8/common/arm/neon/shortidct4x4llm_neon.asm
+++ b/vp8/common/arm/neon/shortidct4x4llm_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/sixtappredict16x16_neon.asm b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
index d6a207063..e434a709c 100644
--- a/vp8/common/arm/neon/sixtappredict16x16_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict16x16_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/sixtappredict4x4_neon.asm b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
index 0b5504fab..3d22d775a 100644
--- a/vp8/common/arm/neon/sixtappredict4x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict4x4_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/sixtappredict8x4_neon.asm b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
index 0b19e0b31..1dd6b1b37 100644
--- a/vp8/common/arm/neon/sixtappredict8x4_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x4_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/neon/sixtappredict8x8_neon.asm b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
index 3fbe5644a..37255c758 100644
--- a/vp8/common/arm/neon/sixtappredict8x8_neon.asm
+++ b/vp8/common/arm/neon/sixtappredict8x8_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/recon_arm.c b/vp8/common/arm/recon_arm.c
index 2bfdda876..218898b44 100644
--- a/vp8/common/arm/recon_arm.c
+++ b/vp8/common/arm/recon_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h
index 2d5bfac52..18855a3c0 100644
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/reconintra4x4_arm.c b/vp8/common/arm/reconintra4x4_arm.c
index b51c26b08..8d968d7ad 100644
--- a/vp8/common/arm/reconintra4x4_arm.c
+++ b/vp8/common/arm/reconintra4x4_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/reconintra_arm.c b/vp8/common/arm/reconintra_arm.c
index e72261c03..4cc93d134 100644
--- a/vp8/common/arm/reconintra_arm.c
+++ b/vp8/common/arm/reconintra_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h
index e86e1252f..53600e547 100644
--- a/vp8/common/arm/subpixel_arm.h
+++ b/vp8/common/arm/subpixel_arm.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c
index 6e8651754..1eed97e02 100644
--- a/vp8/common/arm/systemdependent.c
+++ b/vp8/common/arm/systemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/arm/vpx_asm_offsets.c b/vp8/common/arm/vpx_asm_offsets.c
index 8009a683c..5baf8ccf5 100644
--- a/vp8/common/arm/vpx_asm_offsets.c
+++ b/vp8/common/arm/vpx_asm_offsets.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/bigend.h b/vp8/common/bigend.h
index 98b9aa54f..6ac3f8b5a 100644
--- a/vp8/common/bigend.h
+++ b/vp8/common/bigend.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/blockd.c b/vp8/common/blockd.c
index f39e5b2c0..7f75a72c5 100644
--- a/vp8/common/blockd.c
+++ b/vp8/common/blockd.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 1cc5a1f2c..4b7f1a359 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/boolcoder.h b/vp8/common/boolcoder.h
index 048756762..5658868a6 100644
--- a/vp8/common/boolcoder.h
+++ b/vp8/common/boolcoder.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/codec_common_interface.h b/vp8/common/codec_common_interface.h
index e53d4e59e..7a7db3847 100644
--- a/vp8/common/codec_common_interface.h
+++ b/vp8/common/codec_common_interface.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/coefupdateprobs.h b/vp8/common/coefupdateprobs.h
index 01a4f89b6..785e3ff70 100644
--- a/vp8/common/coefupdateprobs.h
+++ b/vp8/common/coefupdateprobs.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/common.h b/vp8/common/common.h
index b8b2eaa60..9a93da991 100644
--- a/vp8/common/common.h
+++ b/vp8/common/common.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/common_types.h b/vp8/common/common_types.h
index f5b69f49e..4e6248697 100644
--- a/vp8/common/common_types.h
+++ b/vp8/common/common_types.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/context.c b/vp8/common/context.c
index af7471167..99e95d30f 100644
--- a/vp8/common/context.c
+++ b/vp8/common/context.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/debugmodes.c b/vp8/common/debugmodes.c
index edd583484..c3ac88fc8 100644
--- a/vp8/common/debugmodes.c
+++ b/vp8/common/debugmodes.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h
index b41a618ad..b85f59b9f 100644
--- a/vp8/common/defaultcoefcounts.h
+++ b/vp8/common/defaultcoefcounts.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/dma_desc.h b/vp8/common/dma_desc.h
index d4d9f59fb..b923da6e0 100644
--- a/vp8/common/dma_desc.h
+++ b/vp8/common/dma_desc.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/duck_io.h b/vp8/common/duck_io.h
index 0bf41895d..43daa65bc 100644
--- a/vp8/common/duck_io.h
+++ b/vp8/common/duck_io.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/entropy.c b/vp8/common/entropy.c
index c55cddeec..1438e7e0f 100644
--- a/vp8/common/entropy.c
+++ b/vp8/common/entropy.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index feed4aff4..80b481700 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/entropymode.c b/vp8/common/entropymode.c
index 41922834f..e9dc668b2 100644
--- a/vp8/common/entropymode.c
+++ b/vp8/common/entropymode.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/entropymode.h b/vp8/common/entropymode.h
index 3d5af7cf7..da6ae8ead 100644
--- a/vp8/common/entropymode.h
+++ b/vp8/common/entropymode.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c
index a98d06aba..8e72881e9 100644
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/entropymv.h b/vp8/common/entropymv.h
index 31895d592..911507ddc 100644
--- a/vp8/common/entropymv.h
+++ b/vp8/common/entropymv.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/extend.c b/vp8/common/extend.c
index 0608a13b4..7e06ac30c 100644
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/extend.h b/vp8/common/extend.h
index 157a67c40..fd0a608e5 100644
--- a/vp8/common/extend.h
+++ b/vp8/common/extend.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/filter_c.c b/vp8/common/filter_c.c
index 7145f3fb9..3d18d8191 100644
--- a/vp8/common/filter_c.c
+++ b/vp8/common/filter_c.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c
index 48139428f..41037f707 100644
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/findnearmv.h b/vp8/common/findnearmv.h
index f467ab635..1a6c72bcd 100644
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/fourcc.hpp b/vp8/common/fourcc.hpp
index 7a85eee69..c5826285e 100644
--- a/vp8/common/fourcc.hpp
+++ b/vp8/common/fourcc.hpp
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/g_common.h b/vp8/common/g_common.h
index d5d005ec7..5f523980b 100644
--- a/vp8/common/g_common.h
+++ b/vp8/common/g_common.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 91077b3cf..c04e31ffe 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/header.h b/vp8/common/header.h
index 40bef73a5..3e98eeb3c 100644
--- a/vp8/common/header.h
+++ b/vp8/common/header.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/idct.h b/vp8/common/idct.h
index 7b04799c8..f5fd94dfd 100644
--- a/vp8/common/idct.h
+++ b/vp8/common/idct.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/idctllm.c b/vp8/common/idctllm.c
index c9686626c..196062df6 100644
--- a/vp8/common/idctllm.c
+++ b/vp8/common/idctllm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index 32ef15ed1..4cb433a80 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/invtrans.h b/vp8/common/invtrans.h
index 0d502d285..b3ffb7073 100644
--- a/vp8/common/invtrans.h
+++ b/vp8/common/invtrans.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/littlend.h b/vp8/common/littlend.h
index 0aa76df69..99df1164c 100644
--- a/vp8/common/littlend.h
+++ b/vp8/common/littlend.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index 19fa865f8..da9ca2871 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index 66185d1e7..a2049bf61 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index 7aba7ed13..ea82e2a07 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/mac_specs.h b/vp8/common/mac_specs.h
index 58721e10c..4b8ee5877 100644
--- a/vp8/common/mac_specs.h
+++ b/vp8/common/mac_specs.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c
index 4894bd96c..ce40d16ca 100644
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/modecont.c b/vp8/common/modecont.c
index e2008ce92..0fa299577 100644
--- a/vp8/common/modecont.c
+++ b/vp8/common/modecont.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/modecont.h b/vp8/common/modecont.h
index 88dc626f7..24db88295 100644
--- a/vp8/common/modecont.h
+++ b/vp8/common/modecont.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/modecontext.c b/vp8/common/modecontext.c
index a724d17bf..8e483b800 100644
--- a/vp8/common/modecontext.c
+++ b/vp8/common/modecontext.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/mv.h b/vp8/common/mv.h
index aca788be8..73c91b9e7 100644
--- a/vp8/common/mv.h
+++ b/vp8/common/mv.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index b909e725a..a006306db 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 7b44f2a6f..132765d18 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h
index a03bd5ded..00a97d97d 100644
--- a/vp8/common/onyxd.h
+++ b/vp8/common/onyxd.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/partialgfupdate.h b/vp8/common/partialgfupdate.h
index 6d83d209b..115134a53 100644
--- a/vp8/common/partialgfupdate.h
+++ b/vp8/common/partialgfupdate.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 648892194..0c8cf13bf 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index 1c16995e7..80337fc68 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/ppc/copy_altivec.asm b/vp8/common/ppc/copy_altivec.asm
index 6d855fce4..a4ce91583 100644
--- a/vp8/common/ppc/copy_altivec.asm
+++ b/vp8/common/ppc/copy_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/ppc/filter_altivec.asm b/vp8/common/ppc/filter_altivec.asm
index 7698add3d..4da2e94f9 100644
--- a/vp8/common/ppc/filter_altivec.asm
+++ b/vp8/common/ppc/filter_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/ppc/filter_bilinear_altivec.asm b/vp8/common/ppc/filter_bilinear_altivec.asm
index 08acc910e..fd8aa665f 100644
--- a/vp8/common/ppc/filter_bilinear_altivec.asm
+++ b/vp8/common/ppc/filter_bilinear_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/ppc/idctllm_altivec.asm b/vp8/common/ppc/idctllm_altivec.asm
index 039157a19..117d9cfc8 100644
--- a/vp8/common/ppc/idctllm_altivec.asm
+++ b/vp8/common/ppc/idctllm_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/ppc/loopfilter_altivec.c b/vp8/common/ppc/loopfilter_altivec.c
index ad02f9349..bad3cf3bd 100644
--- a/vp8/common/ppc/loopfilter_altivec.c
+++ b/vp8/common/ppc/loopfilter_altivec.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/ppc/loopfilter_filters_altivec.asm b/vp8/common/ppc/loopfilter_filters_altivec.asm
index 03b25a78c..61df4e976 100644
--- a/vp8/common/ppc/loopfilter_filters_altivec.asm
+++ b/vp8/common/ppc/loopfilter_filters_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/ppc/platform_altivec.asm b/vp8/common/ppc/platform_altivec.asm
index a1442cbcf..f81d86f74 100644
--- a/vp8/common/ppc/platform_altivec.asm
+++ b/vp8/common/ppc/platform_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/ppc/recon_altivec.asm b/vp8/common/ppc/recon_altivec.asm
index dbe7e4368..dd39e05a8 100644
--- a/vp8/common/ppc/recon_altivec.asm
+++ b/vp8/common/ppc/recon_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/ppc/systemdependent.c b/vp8/common/ppc/systemdependent.c
index 87547fc55..1f5d79068 100644
--- a/vp8/common/ppc/systemdependent.c
+++ b/vp8/common/ppc/systemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index d9eacb3df..b1f925c44 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/pragmas.h b/vp8/common/pragmas.h
index 519c68523..99fee5ae2 100644
--- a/vp8/common/pragmas.h
+++ b/vp8/common/pragmas.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/predictdc.c b/vp8/common/predictdc.c
index 6eb5fae4f..f315f50e0 100644
--- a/vp8/common/predictdc.c
+++ b/vp8/common/predictdc.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/predictdc.h b/vp8/common/predictdc.h
index 8af95fae9..fa8596822 100644
--- a/vp8/common/predictdc.h
+++ b/vp8/common/predictdc.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/preproc.h b/vp8/common/preproc.h
index ec8d075ea..0b142bda7 100644
--- a/vp8/common/preproc.h
+++ b/vp8/common/preproc.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/preprocif.h b/vp8/common/preprocif.h
index e2ea2cad5..7d554b509 100644
--- a/vp8/common/preprocif.h
+++ b/vp8/common/preprocif.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/proposed.h b/vp8/common/proposed.h
index c0085765d..c9659902b 100644
--- a/vp8/common/proposed.h
+++ b/vp8/common/proposed.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/quant_common.c b/vp8/common/quant_common.c
index 658629c42..e9833fe33 100644
--- a/vp8/common/quant_common.c
+++ b/vp8/common/quant_common.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/quant_common.h b/vp8/common/quant_common.h
index 7e14c7a34..cb64d8eb8 100644
--- a/vp8/common/quant_common.h
+++ b/vp8/common/quant_common.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/recon.c b/vp8/common/recon.c
index c37585c31..0b439e054 100644
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/recon.h b/vp8/common/recon.h
index 03eaab20b..e34a63c86 100644
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index f11995e24..ffdc660c2 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/reconinter.h b/vp8/common/reconinter.h
index 8067acbef..7c1dee431 100644
--- a/vp8/common/reconinter.h
+++ b/vp8/common/reconinter.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index b5f57c203..ce0b1b8ec 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/reconintra.h b/vp8/common/reconintra.h
index ec5f51440..988b43a77 100644
--- a/vp8/common/reconintra.h
+++ b/vp8/common/reconintra.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index b77d341ca..c6e5fe7fd 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/reconintra4x4.h b/vp8/common/reconintra4x4.h
index e26961622..6ac2b7137 100644
--- a/vp8/common/reconintra4x4.h
+++ b/vp8/common/reconintra4x4.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c
index a8097ee29..8647ae2aa 100644
--- a/vp8/common/setupintrarecon.c
+++ b/vp8/common/setupintrarecon.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/setupintrarecon.h b/vp8/common/setupintrarecon.h
index 56485b738..5264fd04b 100644
--- a/vp8/common/setupintrarecon.h
+++ b/vp8/common/setupintrarecon.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/subpixel.h b/vp8/common/subpixel.h
index a08d38784..acdeec3bc 100644
--- a/vp8/common/subpixel.h
+++ b/vp8/common/subpixel.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/swapyv12buffer.c b/vp8/common/swapyv12buffer.c
index 9742e34aa..73656b3d7 100644
--- a/vp8/common/swapyv12buffer.c
+++ b/vp8/common/swapyv12buffer.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/swapyv12buffer.h b/vp8/common/swapyv12buffer.h
index 80121b8dc..a6473ed92 100644
--- a/vp8/common/swapyv12buffer.h
+++ b/vp8/common/swapyv12buffer.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/systemdependent.h b/vp8/common/systemdependent.h
index 5d432745b..db996987a 100644
--- a/vp8/common/systemdependent.h
+++ b/vp8/common/systemdependent.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/textblit.c b/vp8/common/textblit.c
index f999a0c8f..da40f9352 100644
--- a/vp8/common/textblit.c
+++ b/vp8/common/textblit.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index cd2236168..f9a257460 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/treecoder.c b/vp8/common/treecoder.c
index 495abd716..d80c64bdf 100644
--- a/vp8/common/treecoder.c
+++ b/vp8/common/treecoder.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index 990327536..35e5be10c 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/type_aliases.h b/vp8/common/type_aliases.h
index 98da4eec0..f2a370298 100644
--- a/vp8/common/type_aliases.h
+++ b/vp8/common/type_aliases.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/vfwsetting.hpp b/vp8/common/vfwsetting.hpp
index 18efafec5..44869ecc7 100644
--- a/vp8/common/vfwsetting.hpp
+++ b/vp8/common/vfwsetting.hpp
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/vpx_ref_build_prefix.h b/vp8/common/vpx_ref_build_prefix.h
index 7d91d5646..a2fce65dc 100644
--- a/vp8/common/vpx_ref_build_prefix.h
+++ b/vp8/common/vpx_ref_build_prefix.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/vpxblit.h b/vp8/common/vpxblit.h
index 83dfbafb2..a95d90574 100644
--- a/vp8/common/vpxblit.h
+++ b/vp8/common/vpxblit.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/vpxblit_c64.h b/vp8/common/vpxblit_c64.h
index ab3da5229..4ee617f6c 100644
--- a/vp8/common/vpxblit_c64.h
+++ b/vp8/common/vpxblit_c64.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/vpxerrors.h b/vp8/common/vpxerrors.h
index 79e2bc914..b70f29673 100644
--- a/vp8/common/vpxerrors.h
+++ b/vp8/common/vpxerrors.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/boolcoder.cxx b/vp8/common/x86/boolcoder.cxx
index 1238dd2d2..faddf1f42 100644
--- a/vp8/common/x86/boolcoder.cxx
+++ b/vp8/common/x86/boolcoder.cxx
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/idct_x86.h b/vp8/common/x86/idct_x86.h
index f1e433d5c..f6e568cdc 100644
--- a/vp8/common/x86/idct_x86.h
+++ b/vp8/common/x86/idct_x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm
index b0d4a0c0d..99e09a50e 100644
--- a/vp8/common/x86/idctllm_mmx.asm
+++ b/vp8/common/x86/idctllm_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
index 058ed8a8c..ac941851b 100644
--- a/vp8/common/x86/idctllm_sse2.asm
+++ b/vp8/common/x86/idctllm_sse2.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm
index 26f04e3f2..3f0671c58 100644
--- a/vp8/common/x86/iwalsh_mmx.asm
+++ b/vp8/common/x86/iwalsh_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/iwalsh_sse2.asm b/vp8/common/x86/iwalsh_sse2.asm
index 17337f0c1..83c97df7d 100644
--- a/vp8/common/x86/iwalsh_sse2.asm
+++ b/vp8/common/x86/iwalsh_sse2.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index 7a9b6792f..0b39e627d 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index ad2f36c9e..5839e43bf 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index 16498abbd..3ff8c4e12 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/loopfilter_x86.h b/vp8/common/x86/loopfilter_x86.h
index cf6ceab07..80dbebc8d 100644
--- a/vp8/common/x86/loopfilter_x86.h
+++ b/vp8/common/x86/loopfilter_x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm
index 2f6ea4911..349ac0d3b 100644
--- a/vp8/common/x86/postproc_mmx.asm
+++ b/vp8/common/x86/postproc_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/postproc_mmx.c b/vp8/common/x86/postproc_mmx.c
index 1b3d60d9d..6b6321ace 100644
--- a/vp8/common/x86/postproc_mmx.c
+++ b/vp8/common/x86/postproc_mmx.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index b6e98d8cd..276f208ff 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/postproc_x86.h b/vp8/common/x86/postproc_x86.h
index b542728ad..899dd2f89 100644
--- a/vp8/common/x86/postproc_x86.h
+++ b/vp8/common/x86/postproc_x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/recon_mmx.asm b/vp8/common/x86/recon_mmx.asm
index b1eb629b7..e7211fccb 100644
--- a/vp8/common/x86/recon_mmx.asm
+++ b/vp8/common/x86/recon_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/recon_sse2.asm b/vp8/common/x86/recon_sse2.asm
index 716818906..4ad3973ec 100644
--- a/vp8/common/x86/recon_sse2.asm
+++ b/vp8/common/x86/recon_sse2.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/recon_x86.h b/vp8/common/x86/recon_x86.h
index d83328e66..40ee65a12 100644
--- a/vp8/common/x86/recon_x86.h
+++ b/vp8/common/x86/recon_x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index b8a712761..06db0c6a0 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index 4eeab0c66..2385abfd0 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
index 87173f2ca..0d70b79eb 100644
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/subpixel_x86.h b/vp8/common/x86/subpixel_x86.h
index b371892c9..003ee7c81 100644
--- a/vp8/common/x86/subpixel_x86.h
+++ b/vp8/common/x86/subpixel_x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 8b54b2327..950d96262 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index ce487ff9f..c233c0f47 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/armv5/dequantize_v5.asm b/vp8/decoder/arm/armv5/dequantize_v5.asm
index a949d5774..de3648ae2 100644
--- a/vp8/decoder/arm/armv5/dequantize_v5.asm
+++ b/vp8/decoder/arm/armv5/dequantize_v5.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/armv6/dboolhuff_v6.asm b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
index e181b3081..6515804bb 100644
--- a/vp8/decoder/arm/armv6/dboolhuff_v6.asm
+++ b/vp8/decoder/arm/armv6/dboolhuff_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
index 886873c70..6bebda24f 100644
--- a/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
+++ b/vp8/decoder/arm/armv6/dequant_dc_idct_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license and patent
 ;  grant that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/armv6/dequant_idct_v6.asm b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
index c13b51299..47b671ca6 100644
--- a/vp8/decoder/arm/armv6/dequant_idct_v6.asm
+++ b/vp8/decoder/arm/armv6/dequant_idct_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license and patent
 ;  grant that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/armv6/dequantize_v6.asm b/vp8/decoder/arm/armv6/dequantize_v6.asm
index ba0ab7e1d..72f7e0ee5 100644
--- a/vp8/decoder/arm/armv6/dequantize_v6.asm
+++ b/vp8/decoder/arm/armv6/dequantize_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/armv6/idct_blk_v6.c b/vp8/decoder/arm/armv6/idct_blk_v6.c
index 96aca2b81..3c7bc502f 100644
--- a/vp8/decoder/arm/armv6/idct_blk_v6.c
+++ b/vp8/decoder/arm/armv6/idct_blk_v6.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
index e39f7d2d1..39265879b 100644
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index 12e836a6f..40151e01a 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/detokenize.asm b/vp8/decoder/arm/detokenize.asm
index a4f0cb0a5..45e068a9f 100644
--- a/vp8/decoder/arm/detokenize.asm
+++ b/vp8/decoder/arm/detokenize.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/detokenize_arm.h b/vp8/decoder/arm/detokenize_arm.h
index 1c53f7b78..9bb19b6cf 100644
--- a/vp8/decoder/arm/detokenize_arm.h
+++ b/vp8/decoder/arm/detokenize_arm.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
index 2ea03c415..9dcf7b657 100644
--- a/vp8/decoder/arm/dsystemdependent.c
+++ b/vp8/decoder/arm/dsystemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/neon/dboolhuff_neon.asm b/vp8/decoder/arm/neon/dboolhuff_neon.asm
index 321ca6515..ff3ffda97 100644
--- a/vp8/decoder/arm/neon/dboolhuff_neon.asm
+++ b/vp8/decoder/arm/neon/dboolhuff_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm b/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
index ddb324068..f68a78095 100644
--- a/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
+++ b/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/neon/dequant_idct_neon.asm b/vp8/decoder/arm/neon/dequant_idct_neon.asm
index 5c60dd690..1923be42a 100644
--- a/vp8/decoder/arm/neon/dequant_idct_neon.asm
+++ b/vp8/decoder/arm/neon/dequant_idct_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/neon/dequantizeb_neon.asm b/vp8/decoder/arm/neon/dequantizeb_neon.asm
index da1ca5c24..c8e0c31f2 100644
--- a/vp8/decoder/arm/neon/dequantizeb_neon.asm
+++ b/vp8/decoder/arm/neon/dequantizeb_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c
index e190bc023..4725e6240 100644
--- a/vp8/decoder/arm/neon/idct_blk_neon.c
+++ b/vp8/decoder/arm/neon/idct_blk_neon.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/dboolhuff.c b/vp8/decoder/dboolhuff.c
index 377c7b631..57cba16a3 100644
--- a/vp8/decoder/dboolhuff.c
+++ b/vp8/decoder/dboolhuff.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index 050bab1d5..c72bc0330 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index d14126739..da68ee688 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/decodemv.h b/vp8/decoder/decodemv.h
index 08134fbe2..940342447 100644
--- a/vp8/decoder/decodemv.h
+++ b/vp8/decoder/decodemv.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h
index 2267767b6..c8e3f02f9 100644
--- a/vp8/decoder/decoderthreading.h
+++ b/vp8/decoder/decoderthreading.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index fb1794126..f2d8c766e 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/demode.c b/vp8/decoder/demode.c
index 436725de6..557508dd8 100644
--- a/vp8/decoder/demode.c
+++ b/vp8/decoder/demode.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/demode.h b/vp8/decoder/demode.h
index ca9b80a4f..a58cff95b 100644
--- a/vp8/decoder/demode.h
+++ b/vp8/decoder/demode.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index df7cf5f6c..8cfa2a32e 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/dequantize.h b/vp8/decoder/dequantize.h
index 125d35b05..b78e39c1d 100644
--- a/vp8/decoder/dequantize.h
+++ b/vp8/decoder/dequantize.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 75b739251..65c7d5370 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/detokenize.h b/vp8/decoder/detokenize.h
index aa98deae7..294a4a55d 100644
--- a/vp8/decoder/detokenize.h
+++ b/vp8/decoder/detokenize.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index e8104dc42..60f2af5b8 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c
index b18984bc3..c6a42578a 100644
--- a/vp8/decoder/idct_blk.c
+++ b/vp8/decoder/idct_blk.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 5a88ba017..1651784cf 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index af1182fb4..9be4d47b2 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 93acd368b..a77552c3c 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/treereader.h b/vp8/decoder/treereader.h
index a850df6d5..277842896 100644
--- a/vp8/decoder/treereader.h
+++ b/vp8/decoder/treereader.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
index f11eef35a..150d090b6 100644
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/x86/dequantize_x86.h b/vp8/decoder/x86/dequantize_x86.h
index 201479cfa..dc68daab3 100644
--- a/vp8/decoder/x86/dequantize_x86.h
+++ b/vp8/decoder/x86/dequantize_x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/x86/idct_blk_mmx.c b/vp8/decoder/x86/idct_blk_mmx.c
index 1522a8022..78c91d3d2 100644
--- a/vp8/decoder/x86/idct_blk_mmx.c
+++ b/vp8/decoder/x86/idct_blk_mmx.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/x86/idct_blk_sse2.c b/vp8/decoder/x86/idct_blk_sse2.c
index c5e4ad3a6..0273d6ed2 100644
--- a/vp8/decoder/x86/idct_blk_sse2.c
+++ b/vp8/decoder/x86/idct_blk_sse2.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/x86/onyxdxv.c b/vp8/decoder/x86/onyxdxv.c
index 6fd0e25fe..50293c792 100644
--- a/vp8/decoder/x86/onyxdxv.c
+++ b/vp8/decoder/x86/onyxdxv.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/x86/x86_dsystemdependent.c b/vp8/decoder/x86/x86_dsystemdependent.c
index eb8198f96..47e346dd9 100644
--- a/vp8/decoder/x86/x86_dsystemdependent.c
+++ b/vp8/decoder/x86/x86_dsystemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/xprintf.c b/vp8/decoder/xprintf.c
index 80be17c4c..e3b953ef3 100644
--- a/vp8/decoder/xprintf.c
+++ b/vp8/decoder/xprintf.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/decoder/xprintf.h b/vp8/decoder/xprintf.h
index fa2e15d19..f83dd39c6 100644
--- a/vp8/decoder/xprintf.h
+++ b/vp8/decoder/xprintf.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/armv6/walsh_v6.asm b/vp8/encoder/arm/armv6/walsh_v6.asm
index 13d0864ff..61ffdb315 100644
--- a/vp8/encoder/arm/armv6/walsh_v6.asm
+++ b/vp8/encoder/arm/armv6/walsh_v6.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/boolhuff_arm.c b/vp8/encoder/arm/boolhuff_arm.c
index bb02a4aed..fe8e70c16 100644
--- a/vp8/encoder/arm/boolhuff_arm.c
+++ b/vp8/encoder/arm/boolhuff_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c
index 698cf1e6f..8d70d635a 100644
--- a/vp8/encoder/arm/csystemdependent.c
+++ b/vp8/encoder/arm/csystemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h
index 433489c70..774599bf0 100644
--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/encodemb_arm.c b/vp8/encoder/arm/encodemb_arm.c
index 10f5114c7..cc9e014b2 100644
--- a/vp8/encoder/arm/encodemb_arm.c
+++ b/vp8/encoder/arm/encodemb_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h
index f59ef9f7e..eb699433f 100644
--- a/vp8/encoder/arm/encodemb_arm.h
+++ b/vp8/encoder/arm/encodemb_arm.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c
index 709e2e824..4e95c47ac 100644
--- a/vp8/encoder/arm/mcomp_arm.c
+++ b/vp8/encoder/arm/mcomp_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/neon/boolhuff_armv7.asm
index 13201f7a4..9c4823c51 100644
--- a/vp8/encoder/arm/neon/boolhuff_armv7.asm
+++ b/vp8/encoder/arm/neon/boolhuff_armv7.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
index 4af7687aa..8c191a753 100644
--- a/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
+++ b/vp8/encoder/arm/neon/fastfdct4x4_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
index 774027aec..ca351a1c4 100644
--- a/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
+++ b/vp8/encoder/arm/neon/fastfdct8x4_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/fastquantizeb_neon.asm b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
index 118a11f0f..ca1ea9c18 100644
--- a/vp8/encoder/arm/neon/fastquantizeb_neon.asm
+++ b/vp8/encoder/arm/neon/fastquantizeb_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/sad16_neon.asm b/vp8/encoder/arm/neon/sad16_neon.asm
index 7b102b9c9..d7c590e15 100644
--- a/vp8/encoder/arm/neon/sad16_neon.asm
+++ b/vp8/encoder/arm/neon/sad16_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/sad8_neon.asm b/vp8/encoder/arm/neon/sad8_neon.asm
index 7c1cc9828..23ba6df93 100644
--- a/vp8/encoder/arm/neon/sad8_neon.asm
+++ b/vp8/encoder/arm/neon/sad8_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/shortfdct_neon.asm b/vp8/encoder/arm/neon/shortfdct_neon.asm
index 39d31cb1c..5af5cb888 100644
--- a/vp8/encoder/arm/neon/shortfdct_neon.asm
+++ b/vp8/encoder/arm/neon/shortfdct_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/subtract_neon.asm b/vp8/encoder/arm/neon/subtract_neon.asm
index d66d203b7..3ea00f8b9 100644
--- a/vp8/encoder/arm/neon/subtract_neon.asm
+++ b/vp8/encoder/arm/neon/subtract_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/variance_neon.asm b/vp8/encoder/arm/neon/variance_neon.asm
index 903c45137..e1a46869a 100644
--- a/vp8/encoder/arm/neon/variance_neon.asm
+++ b/vp8/encoder/arm/neon/variance_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
index 0834a2fd5..b0450e523 100644
--- a/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_memcpy_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
index e86d017c6..6af4e87ba 100644
--- a/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_mse16x16_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
index 82595860d..c19ac8250 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
index bed3255aa..075645586 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
index 8590ea02a..10a3d9851 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
+++ b/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
index 9c750bad4..ba3decf6c 100644
--- a/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
index 65c2a66a6..1b09cfe4c 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
index 6af89a1b8..1c1441cc2 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
index e491d47e5..cf4da62fa 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance8x8_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/picklpf_arm.c b/vp8/encoder/arm/picklpf_arm.c
index 31e95eadc..b2d8f2b2c 100644
--- a/vp8/encoder/arm/picklpf_arm.c
+++ b/vp8/encoder/arm/picklpf_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c
index 68dcdd73b..50f58bf08 100644
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/quantize_arm.h b/vp8/encoder/arm/quantize_arm.h
index 339b8a28a..5f9155eb1 100644
--- a/vp8/encoder/arm/quantize_arm.h
+++ b/vp8/encoder/arm/quantize_arm.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index 8097ed984..859e43f51 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
index 9392b603c..c595ca3c0 100644
--- a/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
+++ b/vp8/encoder/arm/vpx_vp8_enc_asm_offsets.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index c706395a8..929c17841 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index 07b73c31a..559631338 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index be2b81678..ffb88904e 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/boolhuff.c b/vp8/encoder/boolhuff.c
index c5ddf802a..82006b196 100644
--- a/vp8/encoder/boolhuff.c
+++ b/vp8/encoder/boolhuff.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/boolhuff.h b/vp8/encoder/boolhuff.h
index 7a0ba8f7a..f723da3f0 100644
--- a/vp8/encoder/boolhuff.h
+++ b/vp8/encoder/boolhuff.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/dct.c b/vp8/encoder/dct.c
index 2827aa5a4..b5a11ae34 100644
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/dct.h b/vp8/encoder/dct.h
index cd8faedef..fec3b4c37 100644
--- a/vp8/encoder/dct.h
+++ b/vp8/encoder/dct.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 96f36ee63..d8a76d5b5 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 4ed6e8414..af80857d2 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/encodeintra.h b/vp8/encoder/encodeintra.h
index d51b95fb1..5be23d12b 100644
--- a/vp8/encoder/encodeintra.h
+++ b/vp8/encoder/encodeintra.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 0cc7880a9..e10b5159a 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/encodemb.h b/vp8/encoder/encodemb.h
index f7b110d64..08f75c3b1 100644
--- a/vp8/encoder/encodemb.h
+++ b/vp8/encoder/encodemb.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/encodemv.c b/vp8/encoder/encodemv.c
index d945a779f..cce753013 100644
--- a/vp8/encoder/encodemv.c
+++ b/vp8/encoder/encodemv.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/encodemv.h b/vp8/encoder/encodemv.h
index d18c9de2c..e4481bff0 100644
--- a/vp8/encoder/encodemv.h
+++ b/vp8/encoder/encodemv.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/ethreading.c b/vp8/encoder/ethreading.c
index 04093ff43..962e74174 100644
--- a/vp8/encoder/ethreading.c
+++ b/vp8/encoder/ethreading.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index fea4e3d6a..2dea8b70f 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/firstpass.h b/vp8/encoder/firstpass.h
index c6f3e18a9..c7f3e0e45 100644
--- a/vp8/encoder/firstpass.h
+++ b/vp8/encoder/firstpass.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index dd89f1a82..1acb73d9c 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 156578bb1..b89354eaa 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 1c1714111..7cc924279 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/modecosts.c b/vp8/encoder/modecosts.c
index df3d7d608..d23c97e6e 100644
--- a/vp8/encoder/modecosts.c
+++ b/vp8/encoder/modecosts.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/modecosts.h b/vp8/encoder/modecosts.h
index d71abe89b..99ef119d5 100644
--- a/vp8/encoder/modecosts.h
+++ b/vp8/encoder/modecosts.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 99a09deb2..5c78b9c8a 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index c860a6ca0..211f65912 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/parms.cpp b/vp8/encoder/parms.cpp
index fccc9db00..6cc450121 100644
--- a/vp8/encoder/parms.cpp
+++ b/vp8/encoder/parms.cpp
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 2be412c99..eeeddcce9 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/pickinter.h b/vp8/encoder/pickinter.h
index 057794b82..b80e4c86f 100644
--- a/vp8/encoder/pickinter.h
+++ b/vp8/encoder/pickinter.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 4f3dba6bc..79e07dbc0 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/ppc/csystemdependent.c b/vp8/encoder/ppc/csystemdependent.c
index 3af628852..588656b97 100644
--- a/vp8/encoder/ppc/csystemdependent.c
+++ b/vp8/encoder/ppc/csystemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/ppc/encodemb_altivec.asm b/vp8/encoder/ppc/encodemb_altivec.asm
index ab2341bab..6e0099ddc 100644
--- a/vp8/encoder/ppc/encodemb_altivec.asm
+++ b/vp8/encoder/ppc/encodemb_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/ppc/fdct_altivec.asm b/vp8/encoder/ppc/fdct_altivec.asm
index 2829b9260..935d0cb09 100644
--- a/vp8/encoder/ppc/fdct_altivec.asm
+++ b/vp8/encoder/ppc/fdct_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/ppc/rdopt_altivec.asm b/vp8/encoder/ppc/rdopt_altivec.asm
index 6ef585c60..ba4823009 100644
--- a/vp8/encoder/ppc/rdopt_altivec.asm
+++ b/vp8/encoder/ppc/rdopt_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/ppc/sad_altivec.asm b/vp8/encoder/ppc/sad_altivec.asm
index 84ecc7756..e5f26380f 100644
--- a/vp8/encoder/ppc/sad_altivec.asm
+++ b/vp8/encoder/ppc/sad_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/ppc/variance_altivec.asm b/vp8/encoder/ppc/variance_altivec.asm
index 5126d68c4..a1ebf663a 100644
--- a/vp8/encoder/ppc/variance_altivec.asm
+++ b/vp8/encoder/ppc/variance_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/ppc/variance_subpixel_altivec.asm b/vp8/encoder/ppc/variance_subpixel_altivec.asm
index 184c8a043..301360b1d 100644
--- a/vp8/encoder/ppc/variance_subpixel_altivec.asm
+++ b/vp8/encoder/ppc/variance_subpixel_altivec.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/preproc.c b/vp8/encoder/preproc.c
index dc2d10ebf..bd918fa3c 100644
--- a/vp8/encoder/preproc.c
+++ b/vp8/encoder/preproc.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/psnr.c b/vp8/encoder/psnr.c
index f693c5e6f..dc2a03b69 100644
--- a/vp8/encoder/psnr.c
+++ b/vp8/encoder/psnr.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/psnr.h b/vp8/encoder/psnr.h
index 29cb4099f..8ae444823 100644
--- a/vp8/encoder/psnr.h
+++ b/vp8/encoder/psnr.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index f495940b3..20ec9d11b 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/quantize.h b/vp8/encoder/quantize.h
index 05056d9ce..b74718bfa 100644
--- a/vp8/encoder/quantize.h
+++ b/vp8/encoder/quantize.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 371272268..50f4db0b8 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/ratectrl.h b/vp8/encoder/ratectrl.h
index 9124c3383..766dfdfce 100644
--- a/vp8/encoder/ratectrl.h
+++ b/vp8/encoder/ratectrl.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 75a71d7df..dbef85b9f 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/rdopt.h b/vp8/encoder/rdopt.h
index e3766661e..fb74dd431 100644
--- a/vp8/encoder/rdopt.h
+++ b/vp8/encoder/rdopt.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c
index e9e565278..e63be2bda 100644
--- a/vp8/encoder/sad_c.c
+++ b/vp8/encoder/sad_c.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/segmentation.c b/vp8/encoder/segmentation.c
index bb78614c6..fc0967db3 100644
--- a/vp8/encoder/segmentation.c
+++ b/vp8/encoder/segmentation.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/segmentation.h b/vp8/encoder/segmentation.h
index 1e33dced0..216e194c2 100644
--- a/vp8/encoder/segmentation.h
+++ b/vp8/encoder/segmentation.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/ssim.c b/vp8/encoder/ssim.c
index dd0c82a77..4ebcba1a1 100644
--- a/vp8/encoder/ssim.c
+++ b/vp8/encoder/ssim.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 2d0a45c75..d9b8d36fd 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h
index 6b3d08a30..7b9fc9eaa 100644
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/treewriter.c b/vp8/encoder/treewriter.c
index 3ce4267f6..03967c835 100644
--- a/vp8/encoder/treewriter.c
+++ b/vp8/encoder/treewriter.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/treewriter.h b/vp8/encoder/treewriter.h
index c0d45ee89..88096d875 100644
--- a/vp8/encoder/treewriter.h
+++ b/vp8/encoder/treewriter.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index 8113cfcb8..0341fbd9f 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index 1030d8564..179cd0d8e 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/csystemdependent.c b/vp8/encoder/x86/csystemdependent.c
index 4bb6b60b8..9fb67613d 100644
--- a/vp8/encoder/x86/csystemdependent.c
+++ b/vp8/encoder/x86/csystemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm
index ff96c49f3..b6cfc5ce0 100644
--- a/vp8/encoder/x86/dct_mmx.asm
+++ b/vp8/encoder/x86/dct_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index 0e8cfcfc3..f7a18432d 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/dct_x86.h b/vp8/encoder/x86/dct_x86.h
index bff52e129..05824c684 100644
--- a/vp8/encoder/x86/dct_x86.h
+++ b/vp8/encoder/x86/dct_x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/encodemb_x86.h b/vp8/encoder/x86/encodemb_x86.h
index e9256d2c3..d090b2d89 100644
--- a/vp8/encoder/x86/encodemb_x86.h
+++ b/vp8/encoder/x86/encodemb_x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
index b4fe576a0..413d74d61 100644
--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
index 1b02369ac..38812c8d1 100644
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/mcomp_x86.h b/vp8/encoder/x86/mcomp_x86.h
index 13a4ee59b..e8d658b39 100644
--- a/vp8/encoder/x86/mcomp_x86.h
+++ b/vp8/encoder/x86/mcomp_x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/preproc_mmx.c b/vp8/encoder/x86/preproc_mmx.c
index c68cdff46..a182c8856 100644
--- a/vp8/encoder/x86/preproc_mmx.c
+++ b/vp8/encoder/x86/preproc_mmx.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm
index 1374fdbba..a867409b5 100644
--- a/vp8/encoder/x86/quantize_mmx.asm
+++ b/vp8/encoder/x86/quantize_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
index faaff6428..a1b1c40cb 100644
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license and patent
 ;  grant that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/quantize_x86.h b/vp8/encoder/x86/quantize_x86.h
index 31a43e428..b5b22c022 100644
--- a/vp8/encoder/x86/quantize_x86.h
+++ b/vp8/encoder/x86/quantize_x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license and patent
  *  grant that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm
index 8bf4bbd47..ad9658bf6 100644
--- a/vp8/encoder/x86/sad_mmx.asm
+++ b/vp8/encoder/x86/sad_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm
index b0877ec52..9f34a7ac4 100644
--- a/vp8/encoder/x86/sad_sse2.asm
+++ b/vp8/encoder/x86/sad_sse2.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index 8a4954829..c2a1ae70a 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/sad_ssse3.asm b/vp8/encoder/x86/sad_ssse3.asm
index 024cabe06..94bbfffbc 100644
--- a/vp8/encoder/x86/sad_ssse3.asm
+++ b/vp8/encoder/x86/sad_ssse3.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm
index c381ffdf5..8fe3ee174 100644
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ b/vp8/encoder/x86/subtract_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm
index 293e2e14d..173238e24 100644
--- a/vp8/encoder/x86/variance_impl_mmx.asm
+++ b/vp8/encoder/x86/variance_impl_mmx.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index 029f1ac0d..f47d9ccdd 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 63dbc126f..2600ce96b 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index 28099ede4..5e750ba2f 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
index 0de897e8d..3c9f9c790 100644
--- a/vp8/encoder/x86/variance_x86.h
+++ b/vp8/encoder/x86/variance_x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index be226e040..18dc49cd4 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 481bca435..ecca18a0a 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index da74a4fc8..8845368fb 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 7e4fc0be6..e7e535638 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index cafd06554..4ce18b6e7 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index 97367dbd7..1424bd15a 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index f6b7d94b2..1d2558f23 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index c4e79af32..989232cd3 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index 4053df036..ab4cad10c 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/src/vpx_codec.c b/vpx/src/vpx_codec.c
index f8bf6527e..9c1558c1f 100644
--- a/vpx/src/vpx_codec.c
+++ b/vpx/src/vpx_codec.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/src/vpx_decoder.c b/vpx/src/vpx_decoder.c
index e819cc548..b52470b51 100644
--- a/vpx/src/vpx_decoder.c
+++ b/vpx/src/vpx_decoder.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/src/vpx_decoder_compat.c b/vpx/src/vpx_decoder_compat.c
index a05e9272b..e264734fe 100644
--- a/vpx/src/vpx_decoder_compat.c
+++ b/vpx/src/vpx_decoder_compat.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index 671590edf..ddbd65484 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/src/vpx_image.c b/vpx/src/vpx_image.c
index 2f1370d38..7a4e27062 100644
--- a/vpx/src/vpx_image.c
+++ b/vpx/src/vpx_image.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vp8.h b/vpx/vp8.h
index 9e7e95b3d..c7553ec22 100644
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index 58182e1f3..e1c821144 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index 1f021cadb..4cad838ff 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vp8e.h b/vpx/vp8e.h
index aab04f06f..abfce333a 100644
--- a/vpx/vp8e.h
+++ b/vpx/vp8e.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index 952dcbbed..371df00c3 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vpx_codec.mk b/vpx/vpx_codec.mk
index 5ccd67c03..4f1d74bd4 100644
--- a/vpx/vpx_codec.mk
+++ b/vpx/vpx_codec.mk
@@ -1,5 +1,5 @@
 ##
-##  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
 ##  Use of this source code is governed by a BSD-style license
 ##  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vpx_codec_impl_bottom.h b/vpx/vpx_codec_impl_bottom.h
index 023ea9a61..6eb79a88a 100644
--- a/vpx/vpx_codec_impl_bottom.h
+++ b/vpx/vpx_codec_impl_bottom.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vpx_codec_impl_top.h b/vpx/vpx_codec_impl_top.h
index e91904798..c9b8cfab2 100644
--- a/vpx/vpx_codec_impl_top.h
+++ b/vpx/vpx_codec_impl_top.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vpx_decoder.h b/vpx/vpx_decoder.h
index c9f2d42f1..6ffc2d440 100644
--- a/vpx/vpx_decoder.h
+++ b/vpx/vpx_decoder.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vpx_decoder_compat.h b/vpx/vpx_decoder_compat.h
index 416aa1925..9e1e49222 100644
--- a/vpx/vpx_decoder_compat.h
+++ b/vpx/vpx_decoder_compat.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index fa493c273..f894cd871 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index 0bbda1a48..4506dd3b2 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx/vpx_integer.h b/vpx/vpx_integer.h
index 7eccce905..9a06c1ac3 100644
--- a/vpx/vpx_integer.h
+++ b/vpx/vpx_integer.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/include/nds/vpx_mem_nds.h b/vpx_mem/include/nds/vpx_mem_nds.h
index aa9816035..e54f54d9b 100644
--- a/vpx_mem/include/nds/vpx_mem_nds.h
+++ b/vpx_mem/include/nds/vpx_mem_nds.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/include/vpx_mem_intrnl.h b/vpx_mem/include/vpx_mem_intrnl.h
index 1f025f003..00f9c90e1 100644
--- a/vpx_mem/include/vpx_mem_intrnl.h
+++ b/vpx_mem/include/vpx_mem_intrnl.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/include/vpx_mem_tracker.h b/vpx_mem/include/vpx_mem_tracker.h
index 7867328a3..ef2b29b07 100644
--- a/vpx_mem/include/vpx_mem_tracker.h
+++ b/vpx_mem/include/vpx_mem_tracker.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/intel_linux/vpx_mem.c b/vpx_mem/intel_linux/vpx_mem.c
index 75096e783..00150acd5 100644
--- a/vpx_mem/intel_linux/vpx_mem.c
+++ b/vpx_mem/intel_linux/vpx_mem.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/intel_linux/vpx_mem_tracker.c b/vpx_mem/intel_linux/vpx_mem_tracker.c
index e80c2babe..5bed4b50d 100644
--- a/vpx_mem/intel_linux/vpx_mem_tracker.c
+++ b/vpx_mem/intel_linux/vpx_mem_tracker.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/hmm_alloc.c b/vpx_mem/memory_manager/hmm_alloc.c
index e1487ce25..22c4a54ee 100644
--- a/vpx_mem/memory_manager/hmm_alloc.c
+++ b/vpx_mem/memory_manager/hmm_alloc.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/hmm_base.c b/vpx_mem/memory_manager/hmm_base.c
index b4a4d5143..ad1da032e 100644
--- a/vpx_mem/memory_manager/hmm_base.c
+++ b/vpx_mem/memory_manager/hmm_base.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/hmm_dflt_abort.c b/vpx_mem/memory_manager/hmm_dflt_abort.c
index 7ae2defb1..d92435cfa 100644
--- a/vpx_mem/memory_manager/hmm_dflt_abort.c
+++ b/vpx_mem/memory_manager/hmm_dflt_abort.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/hmm_grow.c b/vpx_mem/memory_manager/hmm_grow.c
index b938ee1b1..9a4b6e416 100644
--- a/vpx_mem/memory_manager/hmm_grow.c
+++ b/vpx_mem/memory_manager/hmm_grow.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/hmm_largest.c b/vpx_mem/memory_manager/hmm_largest.c
index eeaaf81db..c3c6f2c42 100644
--- a/vpx_mem/memory_manager/hmm_largest.c
+++ b/vpx_mem/memory_manager/hmm_largest.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/hmm_resize.c b/vpx_mem/memory_manager/hmm_resize.c
index ab0ef255a..f90da9692 100644
--- a/vpx_mem/memory_manager/hmm_resize.c
+++ b/vpx_mem/memory_manager/hmm_resize.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/hmm_shrink.c b/vpx_mem/memory_manager/hmm_shrink.c
index 1306fcad4..78fe268ba 100644
--- a/vpx_mem/memory_manager/hmm_shrink.c
+++ b/vpx_mem/memory_manager/hmm_shrink.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/hmm_true.c b/vpx_mem/memory_manager/hmm_true.c
index f11be912e..3f7be8f70 100644
--- a/vpx_mem/memory_manager/hmm_true.c
+++ b/vpx_mem/memory_manager/hmm_true.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/include/cavl_if.h b/vpx_mem/memory_manager/include/cavl_if.h
index 5ac6c6bab..1b2c9b738 100644
--- a/vpx_mem/memory_manager/include/cavl_if.h
+++ b/vpx_mem/memory_manager/include/cavl_if.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/include/cavl_impl.h b/vpx_mem/memory_manager/include/cavl_impl.h
index 38b9f6be9..5e165dd4d 100644
--- a/vpx_mem/memory_manager/include/cavl_impl.h
+++ b/vpx_mem/memory_manager/include/cavl_impl.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/include/heapmm.h b/vpx_mem/memory_manager/include/heapmm.h
index 0549f9ab4..33004cadc 100644
--- a/vpx_mem/memory_manager/include/heapmm.h
+++ b/vpx_mem/memory_manager/include/heapmm.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/include/hmm_cnfg.h b/vpx_mem/memory_manager/include/hmm_cnfg.h
index 20acd9449..30b9f5045 100644
--- a/vpx_mem/memory_manager/include/hmm_cnfg.h
+++ b/vpx_mem/memory_manager/include/hmm_cnfg.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/memory_manager/include/hmm_intrnl.h b/vpx_mem/memory_manager/include/hmm_intrnl.h
index 4c64be619..5d62abc59 100644
--- a/vpx_mem/memory_manager/include/hmm_intrnl.h
+++ b/vpx_mem/memory_manager/include/hmm_intrnl.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/nds/vpx_mem_nds.c b/vpx_mem/nds/vpx_mem_nds.c
index 694aac84d..11ac95cba 100644
--- a/vpx_mem/nds/vpx_mem_nds.c
+++ b/vpx_mem/nds/vpx_mem_nds.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c b/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
index 3ee54a606..d55b7d92c 100644
--- a/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
+++ b/vpx_mem/ti_c6x/vpx_mem_ti_6cx.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/vpx_mem.c b/vpx_mem/vpx_mem.c
index ded432a66..85b05ab9f 100644
--- a/vpx_mem/vpx_mem.c
+++ b/vpx_mem/vpx_mem.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/vpx_mem.h b/vpx_mem/vpx_mem.h
index f6fb52d31..31f8f9c60 100644
--- a/vpx_mem/vpx_mem.h
+++ b/vpx_mem/vpx_mem.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_mem/vpx_mem_tracker.c b/vpx_mem/vpx_mem_tracker.c
index ab4dec9f0..938ad0716 100644
--- a/vpx_mem/vpx_mem_tracker.c
+++ b/vpx_mem/vpx_mem_tracker.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_ports/config.h b/vpx_ports/config.h
index fd1ee563b..1abe70da9 100644
--- a/vpx_ports/config.h
+++ b/vpx_ports/config.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm
index 44b8f0ee0..87eece84e 100644
--- a/vpx_ports/emms.asm
+++ b/vpx_ports/emms.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_ports/mem.h b/vpx_ports/mem.h
index 8dbb0eb09..9ec34fec6 100644
--- a/vpx_ports/mem.h
+++ b/vpx_ports/mem.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_ports/mem_ops.h b/vpx_ports/mem_ops.h
index ddf0983c7..c178b8b74 100644
--- a/vpx_ports/mem_ops.h
+++ b/vpx_ports/mem_ops.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_ports/mem_ops_aligned.h b/vpx_ports/mem_ops_aligned.h
index 61496f14b..4c44aa260 100644
--- a/vpx_ports/mem_ops_aligned.h
+++ b/vpx_ports/mem_ops_aligned.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_ports/vpx_timer.h b/vpx_ports/vpx_timer.h
index f2a5a7ec1..37a0c7cb2 100644
--- a/vpx_ports/vpx_timer.h
+++ b/vpx_ports/vpx_timer.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_ports/vpxtypes.h b/vpx_ports/vpxtypes.h
index d0b3589e5..2ab66b14b 100644
--- a/vpx_ports/vpxtypes.h
+++ b/vpx_ports/vpxtypes.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index 90a50f8e4..a8e4607cd 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index eff992634..5d85d8e2e 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/arm/armv4/gen_scalers_armv4.asm b/vpx_scale/arm/armv4/gen_scalers_armv4.asm
index b6dae0b19..e495184e7 100644
--- a/vpx_scale/arm/armv4/gen_scalers_armv4.asm
+++ b/vpx_scale/arm/armv4/gen_scalers_armv4.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/arm/nds/yv12extend.c b/vpx_scale/arm/nds/yv12extend.c
index a12bb2799..48c0dfb33 100644
--- a/vpx_scale/arm/nds/yv12extend.c
+++ b/vpx_scale/arm/nds/yv12extend.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
index 25a21e63a..24d46cbe5 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframe_func_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
index 04c0de734..6534827d8 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copyframeyonly_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
index cdb402c92..dfc8db55c 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_copysrcframe_func_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
index 632d43014..e475b929b 100644
--- a/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
+++ b/vpx_scale/arm/neon/vp8_vpxyv12_extendframeborders_neon.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/arm/scalesystemdependant.c b/vpx_scale/arm/scalesystemdependant.c
index 0868b8bca..1e8bcb89d 100644
--- a/vpx_scale/arm/scalesystemdependant.c
+++ b/vpx_scale/arm/scalesystemdependant.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/arm/yv12extend_arm.c b/vpx_scale/arm/yv12extend_arm.c
index dce642d1f..d7a8289a9 100644
--- a/vpx_scale/arm/yv12extend_arm.c
+++ b/vpx_scale/arm/yv12extend_arm.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/blackfin/yv12config.c b/vpx_scale/blackfin/yv12config.c
index 043493ffe..42538af58 100644
--- a/vpx_scale/blackfin/yv12config.c
+++ b/vpx_scale/blackfin/yv12config.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/blackfin/yv12extend.c b/vpx_scale/blackfin/yv12extend.c
index e5af5305b..cfd3de050 100644
--- a/vpx_scale/blackfin/yv12extend.c
+++ b/vpx_scale/blackfin/yv12extend.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/dm642/bicubic_scaler_c64.c b/vpx_scale/dm642/bicubic_scaler_c64.c
index 830900e68..5166ace85 100644
--- a/vpx_scale/dm642/bicubic_scaler_c64.c
+++ b/vpx_scale/dm642/bicubic_scaler_c64.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/dm642/gen_scalers_c64.c b/vpx_scale/dm642/gen_scalers_c64.c
index 47e43cac3..87ff998d3 100644
--- a/vpx_scale/dm642/gen_scalers_c64.c
+++ b/vpx_scale/dm642/gen_scalers_c64.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/dm642/yv12extend.c b/vpx_scale/dm642/yv12extend.c
index fff39d3d8..646e69a7f 100644
--- a/vpx_scale/dm642/yv12extend.c
+++ b/vpx_scale/dm642/yv12extend.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/generic/bicubic_scaler.c b/vpx_scale/generic/bicubic_scaler.c
index c600c3f1b..420f719ce 100644
--- a/vpx_scale/generic/bicubic_scaler.c
+++ b/vpx_scale/generic/bicubic_scaler.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c
index 8be43dc7d..b084e817b 100644
--- a/vpx_scale/generic/gen_scalers.c
+++ b/vpx_scale/generic/gen_scalers.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/generic/scalesystemdependant.c b/vpx_scale/generic/scalesystemdependant.c
index 81c8171d7..926feb7cd 100644
--- a/vpx_scale/generic/scalesystemdependant.c
+++ b/vpx_scale/generic/scalesystemdependant.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c
index 7c8f8d004..e1c80281f 100644
--- a/vpx_scale/generic/vpxscale.c
+++ b/vpx_scale/generic/vpxscale.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index d73cfd113..3034cc3a7 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
index aa623ba03..39d2fa854 100644
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/include/arm/vpxscale_nofp.h b/vpx_scale/include/arm/vpxscale_nofp.h
index 2dea83c79..3e1a9fa83 100644
--- a/vpx_scale/include/arm/vpxscale_nofp.h
+++ b/vpx_scale/include/arm/vpxscale_nofp.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/include/generic/vpxscale_arbitrary.h b/vpx_scale/include/generic/vpxscale_arbitrary.h
index 9a721791b..39de1816b 100644
--- a/vpx_scale/include/generic/vpxscale_arbitrary.h
+++ b/vpx_scale/include/generic/vpxscale_arbitrary.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/include/generic/vpxscale_depricated.h b/vpx_scale/include/generic/vpxscale_depricated.h
index 30a98e800..3f7fe0f04 100644
--- a/vpx_scale/include/generic/vpxscale_depricated.h
+++ b/vpx_scale/include/generic/vpxscale_depricated.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/include/generic/vpxscale_nofp.h b/vpx_scale/include/generic/vpxscale_nofp.h
index 2a618de5d..7b8205a1b 100644
--- a/vpx_scale/include/generic/vpxscale_nofp.h
+++ b/vpx_scale/include/generic/vpxscale_nofp.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/include/leapster/vpxscale.h b/vpx_scale/include/leapster/vpxscale.h
index 54ad54eb1..50218497d 100644
--- a/vpx_scale/include/leapster/vpxscale.h
+++ b/vpx_scale/include/leapster/vpxscale.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/include/symbian/vpxscale_nofp.h b/vpx_scale/include/symbian/vpxscale_nofp.h
index 2dea83c79..3e1a9fa83 100644
--- a/vpx_scale/include/symbian/vpxscale_nofp.h
+++ b/vpx_scale/include/symbian/vpxscale_nofp.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/include/vpxscale_nofp.h b/vpx_scale/include/vpxscale_nofp.h
index c04e5267b..a704bd92c 100644
--- a/vpx_scale/include/vpxscale_nofp.h
+++ b/vpx_scale/include/vpxscale_nofp.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/intel_linux/scaleopt.c b/vpx_scale/intel_linux/scaleopt.c
index 5ea96144f..499f0ed61 100644
--- a/vpx_scale/intel_linux/scaleopt.c
+++ b/vpx_scale/intel_linux/scaleopt.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/intel_linux/scalesystemdependant.c b/vpx_scale/intel_linux/scalesystemdependant.c
index 892d53292..eab741f83 100644
--- a/vpx_scale/intel_linux/scalesystemdependant.c
+++ b/vpx_scale/intel_linux/scalesystemdependant.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/leapster/doptsystemdependant_lf.c b/vpx_scale/leapster/doptsystemdependant_lf.c
index 50073d62b..7cbb1c555 100644
--- a/vpx_scale/leapster/doptsystemdependant_lf.c
+++ b/vpx_scale/leapster/doptsystemdependant_lf.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/leapster/gen_scalers_lf.c b/vpx_scale/leapster/gen_scalers_lf.c
index 809981df5..f1ee056ea 100644
--- a/vpx_scale/leapster/gen_scalers_lf.c
+++ b/vpx_scale/leapster/gen_scalers_lf.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/leapster/vpxscale_lf.c b/vpx_scale/leapster/vpxscale_lf.c
index 32dffbbf2..616ddf559 100644
--- a/vpx_scale/leapster/vpxscale_lf.c
+++ b/vpx_scale/leapster/vpxscale_lf.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/leapster/yv12extend.c b/vpx_scale/leapster/yv12extend.c
index 017be972f..1cddd9540 100644
--- a/vpx_scale/leapster/yv12extend.c
+++ b/vpx_scale/leapster/yv12extend.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/scale_mode.h b/vpx_scale/scale_mode.h
index 90cd2afe0..1476e641b 100644
--- a/vpx_scale/scale_mode.h
+++ b/vpx_scale/scale_mode.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/symbian/gen_scalers_armv4.asm b/vpx_scale/symbian/gen_scalers_armv4.asm
index b6dae0b19..e495184e7 100644
--- a/vpx_scale/symbian/gen_scalers_armv4.asm
+++ b/vpx_scale/symbian/gen_scalers_armv4.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/symbian/scalesystemdependant.c b/vpx_scale/symbian/scalesystemdependant.c
index d6497aaed..b0bffddb7 100644
--- a/vpx_scale/symbian/scalesystemdependant.c
+++ b/vpx_scale/symbian/scalesystemdependant.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/vpxscale.h b/vpx_scale/vpxscale.h
index bd6796a9a..86fc128da 100644
--- a/vpx_scale/vpxscale.h
+++ b/vpx_scale/vpxscale.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/wce/gen_scalers_armv4.asm b/vpx_scale/wce/gen_scalers_armv4.asm
index b6dae0b19..e495184e7 100644
--- a/vpx_scale/wce/gen_scalers_armv4.asm
+++ b/vpx_scale/wce/gen_scalers_armv4.asm
@@ -1,5 +1,5 @@
 ;
-;  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ;
 ;  Use of this source code is governed by a BSD-style license
 ;  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/wce/scalesystemdependant.c b/vpx_scale/wce/scalesystemdependant.c
index 3b381fcc8..e9f2c26a9 100644
--- a/vpx_scale/wce/scalesystemdependant.c
+++ b/vpx_scale/wce/scalesystemdependant.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/win32/scaleopt.c b/vpx_scale/win32/scaleopt.c
index f354a1d6f..3711fe5eb 100644
--- a/vpx_scale/win32/scaleopt.c
+++ b/vpx_scale/win32/scaleopt.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/win32/scalesystemdependant.c b/vpx_scale/win32/scalesystemdependant.c
index 892d53292..eab741f83 100644
--- a/vpx_scale/win32/scalesystemdependant.c
+++ b/vpx_scale/win32/scalesystemdependant.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/x86_64/scaleopt.c b/vpx_scale/x86_64/scaleopt.c
index fa5061a3b..101f5ff60 100644
--- a/vpx_scale/x86_64/scaleopt.c
+++ b/vpx_scale/x86_64/scaleopt.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/x86_64/scalesystemdependant.c b/vpx_scale/x86_64/scalesystemdependant.c
index da9ea4e05..89ae86e00 100644
--- a/vpx_scale/x86_64/scalesystemdependant.c
+++ b/vpx_scale/x86_64/scalesystemdependant.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index a7f4d5f1d..50d6e3b3a 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/vpx_scale/yv12extend.h b/vpx_scale/yv12extend.h
index 1a32f1003..0c239b9d1 100644
--- a/vpx_scale/yv12extend.h
+++ b/vpx_scale/yv12extend.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/wince_wmain_adapter.cpp b/wince_wmain_adapter.cpp
index ddee8a45b..57f880a6e 100644
--- a/wince_wmain_adapter.cpp
+++ b/wince_wmain_adapter.cpp
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/y4minput.c b/y4minput.c
index 70cdaef2a..3eaec4ed3 100644
--- a/y4minput.c
+++ b/y4minput.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source
diff --git a/y4minput.h b/y4minput.h
index 09441bff3..1a01bcda7 100644
--- a/y4minput.h
+++ b/y4minput.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2010 The VP8 project authors. All Rights Reserved.
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
  *
  *  Use of this source code is governed by a BSD-style license
  *  that can be found in the LICENSE file in the root of the source

From edcbb1c199e086b45803829d00d09d5aa295b3e3 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 9 Sep 2010 12:57:23 -0400
Subject: [PATCH 155/307] Fix GF interval for non-lagged ARFs

When ARFs are enabled in non-lagged compress modes, the GF interval
was being reset to zero. Non-lagged ARF updates were enabled in commit
63ccfbd, but this incorrect GF interval caused a quality regression.

Change-Id: I615c3b493f4ce2127044f4e68d0bcb07d6b730c3
---
 vp8/encoder/firstpass.c | 1 +
 vp8/encoder/onyx_if.c   | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 2dea8b70f..a65bce6e1 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1403,6 +1403,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
     // Should we use the alternate refernce frame
     if (cpi->oxcf.play_alternate &&
+        cpi->oxcf.lag_in_frames &&
         (i >= MIN_GF_INTERVAL) &&
         (i <= (cpi->frames_to_key - MIN_GF_INTERVAL)) &&          // dont use ARF very near next kf
         (((next_frame.pcnt_inter > 0.75) &&
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 5c78b9c8a..56bac0aaa 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1344,8 +1344,8 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
         cpi->max_gf_interval = 12;
 
 
-    // Special conditions when altr ref frame enabled
-    if (cpi->oxcf.play_alternate)
+    // Special conditions when altr ref frame enabled in lagged compress mode
+    if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames)
     {
         if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1)
             cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1;

From 14ba764219296ec74fab5647ca7bdc2e4ca693ce Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Tue, 7 Sep 2010 14:21:27 -0400
Subject: [PATCH 156/307] Update NEON wide idcts

Expand 93c32a55 which used SSE2 instructions to do two
idct/dequant/recons at a time to NEON. Initial working
commit. More work needs to be put into rearranging and
interlacing the data to take advantage of quadword
operations, which is when we'll hopefully see a much
better boost

Change-Id: I86d59d96f15e0d0f9710253e2c098ac2ff2865d1
---
 vp8/decoder/arm/neon/dequant_dc_idct_neon.asm | 136 -------------
 vp8/decoder/arm/neon/idct_blk_neon.c          | 138 +++++--------
 .../arm/neon/idct_dequant_0_2x_neon.asm       |  79 ++++++++
 .../arm/neon/idct_dequant_dc_0_2x_neon.asm    |  69 +++++++
 .../arm/neon/idct_dequant_dc_full_2x_neon.asm | 190 ++++++++++++++++++
 .../arm/neon/idct_dequant_full_2x_neon.asm    | 183 +++++++++++++++++
 vp8/vp8dx_arm.mk                              |   5 +-
 7 files changed, 577 insertions(+), 223 deletions(-)
 delete mode 100644 vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
 create mode 100644 vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
 create mode 100644 vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
 create mode 100644 vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
 create mode 100644 vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm

diff --git a/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm b/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
deleted file mode 100644
index f68a78095..000000000
--- a/vp8/decoder/arm/neon/dequant_dc_idct_neon.asm
+++ /dev/null
@@ -1,136 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_dequant_dc_idct_add_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;void vp8_dequant_dc_idct_add_neon(short *input, short *dq, unsigned char *pred,
-;                                  unsigned char *dest, int pitch, int stride,
-;                                  int Dc);
-; r0    short *input,
-; r1    short *dq,
-; r2    unsigned char *pred
-; r3    unsigned char *dest
-; sp    int pitch
-; sp+4  int stride
-; sp+8  int Dc
-|vp8_dequant_dc_idct_add_neon| PROC
-    vld1.16         {q3, q4}, [r0]
-    vld1.16         {q5, q6}, [r1]
-
-    ldr             r1, [sp, #8]            ;load Dc from stack
-
-    ldr             r12, _CONSTANTS_
-
-    vmul.i16        q1, q3, q5              ;input for short_idct4x4llm_neon
-    vmul.i16        q2, q4, q6
-
-    vmov.16         d2[0], r1
-
-    ldr             r1, [sp]                ; pitch
-    vld1.32         {d14[0]}, [r2], r1
-    vld1.32         {d14[1]}, [r2], r1
-    vld1.32         {d15[0]}, [r2], r1
-    vld1.32         {d15[1]}, [r2]
-
-    ldr             r1, [sp, #4]            ; stride
-
-;|short_idct4x4llm_neon| PROC
-    vld1.16         {d0}, [r12]
-    vswp            d3, d4                  ;q2(vp[4] vp[12])
-
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-
-    vqadd.s16       d12, d2, d3             ;a1
-    vqsub.s16       d13, d2, d3             ;b1
-
-    vshr.s16        q3, q3, #1
-    vshr.s16        q4, q4, #1
-
-    vqadd.s16       q3, q3, q2
-    vqadd.s16       q4, q4, q2
-
-    vqsub.s16       d10, d6, d9             ;c1
-    vqadd.s16       d11, d7, d8             ;d1
-
-    vqadd.s16       d2, d12, d11
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-
-    vtrn.32         d2, d4
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-
-; memset(input, 0, 32) -- 32bytes
-    vmov.i16        q14, #0
-
-    vswp            d3, d4
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-
-    vqadd.s16       d12, d2, d3             ;a1
-    vqsub.s16       d13, d2, d3             ;b1
-
-    vmov            q15, q14
-
-    vshr.s16        q3, q3, #1
-    vshr.s16        q4, q4, #1
-
-    vqadd.s16       q3, q3, q2
-    vqadd.s16       q4, q4, q2
-
-    vqsub.s16       d10, d6, d9             ;c1
-    vqadd.s16       d11, d7, d8             ;d1
-
-    vqadd.s16       d2, d12, d11
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-
-    vst1.16         {q14, q15}, [r0]
-
-    vrshr.s16       d2, d2, #3
-    vrshr.s16       d3, d3, #3
-    vrshr.s16       d4, d4, #3
-    vrshr.s16       d5, d5, #3
-
-    vtrn.32         d2, d4
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-
-    vaddw.u8        q1, q1, d14
-    vaddw.u8        q2, q2, d15
-
-    vqmovun.s16     d0, q1
-    vqmovun.s16     d1, q2
-
-    vst1.32         {d0[0]}, [r3], r1
-    vst1.32         {d0[1]}, [r3], r1
-    vst1.32         {d1[0]}, [r3], r1
-    vst1.32         {d1[1]}, [r3]
-
-    bx             lr
-
-    ENDP           ; |vp8_dequant_dc_idct_add_neon|
-
-; Constant Pool
-_CONSTANTS_       DCD cospi8sqrt2minus1
-cospi8sqrt2minus1 DCD 0x4e7b4e7b
-sinpi8sqrt2       DCD 0x8a8c8a8c
-
-    END
diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c
index 4725e6240..103cce74e 100644
--- a/vp8/decoder/arm/neon/idct_blk_neon.c
+++ b/vp8/decoder/arm/neon/idct_blk_neon.c
@@ -12,6 +12,21 @@
 #include "idct.h"
 #include "dequantize.h"
 
+/* place these declarations here because we don't want to maintain them
+ * outside of this scope
+ */
+void idct_dequant_dc_full_2x_neon
+            (short *input, short *dq, unsigned char *pre, unsigned char *dst,
+             int stride, short *dc);
+void idct_dequant_dc_0_2x_neon
+            (short *dc, unsigned char *pre, unsigned char *dst, int stride);
+void idct_dequant_full_2x_neon
+            (short *q, short *dq, unsigned char *pre, unsigned char *dst,
+             int pitch, int stride);
+void idct_dequant_0_2x_neon
+            (short *q, short dq, unsigned char *pre, int pitch,
+             unsigned char *dst, int stride);
+
 void vp8_dequant_dc_idct_add_y_block_neon
             (short *q, short *dq, unsigned char *pre,
              unsigned char *dst, int stride, char *eobs, short *dc)
@@ -20,25 +35,15 @@ void vp8_dequant_dc_idct_add_y_block_neon
 
     for (i = 0; i < 4; i++)
     {
-        if (eobs[0] > 1)
-            vp8_dequant_dc_idct_add_neon (q, dq, pre, dst, 16, stride, dc[0]);
+        if (((short *)eobs)[0] & 0xfefe)
+            idct_dequant_dc_full_2x_neon (q, dq, pre, dst, stride, dc);
         else
-            vp8_dc_only_idct_add_neon (dc[0], pre, dst, 16, stride);
+            idct_dequant_dc_0_2x_neon(dc, pre, dst, stride);
 
-        if (eobs[1] > 1)
-            vp8_dequant_dc_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride, dc[1]);
+        if (((short *)eobs)[1] & 0xfefe)
+            idct_dequant_dc_full_2x_neon (q+32, dq, pre+8, dst+8, stride, dc+2);
         else
-            vp8_dc_only_idct_add_neon (dc[1], pre+4, dst+4, 16, stride);
-
-        if (eobs[2] > 1)
-            vp8_dequant_dc_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride, dc[2]);
-        else
-            vp8_dc_only_idct_add_neon (dc[2], pre+8, dst+8, 16, stride);
-
-        if (eobs[3] > 1)
-            vp8_dequant_dc_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride, dc[3]);
-        else
-            vp8_dc_only_idct_add_neon (dc[3], pre+12, dst+12, 16, stride);
+            idct_dequant_dc_0_2x_neon(dc+2, pre+8, dst+8, stride);
 
         q    += 64;
         dc   += 4;
@@ -56,37 +61,15 @@ void vp8_dequant_idct_add_y_block_neon
 
     for (i = 0; i < 4; i++)
     {
-        if (eobs[0] > 1)
-            vp8_dequant_idct_add_neon (q, dq, pre, dst, 16, stride);
+        if (((short *)eobs)[0] & 0xfefe)
+            idct_dequant_full_2x_neon (q, dq, pre, dst, 16, stride);
         else
-        {
-            vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dst, 16, stride);
-            ((int *)q)[0] = 0;
-        }
+            idct_dequant_0_2x_neon (q, dq[0], pre, 16, dst, stride);
 
-        if (eobs[1] > 1)
-            vp8_dequant_idct_add_neon (q+16, dq, pre+4, dst+4, 16, stride);
+        if (((short *)eobs)[1] & 0xfefe)
+            idct_dequant_full_2x_neon (q+32, dq, pre+8, dst+8, 16, stride);
         else
-        {
-            vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dst+4, 16, stride);
-            ((int *)(q+16))[0] = 0;
-        }
-
-        if (eobs[2] > 1)
-            vp8_dequant_idct_add_neon (q+32, dq, pre+8, dst+8, 16, stride);
-        else
-        {
-            vp8_dc_only_idct_add_neon (q[32]*dq[0], pre+8, dst+8, 16, stride);
-            ((int *)(q+32))[0] = 0;
-        }
-
-        if (eobs[3] > 1)
-            vp8_dequant_idct_add_neon (q+48, dq, pre+12, dst+12, 16, stride);
-        else
-        {
-            vp8_dc_only_idct_add_neon (q[48]*dq[0], pre+12, dst+12, 16, stride);
-            ((int *)(q+48))[0] = 0;
-        }
+            idct_dequant_0_2x_neon (q+32, dq[0], pre+8, 16, dst+8, stride);
 
         q    += 64;
         pre  += 64;
@@ -101,51 +84,34 @@ void vp8_dequant_idct_add_uv_block_neon
 {
     int i;
 
-    for (i = 0; i < 2; i++)
-    {
-        if (eobs[0] > 1)
-            vp8_dequant_idct_add_neon (q, dq, pre, dstu, 8, stride);
-        else
-        {
-            vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstu, 8, stride);
-            ((int *)q)[0] = 0;
-        }
+    if (((short *)eobs)[0] & 0xfefe)
+        idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride);
+    else
+        idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride);
 
-        if (eobs[1] > 1)
-            vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstu+4, 8, stride);
-        else
-        {
-            vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstu+4, 8, stride);
-            ((int *)(q+16))[0] = 0;
-        }
+    q    += 32;
+    pre  += 32;
+    dstu += 4*stride;
 
-        q    += 32;
-        pre  += 32;
-        dstu += 4*stride;
-        eobs += 2;
-    }
+    if (((short *)eobs)[1] & 0xfefe)
+        idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride);
+    else
+        idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstu, stride);
 
-    for (i = 0; i < 2; i++)
-    {
-        if (eobs[0] > 1)
-            vp8_dequant_idct_add_neon (q, dq, pre, dstv, 8, stride);
-        else
-        {
-            vp8_dc_only_idct_add_neon (q[0]*dq[0], pre, dstv, 8, stride);
-            ((int *)q)[0] = 0;
-        }
+    q += 32;
+    pre += 32;
 
-        if (eobs[1] > 1)
-            vp8_dequant_idct_add_neon (q+16, dq, pre+4, dstv+4, 8, stride);
-        else
-        {
-            vp8_dc_only_idct_add_neon (q[16]*dq[0], pre+4, dstv+4, 8, stride);
-            ((int *)(q+16))[0] = 0;
-        }
+    if (((short *)eobs)[2] & 0xfefe)
+        idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride);
+    else
+        idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride);
 
-        q    += 32;
-        pre  += 32;
-        dstv += 4*stride;
-        eobs += 2;
-    }
+    q    += 32;
+    pre  += 32;
+    dstv += 4*stride;
+
+    if (((short *)eobs)[3] & 0xfefe)
+        idct_dequant_full_2x_neon (q, dq, pre, dstv, 8, stride);
+    else
+        idct_dequant_0_2x_neon (q, dq[0], pre, 8, dstv, stride);
 }
diff --git a/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
new file mode 100644
index 000000000..456f8e1d4
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_0_2x_neon.asm
@@ -0,0 +1,79 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+    EXPORT  |idct_dequant_0_2x_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_0_2x_neon(short *q, short dq, unsigned char *pre,
+;                            int pitch, unsigned char *dst, int stride);
+; r0   *q
+; r1   dq
+; r2   *pre
+; r3   pitch
+; sp   *dst
+; sp+4 stride
+|idct_dequant_0_2x_neon| PROC
+    add             r12, r2, #4
+    vld1.32         {d2[0]}, [r2], r3
+    vld1.32         {d2[1]}, [r2], r3
+    vld1.32         {d4[0]}, [r2], r3
+    vld1.32         {d4[1]}, [r2]
+    vld1.32         {d8[0]}, [r12], r3
+    vld1.32         {d8[1]}, [r12], r3
+    vld1.32         {d10[0]}, [r12], r3
+    vld1.32         {d10[1]}, [r12]
+
+    ldrh            r12, [r0]               ; lo q
+    ldrh            r2, [r0, #32]           ; hi q
+    mov             r3, #0
+    strh            r3, [r0]
+    strh            r3, [r0, #32]
+
+    sxth            r12, r12                ; lo
+    mul             r0, r12, r1
+    add             r0, r0, #4
+    asr             r0, r0, #3
+    vdup.16         q0, r0
+    sxth            r2, r2                  ; hi
+    mul             r0, r2, r1
+    add             r0, r0, #4
+    asr             r0, r0, #3
+    vdup.16         q3, r0
+
+    vaddw.u8        q1, q0, d2              ; lo
+    vaddw.u8        q2, q0, d4
+    vaddw.u8        q4, q3, d8              ; hi
+    vaddw.u8        q5, q3, d10
+
+    ldr             r2, [sp]                ; dst
+    ldr             r3, [sp, #4]            ; stride
+
+    vqmovun.s16     d2, q1                  ; lo
+    vqmovun.s16     d4, q2
+    vqmovun.s16     d8, q4                  ; hi
+    vqmovun.s16     d10, q5
+
+    add             r0, r2, #4
+    vst1.32         {d2[0]}, [r2], r3       ; lo
+    vst1.32         {d2[1]}, [r2], r3
+    vst1.32         {d4[0]}, [r2], r3
+    vst1.32         {d4[1]}, [r2]
+    vst1.32         {d8[0]}, [r0], r3       ; hi
+    vst1.32         {d8[1]}, [r0], r3
+    vst1.32         {d10[0]}, [r0], r3
+    vst1.32         {d10[1]}, [r0]
+
+    bx             lr
+
+    ENDP           ; |idct_dequant_0_2x_neon|
+    END
diff --git a/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
new file mode 100644
index 000000000..0dc036acb
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_dc_0_2x_neon.asm
@@ -0,0 +1,69 @@
+;
+;  Copyright (c) 2010 The Webm project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+    EXPORT  |idct_dequant_dc_0_2x_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_dc_0_2x_neon(short *dc, unsigned char *pre,
+;                               unsigned char *dst, int stride);
+; r0  *dc
+; r1  *pre
+; r2  *dst
+; r3  stride
+|idct_dequant_dc_0_2x_neon| PROC
+    ldr             r0, [r0]                ; *dc
+    mov             r12, #16
+
+    vld1.32         {d2[0]}, [r1], r12      ; lo
+    vld1.32         {d2[1]}, [r1], r12
+    vld1.32         {d4[0]}, [r1], r12
+    vld1.32         {d4[1]}, [r1]
+    sub             r1, r1, #44
+    vld1.32         {d8[0]}, [r1], r12      ; hi
+    vld1.32         {d8[1]}, [r1], r12
+    vld1.32         {d10[0]}, [r1], r12
+    vld1.32         {d10[1]}, [r1]
+
+    sxth            r1, r0                  ; lo *dc
+    add             r1, r1, #4
+    asr             r1, r1, #3
+    vdup.16         q0, r1
+    sxth            r0, r0, ror #16         ; hi *dc
+    add             r0, r0, #4
+    asr             r0, r0, #3
+    vdup.16         q3, r0
+
+    vaddw.u8        q1, q0, d2              ; lo
+    vaddw.u8        q2, q0, d4
+    vaddw.u8        q4, q3, d8              ; hi
+    vaddw.u8        q5, q3, d10
+
+    vqmovun.s16     d2, q1                  ; lo
+    vqmovun.s16     d4, q2
+    vqmovun.s16     d8, q4                  ; hi
+    vqmovun.s16     d10, q5
+
+    add             r0, r2, #4
+    vst1.32         {d2[0]}, [r2], r3       ; lo
+    vst1.32         {d2[1]}, [r2], r3
+    vst1.32         {d4[0]}, [r2], r3
+    vst1.32         {d4[1]}, [r2]
+    vst1.32         {d8[0]}, [r0], r3       ; hi
+    vst1.32         {d8[1]}, [r0], r3
+    vst1.32         {d10[0]}, [r0], r3
+    vst1.32         {d10[1]}, [r0]
+
+    bx             lr
+
+    ENDP           ;|idct_dequant_dc_0_2x_neon|
+    END
diff --git a/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
new file mode 100644
index 000000000..babfb91ad
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
@@ -0,0 +1,190 @@
+;
+;  Copyright (c) 2010 The Webm project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |idct_dequant_dc_full_2x_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_dc_full_2x_neon(short *q, short *dq, unsigned char *pre,
+;                                  unsigned char *dst, int stride, short *dc);
+; r0    *q,
+; r1    *dq,
+; r2    *pre
+; r3    *dst
+; sp    stride
+; sp+4  *dc
+|idct_dequant_dc_full_2x_neon| PROC
+    vld1.16         {q3, q4}, [r0]          ; lo input
+    vld1.16         {q5, q6}, [r1]          ; use the same dq for both
+    mov             r1, #16                 ; pitch
+    add             r0, r0, #32
+    vld1.16         {q10, q11}, [r0]        ; hi input
+    add             r12, r2, #4
+    vld1.32         {d14[0]}, [r2], r1      ; lo pred
+    vld1.32         {d14[1]}, [r2], r1
+    vld1.32         {d15[0]}, [r2], r1
+    vld1.32         {d15[1]}, [r2]
+    vld1.32         {d28[0]}, [r12], r1     ; hi pred
+    vld1.32         {d28[1]}, [r12], r1
+    vld1.32         {d29[0]}, [r12], r1
+    ldr             r1, [sp, #4]            ; dc
+    vld1.32         {d29[1]}, [r12]
+
+    ldr             r2, _CONSTANTS_
+
+    ldrh            r12, [r1], #2           ; lo *dc
+    ldrh            r1, [r1]                ; hi *dc
+
+    vmul.i16        q1, q3, q5              ; lo input * dq
+    vmul.i16        q2, q4, q6
+    vmul.i16        q8, q10, q5             ; hi input * dq
+    vmul.i16        q9, q11, q6
+
+    vmov.16         d2[0], r12              ; move lo dc up to neon, overwrite first element
+    vmov.16         d16[0], r1              ; move hi dc up to neon, overwrite first element
+
+    ldr             r1, [sp]                ; stride
+
+    vld1.16         {d0}, [r2]
+    vswp            d3, d4                  ; lo q2(vp[4] vp[12])
+    vswp            d17, d18                ; hi q2(vp[4] vp[12])
+
+    vqdmulh.s16     q3, q2, d0[2]           ; lo * constants
+    vqdmulh.s16     q4, q2, d0[0]
+    vqdmulh.s16     q10, q9, d0[2]          ; hi * constants
+    vqdmulh.s16     q11, q9, d0[0]
+
+    vqadd.s16       d12, d2, d3             ; lo a1
+    vqsub.s16       d13, d2, d3             ; lo b1
+    vqadd.s16       d26, d16, d17           ; hi a1
+    vqsub.s16       d27, d16, d17           ; hi b1
+
+    vshr.s16        q3, q3, #1              ; lo
+    vshr.s16        q4, q4, #1
+    vshr.s16        q10, q10, #1            ; hi
+    vshr.s16        q11, q11, #1
+
+    vqadd.s16       q3, q3, q2              ; lo
+    vqadd.s16       q4, q4, q2
+    vqadd.s16       q10, q10, q9            ; hi
+    vqadd.s16       q11, q11, q9
+
+    vqsub.s16       d10, d6, d9             ; lo c1
+    vqadd.s16       d11, d7, d8             ; lo d1
+    vqsub.s16       d24, d20, d23           ; hi c1
+    vqadd.s16       d25, d21, d22           ; hi d1
+
+    vqadd.s16       d2, d12, d11            ; lo
+    vqadd.s16       d3, d13, d10
+    vqsub.s16       d4, d13, d10
+    vqsub.s16       d5, d12, d11
+    vqadd.s16       d16, d26, d25           ; hi
+    vqadd.s16       d17, d27, d24
+    vqsub.s16       d18, d27, d24
+    vqsub.s16       d19, d26, d25
+
+    vtrn.32         d2, d4                  ; lo
+    vtrn.32         d3, d5
+    vtrn.16         d2, d3
+    vtrn.16         d4, d5
+    vtrn.32         d16, d18                ; hi
+    vtrn.32         d17, d19
+    vtrn.16         d16, d17
+    vtrn.16         d18, d19
+
+    vswp            d3, d4                  ; lo
+    vqdmulh.s16     q3, q2, d0[2]
+    vqdmulh.s16     q4, q2, d0[0]
+    vswp            d17, d18                ; hi
+    vqdmulh.s16     q10, q9, d0[2]
+    vqdmulh.s16     q11, q9, d0[0]
+
+    vqadd.s16       d12, d2, d3             ; lo a1
+    vqsub.s16       d13, d2, d3             ; lo b1
+    vqadd.s16       d26, d16, d17           ; hi a1
+    vqsub.s16       d27, d16, d17           ; hi b1
+
+    vshr.s16        q3, q3, #1              ; lo
+    vshr.s16        q4, q4, #1
+    vshr.s16        q10, q10, #1            ; hi
+    vshr.s16        q11, q11, #1
+
+    vqadd.s16       q3, q3, q2              ; lo
+    vqadd.s16       q4, q4, q2
+    vqadd.s16       q10, q10, q9            ; hi
+    vqadd.s16       q11, q11, q9
+
+    vqsub.s16       d10, d6, d9             ; lo c1
+    vqadd.s16       d11, d7, d8             ; lo d1
+    vqsub.s16       d24, d20, d23           ; hi c1
+    vqadd.s16       d25, d21, d22           ; hi d1
+
+    vqadd.s16       d2, d12, d11            ; lo
+    vqadd.s16       d3, d13, d10
+    vqsub.s16       d4, d13, d10
+    vqsub.s16       d5, d12, d11
+    vqadd.s16       d16, d26, d25           ; hi
+    vqadd.s16       d17, d27, d24
+    vqsub.s16       d18, d27, d24
+    vqsub.s16       d19, d26, d25
+
+    vrshr.s16       q1, q1, #3              ; lo
+    vrshr.s16       q2, q2, #3
+    vrshr.s16       q8, q8, #3              ; hi
+    vrshr.s16       q9, q9, #3
+
+    vtrn.32         d2, d4                  ; lo
+    vtrn.32         d3, d5
+    vtrn.16         d2, d3
+    vtrn.16         d4, d5
+    vtrn.32         d16, d18                ; hi
+    vtrn.32         d17, d19
+    vtrn.16         d16, d17
+    vtrn.16         d18, d19
+
+    vaddw.u8        q1, q1, d14             ; lo
+    vaddw.u8        q2, q2, d15
+    vaddw.u8        q8, q8, d28             ; hi
+    vaddw.u8        q9, q9, d29
+
+    vmov.i16        q14, #0
+    vmov            q15, q14
+    vst1.16         {q14, q15}, [r0]        ; write over high input
+    sub             r0, r0, #32
+    vst1.16         {q14, q15}, [r0]        ; write over low input
+
+    vqmovun.s16     d0, q1                  ; lo
+    vqmovun.s16     d1, q2
+    vqmovun.s16     d2, q8                  ; hi
+    vqmovun.s16     d3, q9
+
+    add             r2, r3, #4              ; hi
+    vst1.32         {d0[0]}, [r3], r1       ; lo
+    vst1.32         {d0[1]}, [r3], r1
+    vst1.32         {d1[0]}, [r3], r1
+    vst1.32         {d1[1]}, [r3]
+    vst1.32         {d2[0]}, [r2], r1       ; hi
+    vst1.32         {d2[1]}, [r2], r1
+    vst1.32         {d3[0]}, [r2], r1
+    vst1.32         {d3[1]}, [r2]
+
+    bx             lr
+
+    ENDP           ; |idct_dequant_dc_full_2x_neon|
+
+; Constant Pool
+_CONSTANTS_       DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b4e7b
+sinpi8sqrt2       DCD 0x8a8c8a8c
+
+    END
diff --git a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
new file mode 100644
index 000000000..14960f99c
--- /dev/null
+++ b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
@@ -0,0 +1,183 @@
+;
+;  Copyright (c) 2010 The Webm project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |idct_dequant_full_2x_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+;void idct_dequant_full_2x_neon(short *q, short *dq, unsigned char *pre,
+;                               unsigned char *dst, int pitch, int stride);
+; r0    *q,
+; r1    *dq,
+; r2    *pre
+; r3    *dst
+; sp    pitch
+; sp+4  stride
+|idct_dequant_full_2x_neon| PROC
+    vld1.16         {q3, q4}, [r0]          ; lo input
+    vld1.16         {q5, q6}, [r1]          ; use the same dq for both
+    ldr             r1, [sp]                ; pitch
+    add             r0, r0, #32
+    vld1.16         {q10, q11}, [r0]        ; hi input
+    add             r12, r2, #4
+    vld1.32         {d14[0]}, [r2], r1      ; lo pred
+    vld1.32         {d14[1]}, [r2], r1
+    vld1.32         {d15[0]}, [r2], r1
+    vld1.32         {d15[1]}, [r2]
+    vld1.32         {d28[0]}, [r12], r1     ; hi pred
+    vld1.32         {d28[1]}, [r12], r1
+    vld1.32         {d29[0]}, [r12], r1
+    vld1.32         {d29[1]}, [r12]
+
+    ldr             r2, _CONSTANTS_
+
+    vmul.i16        q1, q3, q5              ; lo input * dq
+    vmul.i16        q2, q4, q6
+    vmul.i16        q8, q10, q5             ; hi input * dq
+    vmul.i16        q9, q11, q6
+
+    ldr             r1, [sp, #4]            ; stride
+
+    vld1.16         {d0}, [r2]
+    vswp            d3, d4                  ; lo q2(vp[4] vp[12])
+    vswp            d17, d18                ; hi q2(vp[4] vp[12])
+
+    vqdmulh.s16     q3, q2, d0[2]           ; lo * constants
+    vqdmulh.s16     q4, q2, d0[0]
+    vqdmulh.s16     q10, q9, d0[2]          ; hi * constants
+    vqdmulh.s16     q11, q9, d0[0]
+
+    vqadd.s16       d12, d2, d3             ; lo a1
+    vqsub.s16       d13, d2, d3             ; lo b1
+    vqadd.s16       d26, d16, d17           ; hi a1
+    vqsub.s16       d27, d16, d17           ; hi b1
+
+    vshr.s16        q3, q3, #1              ; lo
+    vshr.s16        q4, q4, #1
+    vshr.s16        q10, q10, #1            ; hi
+    vshr.s16        q11, q11, #1
+
+    vqadd.s16       q3, q3, q2              ; lo
+    vqadd.s16       q4, q4, q2
+    vqadd.s16       q10, q10, q9            ; hi
+    vqadd.s16       q11, q11, q9
+
+    vqsub.s16       d10, d6, d9             ; lo c1
+    vqadd.s16       d11, d7, d8             ; lo d1
+    vqsub.s16       d24, d20, d23           ; hi c1
+    vqadd.s16       d25, d21, d22           ; hi d1
+
+    vqadd.s16       d2, d12, d11            ; lo
+    vqadd.s16       d3, d13, d10
+    vqsub.s16       d4, d13, d10
+    vqsub.s16       d5, d12, d11
+    vqadd.s16       d16, d26, d25           ; hi
+    vqadd.s16       d17, d27, d24
+    vqsub.s16       d18, d27, d24
+    vqsub.s16       d19, d26, d25
+
+    vtrn.32         d2, d4                  ; lo
+    vtrn.32         d3, d5
+    vtrn.16         d2, d3
+    vtrn.16         d4, d5
+    vtrn.32         d16, d18                ; hi
+    vtrn.32         d17, d19
+    vtrn.16         d16, d17
+    vtrn.16         d18, d19
+
+    vswp            d3, d4                  ; lo
+    vqdmulh.s16     q3, q2, d0[2]
+    vqdmulh.s16     q4, q2, d0[0]
+    vswp            d17, d18                ; hi
+    vqdmulh.s16     q10, q9, d0[2]
+    vqdmulh.s16     q11, q9, d0[0]
+
+    vqadd.s16       d12, d2, d3             ; lo a1
+    vqsub.s16       d13, d2, d3             ; lo b1
+    vqadd.s16       d26, d16, d17           ; hi a1
+    vqsub.s16       d27, d16, d17           ; hi b1
+
+    vshr.s16        q3, q3, #1              ; lo
+    vshr.s16        q4, q4, #1
+    vshr.s16        q10, q10, #1            ; hi
+    vshr.s16        q11, q11, #1
+
+    vqadd.s16       q3, q3, q2              ; lo
+    vqadd.s16       q4, q4, q2
+    vqadd.s16       q10, q10, q9            ; hi
+    vqadd.s16       q11, q11, q9
+
+    vqsub.s16       d10, d6, d9             ; lo c1
+    vqadd.s16       d11, d7, d8             ; lo d1
+    vqsub.s16       d24, d20, d23           ; hi c1
+    vqadd.s16       d25, d21, d22           ; hi d1
+
+    vqadd.s16       d2, d12, d11            ; lo
+    vqadd.s16       d3, d13, d10
+    vqsub.s16       d4, d13, d10
+    vqsub.s16       d5, d12, d11
+    vqadd.s16       d16, d26, d25           ; hi
+    vqadd.s16       d17, d27, d24
+    vqsub.s16       d18, d27, d24
+    vqsub.s16       d19, d26, d25
+
+    vrshr.s16       q1, q1, #3              ; lo
+    vrshr.s16       q2, q2, #3
+    vrshr.s16       q8, q8, #3              ; hi
+    vrshr.s16       q9, q9, #3
+
+    vtrn.32         d2, d4                  ; lo
+    vtrn.32         d3, d5
+    vtrn.16         d2, d3
+    vtrn.16         d4, d5
+    vtrn.32         d16, d18                ; hi
+    vtrn.32         d17, d19
+    vtrn.16         d16, d17
+    vtrn.16         d18, d19
+
+    vaddw.u8        q1, q1, d14             ; lo
+    vaddw.u8        q2, q2, d15
+    vaddw.u8        q8, q8, d28             ; hi
+    vaddw.u8        q9, q9, d29
+
+    vmov.i16        q14, #0
+    vmov            q15, q14
+    vst1.16         {q14, q15}, [r0]        ; write over high input
+    sub             r0, r0, #32
+    vst1.16         {q14, q15}, [r0]        ; write over low input
+
+    vqmovun.s16     d0, q1                  ; lo
+    vqmovun.s16     d1, q2
+    vqmovun.s16     d2, q8                  ; hi
+    vqmovun.s16     d3, q9
+
+    add             r2, r3, #4              ; hi
+    vst1.32         {d0[0]}, [r3], r1       ; lo
+    vst1.32         {d0[1]}, [r3], r1
+    vst1.32         {d1[0]}, [r3], r1
+    vst1.32         {d1[1]}, [r3]
+    vst1.32         {d2[0]}, [r2], r1       ; hi
+    vst1.32         {d2[1]}, [r2], r1
+    vst1.32         {d3[0]}, [r2], r1
+    vst1.32         {d3[1]}, [r2]
+
+    bx             lr
+
+    ENDP           ; |idct_dequant_full_2x_neon|
+
+; Constant Pool
+_CONSTANTS_       DCD cospi8sqrt2minus1
+cospi8sqrt2minus1 DCD 0x4e7b4e7b
+sinpi8sqrt2       DCD 0x8a8c8a8c
+
+    END
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index 989232cd3..ae0610cda 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -25,7 +25,10 @@ VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/dequantize_v6$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/armv6/idct_blk_v6.c
 
 #File list for neon
-VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_dc_idct_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/idct_dequant_dc_full_2x_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/idct_dequant_dc_0_2x_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequant_idct_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/idct_dequant_full_2x_neon$(ASM)
+VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/idct_dequant_0_2x_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/dequantizeb_neon$(ASM)
 VP8_DX_SRCS-$(HAVE_ARMV7)  += decoder/arm/neon/idct_blk_neon.c

From c5fb0eb8d9a9cf0e593fbf06730f5b8501967009 Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Thu, 9 Sep 2010 14:42:48 -0400
Subject: [PATCH 157/307] Improved subset block search

Improved the subset block search and fill.  (about 3% improvement for
32 bit)  Modified/merged the code in order to create
vp8_read_mb_modes_mv which can decode the modes/mvs on a macroblock
level. This will allow the decode loop (in the future) to decode
modes/mvs on a frame, row, or mb level.

Change-Id: If637d994b508792f846d39b5d44a7bf9aa5cddf3
---
 vp8/decoder/decodemv.c   | 682 +++++++++++++++++++++++----------------
 vp8/decoder/decodframe.c |  20 +-
 vp8/decoder/demode.c     | 149 ---------
 vp8/decoder/demode.h     |  33 --
 vp8/decoder/onyxd_int.h  |   6 +
 vp8/vp8dx.mk             |   2 -
 6 files changed, 422 insertions(+), 470 deletions(-)
 mode change 100644 => 100755 vp8/decoder/decodemv.c
 delete mode 100644 vp8/decoder/demode.c
 delete mode 100644 vp8/decoder/demode.h

diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
old mode 100644
new mode 100755
index da68ee688..e9281f7ae
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -14,10 +14,126 @@
 #include "entropymode.h"
 #include "onyxd_int.h"
 #include "findnearmv.h"
-#include "demode.h"
+
 #if CONFIG_DEBUG
 #include <assert.h>
 #endif
+static int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p)
+{
+    const int i = vp8_treed_read(bc, vp8_bmode_tree, p);
+
+    return i;
+}
+
+
+static int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p)
+{
+    const int i = vp8_treed_read(bc, vp8_ymode_tree, p);
+
+    return i;
+}
+
+static int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p)
+{
+    const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p);
+
+    return i;
+}
+
+
+
+static int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
+{
+    const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p);
+
+    return i;
+}
+
+static void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
+{
+    // Is segmentation enabled
+    if (x->segmentation_enabled && x->update_mb_segmentation_map)
+    {
+        // If so then read the segment id.
+        if (vp8_read(r, x->mb_segment_tree_probs[0]))
+            mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
+        else
+            mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
+    }
+}
+
+static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_col)
+{
+    vp8_reader *const bc = & pbi->bc;
+    const int mis = pbi->common.mode_info_stride;
+
+        {
+            MB_PREDICTION_MODE y_mode;
+
+            // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
+            // By default on a key frame reset all MBs to segment 0
+            m->mbmi.segment_id = 0;
+
+            if (pbi->mb.update_mb_segmentation_map)
+                vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
+
+            // Read the macroblock coeff skip flag if this feature is in use, else default to 0
+            if (pbi->common.mb_no_coeff_skip)
+                m->mbmi.mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
+            else
+                m->mbmi.mb_skip_coeff = 0;
+
+            y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, pbi->common.kf_ymode_prob);
+
+            m->mbmi.ref_frame = INTRA_FRAME;
+
+            if ((m->mbmi.mode = y_mode) == B_PRED)
+            {
+                int i = 0;
+
+                do
+                {
+                    const B_PREDICTION_MODE A = vp8_above_bmi(m, i, mis)->mode;
+                    const B_PREDICTION_MODE L = vp8_left_bmi(m, i)->mode;
+
+                    m->bmi[i].mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, pbi->common.kf_bmode_prob [A] [L]);
+                }
+                while (++i < 16);
+            }
+            else
+            {
+                int BMode;
+                int i = 0;
+
+                switch (y_mode)
+                {
+                case DC_PRED:
+                    BMode = B_DC_PRED;
+                    break;
+                case V_PRED:
+                    BMode = B_VE_PRED;
+                    break;
+                case H_PRED:
+                    BMode = B_HE_PRED;
+                    break;
+                case TM_PRED:
+                    BMode = B_TM_PRED;
+                    break;
+                default:
+                    BMode = B_DC_PRED;
+                    break;
+                }
+
+                do
+                {
+                    m->bmi[i].mode = (B_PREDICTION_MODE)BMode;
+                }
+                while (++i < 16);
+            }
+
+            m->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.kf_uv_mode_prob);
+        }
+}
 
 static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)
 {
@@ -99,6 +215,8 @@ static MB_PREDICTION_MODE sub_mv_ref(vp8_reader *bc, const vp8_prob *p)
 
     return (MB_PREDICTION_MODE)i;
 }
+
+#ifdef VPX_MODE_COUNT
 unsigned int vp8_mv_cont_count[5][4] =
 {
     { 0, 0, 0, 0 },
@@ -107,310 +225,327 @@ unsigned int vp8_mv_cont_count[5][4] =
     { 0, 0, 0, 0 },
     { 0, 0, 0, 0 }
 };
+#endif
 
-void vp8_decode_mode_mvs(VP8D_COMP *pbi)
+unsigned char vp8_mbsplit_offset[4][16] = {
+    { 0,  8,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  2,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  2,  8, 10,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}
+};
+
+unsigned char vp8_mbsplit_fill_count[4] = {8, 8, 4, 1};
+unsigned char vp8_mbsplit_fill_offset[4][16] = {
+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15},
+    { 0,  1,  4,  5,  8,  9, 12, 13,  2,  3,   6,  7, 10, 11, 14, 15},
+    { 0,  1,  4,  5,  2,  3,  6,  7,  8,  9,  12, 13, 10, 11, 14, 15},
+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}
+};
+
+
+
+
+void vp8_mb_mode_mv_init(VP8D_COMP *pbi)
+{
+    vp8_reader *const bc = & pbi->bc;
+    MV_CONTEXT *const mvc = pbi->common.fc.mvc;
+
+    pbi->prob_skip_false = 0;
+    if (pbi->common.mb_no_coeff_skip)
+        pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8);
+
+    if(pbi->common.frame_type != KEY_FRAME)
+    {
+        pbi->prob_intra = (vp8_prob)vp8_read_literal(bc, 8);
+        pbi->prob_last  = (vp8_prob)vp8_read_literal(bc, 8);
+        pbi->prob_gf    = (vp8_prob)vp8_read_literal(bc, 8);
+
+        if (vp8_read_bit(bc))
+        {
+            int i = 0;
+
+            do
+            {
+                pbi->common.fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+            }
+            while (++i < 4);
+        }
+
+        if (vp8_read_bit(bc))
+        {
+            int i = 0;
+
+            do
+            {
+                pbi->common.fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+            }
+            while (++i < 3);
+        }
+
+        read_mvcontexts(bc, mvc);
+    }
+}
+
+void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
+                            int mb_row, int mb_col)
 {
     const MV Zero = { 0, 0};
-
-    VP8_COMMON *const pc = & pbi->common;
     vp8_reader *const bc = & pbi->bc;
+    MV_CONTEXT *const mvc = pbi->common.fc.mvc;
+    const int mis = pbi->common.mode_info_stride;
 
-    MODE_INFO *mi = pc->mi, *ms;
-    const int mis = pc->mode_info_stride;
+    MV *const mv = & mbmi->mv.as_mv;
+    int mb_to_left_edge;
+    int mb_to_right_edge;
+    int mb_to_top_edge;
+    int mb_to_bottom_edge;
 
-    MV_CONTEXT *const mvc = pc->fc.mvc;
+    mb_to_top_edge = pbi->mb.mb_to_top_edge;
+    mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge;
+    mb_to_top_edge -= LEFT_TOP_MARGIN;
+    mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
 
-    int mb_row = -1;
+    mbmi->need_to_clamp_mvs = 0;
+    // Distance of Mb to the various image edges.
+    // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
+    pbi->mb.mb_to_left_edge =
+    mb_to_left_edge = -((mb_col * 16) << 3);
+    mb_to_left_edge -= LEFT_TOP_MARGIN;
 
-    vp8_prob prob_intra;
-    vp8_prob prob_last;
-    vp8_prob prob_gf;
-    vp8_prob prob_skip_false = 0;
+    pbi->mb.mb_to_right_edge =
+    mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3;
+    mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
 
-    if (pc->mb_no_coeff_skip)
-        prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8);
+    // If required read in new segmentation data for this MB
+    if (pbi->mb.update_mb_segmentation_map)
+        vp8_read_mb_features(bc, mbmi, &pbi->mb);
 
-    prob_intra = (vp8_prob)vp8_read_literal(bc, 8);
-    prob_last  = (vp8_prob)vp8_read_literal(bc, 8);
-    prob_gf    = (vp8_prob)vp8_read_literal(bc, 8);
+    // Read the macroblock coeff skip flag if this feature is in use, else default to 0
+    if (pbi->common.mb_no_coeff_skip)
+        mbmi->mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
+    else
+        mbmi->mb_skip_coeff = 0;
 
-    ms = pc->mi - 1;
-
-    if (vp8_read_bit(bc))
+    if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, pbi->prob_intra)))    /* inter MB */
     {
-        int i = 0;
+        int rct[4];
+        vp8_prob mv_ref_p [VP8_MVREFS-1];
+        MV nearest, nearby, best_mv;
 
-        do
+        if (vp8_read(bc, pbi->prob_last))
         {
-            pc->fc.ymode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
+            mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, pbi->prob_gf)));
         }
-        while (++i < 4);
-    }
 
-    if (vp8_read_bit(bc))
-    {
-        int i = 0;
+        vp8_find_near_mvs(&pbi->mb, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias);
 
-        do
+        vp8_mv_ref_probs(mv_ref_p, rct);
+
+        mbmi->uv_mode = DC_PRED;
+        switch (mbmi->mode = read_mv_ref(bc, mv_ref_p))
         {
-            pc->fc.uv_mode_prob[i] = (vp8_prob) vp8_read_literal(bc, 8);
-        }
-        while (++i < 3);
-    }
-
-    read_mvcontexts(bc, mvc);
-
-    while (++mb_row < pc->mb_rows)
-    {
-        int mb_col = -1;
-
-        while (++mb_col < pc->mb_cols)
+        case SPLITMV:
         {
-            MB_MODE_INFO *const mbmi = & mi->mbmi;
-            MV *const mv = & mbmi->mv.as_mv;
-            VP8_COMMON *const pc = &pbi->common;
-            MACROBLOCKD *xd = &pbi->mb;
+            const int s = mbmi->partitioning =
+                      vp8_treed_read(bc, vp8_mbsplit_tree, vp8_mbsplit_probs);
+            const int num_p = vp8_mbsplit_count [s];
+            int j = 0;
 
-            mbmi->need_to_clamp_mvs = 0;
-            // Distance of Mb to the various image edges.
-            // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
-            xd->mb_to_left_edge = -((mb_col * 16) << 3);
-            xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
-            xd->mb_to_top_edge = -((mb_row * 16)) << 3;
-            xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;
-
-            // If required read in new segmentation data for this MB
-            if (pbi->mb.update_mb_segmentation_map)
-                vp8_read_mb_features(bc, mbmi, &pbi->mb);
-
-            // Read the macroblock coeff skip flag if this feature is in use, else default to 0
-            if (pc->mb_no_coeff_skip)
-                mbmi->mb_skip_coeff = vp8_read(bc, prob_skip_false);
-            else
-                mbmi->mb_skip_coeff = 0;
-
-            mbmi->uv_mode = DC_PRED;
-
-            if ((mbmi->ref_frame = (MV_REFERENCE_FRAME) vp8_read(bc, prob_intra)))    /* inter MB */
+            do  /* for each subset j */
             {
-                int rct[4];
-                vp8_prob mv_ref_p [VP8_MVREFS-1];
-                MV nearest, nearby, best_mv;
+                B_MODE_INFO bmi;
+                MV *const mv = & bmi.mv.as_mv;
 
-                if (vp8_read(bc, prob_last))
+                int k;  /* first block in subset j */
+                int mv_contz;
+                k = vp8_mbsplit_offset[s][j];
+
+                mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
+
+                switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
                 {
-                    mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)mbmi->ref_frame + (int)(1 + vp8_read(bc, prob_gf)));
-                }
-
-                vp8_find_near_mvs(xd, mi, &nearest, &nearby, &best_mv, rct, mbmi->ref_frame, pbi->common.ref_frame_sign_bias);
-
-                vp8_mv_ref_probs(mv_ref_p, rct);
-
-                switch (mbmi->mode = read_mv_ref(bc, mv_ref_p))
-                {
-                case SPLITMV:
-                {
-                    const int s = mbmi->partitioning = vp8_treed_read(
-                                                           bc, vp8_mbsplit_tree, vp8_mbsplit_probs
-                                                       );
-                    const int num_p = vp8_mbsplit_count [s];
-                    const int *const  L = vp8_mbsplits [s];
-                    int j = 0;
-
-                    do  /* for each subset j */
-                    {
-                        B_MODE_INFO bmi;
-                        MV *const mv = & bmi.mv.as_mv;
-
-                        int k = -1;  /* first block in subset j */
-                        int mv_contz;
-
-                        while (j != L[++k])
-                        {
-#if CONFIG_DEBUG
-                            if (k >= 16)
-                            {
-                                assert(0);
-                            }
-#endif
-                        }
-
-                        mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
-
-                        switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
-                        {
-                        case NEW4X4:
-                            read_mv(bc, mv, (const MV_CONTEXT *) mvc);
-                            mv->row += best_mv.row;
-                            mv->col += best_mv.col;
-#ifdef VPX_MODE_COUNT
-                            vp8_mv_cont_count[mv_contz][3]++;
-#endif
-                            break;
-                        case LEFT4X4:
-                            *mv = vp8_left_bmi(mi, k)->mv.as_mv;
-#ifdef VPX_MODE_COUNT
-                            vp8_mv_cont_count[mv_contz][0]++;
-#endif
-                            break;
-                        case ABOVE4X4:
-                            *mv = vp8_above_bmi(mi, k, mis)->mv.as_mv;
-#ifdef VPX_MODE_COUNT
-                            vp8_mv_cont_count[mv_contz][1]++;
-#endif
-                            break;
-                        case ZERO4X4:
-                            *mv = Zero;
-#ifdef VPX_MODE_COUNT
-                            vp8_mv_cont_count[mv_contz][2]++;
-#endif
-                            break;
-                        default:
-                            break;
-                        }
-
-                        if (mv->col < xd->mb_to_left_edge
-                                      - LEFT_TOP_MARGIN
-                            || mv->col > xd->mb_to_right_edge
-                                         + RIGHT_BOTTOM_MARGIN
-                            || mv->row < xd->mb_to_top_edge
-                                         - LEFT_TOP_MARGIN
-                            || mv->row > xd->mb_to_bottom_edge
-                                         + RIGHT_BOTTOM_MARGIN
-                            )
-                            mbmi->need_to_clamp_mvs = 1;
-
-                        /* Fill (uniform) modes, mvs of jth subset.
-                           Must do it here because ensuing subsets can
-                           refer back to us via "left" or "above". */
-                        do
-                            if (j == L[k])
-                                mi->bmi[k] = bmi;
-
-                        while (++k < 16);
-                    }
-                    while (++j < num_p);
-                }
-
-                *mv = mi->bmi[15].mv.as_mv;
-
-                break;  /* done with SPLITMV */
-
-                case NEARMV:
-                    *mv = nearby;
-
-                    // Clip "next_nearest" so that it does not extend to far out of image
-                    if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
-                        mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
-                    else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
-                    if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
-                        mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
-                    else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
-
-                    goto propagate_mv;
-
-                case NEARESTMV:
-                    *mv = nearest;
-
-                    // Clip "next_nearest" so that it does not extend to far out of image
-                    if (mv->col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN))
-                        mv->col = xd->mb_to_left_edge - LEFT_TOP_MARGIN;
-                    else if (mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN;
-
-                    if (mv->row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN))
-                        mv->row = xd->mb_to_top_edge - LEFT_TOP_MARGIN;
-                    else if (mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN)
-                        mv->row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN;
-
-                    goto propagate_mv;
-
-                case ZEROMV:
-                    *mv = Zero;
-                    goto propagate_mv;
-
-                case NEWMV:
+                case NEW4X4:
                     read_mv(bc, mv, (const MV_CONTEXT *) mvc);
                     mv->row += best_mv.row;
                     mv->col += best_mv.col;
-
-                    /* Don't need to check this on NEARMV and NEARESTMV modes
-                     * since those modes clamp the MV. The NEWMV mode does not,
-                     * so signal to the prediction stage whether special
-                     * handling may be required.
-                     */
-                    if (mv->col < xd->mb_to_left_edge - LEFT_TOP_MARGIN
-                        || mv->col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN
-                        || mv->row < xd->mb_to_top_edge - LEFT_TOP_MARGIN
-                        || mv->row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN
-                        )
-                        mbmi->need_to_clamp_mvs = 1;
-
-                propagate_mv:  /* same MV throughout */
-                    {
-                        //int i=0;
-                        //do
-                        //{
-                        //  mi->bmi[i].mv.as_mv = *mv;
-                        //}
-                        //while( ++i < 16);
-
-                        mi->bmi[0].mv.as_mv = *mv;
-                        mi->bmi[1].mv.as_mv = *mv;
-                        mi->bmi[2].mv.as_mv = *mv;
-                        mi->bmi[3].mv.as_mv = *mv;
-                        mi->bmi[4].mv.as_mv = *mv;
-                        mi->bmi[5].mv.as_mv = *mv;
-                        mi->bmi[6].mv.as_mv = *mv;
-                        mi->bmi[7].mv.as_mv = *mv;
-                        mi->bmi[8].mv.as_mv = *mv;
-                        mi->bmi[9].mv.as_mv = *mv;
-                        mi->bmi[10].mv.as_mv = *mv;
-                        mi->bmi[11].mv.as_mv = *mv;
-                        mi->bmi[12].mv.as_mv = *mv;
-                        mi->bmi[13].mv.as_mv = *mv;
-                        mi->bmi[14].mv.as_mv = *mv;
-                        mi->bmi[15].mv.as_mv = *mv;
-                    }
-
+  #ifdef VPX_MODE_COUNT
+                    vp8_mv_cont_count[mv_contz][3]++;
+  #endif
+                    break;
+                case LEFT4X4:
+                    *mv = vp8_left_bmi(mi, k)->mv.as_mv;
+  #ifdef VPX_MODE_COUNT
+                    vp8_mv_cont_count[mv_contz][0]++;
+  #endif
+                    break;
+                case ABOVE4X4:
+                    *mv = vp8_above_bmi(mi, k, mis)->mv.as_mv;
+  #ifdef VPX_MODE_COUNT
+                    vp8_mv_cont_count[mv_contz][1]++;
+  #endif
+                    break;
+                case ZERO4X4:
+                    *mv = Zero;
+  #ifdef VPX_MODE_COUNT
+                    vp8_mv_cont_count[mv_contz][2]++;
+  #endif
+                    break;
+                default:
                     break;
-
-                default:;
-#if CONFIG_DEBUG
-                    assert(0);
-#endif
                 }
+
+                mbmi->need_to_clamp_mvs = (mv->col < mb_to_left_edge) ? 1 : 0;
+                mbmi->need_to_clamp_mvs |= (mv->col > mb_to_right_edge) ? 1 : 0;
+                mbmi->need_to_clamp_mvs |= (mv->row < mb_to_top_edge) ? 1 : 0;
+                mbmi->need_to_clamp_mvs |= (mv->row > mb_to_bottom_edge) ? 1 : 0;
+
+                {
+                    /* Fill (uniform) modes, mvs of jth subset.
+                     Must do it here because ensuing subsets can
+                     refer back to us via "left" or "above". */
+                    unsigned char *fill_offset;
+                    unsigned int fill_count = vp8_mbsplit_fill_count[s];
+
+                    fill_offset = &vp8_mbsplit_fill_offset[s][(unsigned char)j * vp8_mbsplit_fill_count[s]];
+
+                    do {
+                        mi->bmi[ *fill_offset] = bmi;
+                      fill_offset++;
+
+                    }while (--fill_count);
+                }
+
             }
-            else
+            while (++j < num_p);
+        }
+
+        *mv = mi->bmi[15].mv.as_mv;
+
+        break;  /* done with SPLITMV */
+
+        case NEARMV:
+            *mv = nearby;
+            // Clip "next_nearest" so that it does not extend to far out of image
+            mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
+            mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
+            mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
+            mv->row = (mv->row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->row;
+            goto propagate_mv;
+
+        case NEARESTMV:
+            *mv = nearest;
+            // Clip "next_nearest" so that it does not extend to far out of image
+            mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
+            mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
+            mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
+            mv->row = (mv->row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->row;
+            goto propagate_mv;
+
+        case ZEROMV:
+            *mv = Zero;
+            goto propagate_mv;
+
+        case NEWMV:
+            read_mv(bc, mv, (const MV_CONTEXT *) mvc);
+            mv->row += best_mv.row;
+            mv->col += best_mv.col;
+
+            /* Don't need to check this on NEARMV and NEARESTMV modes
+             * since those modes clamp the MV. The NEWMV mode does not,
+             * so signal to the prediction stage whether special
+             * handling may be required.
+             */
+            mbmi->need_to_clamp_mvs = (mv->col < mb_to_left_edge) ? 1 : 0;
+            mbmi->need_to_clamp_mvs |= (mv->col > mb_to_right_edge) ? 1 : 0;
+            mbmi->need_to_clamp_mvs |= (mv->row < mb_to_top_edge) ? 1 : 0;
+            mbmi->need_to_clamp_mvs |= (mv->row > mb_to_bottom_edge) ? 1 : 0;
+
+        propagate_mv:  /* same MV throughout */
             {
-                /* MB is intra coded */
+                //int i=0;
+                //do
+                //{
+                //  mi->bmi[i].mv.as_mv = *mv;
+                //}
+                //while( ++i < 16);
 
-                int j = 0;
-
-                do
-                {
-                    mi->bmi[j].mv.as_mv = Zero;
-                }
-                while (++j < 16);
-
-                *mv = Zero;
-
-                if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pc->fc.ymode_prob)) == B_PRED)
-                {
-                    int j = 0;
-
-                    do
-                    {
-                        mi->bmi[j].mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pc->fc.bmode_prob);
-                    }
-                    while (++j < 16);
-                }
-
-                mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pc->fc.uv_mode_prob);
+                mi->bmi[0].mv.as_mv = *mv;
+                mi->bmi[1].mv.as_mv = *mv;
+                mi->bmi[2].mv.as_mv = *mv;
+                mi->bmi[3].mv.as_mv = *mv;
+                mi->bmi[4].mv.as_mv = *mv;
+                mi->bmi[5].mv.as_mv = *mv;
+                mi->bmi[6].mv.as_mv = *mv;
+                mi->bmi[7].mv.as_mv = *mv;
+                mi->bmi[8].mv.as_mv = *mv;
+                mi->bmi[9].mv.as_mv = *mv;
+                mi->bmi[10].mv.as_mv = *mv;
+                mi->bmi[11].mv.as_mv = *mv;
+                mi->bmi[12].mv.as_mv = *mv;
+                mi->bmi[13].mv.as_mv = *mv;
+                mi->bmi[14].mv.as_mv = *mv;
+                mi->bmi[15].mv.as_mv = *mv;
             }
+            break;
+        default:;
+  #if CONFIG_DEBUG
+            assert(0);
+  #endif
+        }
+    }
+    else
+    {
+        /* MB is intra coded */
+        int j = 0;
+        do
+        {
+            mi->bmi[j].mv.as_mv = Zero;
+        }
+        while (++j < 16);
+
+        if ((mbmi->mode = (MB_PREDICTION_MODE) vp8_read_ymode(bc, pbi->common.fc.ymode_prob)) == B_PRED)
+        {
+            j = 0;
+            do
+            {
+                mi->bmi[j].mode = (B_PREDICTION_MODE)vp8_read_bmode(bc, pbi->common.fc.bmode_prob);
+            }
+            while (++j < 16);
+        }
+
+        mbmi->uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, pbi->common.fc.uv_mode_prob);
+    }
+
+}
+
+void vp8_decode_mode_mvs(VP8D_COMP *pbi)
+{
+    MODE_INFO *mi = pbi->common.mi;
+    int mb_row = -1;
+
+    vp8_mb_mode_mv_init(pbi);
+
+    while (++mb_row < pbi->common.mb_rows)
+    {
+        int mb_col = -1;
+        int mb_to_top_edge;
+        int mb_to_bottom_edge;
+
+        pbi->mb.mb_to_top_edge =
+        mb_to_top_edge = -((mb_row * 16)) << 3;
+        mb_to_top_edge -= LEFT_TOP_MARGIN;
+
+        pbi->mb.mb_to_bottom_edge =
+        mb_to_bottom_edge = ((pbi->common.mb_rows - 1 - mb_row) * 16) << 3;
+        mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
+
+        while (++mb_col < pbi->common.mb_cols)
+        {
+//          vp8_read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);
+            if(pbi->common.frame_type == KEY_FRAME)
+                vp8_kfread_modes(pbi, mi, mb_row, mb_col);
+            else
+                vp8_read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
 
             mi++;       // next macroblock
         }
@@ -418,3 +553,4 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
         mi++;           // skip left predictor each row
     }
 }
+
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index f2d8c766e..4f5b7c7a2 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -23,7 +23,7 @@
 #include "quant_common.h"
 
 #include "setupintrarecon.h"
-#include "demode.h"
+
 #include "decodemv.h"
 #include "extend.h"
 #include "vpx_mem/vpx_mem.h"
@@ -151,15 +151,11 @@ static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
 /* A version of the above function for chroma block MVs.*/
 static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
 {
-    if (2*mv->col < (xd->mb_to_left_edge - (19 << 3)))
-        mv->col = (xd->mb_to_left_edge - (16 << 3)) >> 1;
-    else if (2*mv->col > xd->mb_to_right_edge + (18 << 3))
-        mv->col = (xd->mb_to_right_edge + (16 << 3)) >> 1;
+    mv->col = (2*mv->col < (xd->mb_to_left_edge - (19 << 3))) ? (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col;
+    mv->col = (2*mv->col > xd->mb_to_right_edge + (18 << 3)) ? (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col;
 
-    if (2*mv->row < (xd->mb_to_top_edge - (19 << 3)))
-        mv->row = (xd->mb_to_top_edge - (16 << 3)) >> 1;
-    else if (2*mv->row > xd->mb_to_bottom_edge + (18 << 3))
-        mv->row = (xd->mb_to_bottom_edge + (16 << 3)) >> 1;
+    mv->row = (2*mv->row < (xd->mb_to_top_edge - (19 << 3))) ? (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row;
+    mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ? (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
 }
 
 static void clamp_mvs(MACROBLOCKD *xd)
@@ -838,10 +834,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     // Read the mb_no_coeff_skip flag
     pc->mb_no_coeff_skip = (int)vp8_read_bit(bc);
 
-    if (pc->frame_type == KEY_FRAME)
-        vp8_kfread_modes(pbi);
-    else
-        vp8_decode_mode_mvs(pbi);
+
+    vp8_decode_mode_mvs(pbi);
 
     vpx_memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols);
 
diff --git a/vp8/decoder/demode.c b/vp8/decoder/demode.c
deleted file mode 100644
index 557508dd8..000000000
--- a/vp8/decoder/demode.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "onyxd_int.h"
-#include "entropymode.h"
-#include "findnearmv.h"
-
-
-int vp8_read_bmode(vp8_reader *bc, const vp8_prob *p)
-{
-    const int i = vp8_treed_read(bc, vp8_bmode_tree, p);
-
-    return i;
-}
-
-
-int vp8_read_ymode(vp8_reader *bc, const vp8_prob *p)
-{
-    const int i = vp8_treed_read(bc, vp8_ymode_tree, p);
-
-    return i;
-}
-
-int vp8_kfread_ymode(vp8_reader *bc, const vp8_prob *p)
-{
-    const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p);
-
-    return i;
-}
-
-
-
-int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
-{
-    const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p);
-
-    return i;
-}
-
-void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
-{
-    // Is segmentation enabled
-    if (x->segmentation_enabled && x->update_mb_segmentation_map)
-    {
-        // If so then read the segment id.
-        if (vp8_read(r, x->mb_segment_tree_probs[0]))
-            mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
-        else
-            mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1]));
-    }
-}
-
-void vp8_kfread_modes(VP8D_COMP *pbi)
-{
-    VP8_COMMON *const cp = & pbi->common;
-    vp8_reader *const bc = & pbi->bc;
-
-    MODE_INFO *m = cp->mi;
-    const int ms = cp->mode_info_stride;
-
-    int mb_row = -1;
-    vp8_prob prob_skip_false = 0;
-
-    if (cp->mb_no_coeff_skip)
-        prob_skip_false = (vp8_prob)(vp8_read_literal(bc, 8));
-
-    while (++mb_row < cp->mb_rows)
-    {
-        int mb_col = -1;
-
-        while (++mb_col < cp->mb_cols)
-        {
-            MB_PREDICTION_MODE y_mode;
-
-            // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
-            // By default on a key frame reset all MBs to segment 0
-            m->mbmi.segment_id = 0;
-
-            if (pbi->mb.update_mb_segmentation_map)
-                vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
-
-            // Read the macroblock coeff skip flag if this feature is in use, else default to 0
-            if (cp->mb_no_coeff_skip)
-                m->mbmi.mb_skip_coeff = vp8_read(bc, prob_skip_false);
-            else
-                m->mbmi.mb_skip_coeff = 0;
-
-            y_mode = (MB_PREDICTION_MODE) vp8_kfread_ymode(bc, cp->kf_ymode_prob);
-
-            m->mbmi.ref_frame = INTRA_FRAME;
-
-            if ((m->mbmi.mode = y_mode) == B_PRED)
-            {
-                int i = 0;
-
-                do
-                {
-                    const B_PREDICTION_MODE A = vp8_above_bmi(m, i, ms)->mode;
-                    const B_PREDICTION_MODE L = vp8_left_bmi(m, i)->mode;
-
-                    m->bmi[i].mode = (B_PREDICTION_MODE) vp8_read_bmode(bc, cp->kf_bmode_prob [A] [L]);
-                }
-                while (++i < 16);
-            }
-            else
-            {
-                int BMode;
-                int i = 0;
-
-                switch (y_mode)
-                {
-                case DC_PRED:
-                    BMode = B_DC_PRED;
-                    break;
-                case V_PRED:
-                    BMode = B_VE_PRED;
-                    break;
-                case H_PRED:
-                    BMode = B_HE_PRED;
-                    break;
-                case TM_PRED:
-                    BMode = B_TM_PRED;
-                    break;
-                default:
-                    BMode = B_DC_PRED;
-                    break;
-                }
-
-                do
-                {
-                    m->bmi[i].mode = (B_PREDICTION_MODE)BMode;
-                }
-                while (++i < 16);
-            }
-
-            (m++)->mbmi.uv_mode = (MB_PREDICTION_MODE)vp8_read_uv_mode(bc, cp->kf_uv_mode_prob);
-        }
-
-        m++; // skip the border
-    }
-}
diff --git a/vp8/decoder/demode.h b/vp8/decoder/demode.h
deleted file mode 100644
index a58cff95b..000000000
--- a/vp8/decoder/demode.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "onyxd_int.h"
-
-/* Read (intra) modes for all blocks in a keyframe */
-
-void vp8_kfread_modes(VP8D_COMP *pbi);
-
-/* Intra mode for a Y subblock */
-
-int vp8_read_bmode(vp8_reader *, const vp8_prob *);
-
-/* MB intra Y mode trees differ for key and inter frames. */
-
-int   vp8_read_ymode(vp8_reader *, const vp8_prob *);
-int vp8_kfread_ymode(vp8_reader *, const vp8_prob *);
-
-/* MB intra UV mode trees are the same for key and inter frames. */
-
-int vp8_read_uv_mode(vp8_reader *, const vp8_prob *);
-
-/* Read any macroblock-level features that may be present. */
-
-void vp8_read_mb_features(vp8_reader *, MB_MODE_INFO *, MACROBLOCKD *);
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 9be4d47b2..ad21ae3f5 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -125,6 +125,12 @@ typedef struct VP8Decompressor
     struct vp8_dboolhuff_rtcd_vtable dboolhuff;
 #endif
 
+
+    vp8_prob prob_intra;
+    vp8_prob prob_last;
+    vp8_prob prob_gf;
+    vp8_prob prob_skip_false;
+
 } VP8D_COMP;
 
 int vp8_decode_frame(VP8D_COMP *cpi);
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index 1d2558f23..941961708 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -54,14 +54,12 @@ CFLAGS+=-I$(SRC_PATH_BARE)/$(VP8_PREFIX)decoder
 VP8_DX_SRCS-yes += decoder/dboolhuff.c
 VP8_DX_SRCS-yes += decoder/decodemv.c
 VP8_DX_SRCS-yes += decoder/decodframe.c
-VP8_DX_SRCS-yes += decoder/demode.c
 VP8_DX_SRCS-yes += decoder/dequantize.c
 VP8_DX_SRCS-yes += decoder/detokenize.c
 VP8_DX_SRCS-yes += decoder/generic/dsystemdependent.c
 VP8_DX_SRCS-yes += decoder/dboolhuff.h
 VP8_DX_SRCS-yes += decoder/decodemv.h
 VP8_DX_SRCS-yes += decoder/decoderthreading.h
-VP8_DX_SRCS-yes += decoder/demode.h
 VP8_DX_SRCS-yes += decoder/dequantize.h
 VP8_DX_SRCS-yes += decoder/detokenize.h
 VP8_DX_SRCS-yes += decoder/onyxd_int.h

From a65cd3def085df6c1c89e824f637d93674f1f1f9 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Fri, 10 Sep 2010 16:27:28 -0700
Subject: [PATCH 158/307] Make block access to frame buffer sequential

Sequentially accessing memory from a low address to a high
address should make it easier for the processor to predict
the cache.

Change-Id: I1921ce996bdd547144fe864fea6435f527f5842d
---
 vp8/common/x86/loopfilter_sse2.asm | 205 +++++++++++++++--------------
 1 file changed, 108 insertions(+), 97 deletions(-)

diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 5839e43bf..985d5a09d 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -737,29 +737,30 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
 
 
 %macro TRANSPOSE_16X8_1 0
-        movq        xmm0,               QWORD PTR [rdi+rcx*2]   ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70
-        movq        xmm7,               QWORD PTR [rsi+rcx*2]   ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60
+        movq        xmm4,               QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
+        movq        xmm7,               QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
 
-        punpcklbw   xmm7,               xmm0            ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
-        movq        xmm0,               QWORD PTR [rsi+rcx]
+        punpcklbw   xmm4,               xmm7            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
+        movq        xmm0,               QWORD PTR [rsi+2*rax]  ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20
 
-        movq        xmm5,               QWORD PTR [rsi] ;
-        punpcklbw   xmm5,               xmm0            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+        movdqa      xmm3,               xmm4            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
 
+        movq        xmm7,               QWORD PTR [rdi+2*rax]  ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30
+        punpcklbw   xmm0,               xmm7            ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20
+
+        movq        xmm5,               QWORD PTR [rsi+4*rax]  ; xx xx xx xx xx xx xx xx 47 46 45 44 43 42 41 40
+        movq        xmm2,               QWORD PTR [rdi+4*rax]  ; xx xx xx xx xx xx xx xx 57 56 55 54 53 52 51 50
+
+        punpcklbw   xmm5,               xmm2            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+        movq        xmm7,               QWORD PTR [rsi+2*rcx]  ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60
+
+        movq        xmm1,               QWORD PTR [rdi+2*rcx]  ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70
         movdqa      xmm6,               xmm5            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+
+        punpcklbw   xmm7,               xmm1            ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
         punpcklwd   xmm5,               xmm7            ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
 
         punpckhwd   xmm6,               xmm7            ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
-        movq        xmm7,               QWORD PTR [rsi + rax]   ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30
-
-        movq        xmm0,               QWORD PTR [rsi + rax*2] ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20
-        punpcklbw   xmm0,               xmm7            ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20
-
-        movq        xmm4,               QWORD PTR [rsi + rax*4] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
-        movq        xmm7,               QWORD PTR [rdi + rax*4] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
-
-        punpcklbw   xmm4,               xmm7            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
-        movdqa      xmm3,               xmm4            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
 
         punpcklwd   xmm3,               xmm0            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
         punpckhwd   xmm4,               xmm0            ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
@@ -777,28 +778,28 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
 %endmacro
 
 %macro TRANSPOSE_16X8_2 1
-        movq        xmm6,               QWORD PTR [rdi+rcx*2] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0
-        movq        xmm5,               QWORD PTR [rsi+rcx*2] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0
+        movq        xmm2,               QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80
+        movq        xmm5,               QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90
 
-        punpcklbw   xmm5,               xmm6            ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
-        movq        xmm6,               QWORD PTR [rsi+rcx]   ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0
+        punpcklbw   xmm2,               xmm5            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
+        movq        xmm0,               QWORD PTR [rsi+2*rax] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0
+
+        movq        xmm5,               QWORD PTR [rdi+2*rax] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0
+        punpcklbw   xmm0,               xmm5                  ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
+
+        movq        xmm1,               QWORD PTR [rsi+4*rax] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0
+        movq        xmm6,               QWORD PTR [rdi+4*rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0
 
-        movq        xmm1,               QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0
         punpcklbw   xmm1,               xmm6            ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0
+        movq        xmm5,               QWORD PTR [rsi+2*rcx] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0
+
+        movq        xmm6,               QWORD PTR [rdi+2*rcx] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0
+        punpcklbw   xmm5,               xmm6            ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
 
         movdqa      xmm6,               xmm1            ;
         punpckhwd   xmm6,               xmm5            ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4
 
         punpcklwd   xmm1,               xmm5            ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0
-        movq        xmm5,               QWORD PTR [rsi+rax]   ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0
-
-        movq        xmm0,               QWORD PTR [rsi+rax*2] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0
-        punpcklbw   xmm0,               xmm5                  ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
-
-        movq        xmm2,               QWORD PTR [rsi+rax*4] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80
-        movq        xmm5,               QWORD PTR [rdi+rax*4] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90
-
-        punpcklbw   xmm2,               xmm5            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
         movdqa      xmm5,               xmm2            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
 
         punpcklwd   xmm5,               xmm0            ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
@@ -995,7 +996,6 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         lea         rdx,                srct
 
         movdqa      xmm2,               [rdx]           ; p1        lea         rsi,       [rsi+rcx*8]
-        lea         rdi,                [rsi+rcx]
         movdqa      xmm7,               [rdx+48]        ; q1
         movdqa      xmm6,               [rdx+16]        ; p0
         movdqa      xmm0,               [rdx+32]        ; q0
@@ -1103,27 +1103,27 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
 %endmacro
 
 %macro BV_WRITEBACK 2
-        movd        [rsi+rax*4+2],      %1
+        movd        [rsi+2],            %1
         psrldq      %1,                 4
 
-        movd        [rdi+rax*4+2],      %1
+        movd        [rdi+2],            %1
         psrldq      %1,                 4
 
-        movd        [rsi+rax*2+2],      %1
+        movd        [rsi+2*rax+2],      %1
         psrldq      %1,                 4
 
-        movd        [rdi+rax*2+2],      %1
+        movd        [rdi+2*rax+2],      %1
 
-        movd        [rsi+2],            %2
+        movd        [rsi+4*rax+2],      %2
         psrldq      %2,                 4
 
-        movd        [rdi+2],            %2
+        movd        [rdi+4*rax+2],      %2
         psrldq      %2,                 4
 
-        movd        [rdi+rcx+2],        %2
+        movd        [rsi+2*rcx+2],      %2
         psrldq      %2,                 4
 
-        movd        [rdi+rcx*2+2],      %2
+        movd        [rdi+2*rcx+2],      %2
 %endmacro
 
 
@@ -1156,16 +1156,15 @@ sym(vp8_loop_filter_vertical_edge_sse2):
         mov         rsi,        arg(0)                  ; src_ptr
         movsxd      rax,        dword ptr arg(1)        ; src_pixel_step
 
-        lea         rsi,        [rsi + rax*4 - 4]
+        lea         rsi,        [rsi - 4]
         lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
-        mov         rcx,        rax
-        neg         rax
+        lea         rcx,        [rax*2+rax]
 
         ;transpose 16x8 to 8x16, and store the 8-line result on stack.
         TRANSPOSE_16X8_1
 
-        lea         rsi,        [rsi+rcx*8]
-        lea         rdi,        [rdi+rcx*8]
+        lea         rsi,        [rsi+rax*8]
+        lea         rdi,        [rdi+rax*8]
         lea         rdx,        srct
         TRANSPOSE_16X8_2 1
 
@@ -1180,10 +1179,14 @@ sym(vp8_loop_filter_vertical_edge_sse2):
         ; tranpose and write back - only work on q1, q0, p0, p1
         BV_TRANSPOSE
         ; store 16-line result
+
+        lea         rdx,        [rax]
+        neg         rdx
+
         BV_WRITEBACK xmm1, xmm5
 
-        lea         rsi,        [rsi+rax*8]
-        lea         rdi,        [rsi+rcx]
+        lea         rsi,        [rsi+rdx*8]
+        lea         rdi,        [rdi+rdx*8]
         BV_WRITEBACK xmm2, xmm6
 
     add rsp, 96
@@ -1227,17 +1230,16 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
         mov         rsi,        arg(0)                  ; u_ptr
         movsxd      rax,        dword ptr arg(1)        ; src_pixel_step
 
-        lea         rsi,        [rsi + rax*4 - 4]
+        lea         rsi,        [rsi - 4]
         lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
-        mov         rcx,        rax
-        neg         rax
+        lea         rcx,        [rax+2*rax]
 
         ;transpose 16x8 to 8x16, and store the 8-line result on stack.
         TRANSPOSE_16X8_1
 
-        mov         rsi,        arg(5)                   ; v_ptr
-        lea         rsi,        [rsi + rcx*4 - 4]
-        lea         rdi,        [rsi + rcx]              ; rdi points to row +1 for indirect addressing
+        mov         rsi,        arg(5)                  ; v_ptr
+        lea         rsi,        [rsi - 4]
+        lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
 
         lea         rdx,        srct
         TRANSPOSE_16X8_2 1
@@ -1252,12 +1254,15 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
 
         ; tranpose and write back - only work on q1, q0, p0, p1
         BV_TRANSPOSE
+
+        lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
+
         ; store 16-line result
         BV_WRITEBACK xmm1, xmm5
 
-        mov         rsi,        arg(0)                   ;u_ptr
-        lea         rsi,        [rsi + rcx*4 - 4]
-        lea         rdi,        [rsi + rcx]
+        mov         rsi,        arg(0)                  ; u_ptr
+        lea         rsi,        [rsi - 4]
+        lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
         BV_WRITEBACK xmm2, xmm6
 
     add rsp, 96
@@ -1479,28 +1484,30 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
 %endmacro
 
 %macro MBV_WRITEBACK_1 0
-        movq        QWORD PTR [rsi+rax*4], xmm0
+        movq        QWORD PTR [rsi],    xmm0
         psrldq      xmm0,               8
 
-        movq        QWORD PTR [rsi+rax*2], xmm6
+        movq        QWORD PTR [rdi],    xmm0
+
+        movq        QWORD PTR [rsi+2*rax], xmm6
         psrldq      xmm6,               8
 
-        movq        QWORD PTR [rdi+rax*4], xmm0
-        movq        QWORD PTR [rsi+rax],   xmm6
+        movq        QWORD PTR [rdi+2*rax], xmm6
 
         movdqa      xmm0,               xmm5                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
         punpckldq   xmm0,               xmm7                ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40
 
         punpckhdq   xmm5,               xmm7                ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60
 
-        movq        QWORD PTR [rsi],    xmm0
+        movq        QWORD PTR [rsi+4*rax], xmm0
         psrldq      xmm0,               8
 
-        movq        QWORD PTR [rsi+rcx*2], xmm5
+        movq        QWORD PTR [rdi+4*rax], xmm0
+
+        movq        QWORD PTR [rsi+2*rcx], xmm5
         psrldq      xmm5,               8
 
-        movq        QWORD PTR [rsi+rcx],   xmm0
-        movq        QWORD PTR [rdi+rcx*2], xmm5
+        movq        QWORD PTR [rdi+2*rcx], xmm5
 
         movdqa      xmm2,               [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
         punpckhbw   xmm2,               [rdx+80]            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
@@ -1518,28 +1525,30 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
 %endmacro
 
 %macro MBV_WRITEBACK_2 0
-        movq        QWORD PTR [rsi+rax*4], xmm1
+        movq        QWORD PTR [rsi], xmm1
         psrldq      xmm1,               8
 
-        movq        QWORD PTR [rsi+rax*2], xmm3
+        movq        QWORD PTR [rdi], xmm1
+
+        movq        QWORD PTR [rsi+2*rax], xmm3
         psrldq      xmm3,               8
 
-        movq        QWORD PTR [rdi+rax*4], xmm1
-        movq        QWORD PTR [rsi+rax],   xmm3
+        movq        QWORD PTR [rdi+2*rax], xmm3
 
         movdqa      xmm1,               xmm4                ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
         punpckldq   xmm1,               xmm2                ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0
 
         punpckhdq   xmm4,               xmm2                ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0
-        movq        QWORD PTR [rsi],    xmm1
+        movq        QWORD PTR [rsi+4*rax], xmm1
 
         psrldq      xmm1,               8
 
-        movq        QWORD PTR [rsi+rcx*2], xmm4
+        movq        QWORD PTR [rdi+4*rax], xmm1
+
+        movq        QWORD PTR [rsi+2*rcx], xmm4
         psrldq      xmm4,               8
 
-        movq        QWORD PTR [rsi+rcx], xmm1
-        movq        QWORD PTR [rdi+rcx*2], xmm4
+        movq        QWORD PTR [rdi+2*rcx], xmm4
 %endmacro
 
 
@@ -1569,20 +1578,19 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
     %define t1   [rsp + 16]   ;__declspec(align(16)) char t1[16];
     %define srct [rsp + 32]   ;__declspec(align(16)) char srct[128];
 
-        mov         rsi,                arg(0) ;src_ptr
-        movsxd      rax,                dword ptr arg(1)   ;src_pixel_step
+        mov         rsi,                arg(0)              ; src_ptr
+        movsxd      rax,                dword ptr arg(1)    ; src_pixel_step
 
-        lea         rsi,                [rsi + rax*4 - 4]
-        lea         rdi,                [rsi + rax]        ; rdi points to row +1 for indirect addressing
-        mov         rcx,                rax
-        neg         rax
+        lea         rsi,                [rsi - 4]
+        lea         rdi,                [rsi + rax]         ; rdi points to row +1 for indirect addressing
+        lea         rcx,                [rax*2+rax]
 
         ; Transpose
         TRANSPOSE_16X8_1
 
-        lea         rsi,                [rsi+rcx*8]
-        lea         rdi,                [rdi+rcx*8]
-        lea         rdx,                srct
+        lea         rsi,        [rsi+rax*8]
+        lea         rdi,        [rdi+rax*8]
+        lea         rdx,        srct
         TRANSPOSE_16X8_2 0
 
         ; calculate filter mask
@@ -1590,18 +1598,22 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
         ; calculate high edge variance
         LFV_HEV_MASK
 
+        neg         rax
         ; start work on filters
         MBV_FILTER
 
+        lea         rsi,                [rsi+rax*8]
+        lea         rdi,                [rdi+rax*8]
+
         ; transpose and write back
         MBV_TRANSPOSE
 
-        lea         rsi,                [rsi+rax*8]
-        lea         rdi,                [rdi+rax*8]
+        neg         rax
+
         MBV_WRITEBACK_1
 
-        lea         rsi,                [rsi+rcx*8]
-        lea         rdi,                [rdi+rcx*8]
+        lea         rsi,                [rsi+rax*8]
+        lea         rdi,                [rdi+rax*8]
         MBV_WRITEBACK_2
 
     add rsp, 160
@@ -1642,21 +1654,20 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
     %define t1   [rsp + 16]   ;__declspec(align(16)) char t1[16];
     %define srct [rsp + 32]   ;__declspec(align(16)) char srct[128];
 
-        mov         rsi,                arg(0) ;u_ptr
-        movsxd      rax,                dword ptr arg(1)   ; src_pixel_step
+        mov         rsi,                arg(0)              ; u_ptr
+        movsxd      rax,                dword ptr arg(1)    ; src_pixel_step
 
-        lea         rsi,                [rsi + rax*4 - 4]
-        lea         rdi,                [rsi + rax]        ; rdi points to row +1 for indirect addressing
-        mov         rcx,                rax
-        neg         rax
+        lea         rsi,                [rsi - 4]
+        lea         rdi,                [rsi + rax]         ; rdi points to row +1 for indirect addressing
+        lea         rcx,                [rax+2*rax]
 
         ; Transpose
         TRANSPOSE_16X8_1
 
         ; XMM3 XMM4 XMM7 in use
-        mov         rsi,                arg(5)             ;v_ptr
-        lea         rsi,                [rsi + rcx*4 - 4]
-        lea         rdi,                [rsi + rcx]
+        mov         rsi,                arg(5)              ; v_ptr
+        lea         rsi,                [rsi - 4]
+        lea         rdi,                [rsi + rax]
         lea         rdx,                srct
         TRANSPOSE_16X8_2 0
 
@@ -1672,12 +1683,12 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
         MBV_TRANSPOSE
 
         mov         rsi,                arg(0)             ;u_ptr
-        lea         rsi,                [rsi + rcx*4 - 4]
-        lea         rdi,                [rsi + rcx]
+        lea         rsi,                [rsi - 4]
+        lea         rdi,                [rsi + rax]
         MBV_WRITEBACK_1
         mov         rsi,                arg(5)             ;v_ptr
-        lea         rsi,                [rsi + rcx*4 - 4]
-        lea         rdi,                [rsi + rcx]
+        lea         rsi,                [rsi - 4]
+        lea         rdi,                [rsi + rax]
         MBV_WRITEBACK_2
 
     add rsp, 160

From 7f1a908b97656580590018ab3b6c34544e8926ac Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 13 Sep 2010 09:00:24 -0400
Subject: [PATCH 159/307] cosmetics: expand tabs in configure

Change-Id: I88ddb0afb56ef2be8184b56fe125ad938ead7a84
---
 build/make/configure.sh | 6 +++---
 configure               | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index 73cb60085..e9f8a2b9c 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -495,7 +495,7 @@ setup_gnu_toolchain() {
 
 process_common_toolchain() {
     if [ -z "$toolchain" ]; then
-	gcctarget="$(gcc -dumpmachine 2> /dev/null)"
+        gcctarget="$(gcc -dumpmachine 2> /dev/null)"
 
         # detect tgt_isa
         case "$gcctarget" in
@@ -902,8 +902,8 @@ EOF
 
     # glibc needs these
     if enabled linux; then
-	add_cflags -D_LARGEFILE_SOURCE
-	add_cflags -D_FILE_OFFSET_BITS=64
+        add_cflags -D_LARGEFILE_SOURCE
+        add_cflags -D_FILE_OFFSET_BITS=64
     fi
 }
 
diff --git a/configure b/configure
index ac3d1621d..5d6964e09 100755
--- a/configure
+++ b/configure
@@ -412,7 +412,7 @@ process_detect() {
         # Can only build shared libs on a subset of platforms. Doing this check
         # here rather than at option parse time because the target auto-detect
         # magic happens after the command line has been parsed.
-	enabled linux || die "--enable-shared only supported on ELF for now"
+        enabled linux || die "--enable-shared only supported on ELF for now"
     fi
     if [ -z "$CC" ]; then
         echo "Bypassing toolchain for environment detection."
@@ -514,7 +514,7 @@ process_toolchain() {
     enabled gcc || soft_disable ccache
     if enabled mips; then
         enable dequant_tokens
-	enable dc_recon
+        enable dc_recon
     fi
 
     # Enable the postbuild target if building for visual studio.

From 887d6ef49ae2adfbd10a9beb0e5787ea39937e02 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 13 Sep 2010 09:04:55 -0400
Subject: [PATCH 160/307] configure: support for ppc32-linux-gcc

Fixes issue 89. Thanks to josejx for the patch.

Change-Id: I7e664fed703b49f2fb3af4c5e6ce1173742000c2
---
 build/make/configure.sh | 9 ++++++++-
 configure               | 1 +
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index e9f8a2b9c..e787c5d2c 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -505,6 +505,12 @@ process_common_toolchain() {
             *i[3456]86*)
                 tgt_isa=x86
                 ;;
+            *powerpc64*)
+                tgt_isa=ppc64
+                ;;
+            *powerpc*)
+                tgt_isa=ppc32
+                ;;
         esac
 
         # detect tgt_os
@@ -755,8 +761,8 @@ process_common_toolchain() {
         link_with_cc=gcc
         setup_gnu_toolchain
         add_asflags -force_cpusubtype_ALL -I"\$(dir \$<)darwin"
-        add_cflags -maltivec -faltivec
         soft_enable altivec
+        enabled altivec && add_cflags -maltivec
 
         case "$tgt_os" in
         linux*)
@@ -768,6 +774,7 @@ process_common_toolchain() {
             add_cflags  ${darwin_arch} -m${bits} -fasm-blocks
             add_asflags ${darwin_arch} -force_cpusubtype_ALL -I"\$(dir \$<)darwin"
             add_ldflags ${darwin_arch} -m${bits}
+            enabled altivec && add_cflags -faltivec
         ;;
         esac
     ;;
diff --git a/configure b/configure
index 5d6964e09..f4cd67ce2 100755
--- a/configure
+++ b/configure
@@ -97,6 +97,7 @@ all_platforms="${all_platforms} ppc32-darwin8-gcc"
 all_platforms="${all_platforms} ppc32-darwin9-gcc"
 all_platforms="${all_platforms} ppc64-darwin8-gcc"
 all_platforms="${all_platforms} ppc64-darwin9-gcc"
+all_platforms="${all_platormss} ppc32-linux-gcc"
 all_platforms="${all_platforms} ppc64-linux-gcc"
 all_platforms="${all_platforms} x86-darwin8-gcc"
 all_platforms="${all_platforms} x86-darwin8-icc"

From eeca6b786a0a0ec03a61f214d81496ea666ddd57 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 13 Sep 2010 09:46:51 -0400
Subject: [PATCH 161/307] Remove legacy release.sh script

This script is part of a legacy release process and is unsupported. Most
of this functionality has been moved into 'make dist.'

Change-Id: Id67936302083352b628869e2988876cf56558ca5
---
 release.sh | 210 -----------------------------------------------------
 1 file changed, 210 deletions(-)
 delete mode 100755 release.sh

diff --git a/release.sh b/release.sh
deleted file mode 100755
index 800bdf82f..000000000
--- a/release.sh
+++ /dev/null
@@ -1,210 +0,0 @@
-#!/bin/sh
-##
-##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license
-##  that can be found in the LICENSE file in the root of the source
-##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may
-##  be found in the AUTHORS file in the root of the source tree.
-##
-
-
-
-self=$0
-
-for opt; do
-    case $opt in
-        --clean) clean=yes;;
-        -j*) jopt=$opt;;
-        *) echo "Unsupported option $opt"; exit 1;;
-    esac
-done
-
-TAB="$(printf '\t')"
-cat > release.mk << EOF
-%\$(BUILD_SFX).tar.bz2: %/.done
-${TAB}@echo "\$(subst .tar.bz2,,\$@): tarball"
-${TAB}@cd \$(dir \$<); tar -cf - \$(subst .tar.bz2,,\$@) | bzip2 > ../\$@
-
-%\$(BUILD_SFX).zip: %/.done
-${TAB}@echo "\$(subst .zip,,\$@): zip"
-${TAB}@rm -f \$@; cd \$(dir \$<); zip -rq ../\$@ \$(subst .zip,,\$@)
-
-logs/%\$(BUILD_SFX).log.bz2: %/.done
-${TAB}@echo "\$(subst .log.bz2,,\$(notdir \$@)): tarlog"
-${TAB}@mkdir -p logs
-${TAB}@cat \$< | bzip2 > \$@
-
-%/.done:
-${TAB}@mkdir -p \$(dir \$@)
-${TAB}@echo "\$(dir \$@): configure \$(CONFIG_OPTS) \$(EXTRA_PATH)"
-${TAB}@cd \$(dir \$@); export PATH=\$\$PATH\$(EXTRA_PATH); ../\$(SRC_ROOT)/configure \$(CONFIG_OPTS) >makelog.txt 2>&1
-${TAB}@echo "\$(dir \$@): make"
-${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) >>makelog.txt 2>&1
-${TAB}@echo "\$(dir \$@): test install"
-${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) install >>makelog.txt 2>&1
-${TAB}@cd \$(dir \$@)/dist/build; PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) >>makelog.txt 2>&1
-${TAB}@echo "\$(dir \$@): install"
-${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) install DIST_DIR=\$(TGT) >>makelog.txt 2>&1
-${TAB}@touch \$@
-
-#include release-deps.mk
-EOF
-
-#[ -f release-deps.mk ] || \
-#    find ${self%/*} -name .git -prune -o -type f -print0 \
-#    | xargs -0 -n1 echo \
-#    | sed -e 's; ;\\ ;g' | awk '{print "$(TGT)/.done: "$0}' > release-deps.mk
-
-build_config_list() {
-    for codec in $CODEC_LIST; do
-        for arch in $ARCH_LIST; do
-            if [ -n "$OS_LIST" ]; then
-                for os in $OS_LIST; do
-                    CONFIGS="$CONFIGS vpx-${codec}-${arch}-${os}"
-                done
-            else
-                CONFIGS="$CONFIGS vpx-${codec}-${arch}"
-            fi
-        done
-    done
-}
-
-CODEC_LIST="vp8 vp8cx vp8dx"
-case `uname` in
-    Linux*)
-        ARCH_LIST="x86 x86_64"
-        OS_LIST="linux"
-        build_config_list
-        ARCH_LIST="armv5te armv6 armv7"
-        OS_LIST="linux-gcc"
-
-        ;;
-    CYGWIN*)
-        TAR_SFX=.zip
-        for vs in vs7 vs8; do
-            for arch in x86-win32 x86_64-win64; do
-                for msvcrt in md mt; do
-                    case $vs,$arch in
-                        vs7,x86_64-win64) continue ;;
-                    esac
-                    ARCH_LIST="$ARCH_LIST ${arch}${msvcrt}-${vs}"
-                done
-            done
-        done
-        ;;
-    Darwin*)
-        ARCH_LIST="universal"
-        OS_LIST="darwin8 darwin9"
-        ;;
-    sun_os*)
-        ARCH_LIST="x86 x86_64"
-        OS_LIST="solaris"
-        ;;
-esac
-build_config_list
-
-TAR_SFX=${TAR_SFX:-.tar.bz2}
-ARM_TOOLCHAIN=/usr/local/google/csl-2009q3-67
-for cfg in $CONFIGS; do
-    full_cfg=$cfg
-    cfg=${cfg#vpx-}
-    opts=
-    rm -f makelog.txt
-
-    case $cfg in
-        src-*)  opts="$opts --enable-codec-srcs"
-                cfg=${cfg#src-}
-                ;;
-        eval-*) opts="$opts --enable-eval-limit"
-                cfg=${cfg#src-}
-                ;;
-    esac
-
-    case $cfg in
-        #
-        # Linux
-        #
-        *x86-linux)
-            opts="$opts --target=x86-linux-gcc" ;;
-        *x86_64-linux)
-            opts="$opts --target=x86_64-linux-gcc" ;;
-        *arm*-linux-gcc)
-            armv=${cfg##*armv}
-            armv=${armv%%-*}
-            opts="$opts --target=armv${armv}-linux-gcc" ;;
-        *arm*-linux-rvct)
-            armv=${cfg##*armv}
-            armv=${armv%%-*}
-            opts="$opts --target=armv${armv}-linux-rvct"
-            opts="$opts --libc=${ARM_TOOLCHAIN}/arm-none-linux-gnueabi/libc" ;;
-
-
-        #
-        # Windows
-        #
-        # need --enable-debug-libs for now until we're smarter about
-        # building the debug/release from the customer installed
-        # environment
-        *-x86-win32*-vs*)
-            opts="$opts --target=x86-win32-vs${cfg##*-vs} --enable-debug-libs";;
-        *-x86_64-win64*-vs8)
-            opts="$opts --target=x86_64-win64-vs8 --enable-debug-libs" ;;
-
-        #
-        # Darwin
-        #
-        *-universal-darwin*)
-            opts="$opts --target=universal-darwin${cfg##*-darwin}-gcc" ;;
-
-        #
-        # Solaris
-        #
-        *x86-solaris)
-            opts="$opts --target=x86-solaris-gcc" ;;
-        *x86_64-solaris)
-            opts="$opts --target=x86_64-solaris-gcc" ;;
-    esac
-
-    case $cfg in
-        *x86-linux | *x86-solaris) opts="$opts --enable-pic" ;;
-    esac
-
-    case $cfg in
-        *-win[36][24]mt*)  opts="$opts --enable-static-msvcrt" ;;
-        *-win[36][24]md*)  opts="$opts --disable-static-msvcrt" ;;
-    esac
-
-    opts="$opts --disable-codecs"
-    case $cfg in
-        vp8*) opts="$opts --enable-vp8" ;;
-    esac
-    case $cfg in
-        *cx-*) opts="${opts}-encoder" ;;
-        *dx-*) opts="${opts}-decoder" ;;
-    esac
-    opts="$opts --enable-postproc"
-
-    [ "x${clean}" = "xyes" ] \
-        && rm -rf ${full_cfg}${BUILD_SFX}${TAR_SFX} \
-        && rm -rf logs/${full_cfg}${BUILD_SFX}.log.bz2
-
-    TGT=${full_cfg}${BUILD_SFX}
-    BUILD_TARGETS="logs/${TGT}.log.bz2 ${TGT}${TAR_SFX}"
-    echo "${BUILD_TARGETS}: CONFIG_OPTS=$opts" >>release.mk
-    echo "${BUILD_TARGETS}: TGT=${TGT}" >>release.mk
-    case $cfg in
-        *-arm*-linux-*)
-            echo "${BUILD_TARGETS}: EXTRA_PATH=:${ARM_TOOLCHAIN}/bin/" >>release.mk ;;
-        *-vs7)
-            echo "${BUILD_TARGETS}: EXTRA_PATH=:/cygdrive/c/Program\ Files/Microsoft\ Visual\ Studio\ .NET\ 2003/Common7/IDE" >>release.mk ;;
-        *-vs8)
-            echo "${BUILD_TARGETS}: EXTRA_PATH=:/cygdrive/c/Program\ Files/Microsoft\ Visual\ Studio\ 8/Common7/IDE" >>release.mk ;;
-    esac
-    MAKE_TGTS="$MAKE_TGTS ${TGT}${TAR_SFX} logs/${TGT}.log.bz2"
-done
-
-
-${MAKE:-make} ${jopt:--j3} -f release.mk  \
-    SRC_ROOT=${self%/*} BUILD_SFX=${BUILD_SFX} ${MAKE_TGTS}

From 769f2424ccce47c491913c38b06581aa777a53c0 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Mon, 13 Sep 2010 18:34:34 -0700
Subject: [PATCH 162/307] Removed unnecessary pxor.

There is no need to make sure that the lower byte of the
register is 0 because the downshift by 11 overwrites that byte.

Change-Id: I89cbf004b2ff532a2c68e0dc399c45a49cdad5a1
---
 vp8/common/x86/loopfilter_sse2.asm | 102 ++++++++++++-----------------
 1 file changed, 41 insertions(+), 61 deletions(-)

diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 985d5a09d..57276b661 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -196,12 +196,12 @@
         pxor        xmm7,                   [t80 GLOBAL]      ; q1 offset to convert to signed values
 
         psubsb      xmm2,                   xmm7              ; p1 - q1
-        pand        xmm2,                   xmm4              ; high var mask (hvm)(p1 - q1)
         pxor        xmm6,                   [t80 GLOBAL]      ; offset to convert to signed values
 
+        pand        xmm2,                   xmm4              ; high var mask (hvm)(p1 - q1)
         pxor        xmm0,                   [t80 GLOBAL]      ; offset to convert to signed values
-        movdqa      xmm3,                   xmm0              ; q0
 
+        movdqa      xmm3,                   xmm0              ; q0
         psubsb      xmm0,                   xmm6              ; q0 - p0
         paddsb      xmm2,                   xmm0              ; 1 * (q0 - p0) + hvm(p1 - q1)
         paddsb      xmm2,                   xmm0              ; 2 * (q0 - p0) + hvm(p1 - q1)
@@ -211,29 +211,28 @@
         paddsb      xmm1,                   [t4 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 4
         paddsb      xmm2,                   [t3 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 3
 
-        pxor        xmm0,                   xmm0
-        pxor        xmm5,                   xmm5
-        punpcklbw   xmm0,                   xmm2
-        punpckhbw   xmm5,                   xmm2
-        psraw       xmm0,                   11
-        psraw       xmm5,                   11
-        packsswb    xmm0,                   xmm5
-        movdqa      xmm2,                   xmm0              ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+        punpckhbw   xmm5,                   xmm2              ; axbxcxdx
+        punpcklbw   xmm2,                   xmm2              ; exfxgxhx
+
+        psraw       xmm5,                   11                ; sign extended shift right by 3
+        psraw       xmm2,                   11                ; sign extended shift right by 3
+        packsswb    xmm2,                   xmm5              ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+
+        punpcklbw   xmm0,                   xmm1              ; exfxgxhx
+        punpckhbw   xmm1,                   xmm1              ; axbxcxdx
 
-        pxor        xmm0,                   xmm0              ; 0
-        movdqa      xmm5,                   xmm1              ; abcdefgh
-        punpcklbw   xmm0,                   xmm1              ; e0f0g0h0
         psraw       xmm0,                   11                ; sign extended shift right by 3
-        pxor        xmm1,                   xmm1              ; 0
-        punpckhbw   xmm1,                   xmm5              ; a0b0c0d0
         psraw       xmm1,                   11                ; sign extended shift right by 3
-        movdqa      xmm5,                   xmm0              ; save results
 
+        movdqa      xmm5,                   xmm0              ; save results
         packsswb    xmm0,                   xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
+
         paddsw      xmm5,                   [ones GLOBAL]
         paddsw      xmm1,                   [ones GLOBAL]
+
         psraw       xmm5,                   1                 ; partial shifted one more time for 2nd tap
         psraw       xmm1,                   1                 ; partial shifted one more time for 2nd tap
+
         packsswb    xmm5,                   xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
         pandn       xmm4,                   xmm5              ; high edge variance additive
 %endmacro
@@ -433,29 +432,27 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
         pand        xmm2,                   xmm4;             ; Filter2 = vp8_filter & hev
 
         movdqa      xmm5,                   xmm2
-        paddsb      xmm5,                   [t3 GLOBAL]
+        paddsb      xmm5,                   [t3 GLOBAL]       ; vp8_signed_char_clamp(Filter2 + 3)
+
+        punpckhbw   xmm7,                   xmm5              ; axbxcxdx
+        punpcklbw   xmm5,                   xmm5              ; exfxgxhx
 
-        pxor        xmm0,                   xmm0              ; 0
-        pxor        xmm7,                   xmm7              ; 0
-        punpcklbw   xmm0,                   xmm5              ; e0f0g0h0
-        psraw       xmm0,                   11                ; sign extended shift right by 3
-        punpckhbw   xmm7,                   xmm5              ; a0b0c0d0
         psraw       xmm7,                   11                ; sign extended shift right by 3
-        packsswb    xmm0,                   xmm7              ; Filter2 >>=3;
-        movdqa      xmm5,                   xmm0              ; Filter2
-        paddsb      xmm2,                   [t4 GLOBAL]      ; vp8_signed_char_clamp(Filter2 + 4)
+        psraw       xmm5,                   11                ; sign extended shift right by 3
+
+        packsswb    xmm5,                   xmm7              ; Filter2 >>=3;
+        paddsb      xmm2,                   [t4 GLOBAL]       ; vp8_signed_char_clamp(Filter2 + 4)
+
+        punpckhbw   xmm7,                   xmm2              ; axbxcxdx
+        punpcklbw   xmm0,                   xmm2              ; exfxgxhx
 
-        pxor        xmm0,                   xmm0              ; 0
-        pxor        xmm7,                   xmm7              ; 0
-        punpcklbw   xmm0,                   xmm2              ; e0f0g0h0
-        psraw       xmm0,                   11                ; sign extended shift right by 3
-        punpckhbw   xmm7,                   xmm2              ; a0b0c0d0
         psraw       xmm7,                   11                ; sign extended shift right by 3
-        packsswb    xmm0,                   xmm7              ; Filter2 >>=3;
+        psraw       xmm0,                   11                ; sign extended shift right by 3
 
-        psubsb      xmm3,                   xmm0              ; qs0 =qs0 - filter1
+        packsswb    xmm0,                   xmm7              ; Filter2 >>=3;
         paddsb      xmm6,                   xmm5              ; ps0 =ps0 + Fitler2
 
+        psubsb      xmm3,                   xmm0              ; qs0 =qs0 - filter1
         pandn       xmm4,                   xmm1              ; vp8_filter&=~hev
 %endmacro
 
@@ -465,7 +462,6 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
         ; *oq0 = s^0x80;
         ; s = vp8_signed_char_clamp(ps0 + u);
         ; *op0 = s^0x80;
-        pxor        xmm0,                   xmm0
         pxor        xmm1,                   xmm1
 
         pxor        xmm2,                   xmm2
@@ -1022,28 +1018,19 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         paddsb      xmm1,               [t4 GLOBAL]     ; 3* (q0 - p0) + hvm(p1 - q1) + 4
 
         paddsb      xmm2,               [t3 GLOBAL]     ; 3* (q0 - p0) + hvm(p1 - q1) + 3
-        pxor        xmm0,               xmm0
-
-        pxor        xmm5,               xmm5
-        punpcklbw   xmm0,               xmm2
 
         punpckhbw   xmm5,               xmm2
-        psraw       xmm0,               11
+        punpcklbw   xmm2,               xmm2
 
         psraw       xmm5,               11
-        packsswb    xmm0,               xmm5
+        psraw       xmm2,               11
 
-        movdqa      xmm2,               xmm0            ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+        packsswb    xmm2,               xmm5            ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
+        punpcklbw   xmm0,               xmm1            ; exfxgxhx
 
-        pxor        xmm0,               xmm0            ; 0
-        movdqa      xmm5,               xmm1            ; abcdefgh
-
-        punpcklbw   xmm0,               xmm1            ; e0f0g0h0
+        punpckhbw   xmm1,               xmm1            ; axbxcxdx
         psraw       xmm0,               11              ; sign extended shift right by 3
 
-        pxor        xmm1,               xmm1            ; 0
-        punpckhbw   xmm1,               xmm5            ; a0b0c0d0
-
         psraw       xmm1,               11              ; sign extended shift right by 3
         movdqa      xmm5,               xmm0            ; save results
 
@@ -1308,28 +1295,22 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
         movdqa      xmm5,               xmm2
         paddsb      xmm5,               [t3 GLOBAL]
 
-        pxor        xmm0,               xmm0            ; 0
-        pxor        xmm7,               xmm7            ; 0
+        punpckhbw   xmm7,               xmm5            ; axbxcxdx
+        punpcklbw   xmm5,               xmm5            ; exfxgxhx
 
-        punpcklbw   xmm0,               xmm5            ; e0f0g0h0
-        psraw       xmm0,               11              ; sign extended shift right by 3
-
-        punpckhbw   xmm7,               xmm5            ; a0b0c0d0
         psraw       xmm7,               11              ; sign extended shift right by 3
+        psraw       xmm5,               11              ; sign extended shift right by 3
 
-        packsswb    xmm0,               xmm7            ; Filter2 >>=3;
-        movdqa      xmm5,               xmm0            ; Filter2
+        packsswb    xmm5,               xmm7            ; Filter2 >>=3;
 
         paddsb      xmm2,               [t4 GLOBAL]     ; vp8_signed_char_clamp(Filter2 + 4)
-        pxor        xmm0,               xmm0            ; 0
 
-        pxor        xmm7,               xmm7            ; 0
-        punpcklbw   xmm0,               xmm2            ; e0f0g0h0
+        punpcklbw   xmm0,               xmm2            ; exfxgxhx
+        punpckhbw   xmm7,               xmm2            ; axbxcxdx
 
         psraw       xmm0,               11              ; sign extended shift right by 3
-        punpckhbw   xmm7,               xmm2            ; a0b0c0d0
-
         psraw       xmm7,               11              ; sign extended shift right by 3
+
         packsswb    xmm0,               xmm7            ; Filter2 >>=3;
 
         psubsb      xmm3,               xmm0            ; qs0 =qs0 - filter1
@@ -1344,7 +1325,6 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
         ; *oq0 = s^0x80;
         ; s = vp8_signed_char_clamp(ps0 + u);
         ; *op0 = s^0x80;
-        pxor        xmm0,               xmm0
         pxor        xmm1,               xmm1
 
         pxor        xmm2,               xmm2

From 746439ef6c1dd2fedbe0c24ddb76d40cb9d26357 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Tue, 14 Sep 2010 15:46:37 -0700
Subject: [PATCH 163/307] Modify GET_GOT macro for performance.

GET_GOT was producing a zero length call.  This resulted in
pipeline flushes occuring when returing from the assembly
functions.  Masked on out of order cores, but evident on
Atom cores.

Change-Id: I8c375af313e8a169c77adbaf956693c0cfeb5ccd
---
 vpx_ports/x86_abi_support.asm | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index 5d85d8e2e..dc9e2d92c 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -90,7 +90,7 @@
 %macro ALIGN_STACK 2
     mov         %2, rsp
     and         rsp, -%1
-    sub         rsp, %1 - REG_SZ_BYTES
+    lea         rsp, [rsp - (%1 - REG_SZ_BYTES)]
     push        %2
 %endmacro
 
@@ -105,7 +105,6 @@
 %idefine XMMWORD
 %idefine MMWORD
 
-
 ; PIC macros
 ;
 %if ABI_IS_32BIT
@@ -116,9 +115,13 @@
       extern _GLOBAL_OFFSET_TABLE_
       push %1
       call %%get_got
+      %%sub_offset:
+      jmp %%exitGG
       %%get_got:
-      pop %1
-      add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%get_got wrt ..gotpc
+      mov %1, [esp]
+      add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
+      ret
+      %%exitGG:
       %undef GLOBAL
       %define GLOBAL + %1 wrt ..gotoff
       %undef RESTORE_GOT
@@ -128,9 +131,13 @@
     %macro GET_GOT 1
       push %1
       call %%get_got
+      %%sub_offset:
+      jmp  %%exitGG
       %%get_got:
-      pop %1
-      add %1, fake_got - %%get_got
+      mov  %1, [esp]
+      add %1, fake_got - %%sub_offset
+      ret
+      %%exitGG:
       %undef GLOBAL
       %define GLOBAL + %1 - fake_got
       %undef RESTORE_GOT

From 147b125b1596df9bd0c8141b9d09229ab65d3e0f Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 16 Sep 2010 10:00:04 -0400
Subject: [PATCH 164/307] Reduce size of tokenizer tables

This patch reduces the size of the global tables maintained by the
tokenizer to 16k from 80k-96k. See issue #177.

Change-Id: If0275d5f28389af11ac83c5d929d1157cde90fbe
---
 vp8/encoder/tokenize.c | 6 +++---
 vp8/encoder/tokenize.h | 8 +++++++-
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index d9b8d36fd..0e86f28df 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -26,8 +26,8 @@ _int64 context_counters[BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef
 void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) ;
 void vp8_fix_contexts(MACROBLOCKD *x);
 
-TOKENEXTRA vp8_dct_value_tokens[DCT_MAX_VALUE*2];
-const TOKENEXTRA *vp8_dct_value_tokens_ptr;
+TOKENVALUE vp8_dct_value_tokens[DCT_MAX_VALUE*2];
+const TOKENVALUE *vp8_dct_value_tokens_ptr;
 int vp8_dct_value_cost[DCT_MAX_VALUE*2];
 const int *vp8_dct_value_cost_ptr;
 #if 0
@@ -37,7 +37,7 @@ int skip_false_count = 0;
 static void fill_value_tokens()
 {
 
-    TOKENEXTRA *const t = vp8_dct_value_tokens + DCT_MAX_VALUE;
+    TOKENVALUE *const t = vp8_dct_value_tokens + DCT_MAX_VALUE;
     vp8_extra_bit_struct *const e = vp8_extra_bits;
 
     int i = -DCT_MAX_VALUE;
diff --git a/vp8/encoder/tokenize.h b/vp8/encoder/tokenize.h
index 7b9fc9eaa..01e8ec6d7 100644
--- a/vp8/encoder/tokenize.h
+++ b/vp8/encoder/tokenize.h
@@ -17,6 +17,12 @@
 
 void vp8_tokenize_initialize();
 
+typedef struct
+{
+    short Token;
+    short Extra;
+} TOKENVALUE;
+
 typedef struct
 {
     int Token;
@@ -40,6 +46,6 @@ extern const int *vp8_dct_value_cost_ptr;
  *  improve cache locality, since it's needed for costing when the rest of the
  *  fields are not.
  */
-extern const TOKENEXTRA *vp8_dct_value_tokens_ptr;
+extern const TOKENVALUE *vp8_dct_value_tokens_ptr;
 
 #endif  /* tokenize_h */

From 9100073e8d99f2cf1b0b2d2288687d193295addf Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 16 Sep 2010 13:13:31 -0400
Subject: [PATCH 165/307] cleanup: remove unused xprintf

These files aren't currently used, and we can get them back if we
need them.

Change-Id: I62aa3bff828e491a80c80eeb84a7c44903df29b5
---
 vp8/decoder/xprintf.c | 164 ------------------------------------------
 vp8/decoder/xprintf.h |  33 ---------
 2 files changed, 197 deletions(-)
 delete mode 100644 vp8/decoder/xprintf.c
 delete mode 100644 vp8/decoder/xprintf.h

diff --git a/vp8/decoder/xprintf.c b/vp8/decoder/xprintf.c
deleted file mode 100644
index e3b953ef3..000000000
--- a/vp8/decoder/xprintf.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-*   Module Title :     xprintf.cpp
-*
-*   Description  :     Display a printf style message on the current video frame.
-*
-****************************************************************************/
-
-/****************************************************************************
-*  Header Files
-****************************************************************************/
-
-#include <stdio.h>
-#include <stdarg.h>
-#ifdef _WIN32_WCE
-#include <windows.h>
-#endif
-#include "xprintf.h"
-
-/****************************************************************************
- *
- *  ROUTINE       : xprintf
- *
- *  INPUTS        : const PB_INSTANCE *ppbi : Pointer to decoder instance.
- *                  long n_pixel             : Offset into buffer to write text.
- *                  const char *format      : Format string for print.
- *                  ...                     : Variable length argument list.
- *
- *  OUTPUTS       : None.
- *
- *  RETURNS       : int: Size (in bytes) of the formatted text.
- *
- *  FUNCTION      : Display a printf style message on the current video frame.
- *
- *  SPECIAL NOTES : None.
- *
- ****************************************************************************/
-int onyx_xprintf(unsigned char *ppbuffer, long n_pixel, long n_size, long n_stride, const char *format, ...)
-{
-    BOOL b_rc;
-    va_list arglist;
-    HFONT hfont, hfonto;
-
-    int rc = 0;
-    char sz_formatted[256] = "";
-    unsigned char *p_dest = &ppbuffer[n_pixel];
-
-#ifdef _WIN32_WCE
-    //  Set up temporary bitmap
-    HDC hdc_memory   = NULL;
-    HBITMAP hbm_temp = NULL;
-    HBITMAP hbm_orig = NULL;
-
-    RECT rect;
-
-    //  Copy bitmap to video frame
-    long x;
-    long y;
-
-    //  Format text
-    va_start(arglist, format);
-    _vsnprintf(sz_formatted, sizeof(sz_formatted), format, arglist);
-    va_end(arglist);
-
-    rect.left   = 0;
-    rect.top    = 0;
-    rect.right  = 8 * strlen(sz_formatted);
-    rect.bottom = 8;
-
-    hdc_memory = create_compatible_dc(NULL);
-
-    if (hdc_memory == NULL)
-        goto Exit;
-
-    hbm_temp = create_bitmap(rect.right, rect.bottom, 1, 1, NULL);
-
-    if (hbm_temp == NULL)
-        goto Exit;
-
-    hbm_orig = (HBITMAP)(select_object(hdc_memory, hbm_temp));
-
-    if (!hbm_orig)
-        goto Exit;
-
-    //  Write text into bitmap
-    //  font?
-    hfont = create_font(8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, VARIABLE_PITCH | FF_SWISS, "");
-
-    if (hfont == NULL)
-        goto Exit;
-
-    hfonto = (HFONT)(select_object(hdc_memory, hbm_temp));
-
-    if (!hfonto)
-        goto Exit;
-
-    select_object(hdc_memory, hfont);
-    set_text_color(hdc_memory, 1);
-    set_bk_color(hdc_memory, 0);
-    set_bk_mode(hdc_memory, TRANSPARENT);
-
-    b_rc = bit_blt(hdc_memory, rect.left, rect.top, rect.right, rect.bottom, hdc_memory, rect.left, rect.top, BLACKNESS);
-
-    if (!b_rc)
-        goto Exit;
-
-    b_rc = ext_text_out(hdc_memory, 0, 0, ETO_CLIPPED, &rect, sz_formatted, strlen(sz_formatted), NULL);
-
-    if (!b_rc)
-        goto Exit;
-
-    for (y = rect.top; y < rect.bottom; ++y)
-    {
-        for (x = rect.left; x < rect.right; ++x)
-        {
-            if (get_pixel(hdc_memory, x, rect.bottom - 1 - y))
-                p_dest[x] = 255;
-        }
-
-        p_dest += n_stride;
-    }
-
-    rc = strlen(sz_formatted);
-
-Exit:
-
-    if (hbm_temp != NULL)
-    {
-        if (hbm_orig != NULL)
-        {
-            select_object(hdc_memory, hbm_orig);
-        }
-
-        delete_object(hbm_temp);
-    }
-
-    if (hfont != NULL)
-    {
-        if (hfonto != NULL)
-            select_object(hdc_memory, hfonto);
-
-        delete_object(hfont);
-    }
-
-    if (hdc_memory != NULL)
-        delete_dc(hdc_memory);
-
-    hdc_memory = 0;
-
-#endif
-
-    return rc;
-}
diff --git a/vp8/decoder/xprintf.h b/vp8/decoder/xprintf.h
deleted file mode 100644
index f83dd39c6..000000000
--- a/vp8/decoder/xprintf.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/****************************************************************************
-*
-*   Module Title :     xprintf.h
-*
-*   Description  :     Debug print interface header file.
-*
-****************************************************************************/
-#ifndef __INC_XPRINTF_H
-#define __INC_XPRINTF_H
-
-/****************************************************************************
-*  Header Files
-****************************************************************************/
-
-/****************************************************************************
-*  Functions
-****************************************************************************/
-
-// Display a printf style message on the current video frame
-extern int onyx_xprintf(unsigned char *ppbuffer, long n_pixel, long n_size, long n_stride, const char *format, ...);
-
-#endif

From f857a85088eaf515f599a1040098528863d2f657 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 16 Sep 2010 14:08:52 -0400
Subject: [PATCH 166/307] Restructure multi-threaded decoder

On each MB, loopfiltering is done right after MB decoding. This
combines two loops in multi-threaded code into one, which reduces
number of synchronizations to half.

The above-row/left-col data are saved in temp buffers for
next-row/next MB decoding.

Tests on 4-core gLucid machine showed 10% decoder performance
gain with threads=4 (tulip clip). Testing on other platforms
isn't done yet.

Change-Id: Id18ea7c1e84965dabea65d4c01ca5bc056ddeac9
---
 vp8/common/reconintra_mt.c     | 980 +++++++++++++++++++++++++++++++++
 vp8/common/reconintra_mt.h     |  26 +
 vp8/decoder/decoderthreading.h |  12 +-
 vp8/decoder/decodframe.c       |  38 +-
 vp8/decoder/onyxd_if.c         |  84 ++-
 vp8/decoder/onyxd_int.h        |  26 +-
 vp8/decoder/threading.c        | 930 ++++++++++++++++++-------------
 vp8/vp8_common.mk              |   2 +
 8 files changed, 1624 insertions(+), 474 deletions(-)
 create mode 100644 vp8/common/reconintra_mt.c
 create mode 100644 vp8/common/reconintra_mt.h

diff --git a/vp8/common/reconintra_mt.c b/vp8/common/reconintra_mt.c
new file mode 100644
index 000000000..4d395629d
--- /dev/null
+++ b/vp8/common/reconintra_mt.c
@@ -0,0 +1,980 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "recon.h"
+#include "reconintra.h"
+#include "vpx_mem/vpx_mem.h"
+#include "onyxd_int.h"
+
+// For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
+// vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
+
+void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+    unsigned char *yabove_row;    // = x->dst.y_buffer - x->dst.y_stride;
+    unsigned char *yleft_col;
+    unsigned char yleft_buf[16];
+    unsigned char ytop_left;      // = yabove_row[-1];
+    unsigned char *ypred_ptr = x->predictor;
+    int r, c, i;
+
+    if (pbi->common.filter_level)
+    {
+        yabove_row = pbi->mt_yabove_row[mb_row] + mb_col*16 +32;
+        yleft_col = pbi->mt_yleft_col[mb_row];
+    } else
+    {
+        yabove_row = x->dst.y_buffer - x->dst.y_stride;
+
+        for (i = 0; i < 16; i++)
+            yleft_buf[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
+        yleft_col = yleft_buf;
+    }
+
+    ytop_left = yabove_row[-1];
+
+    // for Y
+    switch (x->mode_info_context->mbmi.mode)
+    {
+    case DC_PRED:
+    {
+        int expected_dc;
+        int i;
+        int shift;
+        int average = 0;
+
+
+        if (x->up_available || x->left_available)
+        {
+            if (x->up_available)
+            {
+                for (i = 0; i < 16; i++)
+                {
+                    average += yabove_row[i];
+                }
+            }
+
+            if (x->left_available)
+            {
+
+                for (i = 0; i < 16; i++)
+                {
+                    average += yleft_col[i];
+                }
+
+            }
+
+
+
+            shift = 3 + x->up_available + x->left_available;
+            expected_dc = (average + (1 << (shift - 1))) >> shift;
+        }
+        else
+        {
+            expected_dc = 128;
+        }
+
+        vpx_memset(ypred_ptr, expected_dc, 256);
+    }
+    break;
+    case V_PRED:
+    {
+
+        for (r = 0; r < 16; r++)
+        {
+
+            ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0];
+            ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
+            ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
+            ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
+            ypred_ptr += 16;
+        }
+    }
+    break;
+    case H_PRED:
+    {
+
+        for (r = 0; r < 16; r++)
+        {
+
+            vpx_memset(ypred_ptr, yleft_col[r], 16);
+            ypred_ptr += 16;
+        }
+
+    }
+    break;
+    case TM_PRED:
+    {
+
+        for (r = 0; r < 16; r++)
+        {
+            for (c = 0; c < 16; c++)
+            {
+                int pred =  yleft_col[r] + yabove_row[ c] - ytop_left;
+
+                if (pred < 0)
+                    pred = 0;
+
+                if (pred > 255)
+                    pred = 255;
+
+                ypred_ptr[c] = pred;
+            }
+
+            ypred_ptr += 16;
+        }
+
+    }
+    break;
+    case B_PRED:
+    case NEARESTMV:
+    case NEARMV:
+    case ZEROMV:
+    case NEWMV:
+    case SPLITMV:
+    case MB_MODE_COUNT:
+        break;
+    }
+#else
+    (void) pbi;
+    (void) x;
+    (void) mb_row;
+    (void) mb_col;
+#endif
+}
+
+void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+    unsigned char *yabove_row;    // = x->dst.y_buffer - x->dst.y_stride;
+    unsigned char *yleft_col;
+    unsigned char yleft_buf[16];
+    unsigned char ytop_left;      // = yabove_row[-1];
+    unsigned char *ypred_ptr = x->predictor;
+    int r, c, i;
+
+    int y_stride = x->dst.y_stride;
+    ypred_ptr = x->dst.y_buffer; //x->predictor;
+
+    if (pbi->common.filter_level)
+    {
+        yabove_row = pbi->mt_yabove_row[mb_row] + mb_col*16 +32;
+        yleft_col = pbi->mt_yleft_col[mb_row];
+    } else
+    {
+        yabove_row = x->dst.y_buffer - x->dst.y_stride;
+
+        for (i = 0; i < 16; i++)
+            yleft_buf[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
+        yleft_col = yleft_buf;
+    }
+
+    ytop_left = yabove_row[-1];
+
+    // for Y
+    switch (x->mode_info_context->mbmi.mode)
+    {
+    case DC_PRED:
+    {
+        int expected_dc;
+        int i;
+        int shift;
+        int average = 0;
+
+
+        if (x->up_available || x->left_available)
+        {
+            if (x->up_available)
+            {
+                for (i = 0; i < 16; i++)
+                {
+                    average += yabove_row[i];
+                }
+            }
+
+            if (x->left_available)
+            {
+
+                for (i = 0; i < 16; i++)
+                {
+                    average += yleft_col[i];
+                }
+
+            }
+
+
+
+            shift = 3 + x->up_available + x->left_available;
+            expected_dc = (average + (1 << (shift - 1))) >> shift;
+        }
+        else
+        {
+            expected_dc = 128;
+        }
+
+        //vpx_memset(ypred_ptr, expected_dc, 256);
+        for (r = 0; r < 16; r++)
+        {
+            vpx_memset(ypred_ptr, expected_dc, 16);
+            ypred_ptr += y_stride; //16;
+        }
+    }
+    break;
+    case V_PRED:
+    {
+
+        for (r = 0; r < 16; r++)
+        {
+
+            ((int *)ypred_ptr)[0] = ((int *)yabove_row)[0];
+            ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
+            ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
+            ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
+            ypred_ptr += y_stride; //16;
+        }
+    }
+    break;
+    case H_PRED:
+    {
+
+        for (r = 0; r < 16; r++)
+        {
+
+            vpx_memset(ypred_ptr, yleft_col[r], 16);
+            ypred_ptr += y_stride;  //16;
+        }
+
+    }
+    break;
+    case TM_PRED:
+    {
+
+        for (r = 0; r < 16; r++)
+        {
+            for (c = 0; c < 16; c++)
+            {
+                int pred =  yleft_col[r] + yabove_row[ c] - ytop_left;
+
+                if (pred < 0)
+                    pred = 0;
+
+                if (pred > 255)
+                    pred = 255;
+
+                ypred_ptr[c] = pred;
+            }
+
+            ypred_ptr += y_stride;  //16;
+        }
+
+    }
+    break;
+    case B_PRED:
+    case NEARESTMV:
+    case NEARMV:
+    case ZEROMV:
+    case NEWMV:
+    case SPLITMV:
+    case MB_MODE_COUNT:
+        break;
+    }
+#else
+    (void) pbi;
+    (void) x;
+    (void) mb_row;
+    (void) mb_col;
+#endif
+}
+
+void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+    unsigned char *uabove_row;   // = x->dst.u_buffer - x->dst.uv_stride;
+    unsigned char *uleft_col;    //[16];
+    unsigned char uleft_buf[8];
+    unsigned char utop_left;     // = uabove_row[-1];
+    unsigned char *vabove_row;   // = x->dst.v_buffer - x->dst.uv_stride;
+    unsigned char *vleft_col;    //[20];
+    unsigned char vleft_buf[8];
+    unsigned char vtop_left;     // = vabove_row[-1];
+    unsigned char *upred_ptr = &x->predictor[256];
+    unsigned char *vpred_ptr = &x->predictor[320];
+    int i, j;
+
+    if (pbi->common.filter_level)
+    {
+        uabove_row = pbi->mt_uabove_row[mb_row] + mb_col*8 +16;
+        vabove_row = pbi->mt_vabove_row[mb_row] + mb_col*8 +16;
+        uleft_col = pbi->mt_uleft_col[mb_row];
+        vleft_col = pbi->mt_vleft_col[mb_row];
+    } else
+    {
+        uabove_row = x->dst.u_buffer - x->dst.uv_stride;
+        vabove_row = x->dst.v_buffer - x->dst.uv_stride;
+
+        for (i = 0; i < 8; i++)
+        {
+            uleft_buf[i] = x->dst.u_buffer [i* x->dst.uv_stride -1];
+            vleft_buf[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
+        }
+        uleft_col = uleft_buf;
+        vleft_col = vleft_buf;
+    }
+    utop_left = uabove_row[-1];
+    vtop_left = vabove_row[-1];
+
+    switch (x->mode_info_context->mbmi.uv_mode)
+    {
+    case DC_PRED:
+    {
+        int expected_udc;
+        int expected_vdc;
+        int i;
+        int shift;
+        int Uaverage = 0;
+        int Vaverage = 0;
+
+        if (x->up_available)
+        {
+            for (i = 0; i < 8; i++)
+            {
+                Uaverage += uabove_row[i];
+                Vaverage += vabove_row[i];
+            }
+        }
+
+        if (x->left_available)
+        {
+            for (i = 0; i < 8; i++)
+            {
+                Uaverage += uleft_col[i];
+                Vaverage += vleft_col[i];
+            }
+        }
+
+        if (!x->up_available && !x->left_available)
+        {
+            expected_udc = 128;
+            expected_vdc = 128;
+        }
+        else
+        {
+            shift = 2 + x->up_available + x->left_available;
+            expected_udc = (Uaverage + (1 << (shift - 1))) >> shift;
+            expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift;
+        }
+
+
+        vpx_memset(upred_ptr, expected_udc, 64);
+        vpx_memset(vpred_ptr, expected_vdc, 64);
+
+
+    }
+    break;
+    case V_PRED:
+    {
+        int i;
+
+        for (i = 0; i < 8; i++)
+        {
+            vpx_memcpy(upred_ptr, uabove_row, 8);
+            vpx_memcpy(vpred_ptr, vabove_row, 8);
+            upred_ptr += 8;
+            vpred_ptr += 8;
+        }
+
+    }
+    break;
+    case H_PRED:
+    {
+        int i;
+
+        for (i = 0; i < 8; i++)
+        {
+            vpx_memset(upred_ptr, uleft_col[i], 8);
+            vpx_memset(vpred_ptr, vleft_col[i], 8);
+            upred_ptr += 8;
+            vpred_ptr += 8;
+        }
+    }
+
+    break;
+    case TM_PRED:
+    {
+        int i;
+
+        for (i = 0; i < 8; i++)
+        {
+            for (j = 0; j < 8; j++)
+            {
+                int predu = uleft_col[i] + uabove_row[j] - utop_left;
+                int predv = vleft_col[i] + vabove_row[j] - vtop_left;
+
+                if (predu < 0)
+                    predu = 0;
+
+                if (predu > 255)
+                    predu = 255;
+
+                if (predv < 0)
+                    predv = 0;
+
+                if (predv > 255)
+                    predv = 255;
+
+                upred_ptr[j] = predu;
+                vpred_ptr[j] = predv;
+            }
+
+            upred_ptr += 8;
+            vpred_ptr += 8;
+        }
+
+    }
+    break;
+    case B_PRED:
+    case NEARESTMV:
+    case NEARMV:
+    case ZEROMV:
+    case NEWMV:
+    case SPLITMV:
+    case MB_MODE_COUNT:
+        break;
+    }
+#else
+    (void) pbi;
+    (void) x;
+    (void) mb_row;
+    (void) mb_col;
+#endif
+}
+
+void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+    unsigned char *uabove_row;  // = x->dst.u_buffer - x->dst.uv_stride;
+    unsigned char *uleft_col;   //[16];
+    unsigned char uleft_buf[8];
+    unsigned char utop_left;    // = uabove_row[-1];
+    unsigned char *vabove_row;  // = x->dst.v_buffer - x->dst.uv_stride;
+    unsigned char *vleft_col;   //[20];
+    unsigned char vleft_buf[8];
+    unsigned char vtop_left;    // = vabove_row[-1];
+    unsigned char *upred_ptr = x->dst.u_buffer; //&x->predictor[256];
+    unsigned char *vpred_ptr = x->dst.v_buffer; //&x->predictor[320];
+    int uv_stride = x->dst.uv_stride;
+    int i, j;
+
+    if (pbi->common.filter_level)
+    {
+        uabove_row = pbi->mt_uabove_row[mb_row] + mb_col*8 +16;
+        vabove_row = pbi->mt_vabove_row[mb_row] + mb_col*8 +16;
+        uleft_col = pbi->mt_uleft_col[mb_row];
+        vleft_col = pbi->mt_vleft_col[mb_row];
+    } else
+    {
+        uabove_row = x->dst.u_buffer - x->dst.uv_stride;
+        vabove_row = x->dst.v_buffer - x->dst.uv_stride;
+
+        for (i = 0; i < 8; i++)
+        {
+            uleft_buf[i] = x->dst.u_buffer [i* x->dst.uv_stride -1];
+            vleft_buf[i] = x->dst.v_buffer [i* x->dst.uv_stride -1];
+        }
+        uleft_col = uleft_buf;
+        vleft_col = vleft_buf;
+    }
+    utop_left = uabove_row[-1];
+    vtop_left = vabove_row[-1];
+
+    switch (x->mode_info_context->mbmi.uv_mode)
+    {
+    case DC_PRED:
+    {
+        int expected_udc;
+        int expected_vdc;
+        int i;
+        int shift;
+        int Uaverage = 0;
+        int Vaverage = 0;
+
+        if (x->up_available)
+        {
+            for (i = 0; i < 8; i++)
+            {
+                Uaverage += uabove_row[i];
+                Vaverage += vabove_row[i];
+            }
+        }
+
+        if (x->left_available)
+        {
+            for (i = 0; i < 8; i++)
+            {
+                Uaverage += uleft_col[i];
+                Vaverage += vleft_col[i];
+            }
+        }
+
+        if (!x->up_available && !x->left_available)
+        {
+            expected_udc = 128;
+            expected_vdc = 128;
+        }
+        else
+        {
+            shift = 2 + x->up_available + x->left_available;
+            expected_udc = (Uaverage + (1 << (shift - 1))) >> shift;
+            expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift;
+        }
+
+
+        //vpx_memset(upred_ptr,expected_udc,64);
+        //vpx_memset(vpred_ptr,expected_vdc,64);
+        for (i = 0; i < 8; i++)
+        {
+            vpx_memset(upred_ptr, expected_udc, 8);
+            vpx_memset(vpred_ptr, expected_vdc, 8);
+            upred_ptr += uv_stride; //8;
+            vpred_ptr += uv_stride; //8;
+        }
+    }
+    break;
+    case V_PRED:
+    {
+        int i;
+
+        for (i = 0; i < 8; i++)
+        {
+            vpx_memcpy(upred_ptr, uabove_row, 8);
+            vpx_memcpy(vpred_ptr, vabove_row, 8);
+            upred_ptr += uv_stride; //8;
+            vpred_ptr += uv_stride; //8;
+        }
+
+    }
+    break;
+    case H_PRED:
+    {
+        int i;
+
+        for (i = 0; i < 8; i++)
+        {
+            vpx_memset(upred_ptr, uleft_col[i], 8);
+            vpx_memset(vpred_ptr, vleft_col[i], 8);
+            upred_ptr += uv_stride; //8;
+            vpred_ptr += uv_stride; //8;
+        }
+    }
+
+    break;
+    case TM_PRED:
+    {
+        int i;
+
+        for (i = 0; i < 8; i++)
+        {
+            for (j = 0; j < 8; j++)
+            {
+                int predu = uleft_col[i] + uabove_row[j] - utop_left;
+                int predv = vleft_col[i] + vabove_row[j] - vtop_left;
+
+                if (predu < 0)
+                    predu = 0;
+
+                if (predu > 255)
+                    predu = 255;
+
+                if (predv < 0)
+                    predv = 0;
+
+                if (predv > 255)
+                    predv = 255;
+
+                upred_ptr[j] = predu;
+                vpred_ptr[j] = predv;
+            }
+
+            upred_ptr += uv_stride; //8;
+            vpred_ptr += uv_stride; //8;
+        }
+
+    }
+    break;
+    case B_PRED:
+    case NEARESTMV:
+    case NEARMV:
+    case ZEROMV:
+    case NEWMV:
+    case SPLITMV:
+    case MB_MODE_COUNT:
+        break;
+    }
+#else
+    (void) pbi;
+    (void) x;
+    (void) mb_row;
+    (void) mb_col;
+#endif
+}
+
+
+void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
+                          MACROBLOCKD *xd,
+                          int b_mode,
+                          unsigned char *predictor,
+                          int mb_row,
+                          int mb_col,
+                          int num)
+{
+#if CONFIG_MULTITHREAD
+    int i, r, c;
+
+    unsigned char *Above;   // = *(x->base_dst) + x->dst - x->dst_stride;
+    unsigned char Left[4];
+    unsigned char top_left; // = Above[-1];
+
+    BLOCKD *x = &xd->block[num];
+
+    //Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).
+    if (num < 4 && pbi->common.filter_level)
+        Above = pbi->mt_yabove_row[mb_row] + mb_col*16 + num*4 + 32;
+    else
+        Above = *(x->base_dst) + x->dst - x->dst_stride;
+
+    if (num%4==0 && pbi->common.filter_level)
+    {
+        for (i=0; i<4; i++)
+            Left[i] = pbi->mt_yleft_col[mb_row][num + i];
+    }else
+    {
+        Left[0] = (*(x->base_dst))[x->dst - 1];
+        Left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride];
+        Left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride];
+        Left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride];
+    }
+
+    if ((num==4 || num==8 || num==12) && pbi->common.filter_level)
+        top_left = pbi->mt_yleft_col[mb_row][num-1];
+    else
+        top_left = Above[-1];
+
+     switch (b_mode)
+    {
+    case B_DC_PRED:
+    {
+        int expected_dc = 0;
+
+        for (i = 0; i < 4; i++)
+        {
+            expected_dc += Above[i];
+            expected_dc += Left[i];
+        }
+
+        expected_dc = (expected_dc + 4) >> 3;
+
+        for (r = 0; r < 4; r++)
+        {
+            for (c = 0; c < 4; c++)
+            {
+                predictor[c] = expected_dc;
+            }
+
+            predictor += 16;
+        }
+    }
+    break;
+    case B_TM_PRED:
+    {
+        // prediction similar to true_motion prediction
+        for (r = 0; r < 4; r++)
+        {
+            for (c = 0; c < 4; c++)
+            {
+                int pred = Above[c] - top_left + Left[r];
+
+                if (pred < 0)
+                    pred = 0;
+
+                if (pred > 255)
+                    pred = 255;
+
+                predictor[c] = pred;
+            }
+
+            predictor += 16;
+        }
+    }
+    break;
+
+    case B_VE_PRED:
+    {
+
+        unsigned int ap[4];
+        ap[0] = (top_left  + 2 * Above[0] + Above[1] + 2) >> 2;
+        ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2;
+        ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2;
+        ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2;
+
+        for (r = 0; r < 4; r++)
+        {
+            for (c = 0; c < 4; c++)
+            {
+
+                predictor[c] = ap[c];
+            }
+
+            predictor += 16;
+        }
+
+    }
+    break;
+
+
+    case B_HE_PRED:
+    {
+
+        unsigned int lp[4];
+        lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2;
+        lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2;
+        lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2;
+        lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2;
+
+        for (r = 0; r < 4; r++)
+        {
+            for (c = 0; c < 4; c++)
+            {
+                predictor[c] = lp[r];
+            }
+
+            predictor += 16;
+        }
+    }
+    break;
+    case B_LD_PRED:
+    {
+        unsigned char *ptr = Above;
+        predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2;
+        predictor[0 * 16 + 1] =
+            predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2;
+        predictor[0 * 16 + 2] =
+            predictor[1 * 16 + 1] =
+                predictor[2 * 16 + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2;
+        predictor[0 * 16 + 3] =
+            predictor[1 * 16 + 2] =
+                predictor[2 * 16 + 1] =
+                    predictor[3 * 16 + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2;
+        predictor[1 * 16 + 3] =
+            predictor[2 * 16 + 2] =
+                predictor[3 * 16 + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2;
+        predictor[2 * 16 + 3] =
+            predictor[3 * 16 + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2;
+        predictor[3 * 16 + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2;
+
+    }
+    break;
+    case B_RD_PRED:
+    {
+
+        unsigned char pp[9];
+
+        pp[0] = Left[3];
+        pp[1] = Left[2];
+        pp[2] = Left[1];
+        pp[3] = Left[0];
+        pp[4] = top_left;
+        pp[5] = Above[0];
+        pp[6] = Above[1];
+        pp[7] = Above[2];
+        pp[8] = Above[3];
+
+        predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+        predictor[3 * 16 + 1] =
+            predictor[2 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+        predictor[3 * 16 + 2] =
+            predictor[2 * 16 + 1] =
+                predictor[1 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+        predictor[3 * 16 + 3] =
+            predictor[2 * 16 + 2] =
+                predictor[1 * 16 + 1] =
+                    predictor[0 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+        predictor[2 * 16 + 3] =
+            predictor[1 * 16 + 2] =
+                predictor[0 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+        predictor[1 * 16 + 3] =
+            predictor[0 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+        predictor[0 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
+
+    }
+    break;
+    case B_VR_PRED:
+    {
+
+        unsigned char pp[9];
+
+        pp[0] = Left[3];
+        pp[1] = Left[2];
+        pp[2] = Left[1];
+        pp[3] = Left[0];
+        pp[4] = top_left;
+        pp[5] = Above[0];
+        pp[6] = Above[1];
+        pp[7] = Above[2];
+        pp[8] = Above[3];
+
+
+        predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+        predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+        predictor[3 * 16 + 1] =
+            predictor[1 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+        predictor[2 * 16 + 1] =
+            predictor[0 * 16 + 0] = (pp[4] + pp[5] + 1) >> 1;
+        predictor[3 * 16 + 2] =
+            predictor[1 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+        predictor[2 * 16 + 2] =
+            predictor[0 * 16 + 1] = (pp[5] + pp[6] + 1) >> 1;
+        predictor[3 * 16 + 3] =
+            predictor[1 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+        predictor[2 * 16 + 3] =
+            predictor[0 * 16 + 2] = (pp[6] + pp[7] + 1) >> 1;
+        predictor[1 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
+        predictor[0 * 16 + 3] = (pp[7] + pp[8] + 1) >> 1;
+
+    }
+    break;
+    case B_VL_PRED:
+    {
+
+        unsigned char *pp = Above;
+
+        predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+        predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+        predictor[2 * 16 + 0] =
+            predictor[0 * 16 + 1] = (pp[1] + pp[2] + 1) >> 1;
+        predictor[1 * 16 + 1] =
+            predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+        predictor[2 * 16 + 1] =
+            predictor[0 * 16 + 2] = (pp[2] + pp[3] + 1) >> 1;
+        predictor[3 * 16 + 1] =
+            predictor[1 * 16 + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+        predictor[0 * 16 + 3] =
+            predictor[2 * 16 + 2] = (pp[3] + pp[4] + 1) >> 1;
+        predictor[1 * 16 + 3] =
+            predictor[3 * 16 + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+        predictor[2 * 16 + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+        predictor[3 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+    }
+    break;
+
+    case B_HD_PRED:
+    {
+        unsigned char pp[9];
+        pp[0] = Left[3];
+        pp[1] = Left[2];
+        pp[2] = Left[1];
+        pp[3] = Left[0];
+        pp[4] = top_left;
+        pp[5] = Above[0];
+        pp[6] = Above[1];
+        pp[7] = Above[2];
+        pp[8] = Above[3];
+
+
+        predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+        predictor[3 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+        predictor[2 * 16 + 0] =
+            predictor[3 * 16 + 2] = (pp[1] + pp[2] + 1) >> 1;
+        predictor[2 * 16 + 1] =
+            predictor[3 * 16 + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+        predictor[2 * 16 + 2] =
+            predictor[1 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
+        predictor[2 * 16 + 3] =
+            predictor[1 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
+        predictor[1 * 16 + 2] =
+            predictor[0 * 16 + 0] = (pp[3] + pp[4] + 1) >> 1;
+        predictor[1 * 16 + 3] =
+            predictor[0 * 16 + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
+        predictor[0 * 16 + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
+        predictor[0 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
+    }
+    break;
+
+
+    case B_HU_PRED:
+    {
+        unsigned char *pp = Left;
+        predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
+        predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
+        predictor[0 * 16 + 2] =
+            predictor[1 * 16 + 0] = (pp[1] + pp[2] + 1) >> 1;
+        predictor[0 * 16 + 3] =
+            predictor[1 * 16 + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
+        predictor[1 * 16 + 2] =
+            predictor[2 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
+        predictor[1 * 16 + 3] =
+            predictor[2 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2;
+        predictor[2 * 16 + 2] =
+            predictor[2 * 16 + 3] =
+                predictor[3 * 16 + 0] =
+                    predictor[3 * 16 + 1] =
+                        predictor[3 * 16 + 2] =
+                            predictor[3 * 16 + 3] = pp[3];
+    }
+    break;
+
+
+    }
+#else
+    (void) pbi;
+    (void) xd;
+    (void) b_mode;
+    (void) predictor;
+    (void) mb_row;
+    (void) mb_col;
+    (void) num;
+#endif
+}
+
+// copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
+// to the right prediction have filled in pixels to use.
+void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+    unsigned char *above_right;   // = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
+    unsigned int *src_ptr;
+    unsigned int *dst_ptr0;
+    unsigned int *dst_ptr1;
+    unsigned int *dst_ptr2;
+
+    if (pbi->common.filter_level)
+        above_right = pbi->mt_yabove_row[mb_row] + mb_col*16 + 32 +16;
+    else
+        above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
+
+    src_ptr = (unsigned int *)above_right;
+    //dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride);
+    //dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride);
+    //dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride);
+    dst_ptr0 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 3 * x->block[0].dst_stride);
+    dst_ptr1 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 7 * x->block[0].dst_stride);
+    dst_ptr2 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 11 * x->block[0].dst_stride);
+    *dst_ptr0 = *src_ptr;
+    *dst_ptr1 = *src_ptr;
+    *dst_ptr2 = *src_ptr;
+#else
+    (void) pbi;
+    (void) x;
+    (void) mb_row;
+    (void) mb_col;
+#endif
+}
diff --git a/vp8/common/reconintra_mt.h b/vp8/common/reconintra_mt.h
new file mode 100644
index 000000000..5c42f819f
--- /dev/null
+++ b/vp8/common/reconintra_mt.h
@@ -0,0 +1,26 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef __INC_RECONINTRA_MT_H
+#define __INC_RECONINTRA_MT_H
+
+// reconintra functions used in multi-threaded decoder
+#if CONFIG_MULTITHREAD
+extern void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+extern void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+extern void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+extern void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+
+extern void vp8mt_predict_intra4x4(VP8D_COMP *pbi, MACROBLOCKD *x, int b_mode, unsigned char *predictor, int mb_row, int mb_col, int num);
+extern void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
+#endif
+
+#endif
diff --git a/vp8/decoder/decoderthreading.h b/vp8/decoder/decoderthreading.h
index c8e3f02f9..25dee8fe8 100644
--- a/vp8/decoder/decoderthreading.h
+++ b/vp8/decoder/decoderthreading.h
@@ -15,12 +15,12 @@
 #ifndef _DECODER_THREADING_H
 #define _DECODER_THREADING_H
 
-
-extern void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
-                                 MACROBLOCKD *xd);
-extern void vp8_mt_loop_filter_frame(VP8D_COMP *pbi);
-extern void vp8_stop_lfthread(VP8D_COMP *pbi);
-extern void vp8_start_lfthread(VP8D_COMP *pbi);
+#if CONFIG_MULTITHREAD
+extern void vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd);
 extern void vp8_decoder_remove_threads(VP8D_COMP *pbi);
 extern void vp8_decoder_create_threads(VP8D_COMP *pbi);
+extern int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows);
+extern void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows);
+#endif
+
 #endif
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 4f5b7c7a2..efe0ad8e2 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -21,7 +21,8 @@
 #include "alloccommon.h"
 #include "entropymode.h"
 #include "quant_common.h"
-
+#include "vpx_scale/vpxscale.h"
+#include "vpx_scale/yv12extend.h"
 #include "setupintrarecon.h"
 
 #include "decodemv.h"
@@ -64,7 +65,7 @@ void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
     }
 }
 
-static void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
+void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
 {
     int i;
     int QIndex;
@@ -158,7 +159,7 @@ static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd)
     mv->row = (2*mv->row > xd->mb_to_bottom_edge + (18 << 3)) ? (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row;
 }
 
-static void clamp_mvs(MACROBLOCKD *xd)
+void clamp_mvs(MACROBLOCKD *xd)
 {
     if (xd->mode_info_context->mbmi.mode == SPLITMV)
     {
@@ -294,6 +295,7 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
                      xd->dst.uv_stride, xd->eobs+16);
 }
 
+
 static int get_delta_q(vp8_reader *bc, int prev, int *q_update)
 {
     int ret_val = 0;
@@ -398,7 +400,6 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
 
         xd->above_context++;
 
-        pbi->current_mb_col_main = mb_col;
     }
 
     // adjust to the next row of mbs
@@ -408,8 +409,6 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
     );
 
     ++xd->mode_info_context;      /* skip prediction column */
-
-    pbi->last_mb_row_decoded = mb_row;
 }
 
 
@@ -601,6 +600,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
         if (Width != pc->Width  ||  Height != pc->Height)
         {
+            int prev_mb_rows = pc->mb_rows;
+
             if (pc->Width <= 0)
             {
                 pc->Width = Width;
@@ -618,6 +619,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
             if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height))
                 vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
                                    "Failed to allocate frame buffers");
+
+#if CONFIG_MULTITHREAD
+            if (pbi->b_multithreaded_rd)
+                vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows);
+#endif
         }
     }
 
@@ -822,7 +828,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
 
     // set up frame new frame for intra coded blocks
-    vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
+    if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level))
+        vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
 
     vp8_setup_block_dptrs(xd);
 
@@ -841,13 +848,18 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
     vpx_memcpy(&xd->block[0].bmi, &xd->mode_info_context->bmi[0], sizeof(B_MODE_INFO));
 
-
-    if (pbi->b_multithreaded_lf && pc->filter_level != 0)
-        vp8_start_lfthread(pbi);
-
     if (pbi->b_multithreaded_rd && pc->multi_token_partition != ONE_PARTITION)
     {
-        vp8_mtdecode_mb_rows(pbi, xd);
+        vp8mt_decode_mb_rows(pbi, xd);
+        if(pbi->common.filter_level)
+        {
+            //vp8_mt_loop_filter_frame(pbi);   //cm, &pbi->mb, cm->filter_level);
+
+            pc->last_frame_type = pc->frame_type;
+            pc->last_filter_type = pc->filter_type;
+            pc->last_sharpness_level = pc->sharpness_level;
+        }
+        vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]);    //cm->frame_to_show);
     }
     else
     {
@@ -869,8 +881,6 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
             vp8_decode_mb_row(pbi, pc, mb_row, xd);
         }
-
-        pbi->last_mb_row_decoded = mb_row;
     }
 
 
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 1651784cf..7d716b0a0 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -142,6 +142,10 @@ void vp8dx_remove_decompressor(VP8D_PTR ptr)
     if (!pbi)
         return;
 
+#if CONFIG_MULTITHREAD
+    if (pbi->b_multithreaded_rd)
+        vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows);
+#endif
     vp8_decoder_remove_threads(pbi);
     vp8_remove_common(&pbi->common);
     vpx_free(pbi);
@@ -355,66 +359,40 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
         return retcode;
     }
 
-    if (pbi->b_multithreaded_lf && pbi->common.filter_level != 0)
-        vp8_stop_lfthread(pbi);
-
-    if (swap_frame_buffers (cm))
+    if (pbi->b_multithreaded_rd && cm->multi_token_partition != ONE_PARTITION)
     {
-        pbi->common.error.error_code = VPX_CODEC_ERROR;
-        pbi->common.error.setjmp = 0;
-        return -1;
-    }
-
-/*
-    if (!pbi->b_multithreaded_lf)
+        if (swap_frame_buffers (cm))
+        {
+            pbi->common.error.error_code = VPX_CODEC_ERROR;
+            pbi->common.error.setjmp = 0;
+            return -1;
+        }
+    } else
     {
-        struct vpx_usec_timer lpftimer;
-        vpx_usec_timer_start(&lpftimer);
-        // Apply the loop filter if appropriate.
+        if (swap_frame_buffers (cm))
+        {
+            pbi->common.error.error_code = VPX_CODEC_ERROR;
+            pbi->common.error.setjmp = 0;
+            return -1;
+        }
+
+        if(pbi->common.filter_level)
+        {
+            struct vpx_usec_timer lpftimer;
+            vpx_usec_timer_start(&lpftimer);
+            // Apply the loop filter if appropriate.
 
-        if (cm->filter_level > 0)
             vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
 
-        vpx_usec_timer_mark(&lpftimer);
-        pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
-    }else{
-      struct vpx_usec_timer lpftimer;
-      vpx_usec_timer_start(&lpftimer);
-      // Apply the loop filter if appropriate.
+            vpx_usec_timer_mark(&lpftimer);
+            pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
 
-      if (cm->filter_level > 0)
-          vp8_mt_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
-
-      vpx_usec_timer_mark(&lpftimer);
-      pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
+            cm->last_frame_type = cm->frame_type;
+            cm->last_filter_type = cm->filter_type;
+            cm->last_sharpness_level = cm->sharpness_level;
+        }
+        vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
     }
-    if (cm->filter_level > 0) {
-        cm->last_frame_type = cm->frame_type;
-        cm->last_filter_type = cm->filter_type;
-        cm->last_sharpness_level = cm->sharpness_level;
-    }
-*/
-
-    if(pbi->common.filter_level)
-    {
-        struct vpx_usec_timer lpftimer;
-        vpx_usec_timer_start(&lpftimer);
-        // Apply the loop filter if appropriate.
-
-        if (pbi->b_multithreaded_lf && cm->multi_token_partition != ONE_PARTITION)
-            vp8_mt_loop_filter_frame(pbi);   //cm, &pbi->mb, cm->filter_level);
-        else
-            vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
-
-        vpx_usec_timer_mark(&lpftimer);
-        pbi->time_loop_filtering += vpx_usec_timer_elapsed(&lpftimer);
-
-        cm->last_frame_type = cm->frame_type;
-        cm->last_filter_type = cm->filter_type;
-        cm->last_sharpness_level = cm->sharpness_level;
-    }
-
-    vp8_yv12_extend_frame_borders_ptr(cm->frame_to_show);
 
 #if 0
     // DEBUG code
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index ad21ae3f5..ab926e61d 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -88,30 +88,32 @@ typedef struct VP8Decompressor
     unsigned int time_loop_filtering;
 
     volatile int b_multithreaded_rd;
-    volatile int b_multithreaded_lf;
     int max_threads;
-    int last_mb_row_decoded;
     int current_mb_col_main;
     int decoding_thread_count;
     int allocated_decoding_thread_count;
-    int *current_mb_col;                  //Each row remembers its already decoded column.
-    int mt_baseline_filter_level[MAX_MB_SEGMENTS];
 
     // variable for threading
-    DECLARE_ALIGNED(16, MACROBLOCKD, lpfmb);
 #if CONFIG_MULTITHREAD
-    //pthread_t           h_thread_lpf;         // thread for postprocessing
-    sem_t               h_event_end_lpf;          // Event for post_proc completed
-    sem_t               *h_event_start_lpf;
-#endif
+    int mt_baseline_filter_level[MAX_MB_SEGMENTS];
+    int *mt_current_mb_col;                  // Each row remembers its already decoded column.
+
+    unsigned char **mt_yabove_row;           // mb_rows x width
+    unsigned char **mt_uabove_row;
+    unsigned char **mt_vabove_row;
+    unsigned char **mt_yleft_col;            // mb_rows x 16
+    unsigned char **mt_uleft_col;            // mb_rows x 8
+    unsigned char **mt_vleft_col;            // mb_rows x 8
+
     MB_ROW_DEC           *mb_row_di;
-    DECODETHREAD_DATA   *de_thread_data;
-#if CONFIG_MULTITHREAD
+    DECODETHREAD_DATA    *de_thread_data;
+
     pthread_t           *h_decoding_thread;
     sem_t               *h_event_start_decoding;
-    sem_t               h_event_end_decoding;
+    sem_t                h_event_end_decoding;
     // end of threading data
 #endif
+
     vp8_reader *mbc;
     INT64 last_time_stamp;
     int   ready_for_new_data;
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index a77552c3c..93bc69e72 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -22,18 +22,19 @@
 #include "loopfilter.h"
 #include "extend.h"
 #include "vpx_ports/vpx_timer.h"
+#include "detokenize.h"
+#include "reconinter.h"
+#include "reconintra_mt.h"
 
-#define MAX_ROWS 256
-
-extern void vp8_decode_mb_row(VP8D_COMP *pbi,
-                              VP8_COMMON *pc,
-                              int mb_row,
-                              MACROBLOCKD *xd);
-
+extern void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd);
+extern void clamp_mvs(MACROBLOCKD *xd);
 extern void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel);
-extern void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd);
 
-void vp8_thread_loop_filter(VP8D_COMP *pbi, MB_ROW_DEC *mbrd, int ithread);
+#if CONFIG_RUNTIME_CPU_DETECT
+#define RTCD_VTABLE(x) (&(pbi)->common.rtcd.x)
+#else
+#define RTCD_VTABLE(x) NULL
+#endif
 
 void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
 {
@@ -68,58 +69,6 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
 
-        mbd->current_bc = &pbi->bc2;
-
-        for (j = 0; j < 25; j++)
-        {
-            mbd->block[j].dequant = xd->block[j].dequant;
-        }
-    }
-
-    for (i=0; i< pc->mb_rows; i++)
-        pbi->current_mb_col[i]=-1;
-#else
-    (void) pbi;
-    (void) xd;
-    (void) mbrd;
-    (void) count;
-#endif
-}
-
-void vp8_setup_loop_filter_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count)
-{
-#if CONFIG_MULTITHREAD
-    VP8_COMMON *const pc = & pbi->common;
-    int i, j;
-
-    for (i = 0; i < count; i++)
-    {
-        MACROBLOCKD *mbd = &mbrd[i].mbd;
-//#if CONFIG_RUNTIME_CPU_DETECT
-//        mbd->rtcd = xd->rtcd;
-//#endif
-
-        //mbd->subpixel_predict        = xd->subpixel_predict;
-        //mbd->subpixel_predict8x4     = xd->subpixel_predict8x4;
-        //mbd->subpixel_predict8x8     = xd->subpixel_predict8x8;
-        //mbd->subpixel_predict16x16   = xd->subpixel_predict16x16;
-
-        mbd->mode_info_context = pc->mi   + pc->mode_info_stride * (i + 1);
-        mbd->mode_info_stride  = pc->mode_info_stride;
-
-        //mbd->frame_type = pc->frame_type;
-        //mbd->frames_since_golden      = pc->frames_since_golden;
-        //mbd->frames_till_alt_ref_frame  = pc->frames_till_alt_ref_frame;
-
-        //mbd->pre = pc->yv12_fb[pc->lst_fb_idx];
-        //mbd->dst = pc->yv12_fb[pc->new_fb_idx];
-
-        //vp8_setup_block_dptrs(mbd);
-        //vp8_build_block_doffsets(mbd);
-        mbd->segmentation_enabled    = xd->segmentation_enabled;  //
-        mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;  //
-        vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));   //
-
         //signed char ref_lf_deltas[MAX_REF_LF_DELTAS];
         vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
         //signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];
@@ -129,18 +78,16 @@ void vp8_setup_loop_filter_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
 
-        //mbd->mbmi.mode = DC_PRED;
-        //mbd->mbmi.uv_mode = DC_PRED;
-        //mbd->current_bc = &pbi->bc2;
+        mbd->current_bc = &pbi->bc2;
 
-        //for (j = 0; j < 25; j++)
-        //{
-        //    mbd->block[j].dequant = xd->block[j].dequant;
-        //}
+        for (j = 0; j < 25; j++)
+        {
+            mbd->block[j].dequant = xd->block[j].dequant;
+        }
     }
 
     for (i=0; i< pc->mb_rows; i++)
-        pbi->current_mb_col[i]=-1;
+        pbi->mt_current_mb_col[i]=-1;
 #else
     (void) pbi;
     (void) xd;
@@ -149,6 +96,141 @@ void vp8_setup_loop_filter_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_D
 #endif
 }
 
+
+void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb_col)
+{
+#if CONFIG_MULTITHREAD
+    int eobtotal = 0;
+    int i, do_clamp = xd->mode_info_context->mbmi.need_to_clamp_mvs;
+    VP8_COMMON *pc = &pbi->common;
+
+    if (xd->mode_info_context->mbmi.mb_skip_coeff)
+    {
+        vp8_reset_mb_tokens_context(xd);
+    }
+    else
+    {
+        eobtotal = vp8_decode_mb_tokens(pbi, xd);
+    }
+
+    // Perform temporary clamping of the MV to be used for prediction
+    if (do_clamp)
+    {
+        clamp_mvs(xd);
+    }
+
+    xd->mode_info_context->mbmi.dc_diff = 1;
+
+    if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && eobtotal == 0)
+    {
+        xd->mode_info_context->mbmi.dc_diff = 0;
+
+        //mt_skip_recon_mb(pbi, xd, mb_row, mb_col);
+        if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
+        {
+            vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
+            vp8mt_build_intra_predictors_mby_s(pbi, xd, mb_row, mb_col);
+        }
+        else
+        {
+            vp8_build_inter_predictors_mb_s(xd);
+        }
+        return;
+    }
+
+    if (xd->segmentation_enabled)
+        mb_init_dequantizer(pbi, xd);
+
+    // do prediction
+    if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
+    {
+        vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
+
+        if (xd->mode_info_context->mbmi.mode != B_PRED)
+        {
+            vp8mt_build_intra_predictors_mby(pbi, xd, mb_row, mb_col);
+        } else {
+            vp8mt_intra_prediction_down_copy(pbi, xd, mb_row, mb_col);
+        }
+    }
+    else
+    {
+        vp8_build_inter_predictors_mb(xd);
+    }
+
+    // dequantization and idct
+    if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
+    {
+        BLOCKD *b = &xd->block[24];
+        DEQUANT_INVOKE(&pbi->dequant, block)(b);
+
+        // do 2nd order transform on the dc block
+        if (xd->eobs[24] > 1)
+        {
+            IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
+            ((int *)b->qcoeff)[0] = 0;
+            ((int *)b->qcoeff)[1] = 0;
+            ((int *)b->qcoeff)[2] = 0;
+            ((int *)b->qcoeff)[3] = 0;
+            ((int *)b->qcoeff)[4] = 0;
+            ((int *)b->qcoeff)[5] = 0;
+            ((int *)b->qcoeff)[6] = 0;
+            ((int *)b->qcoeff)[7] = 0;
+        }
+        else
+        {
+            IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh1)(&b->dqcoeff[0], b->diff);
+            ((int *)b->qcoeff)[0] = 0;
+        }
+
+        DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
+                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                         xd->predictor, xd->dst.y_buffer,
+                         xd->dst.y_stride, xd->eobs, xd->block[24].diff);
+    }
+    else if ((xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && xd->mode_info_context->mbmi.mode == B_PRED)
+    {
+        for (i = 0; i < 16; i++)
+        {
+            BLOCKD *b = &xd->block[i];
+            vp8mt_predict_intra4x4(pbi, xd, b->bmi.mode, b->predictor, mb_row, mb_col, i);
+
+            if (xd->eobs[i] > 1)
+            {
+                DEQUANT_INVOKE(&pbi->dequant, idct_add)
+                    (b->qcoeff, &b->dequant[0][0],  b->predictor,
+                    *(b->base_dst) + b->dst, 16, b->dst_stride);
+            }
+            else
+            {
+                IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
+                    (b->qcoeff[0] * b->dequant[0][0], b->predictor,
+                    *(b->base_dst) + b->dst, 16, b->dst_stride);
+                ((int *)b->qcoeff)[0] = 0;
+            }
+        }
+    }
+    else
+    {
+        DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
+                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                         xd->predictor, xd->dst.y_buffer,
+                         xd->dst.y_stride, xd->eobs);
+    }
+
+    DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
+                    (xd->qcoeff+16*16, &xd->block[16].dequant[0][0],
+                     xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
+                     xd->dst.uv_stride, xd->eobs+16);
+#else
+    (void) pbi;
+    (void) xd;
+    (void) mb_row;
+    (void) mb_col;
+#endif
+}
+
+
 THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 {
 #if CONFIG_MULTITHREAD
@@ -159,8 +241,6 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
     while (1)
     {
-        int current_filter_level = 0;
-
         if (pbi->b_multithreaded_rd == 0)
             break;
 
@@ -188,10 +268,15 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                     int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
                     int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
 
+                    int filter_level;
+                    loop_filter_info *lfi = pc->lf_info;
+                    int alt_flt_enabled = xd->segmentation_enabled;
+                    int Segment;
+
                     pbi->mb_row_di[ithread].mb_row = mb_row;
                     pbi->mb_row_di[ithread].mbd.current_bc =  &pbi->mbc[mb_row%num_part];
 
-                    last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
+                    last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
 
                     recon_yoffset = mb_row * recon_y_stride * 16;
                     recon_uvoffset = mb_row * recon_uv_stride * 8;
@@ -225,6 +310,17 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                             }
                         }
 
+                        if(pbi->common.filter_level)
+                        {
+                            //update loopfilter info
+                            Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+                            filter_level = pbi->mt_baseline_filter_level[Segment];
+                            // Distance of Mb to the various image edges.
+                            // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                            // Apply any context driven MB level adjustment
+                            vp8_adjust_mb_lf_value(xd, &filter_level);
+                        }
+
                         // Distance of Mb to the various image edges.
                         // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
                         xd->mb_to_left_edge = -((mb_col * 16) << 3);
@@ -249,8 +345,52 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                         xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
 
                         vp8_build_uvmvs(xd, pc->full_pixel);
+                        vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
 
-                        vp8_decode_macroblock(pbi, xd);
+                        if (pbi->common.filter_level)
+                        {
+                            if( mb_row != pc->mb_rows-1 )
+                            {
+                                //Save decoded MB last row data for next-row decoding
+                                vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
+                                vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
+                                vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
+                            }
+
+                            //save left_col for next MB decoding
+                            if(mb_col != pc->mb_cols-1)
+                            {
+                                MODE_INFO *next = xd->mode_info_context +1;
+
+                                if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
+                                {
+                                    for (i = 0; i < 16; i++)
+                                        pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
+                                    for (i = 0; i < 8; i++)
+                                    {
+                                        pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
+                                        pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
+                                    }
+                                }
+                            }
+
+                          // loopfilter on this macroblock.
+                            if (filter_level)
+                            {
+                                if (mb_col > 0)
+                                    pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+                                if (xd->mode_info_context->mbmi.dc_diff > 0)
+                                    pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+                                // don't apply across umv border
+                                if (mb_row > 0)
+                                    pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+                                if (xd->mode_info_context->mbmi.dc_diff > 0)
+                                    pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+                            }
+                        }
 
                         recon_yoffset += 16;
                         recon_uvoffset += 8;
@@ -260,40 +400,40 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                         xd->above_context++;
 
                         //pbi->mb_row_di[ithread].current_mb_col = mb_col;
-                        pbi->current_mb_col[mb_row] = mb_col;
+                        pbi->mt_current_mb_col[mb_row] = mb_col;
                     }
 
                     // adjust to the next row of mbs
-                    vp8_extend_mb_row(
-                    &pc->yv12_fb[dst_fb_idx],
-                    xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
-                    );
+                    if (pbi->common.filter_level)
+                    {
+                        if(mb_row != pc->mb_rows-1)
+                        {
+                            int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
+                            int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
+
+                            for (i = 0; i < 4; i++)
+                            {
+                                pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
+                                pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
+                                pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
+                            }
+                        }
+                    } else
+                        vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
 
                     ++xd->mode_info_context;      /* skip prediction column */
 
                     // since we have multithread
                     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
-
-                    pbi->last_mb_row_decoded = mb_row;
-
                 }
             }
         }
-
-        // If |pbi->common.filter_level| is 0 the value can change in-between
-        // the sem_post and the check to call vp8_thread_loop_filter.
-        current_filter_level = pbi->common.filter_level;
-
         //  add this to each frame
         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
         {
             //SetEvent(pbi->h_event_end_decoding);
             sem_post(&pbi->h_event_end_decoding);
         }
-
-        if ((pbi->b_multithreaded_lf) && (current_filter_level))
-            vp8_thread_loop_filter(pbi, mbrd, ithread);
-
     }
 #else
     (void) p_data;
@@ -303,123 +443,20 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 }
 
 
-void vp8_thread_loop_filter(VP8D_COMP *pbi, MB_ROW_DEC *mbrd, int ithread)
-{
-#if CONFIG_MULTITHREAD
-
-        if (sem_wait(&pbi->h_event_start_lpf[ithread]) == 0)
-        {
-           // if (pbi->b_multithreaded_lf == 0) // we're shutting down      ????
-           //     break;
-           // else
-            {
-                VP8_COMMON *cm  = &pbi->common;
-                MACROBLOCKD *mbd = &mbrd->mbd;
-                int default_filt_lvl = pbi->common.filter_level;
-
-                YV12_BUFFER_CONFIG *post = cm->frame_to_show;
-                loop_filter_info *lfi = cm->lf_info;
-                //int frame_type = cm->frame_type;
-
-                int mb_row;
-                int mb_col;
-
-                int filter_level;
-                int alt_flt_enabled = mbd->segmentation_enabled;
-
-                int i;
-                unsigned char *y_ptr, *u_ptr, *v_ptr;
-
-                volatile int *last_row_current_mb_col;
-
-                // Set up the buffer pointers
-                y_ptr = post->y_buffer + post->y_stride  * 16 * (ithread +1);
-                u_ptr = post->u_buffer + post->uv_stride *  8 * (ithread +1);
-                v_ptr = post->v_buffer + post->uv_stride *  8 * (ithread +1);
-
-                // vp8_filter each macro block
-                for (mb_row = ithread+1; mb_row < cm->mb_rows; mb_row+= (pbi->decoding_thread_count + 1))
-                {
-                    last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
-
-                    for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
-                    {
-                        int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
-
-                        if ((mb_col & 7) == 0)
-                        {
-                            while (mb_col > (*last_row_current_mb_col-8) && *last_row_current_mb_col != cm->mb_cols - 1)
-                            {
-                                x86_pause_hint();
-                                thread_sleep(0);
-                            }
-                        }
-
-                        filter_level = pbi->mt_baseline_filter_level[Segment];
-
-                        // Apply any context driven MB level adjustment
-                        vp8_adjust_mb_lf_value(mbd, &filter_level);
-
-                        if (filter_level)
-                        {
-                            if (mb_col > 0)
-                                cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
-
-                            if (mbd->mode_info_context->mbmi.dc_diff > 0)
-                                cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
-
-                            // don't apply across umv border
-                            if (mb_row > 0)
-                                cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
-
-                            if (mbd->mode_info_context->mbmi.dc_diff > 0)
-                                cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
-                        }
-
-                        y_ptr += 16;
-                        u_ptr += 8;
-                        v_ptr += 8;
-
-                        mbd->mode_info_context++;     // step to next MB
-                        pbi->current_mb_col[mb_row] = mb_col;
-                    }
-
-                    mbd->mode_info_context++;         // Skip border mb
-
-                    y_ptr += post->y_stride  * 16 * (pbi->decoding_thread_count + 1) - post->y_width;
-                    u_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
-                    v_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
-
-                    mbd->mode_info_context += pbi->decoding_thread_count * mbd->mode_info_stride;         // Skip border mb
-                }
-            }
-        }
-
-        //  add this to each frame
-        if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
-        {
-          sem_post(&pbi->h_event_end_lpf);
-        }
-#else
-    (void) pbi;
-#endif
-}
-
 void vp8_decoder_create_threads(VP8D_COMP *pbi)
 {
 #if CONFIG_MULTITHREAD
     int core_count = 0;
     int ithread;
+    int i;
 
     pbi->b_multithreaded_rd = 0;
-    pbi->b_multithreaded_lf = 0;
     pbi->allocated_decoding_thread_count = 0;
     core_count = (pbi->max_threads > 16) ? 16 : pbi->max_threads;
 
     if (core_count > 1)
     {
         pbi->b_multithreaded_rd = 1;
-        pbi->b_multithreaded_lf = 1;  // this can be merged with pbi->b_multithreaded_rd ?
         pbi->decoding_thread_count = core_count -1;
 
         CHECK_MEM_ERROR(pbi->h_decoding_thread, vpx_malloc(sizeof(pthread_t) * pbi->decoding_thread_count));
@@ -428,13 +465,9 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
         vpx_memset(pbi->mb_row_di, 0, sizeof(MB_ROW_DEC) * pbi->decoding_thread_count);
         CHECK_MEM_ERROR(pbi->de_thread_data, vpx_malloc(sizeof(DECODETHREAD_DATA) * pbi->decoding_thread_count));
 
-        CHECK_MEM_ERROR(pbi->current_mb_col, vpx_malloc(sizeof(int) * MAX_ROWS));  // pc->mb_rows));
-        CHECK_MEM_ERROR(pbi->h_event_start_lpf, vpx_malloc(sizeof(sem_t) * pbi->decoding_thread_count));
-
         for (ithread = 0; ithread < pbi->decoding_thread_count; ithread++)
         {
             sem_init(&pbi->h_event_start_decoding[ithread], 0, 0);
-            sem_init(&pbi->h_event_start_lpf[ithread], 0, 0);
 
             pbi->de_thread_data[ithread].ithread  = ithread;
             pbi->de_thread_data[ithread].ptr1     = (void *)pbi;
@@ -444,7 +477,6 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
         }
 
         sem_init(&pbi->h_event_end_decoding, 0, 0);
-        sem_init(&pbi->h_event_end_lpf, 0, 0);
 
         pbi->allocated_decoding_thread_count = pbi->decoding_thread_count;
     }
@@ -454,21 +486,171 @@ void vp8_decoder_create_threads(VP8D_COMP *pbi)
 #endif
 }
 
+
+void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
+{
+#if CONFIG_MULTITHREAD
+    VP8_COMMON *const pc = & pbi->common;
+    int i;
+
+    if (pbi->b_multithreaded_rd)
+    {
+        if (pbi->mt_current_mb_col)
+        {
+            vpx_free(pbi->mt_current_mb_col);
+            pbi->mt_current_mb_col = NULL ;
+        }
+
+        // Free above_row buffers.
+        if (pbi->mt_yabove_row)
+        {
+            for (i=0; i< mb_rows; i++)
+            {
+                if (pbi->mt_yabove_row[i])
+                {
+                    vpx_free(pbi->mt_yabove_row[i]);
+                    pbi->mt_yabove_row[i] = NULL ;
+                }
+            }
+            vpx_free(pbi->mt_yabove_row);
+            pbi->mt_yabove_row = NULL ;
+        }
+
+        if (pbi->mt_uabove_row)
+        {
+            for (i=0; i< mb_rows; i++)
+            {
+                if (pbi->mt_uabove_row[i])
+                {
+                    vpx_free(pbi->mt_uabove_row[i]);
+                    pbi->mt_uabove_row[i] = NULL ;
+                }
+            }
+            vpx_free(pbi->mt_uabove_row);
+            pbi->mt_uabove_row = NULL ;
+        }
+
+        if (pbi->mt_vabove_row)
+        {
+            for (i=0; i< mb_rows; i++)
+            {
+                if (pbi->mt_vabove_row[i])
+                {
+                    vpx_free(pbi->mt_vabove_row[i]);
+                    pbi->mt_vabove_row[i] = NULL ;
+                }
+            }
+            vpx_free(pbi->mt_vabove_row);
+            pbi->mt_vabove_row = NULL ;
+        }
+
+        // Free left_col buffers.
+        if (pbi->mt_yleft_col)
+        {
+            for (i=0; i< mb_rows; i++)
+            {
+                if (pbi->mt_yleft_col[i])
+                {
+                    vpx_free(pbi->mt_yleft_col[i]);
+                    pbi->mt_yleft_col[i] = NULL ;
+                }
+            }
+            vpx_free(pbi->mt_yleft_col);
+            pbi->mt_yleft_col = NULL ;
+        }
+
+        if (pbi->mt_uleft_col)
+        {
+            for (i=0; i< mb_rows; i++)
+            {
+                if (pbi->mt_uleft_col[i])
+                {
+                    vpx_free(pbi->mt_uleft_col[i]);
+                    pbi->mt_uleft_col[i] = NULL ;
+                }
+            }
+            vpx_free(pbi->mt_uleft_col);
+            pbi->mt_uleft_col = NULL ;
+        }
+
+        if (pbi->mt_vleft_col)
+        {
+            for (i=0; i< mb_rows; i++)
+            {
+                if (pbi->mt_vleft_col[i])
+                {
+                    vpx_free(pbi->mt_vleft_col[i]);
+                    pbi->mt_vleft_col[i] = NULL ;
+                }
+            }
+            vpx_free(pbi->mt_vleft_col);
+            pbi->mt_vleft_col = NULL ;
+        }
+    }
+#else
+    (void) pbi;
+#endif
+}
+
+
+int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
+{
+#if CONFIG_MULTITHREAD
+    VP8_COMMON *const pc = & pbi->common;
+    int i;
+    int uv_width;
+
+    if (pbi->b_multithreaded_rd)
+    {
+        vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
+
+        // our internal buffers are always multiples of 16
+        if ((width & 0xf) != 0)
+            width += 16 - (width & 0xf);
+
+        uv_width = width >>1;
+
+        // Allocate an int for each mb row.
+        CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
+
+        // Allocate memory for above_row buffers.
+        CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+        for (i=0; i< pc->mb_rows; i++)
+            CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
+
+        CHECK_MEM_ERROR(pbi->mt_uabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+        for (i=0; i< pc->mb_rows; i++)
+            CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
+
+        CHECK_MEM_ERROR(pbi->mt_vabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+        for (i=0; i< pc->mb_rows; i++)
+            CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
+
+        // Allocate memory for left_col buffers.
+        CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+        for (i=0; i< pc->mb_rows; i++)
+            CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
+
+        CHECK_MEM_ERROR(pbi->mt_uleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+        for (i=0; i< pc->mb_rows; i++)
+            CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
+
+        CHECK_MEM_ERROR(pbi->mt_vleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
+        for (i=0; i< pc->mb_rows; i++)
+            CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1));
+    }
+    return 0;
+#else
+    (void) pbi;
+    (void) width;
+#endif
+}
+
+
 void vp8_decoder_remove_threads(VP8D_COMP *pbi)
 {
 #if CONFIG_MULTITHREAD
 
-    if (pbi->b_multithreaded_lf)
-    {
-        int i;
-        pbi->b_multithreaded_lf = 0;
-
-        for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
-            sem_destroy(&pbi->h_event_start_lpf[i]);
-
-        sem_destroy(&pbi->h_event_end_lpf);
-    }
-
     //shutdown MB Decoding thread;
     if (pbi->b_multithreaded_rd)
     {
@@ -502,12 +684,6 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
             pbi->h_event_start_decoding = NULL;
         }
 
-        if (pbi->h_event_start_lpf)
-        {
-            vpx_free(pbi->h_event_start_lpf);
-            pbi->h_event_start_lpf = NULL;
-        }
-
         if (pbi->mb_row_di)
         {
             vpx_free(pbi->mb_row_di);
@@ -519,12 +695,6 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
             vpx_free(pbi->de_thread_data);
             pbi->de_thread_data = NULL;
         }
-
-        if (pbi->current_mb_col)
-        {
-            vpx_free(pbi->current_mb_col);
-            pbi->current_mb_col = NULL ;
-        }
     }
 #else
     (void) pbi;
@@ -532,42 +702,59 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
 }
 
 
-void vp8_start_lfthread(VP8D_COMP *pbi)
+void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
 {
 #if CONFIG_MULTITHREAD
-  /*
-    memcpy(&pbi->lpfmb, &pbi->mb, sizeof(pbi->mb));
-    pbi->last_mb_row_decoded = 0;
-    sem_post(&pbi->h_event_start_lpf);
-    */
-    (void) pbi;
-#else
-    (void) pbi;
-#endif
-}
-
-void vp8_stop_lfthread(VP8D_COMP *pbi)
-{
-#if CONFIG_MULTITHREAD
-  /*
-    struct vpx_usec_timer timer;
-
-    vpx_usec_timer_start(&timer);
-
-    sem_wait(&pbi->h_event_end_lpf);
-
-    vpx_usec_timer_mark(&timer);
-    pbi->time_loop_filtering += vpx_usec_timer_elapsed(&timer);
-    */
-    (void) pbi;
+    VP8_COMMON *cm  = &pbi->common;
+    MACROBLOCKD *mbd = &pbi->mb;
+    //YV12_BUFFER_CONFIG *post = &cm->new_frame;  //frame_to_show;
+    loop_filter_info *lfi = cm->lf_info;
+    int frame_type = cm->frame_type;
+
+    //int mb_row;
+    //int mb_col;
+    //int baseline_filter_level[MAX_MB_SEGMENTS];
+    int filter_level;
+    int alt_flt_enabled = mbd->segmentation_enabled;
+
+    int i;
+    //unsigned char *y_ptr, *u_ptr, *v_ptr;
+
+    // Note the baseline filter values for each segment
+    if (alt_flt_enabled)
+    {
+        for (i = 0; i < MAX_MB_SEGMENTS; i++)
+        {
+            // Abs value
+            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
+                pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+            // Delta Value
+            else
+            {
+                pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
+                pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+            }
+        }
+    }
+    else
+    {
+        for (i = 0; i < MAX_MB_SEGMENTS; i++)
+            pbi->mt_baseline_filter_level[i] = default_filt_lvl;
+    }
+
+    // Initialize the loop filter for this frame.
+    if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
+        vp8_init_loop_filter(cm);
+    else if (frame_type != cm->last_frame_type)
+        vp8_frame_init_loop_filter(lfi, frame_type);
 #else
     (void) pbi;
+    (void) default_filt_lvl;
 #endif
 }
 
 
-void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
-                          MACROBLOCKD *xd)
+void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 {
 #if CONFIG_MULTITHREAD
     int mb_row;
@@ -575,9 +762,38 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
 
     int ibc = 0;
     int num_part = 1 << pbi->common.multi_token_partition;
-    int i;
+    int i, j;
     volatile int *last_row_current_mb_col = NULL;
 
+    int filter_level;
+    loop_filter_info *lfi = pc->lf_info;
+    int alt_flt_enabled = xd->segmentation_enabled;
+    int Segment;
+
+    if(pbi->common.filter_level)
+    {
+        //Set above_row buffer to 127 for decoding first MB row
+        vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
+        vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
+        vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
+
+        for (i=1; i<pc->mb_rows; i++)
+        {
+            vpx_memset(pbi->mt_yabove_row[i] + VP8BORDERINPIXELS-1, (unsigned char)129, 1);
+            vpx_memset(pbi->mt_uabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
+            vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
+        }
+
+        //Set left_col to 129 initially
+        for (i=0; i<pc->mb_rows; i++)
+        {
+            vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
+            vpx_memset(pbi->mt_uleft_col[i], (unsigned char)129, 8);
+            vpx_memset(pbi->mt_vleft_col[i], (unsigned char)129, 8);
+        }
+        vp8mt_lpf_init(pbi, pc->filter_level);
+    }
+
     vp8_setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count);
 
     for (i = 0; i < pbi->decoding_thread_count; i++)
@@ -601,7 +817,7 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
 
            // volatile int *last_row_current_mb_col = NULL;
             if (mb_row > 0)
-                last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
+                last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
 
             vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
             recon_yoffset = mb_row * recon_y_stride * 16;
@@ -633,6 +849,17 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
                     }
                 }
 
+                if(pbi->common.filter_level)
+                {
+                    //update loopfilter info
+                    Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
+                    filter_level = pbi->mt_baseline_filter_level[Segment];
+                    // Distance of Mb to the various image edges.
+                    // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                    // Apply any context driven MB level adjustment
+                    vp8_adjust_mb_lf_value(xd, &filter_level);
+                }
+
                 // Distance of Mb to the various image edges.
                 // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
                 xd->mb_to_left_edge = -((mb_col * 16) << 3);
@@ -657,8 +884,52 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
                 xd->pre.v_buffer = pc->yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset;
 
                 vp8_build_uvmvs(xd, pc->full_pixel);
+                vp8mt_decode_macroblock(pbi, xd, mb_row, mb_col);
 
-                vp8_decode_macroblock(pbi, xd);
+                if (pbi->common.filter_level)
+                {
+                    //Save decoded MB last row data for next-row decoding
+                    if(mb_row != pc->mb_rows-1)
+                    {
+                        vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
+                        vpx_memcpy((pbi->mt_uabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
+                        vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
+                    }
+
+                    //save left_col for next MB decoding
+                    if(mb_col != pc->mb_cols-1)
+                    {
+                        MODE_INFO *next = xd->mode_info_context +1;
+
+                        if (xd->frame_type == KEY_FRAME  ||  next->mbmi.ref_frame == INTRA_FRAME)
+                        {
+                            for (i = 0; i < 16; i++)
+                                pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer [i* recon_y_stride + 15];
+                            for (i = 0; i < 8; i++)
+                            {
+                                pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer [i* recon_uv_stride + 7];
+                                pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer [i* recon_uv_stride + 7];
+                            }
+                        }
+                    }
+
+                    // loopfilter on this macroblock.
+                    if (filter_level)
+                    {
+                        if (mb_col > 0)
+                            pc->lf_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+                        if (xd->mode_info_context->mbmi.dc_diff > 0)
+                            pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+                        // don't apply across umv border
+                        if (mb_row > 0)
+                            pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+
+                        if (xd->mode_info_context->mbmi.dc_diff > 0)
+                            pc->lf_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
+                    }
+                }
 
                 recon_yoffset += 16;
                 recon_uvoffset += 8;
@@ -667,19 +938,28 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
 
                 xd->above_context++;
 
-                //pbi->current_mb_col_main = mb_col;
-                pbi->current_mb_col[mb_row] = mb_col;
+                pbi->mt_current_mb_col[mb_row] = mb_col;
             }
 
             // adjust to the next row of mbs
-            vp8_extend_mb_row(
-                &pc->yv12_fb[dst_fb_idx],
-                xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
-            );
+            if (pbi->common.filter_level)
+            {
+                if(mb_row != pc->mb_rows-1)
+                {
+                    int lasty = pc->yv12_fb[ref_fb_idx].y_width + VP8BORDERINPIXELS;
+                    int lastuv = (pc->yv12_fb[ref_fb_idx].y_width>>1) + (VP8BORDERINPIXELS>>1);
+
+                    for (i = 0; i < 4; i++)
+                    {
+                        pbi->mt_yabove_row[mb_row +1][lasty + i] = pbi->mt_yabove_row[mb_row +1][lasty -1];
+                        pbi->mt_uabove_row[mb_row +1][lastuv + i] = pbi->mt_uabove_row[mb_row +1][lastuv -1];
+                        pbi->mt_vabove_row[mb_row +1][lastuv + i] = pbi->mt_vabove_row[mb_row +1][lastuv -1];
+                    }
+                }
+            }else
+                vp8_extend_mb_row(&pc->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8);
 
             ++xd->mode_info_context;      /* skip prediction column */
-
-            pbi->last_mb_row_decoded = mb_row;
         }
         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
     }
@@ -690,131 +970,3 @@ void vp8_mtdecode_mb_rows(VP8D_COMP *pbi,
     (void) xd;
 #endif
 }
-
-
-void vp8_mt_loop_filter_frame( VP8D_COMP *pbi)
-{
-#if CONFIG_MULTITHREAD
-    VP8_COMMON *cm  = &pbi->common;
-    MACROBLOCKD *mbd = &pbi->mb;
-    int default_filt_lvl = pbi->common.filter_level;
-
-    YV12_BUFFER_CONFIG *post = cm->frame_to_show;
-    loop_filter_info *lfi = cm->lf_info;
-    int frame_type = cm->frame_type;
-
-    int mb_row;
-    int mb_col;
-
-    int filter_level;
-    int alt_flt_enabled = mbd->segmentation_enabled;
-
-    int i;
-    unsigned char *y_ptr, *u_ptr, *v_ptr;
-
-    volatile int *last_row_current_mb_col=NULL;
-
-    vp8_setup_loop_filter_thread_data(pbi, mbd, pbi->mb_row_di, pbi->decoding_thread_count);
-
-    mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
-
-    // Note the baseline filter values for each segment
-    if (alt_flt_enabled)
-    {
-        for (i = 0; i < MAX_MB_SEGMENTS; i++)
-        {
-            // Abs value
-            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
-                pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
-            else
-            {
-                pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
-            }
-        }
-    }
-    else
-    {
-        for (i = 0; i < MAX_MB_SEGMENTS; i++)
-            pbi->mt_baseline_filter_level[i] = default_filt_lvl;
-    }
-
-    // Initialize the loop filter for this frame.
-    if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
-        vp8_init_loop_filter(cm);
-    else if (frame_type != cm->last_frame_type)
-        vp8_frame_init_loop_filter(lfi, frame_type);
-
-    for (i = 0; i < pbi->decoding_thread_count; i++)
-        sem_post(&pbi->h_event_start_lpf[i]);
-        // sem_post(&pbi->h_event_start_lpf);
-
-    // Set up the buffer pointers
-    y_ptr = post->y_buffer;
-    u_ptr = post->u_buffer;
-    v_ptr = post->v_buffer;
-
-    // vp8_filter each macro block
-    for (mb_row = 0; mb_row < cm->mb_rows; mb_row+= (pbi->decoding_thread_count + 1))
-    {
-        if (mb_row > 0)
-            last_row_current_mb_col = &pbi->current_mb_col[mb_row -1];
-
-        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
-        {
-            int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
-
-            if ( mb_row > 0 && (mb_col & 7) == 0){
-            // if ( mb_row > 0 ){
-                while (mb_col > (*last_row_current_mb_col-8) && *last_row_current_mb_col != cm->mb_cols - 1)
-                {
-                    x86_pause_hint();
-                    thread_sleep(0);
-                }
-            }
-
-            filter_level = pbi->mt_baseline_filter_level[Segment];
-
-            // Distance of Mb to the various image edges.
-            // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-            // Apply any context driven MB level adjustment
-            vp8_adjust_mb_lf_value(mbd, &filter_level);
-
-            if (filter_level)
-            {
-                if (mb_col > 0)
-                    cm->lf_mbv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
-
-                if (mbd->mode_info_context->mbmi.dc_diff > 0)
-                    cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
-
-                // don't apply across umv border
-                if (mb_row > 0)
-                    cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
-
-                if (mbd->mode_info_context->mbmi.dc_diff > 0)
-                    cm->lf_bh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
-            }
-
-            y_ptr += 16;
-            u_ptr += 8;
-            v_ptr += 8;
-
-            mbd->mode_info_context++;     // step to next MB
-            pbi->current_mb_col[mb_row] = mb_col;
-        }
-        mbd->mode_info_context++;         // Skip border mb
-
-        //update for multi-thread
-        y_ptr += post->y_stride  * 16 * (pbi->decoding_thread_count + 1) - post->y_width;
-        u_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
-        v_ptr += post->uv_stride *  8 * (pbi->decoding_thread_count + 1) - post->uv_width;
-        mbd->mode_info_context += pbi->decoding_thread_count * mbd->mode_info_stride;
-    }
-
-    sem_wait(&pbi->h_event_end_lpf);
-#else
-    (void) pbi;
-#endif
-}
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index ecca18a0a..b357b28bc 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -81,6 +81,8 @@ VP8_COMMON_SRCS-yes += common/recon.c
 VP8_COMMON_SRCS-yes += common/reconinter.c
 VP8_COMMON_SRCS-yes += common/reconintra.c
 VP8_COMMON_SRCS-yes += common/reconintra4x4.c
+VP8_COMMON_SRCS-yes += common/reconintra_mt.h
+VP8_COMMON_SRCS-yes += common/reconintra_mt.c
 VP8_COMMON_SRCS-yes += common/setupintrarecon.c
 VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
 VP8_COMMON_SRCS-yes += common/textblit.c

From 022323bf856084e812354464a0302f5cb710ee78 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Thu, 9 Sep 2010 15:55:19 -0400
Subject: [PATCH 167/307] reorder data to use wider instructions

the previous commit laid the groundwork by doing two sets of idcts
together. this moved that further by grouping the interesting data
(q[0], q+16[0]) together to allow using wider instructions. also
managed to drop a few instructions by recognizing that the constant
for sinpi8sqrt2 could be downshifted all the time which avoided a
dowshift as well as workarounds for a function which only accepted
signed data

looks like a modest gain for performance: at qcif, went from ~180
fps to ~183
Change-Id: I842673f3080b8239e026cc9b50346dbccbab4adf
---
 vp8/decoder/arm/neon/idct_blk_neon.c          |   2 -
 .../arm/neon/idct_dequant_dc_full_2x_neon.asm | 256 ++++++++++--------
 .../arm/neon/idct_dequant_full_2x_neon.asm    | 249 +++++++++--------
 3 files changed, 268 insertions(+), 239 deletions(-)

diff --git a/vp8/decoder/arm/neon/idct_blk_neon.c b/vp8/decoder/arm/neon/idct_blk_neon.c
index 103cce74e..fe4f2e0d4 100644
--- a/vp8/decoder/arm/neon/idct_blk_neon.c
+++ b/vp8/decoder/arm/neon/idct_blk_neon.c
@@ -82,8 +82,6 @@ void vp8_dequant_idct_add_uv_block_neon
             (short *q, short *dq, unsigned char *pre,
              unsigned char *dstu, unsigned char *dstv, int stride, char *eobs)
 {
-    int i;
-
     if (((short *)eobs)[0] & 0xfefe)
         idct_dequant_full_2x_neon (q, dq, pre, dstu, 8, stride);
     else
diff --git a/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
index babfb91ad..ad4364adc 100644
--- a/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
+++ b/vp8/decoder/arm/neon/idct_dequant_dc_full_2x_neon.asm
@@ -24,138 +24,151 @@
 ; sp    stride
 ; sp+4  *dc
 |idct_dequant_dc_full_2x_neon| PROC
-    vld1.16         {q3, q4}, [r0]          ; lo input
-    vld1.16         {q5, q6}, [r1]          ; use the same dq for both
+    vld1.16         {q0, q1}, [r1]          ; dq (same l/r)
+    vld1.16         {q2, q3}, [r0]          ; l q
     mov             r1, #16                 ; pitch
     add             r0, r0, #32
-    vld1.16         {q10, q11}, [r0]        ; hi input
+    vld1.16         {q4, q5}, [r0]          ; r q
     add             r12, r2, #4
-    vld1.32         {d14[0]}, [r2], r1      ; lo pred
-    vld1.32         {d14[1]}, [r2], r1
-    vld1.32         {d15[0]}, [r2], r1
-    vld1.32         {d15[1]}, [r2]
-    vld1.32         {d28[0]}, [r12], r1     ; hi pred
-    vld1.32         {d28[1]}, [r12], r1
-    vld1.32         {d29[0]}, [r12], r1
-    ldr             r1, [sp, #4]            ; dc
-    vld1.32         {d29[1]}, [r12]
+    ; interleave the predictors
+    vld1.32         {d28[0]}, [r2], r1      ; l pre
+    vld1.32         {d28[1]}, [r12], r1     ; r pre
+    vld1.32         {d29[0]}, [r2], r1
+    vld1.32         {d29[1]}, [r12], r1
+    vld1.32         {d30[0]}, [r2], r1
+    vld1.32         {d30[1]}, [r12], r1
+    vld1.32         {d31[0]}, [r2]
+    ldr             r1, [sp, #4]
+    vld1.32         {d31[1]}, [r12]
 
     ldr             r2, _CONSTANTS_
 
     ldrh            r12, [r1], #2           ; lo *dc
     ldrh            r1, [r1]                ; hi *dc
 
-    vmul.i16        q1, q3, q5              ; lo input * dq
-    vmul.i16        q2, q4, q6
-    vmul.i16        q8, q10, q5             ; hi input * dq
-    vmul.i16        q9, q11, q6
+    ; dequant: q[i] = q[i] * dq[i]
+    vmul.i16        q2, q2, q0
+    vmul.i16        q3, q3, q1
+    vmul.i16        q4, q4, q0
+    vmul.i16        q5, q5, q1
 
-    vmov.16         d2[0], r12              ; move lo dc up to neon, overwrite first element
-    vmov.16         d16[0], r1              ; move hi dc up to neon, overwrite first element
-
-    ldr             r1, [sp]                ; stride
+    ; move dc up to neon and overwrite first element
+    vmov.16         d4[0], r12
+    vmov.16         d8[0], r1
 
     vld1.16         {d0}, [r2]
-    vswp            d3, d4                  ; lo q2(vp[4] vp[12])
-    vswp            d17, d18                ; hi q2(vp[4] vp[12])
 
-    vqdmulh.s16     q3, q2, d0[2]           ; lo * constants
-    vqdmulh.s16     q4, q2, d0[0]
-    vqdmulh.s16     q10, q9, d0[2]          ; hi * constants
-    vqdmulh.s16     q11, q9, d0[0]
+    ; q2: l0r0  q3: l8r8
+    ; q4: l4r4  q5: l12r12
+    vswp            d5, d8
+    vswp            d7, d10
 
-    vqadd.s16       d12, d2, d3             ; lo a1
-    vqsub.s16       d13, d2, d3             ; lo b1
-    vqadd.s16       d26, d16, d17           ; hi a1
-    vqsub.s16       d27, d16, d17           ; hi b1
+    ; _CONSTANTS_ * 4,12 >> 16
+    ; q6:  4 * sinpi : c1/temp1
+    ; q7: 12 * sinpi : d1/temp2
+    ; q8:  4 * cospi
+    ; q9: 12 * cospi
+    vqdmulh.s16     q6, q4, d0[2]           ; sinpi8sqrt2
+    vqdmulh.s16     q7, q5, d0[2]
+    vqdmulh.s16     q8, q4, d0[0]           ; cospi8sqrt2minus1
+    vqdmulh.s16     q9, q5, d0[0]
 
-    vshr.s16        q3, q3, #1              ; lo
-    vshr.s16        q4, q4, #1
-    vshr.s16        q10, q10, #1            ; hi
+    vqadd.s16       q10, q2, q3             ; a1 = 0 + 8
+    vqsub.s16       q11, q2, q3             ; b1 = 0 - 8
+
+    ; vqdmulh only accepts signed values. this was a problem because
+    ; our constant had the high bit set, and was treated as a negative value.
+    ; vqdmulh also doubles the value before it shifts by 16. we need to
+    ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0,
+    ; so we can shift the constant without losing precision. this avoids
+    ; shift again afterward, but also avoids the sign issue. win win!
+    ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we
+    ; pre-shift it
+    vshr.s16        q8, q8, #1
+    vshr.s16        q9, q9, #1
+
+    ; q4:  4 +  4 * cospi : d1/temp1
+    ; q5: 12 + 12 * cospi : c1/temp2
+    vqadd.s16       q4, q4, q8
+    vqadd.s16       q5, q5, q9
+
+    ; c1 = temp1 - temp2
+    ; d1 = temp1 + temp2
+    vqsub.s16       q2, q6, q5
+    vqadd.s16       q3, q4, q7
+
+    ; [0]: a1+d1
+    ; [1]: b1+c1
+    ; [2]: b1-c1
+    ; [3]: a1-d1
+    vqadd.s16       q4, q10, q3
+    vqadd.s16       q5, q11, q2
+    vqsub.s16       q6, q11, q2
+    vqsub.s16       q7, q10, q3
+
+    ; rotate
+    vtrn.32         q4, q6
+    vtrn.32         q5, q7
+    vtrn.16         q4, q5
+    vtrn.16         q6, q7
+    ; idct loop 2
+    ; q4: l 0, 4, 8,12 r 0, 4, 8,12
+    ; q5: l 1, 5, 9,13 r 1, 5, 9,13
+    ; q6: l 2, 6,10,14 r 2, 6,10,14
+    ; q7: l 3, 7,11,15 r 3, 7,11,15
+
+    ; q8:  1 * sinpi : c1/temp1
+    ; q9:  3 * sinpi : d1/temp2
+    ; q10: 1 * cospi
+    ; q11: 3 * cospi
+    vqdmulh.s16     q8, q5, d0[2]           ; sinpi8sqrt2
+    vqdmulh.s16     q9, q7, d0[2]
+    vqdmulh.s16     q10, q5, d0[0]          ; cospi8sqrt2minus1
+    vqdmulh.s16     q11, q7, d0[0]
+
+    vqadd.s16       q2, q4, q6             ; a1 = 0 + 2
+    vqsub.s16       q3, q4, q6             ; b1 = 0 - 2
+
+    ; see note on shifting above
+    vshr.s16        q10, q10, #1
     vshr.s16        q11, q11, #1
 
-    vqadd.s16       q3, q3, q2              ; lo
-    vqadd.s16       q4, q4, q2
-    vqadd.s16       q10, q10, q9            ; hi
-    vqadd.s16       q11, q11, q9
+    ; q10: 1 + 1 * cospi : d1/temp1
+    ; q11: 3 + 3 * cospi : c1/temp2
+    vqadd.s16       q10, q5, q10
+    vqadd.s16       q11, q7, q11
 
-    vqsub.s16       d10, d6, d9             ; lo c1
-    vqadd.s16       d11, d7, d8             ; lo d1
-    vqsub.s16       d24, d20, d23           ; hi c1
-    vqadd.s16       d25, d21, d22           ; hi d1
+    ; q8: c1 = temp1 - temp2
+    ; q9: d1 = temp1 + temp2
+    vqsub.s16       q8, q8, q11
+    vqadd.s16       q9, q10, q9
 
-    vqadd.s16       d2, d12, d11            ; lo
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-    vqadd.s16       d16, d26, d25           ; hi
-    vqadd.s16       d17, d27, d24
-    vqsub.s16       d18, d27, d24
-    vqsub.s16       d19, d26, d25
+    ; a1+d1
+    ; b1+c1
+    ; b1-c1
+    ; a1-d1
+    vqadd.s16       q4, q2, q9
+    vqadd.s16       q5, q3, q8
+    vqsub.s16       q6, q3, q8
+    vqsub.s16       q7, q2, q9
 
-    vtrn.32         d2, d4                  ; lo
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-    vtrn.32         d16, d18                ; hi
-    vtrn.32         d17, d19
-    vtrn.16         d16, d17
-    vtrn.16         d18, d19
+    ; +4 >> 3 (rounding)
+    vrshr.s16       q4, q4, #3              ; lo
+    vrshr.s16       q5, q5, #3
+    vrshr.s16       q6, q6, #3              ; hi
+    vrshr.s16       q7, q7, #3
 
-    vswp            d3, d4                  ; lo
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-    vswp            d17, d18                ; hi
-    vqdmulh.s16     q10, q9, d0[2]
-    vqdmulh.s16     q11, q9, d0[0]
+    vtrn.32         q4, q6
+    vtrn.32         q5, q7
+    vtrn.16         q4, q5
+    vtrn.16         q6, q7
 
-    vqadd.s16       d12, d2, d3             ; lo a1
-    vqsub.s16       d13, d2, d3             ; lo b1
-    vqadd.s16       d26, d16, d17           ; hi a1
-    vqsub.s16       d27, d16, d17           ; hi b1
-
-    vshr.s16        q3, q3, #1              ; lo
-    vshr.s16        q4, q4, #1
-    vshr.s16        q10, q10, #1            ; hi
-    vshr.s16        q11, q11, #1
-
-    vqadd.s16       q3, q3, q2              ; lo
-    vqadd.s16       q4, q4, q2
-    vqadd.s16       q10, q10, q9            ; hi
-    vqadd.s16       q11, q11, q9
-
-    vqsub.s16       d10, d6, d9             ; lo c1
-    vqadd.s16       d11, d7, d8             ; lo d1
-    vqsub.s16       d24, d20, d23           ; hi c1
-    vqadd.s16       d25, d21, d22           ; hi d1
-
-    vqadd.s16       d2, d12, d11            ; lo
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-    vqadd.s16       d16, d26, d25           ; hi
-    vqadd.s16       d17, d27, d24
-    vqsub.s16       d18, d27, d24
-    vqsub.s16       d19, d26, d25
-
-    vrshr.s16       q1, q1, #3              ; lo
-    vrshr.s16       q2, q2, #3
-    vrshr.s16       q8, q8, #3              ; hi
-    vrshr.s16       q9, q9, #3
-
-    vtrn.32         d2, d4                  ; lo
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-    vtrn.32         d16, d18                ; hi
-    vtrn.32         d17, d19
-    vtrn.16         d16, d17
-    vtrn.16         d18, d19
-
-    vaddw.u8        q1, q1, d14             ; lo
-    vaddw.u8        q2, q2, d15
-    vaddw.u8        q8, q8, d28             ; hi
-    vaddw.u8        q9, q9, d29
+    ; adding pre
+    ; input is still packed. pre was read interleaved
+    vaddw.u8        q4, q4, d28
+    vaddw.u8        q5, q5, d29
+    vaddw.u8        q6, q6, d30
+    vaddw.u8        q7, q7, d31
 
     vmov.i16        q14, #0
     vmov            q15, q14
@@ -163,19 +176,21 @@
     sub             r0, r0, #32
     vst1.16         {q14, q15}, [r0]        ; write over low input
 
-    vqmovun.s16     d0, q1                  ; lo
-    vqmovun.s16     d1, q2
-    vqmovun.s16     d2, q8                  ; hi
-    vqmovun.s16     d3, q9
+    ;saturate and narrow
+    vqmovun.s16     d0, q4                  ; lo
+    vqmovun.s16     d1, q5
+    vqmovun.s16     d2, q6                  ; hi
+    vqmovun.s16     d3, q7
 
+    ldr             r1, [sp]                ; stride
     add             r2, r3, #4              ; hi
     vst1.32         {d0[0]}, [r3], r1       ; lo
-    vst1.32         {d0[1]}, [r3], r1
+    vst1.32         {d0[1]}, [r2], r1       ; hi
     vst1.32         {d1[0]}, [r3], r1
-    vst1.32         {d1[1]}, [r3]
-    vst1.32         {d2[0]}, [r2], r1       ; hi
+    vst1.32         {d1[1]}, [r2], r1
+    vst1.32         {d2[0]}, [r3], r1
     vst1.32         {d2[1]}, [r2], r1
-    vst1.32         {d3[0]}, [r2], r1
+    vst1.32         {d3[0]}, [r3]
     vst1.32         {d3[1]}, [r2]
 
     bx             lr
@@ -184,7 +199,8 @@
 
 ; Constant Pool
 _CONSTANTS_       DCD cospi8sqrt2minus1
-cospi8sqrt2minus1 DCD 0x4e7b4e7b
-sinpi8sqrt2       DCD 0x8a8c8a8c
+cospi8sqrt2minus1 DCD 0x4e7b
+; because the lowest bit in 0x8a8c is 0, we can pre-shift this
+sinpi8sqrt2       DCD 0x4546
 
     END
diff --git a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
index 14960f99c..85fff11b3 100644
--- a/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
+++ b/vp8/decoder/arm/neon/idct_dequant_full_2x_neon.asm
@@ -24,131 +24,143 @@
 ; sp    pitch
 ; sp+4  stride
 |idct_dequant_full_2x_neon| PROC
-    vld1.16         {q3, q4}, [r0]          ; lo input
-    vld1.16         {q5, q6}, [r1]          ; use the same dq for both
+    vld1.16         {q0, q1}, [r1]          ; dq (same l/r)
+    vld1.16         {q2, q3}, [r0]          ; l q
     ldr             r1, [sp]                ; pitch
     add             r0, r0, #32
-    vld1.16         {q10, q11}, [r0]        ; hi input
+    vld1.16         {q4, q5}, [r0]          ; r q
     add             r12, r2, #4
-    vld1.32         {d14[0]}, [r2], r1      ; lo pred
-    vld1.32         {d14[1]}, [r2], r1
-    vld1.32         {d15[0]}, [r2], r1
-    vld1.32         {d15[1]}, [r2]
-    vld1.32         {d28[0]}, [r12], r1     ; hi pred
-    vld1.32         {d28[1]}, [r12], r1
-    vld1.32         {d29[0]}, [r12], r1
-    vld1.32         {d29[1]}, [r12]
+    ; interleave the predictors
+    vld1.32         {d28[0]}, [r2], r1      ; l pre
+    vld1.32         {d28[1]}, [r12], r1     ; r pre
+    vld1.32         {d29[0]}, [r2], r1
+    vld1.32         {d29[1]}, [r12], r1
+    vld1.32         {d30[0]}, [r2], r1
+    vld1.32         {d30[1]}, [r12], r1
+    vld1.32         {d31[0]}, [r2]
+    vld1.32         {d31[1]}, [r12]
 
     ldr             r2, _CONSTANTS_
 
-    vmul.i16        q1, q3, q5              ; lo input * dq
-    vmul.i16        q2, q4, q6
-    vmul.i16        q8, q10, q5             ; hi input * dq
-    vmul.i16        q9, q11, q6
-
-    ldr             r1, [sp, #4]            ; stride
+    ; dequant: q[i] = q[i] * dq[i]
+    vmul.i16        q2, q2, q0
+    vmul.i16        q3, q3, q1
+    vmul.i16        q4, q4, q0
+    vmul.i16        q5, q5, q1
 
     vld1.16         {d0}, [r2]
-    vswp            d3, d4                  ; lo q2(vp[4] vp[12])
-    vswp            d17, d18                ; hi q2(vp[4] vp[12])
 
-    vqdmulh.s16     q3, q2, d0[2]           ; lo * constants
-    vqdmulh.s16     q4, q2, d0[0]
-    vqdmulh.s16     q10, q9, d0[2]          ; hi * constants
-    vqdmulh.s16     q11, q9, d0[0]
+    ; q2: l0r0  q3: l8r8
+    ; q4: l4r4  q5: l12r12
+    vswp            d5, d8
+    vswp            d7, d10
 
-    vqadd.s16       d12, d2, d3             ; lo a1
-    vqsub.s16       d13, d2, d3             ; lo b1
-    vqadd.s16       d26, d16, d17           ; hi a1
-    vqsub.s16       d27, d16, d17           ; hi b1
+    ; _CONSTANTS_ * 4,12 >> 16
+    ; q6:  4 * sinpi : c1/temp1
+    ; q7: 12 * sinpi : d1/temp2
+    ; q8:  4 * cospi
+    ; q9: 12 * cospi
+    vqdmulh.s16     q6, q4, d0[2]           ; sinpi8sqrt2
+    vqdmulh.s16     q7, q5, d0[2]
+    vqdmulh.s16     q8, q4, d0[0]           ; cospi8sqrt2minus1
+    vqdmulh.s16     q9, q5, d0[0]
 
-    vshr.s16        q3, q3, #1              ; lo
-    vshr.s16        q4, q4, #1
-    vshr.s16        q10, q10, #1            ; hi
+    vqadd.s16       q10, q2, q3             ; a1 = 0 + 8
+    vqsub.s16       q11, q2, q3             ; b1 = 0 - 8
+
+    ; vqdmulh only accepts signed values. this was a problem because
+    ; our constant had the high bit set, and was treated as a negative value.
+    ; vqdmulh also doubles the value before it shifts by 16. we need to
+    ; compensate for this. in the case of sinpi8sqrt2, the lowest bit is 0,
+    ; so we can shift the constant without losing precision. this avoids
+    ; shift again afterward, but also avoids the sign issue. win win!
+    ; for cospi8sqrt2minus1 the lowest bit is 1, so we lose precision if we
+    ; pre-shift it
+    vshr.s16        q8, q8, #1
+    vshr.s16        q9, q9, #1
+
+    ; q4:  4 +  4 * cospi : d1/temp1
+    ; q5: 12 + 12 * cospi : c1/temp2
+    vqadd.s16       q4, q4, q8
+    vqadd.s16       q5, q5, q9
+
+    ; c1 = temp1 - temp2
+    ; d1 = temp1 + temp2
+    vqsub.s16       q2, q6, q5
+    vqadd.s16       q3, q4, q7
+
+    ; [0]: a1+d1
+    ; [1]: b1+c1
+    ; [2]: b1-c1
+    ; [3]: a1-d1
+    vqadd.s16       q4, q10, q3
+    vqadd.s16       q5, q11, q2
+    vqsub.s16       q6, q11, q2
+    vqsub.s16       q7, q10, q3
+
+    ; rotate
+    vtrn.32         q4, q6
+    vtrn.32         q5, q7
+    vtrn.16         q4, q5
+    vtrn.16         q6, q7
+    ; idct loop 2
+    ; q4: l 0, 4, 8,12 r 0, 4, 8,12
+    ; q5: l 1, 5, 9,13 r 1, 5, 9,13
+    ; q6: l 2, 6,10,14 r 2, 6,10,14
+    ; q7: l 3, 7,11,15 r 3, 7,11,15
+
+    ; q8:  1 * sinpi : c1/temp1
+    ; q9:  3 * sinpi : d1/temp2
+    ; q10: 1 * cospi
+    ; q11: 3 * cospi
+    vqdmulh.s16     q8, q5, d0[2]           ; sinpi8sqrt2
+    vqdmulh.s16     q9, q7, d0[2]
+    vqdmulh.s16     q10, q5, d0[0]          ; cospi8sqrt2minus1
+    vqdmulh.s16     q11, q7, d0[0]
+
+    vqadd.s16       q2, q4, q6             ; a1 = 0 + 2
+    vqsub.s16       q3, q4, q6             ; b1 = 0 - 2
+
+    ; see note on shifting above
+    vshr.s16        q10, q10, #1
     vshr.s16        q11, q11, #1
 
-    vqadd.s16       q3, q3, q2              ; lo
-    vqadd.s16       q4, q4, q2
-    vqadd.s16       q10, q10, q9            ; hi
-    vqadd.s16       q11, q11, q9
+    ; q10: 1 + 1 * cospi : d1/temp1
+    ; q11: 3 + 3 * cospi : c1/temp2
+    vqadd.s16       q10, q5, q10
+    vqadd.s16       q11, q7, q11
 
-    vqsub.s16       d10, d6, d9             ; lo c1
-    vqadd.s16       d11, d7, d8             ; lo d1
-    vqsub.s16       d24, d20, d23           ; hi c1
-    vqadd.s16       d25, d21, d22           ; hi d1
+    ; q8: c1 = temp1 - temp2
+    ; q9: d1 = temp1 + temp2
+    vqsub.s16       q8, q8, q11
+    vqadd.s16       q9, q10, q9
 
-    vqadd.s16       d2, d12, d11            ; lo
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-    vqadd.s16       d16, d26, d25           ; hi
-    vqadd.s16       d17, d27, d24
-    vqsub.s16       d18, d27, d24
-    vqsub.s16       d19, d26, d25
+    ; a1+d1
+    ; b1+c1
+    ; b1-c1
+    ; a1-d1
+    vqadd.s16       q4, q2, q9
+    vqadd.s16       q5, q3, q8
+    vqsub.s16       q6, q3, q8
+    vqsub.s16       q7, q2, q9
 
-    vtrn.32         d2, d4                  ; lo
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-    vtrn.32         d16, d18                ; hi
-    vtrn.32         d17, d19
-    vtrn.16         d16, d17
-    vtrn.16         d18, d19
+    ; +4 >> 3 (rounding)
+    vrshr.s16       q4, q4, #3              ; lo
+    vrshr.s16       q5, q5, #3
+    vrshr.s16       q6, q6, #3              ; hi
+    vrshr.s16       q7, q7, #3
 
-    vswp            d3, d4                  ; lo
-    vqdmulh.s16     q3, q2, d0[2]
-    vqdmulh.s16     q4, q2, d0[0]
-    vswp            d17, d18                ; hi
-    vqdmulh.s16     q10, q9, d0[2]
-    vqdmulh.s16     q11, q9, d0[0]
+    vtrn.32         q4, q6
+    vtrn.32         q5, q7
+    vtrn.16         q4, q5
+    vtrn.16         q6, q7
 
-    vqadd.s16       d12, d2, d3             ; lo a1
-    vqsub.s16       d13, d2, d3             ; lo b1
-    vqadd.s16       d26, d16, d17           ; hi a1
-    vqsub.s16       d27, d16, d17           ; hi b1
-
-    vshr.s16        q3, q3, #1              ; lo
-    vshr.s16        q4, q4, #1
-    vshr.s16        q10, q10, #1            ; hi
-    vshr.s16        q11, q11, #1
-
-    vqadd.s16       q3, q3, q2              ; lo
-    vqadd.s16       q4, q4, q2
-    vqadd.s16       q10, q10, q9            ; hi
-    vqadd.s16       q11, q11, q9
-
-    vqsub.s16       d10, d6, d9             ; lo c1
-    vqadd.s16       d11, d7, d8             ; lo d1
-    vqsub.s16       d24, d20, d23           ; hi c1
-    vqadd.s16       d25, d21, d22           ; hi d1
-
-    vqadd.s16       d2, d12, d11            ; lo
-    vqadd.s16       d3, d13, d10
-    vqsub.s16       d4, d13, d10
-    vqsub.s16       d5, d12, d11
-    vqadd.s16       d16, d26, d25           ; hi
-    vqadd.s16       d17, d27, d24
-    vqsub.s16       d18, d27, d24
-    vqsub.s16       d19, d26, d25
-
-    vrshr.s16       q1, q1, #3              ; lo
-    vrshr.s16       q2, q2, #3
-    vrshr.s16       q8, q8, #3              ; hi
-    vrshr.s16       q9, q9, #3
-
-    vtrn.32         d2, d4                  ; lo
-    vtrn.32         d3, d5
-    vtrn.16         d2, d3
-    vtrn.16         d4, d5
-    vtrn.32         d16, d18                ; hi
-    vtrn.32         d17, d19
-    vtrn.16         d16, d17
-    vtrn.16         d18, d19
-
-    vaddw.u8        q1, q1, d14             ; lo
-    vaddw.u8        q2, q2, d15
-    vaddw.u8        q8, q8, d28             ; hi
-    vaddw.u8        q9, q9, d29
+    ; adding pre
+    ; input is still packed. pre was read interleaved
+    vaddw.u8        q4, q4, d28
+    vaddw.u8        q5, q5, d29
+    vaddw.u8        q6, q6, d30
+    vaddw.u8        q7, q7, d31
 
     vmov.i16        q14, #0
     vmov            q15, q14
@@ -156,19 +168,21 @@
     sub             r0, r0, #32
     vst1.16         {q14, q15}, [r0]        ; write over low input
 
-    vqmovun.s16     d0, q1                  ; lo
-    vqmovun.s16     d1, q2
-    vqmovun.s16     d2, q8                  ; hi
-    vqmovun.s16     d3, q9
+    ;saturate and narrow
+    vqmovun.s16     d0, q4                  ; lo
+    vqmovun.s16     d1, q5
+    vqmovun.s16     d2, q6                  ; hi
+    vqmovun.s16     d3, q7
 
+    ldr             r1, [sp, #4]            ; stride
     add             r2, r3, #4              ; hi
     vst1.32         {d0[0]}, [r3], r1       ; lo
-    vst1.32         {d0[1]}, [r3], r1
+    vst1.32         {d0[1]}, [r2], r1       ; hi
     vst1.32         {d1[0]}, [r3], r1
-    vst1.32         {d1[1]}, [r3]
-    vst1.32         {d2[0]}, [r2], r1       ; hi
+    vst1.32         {d1[1]}, [r2], r1
+    vst1.32         {d2[0]}, [r3], r1
     vst1.32         {d2[1]}, [r2], r1
-    vst1.32         {d3[0]}, [r2], r1
+    vst1.32         {d3[0]}, [r3]
     vst1.32         {d3[1]}, [r2]
 
     bx             lr
@@ -177,7 +191,8 @@
 
 ; Constant Pool
 _CONSTANTS_       DCD cospi8sqrt2minus1
-cospi8sqrt2minus1 DCD 0x4e7b4e7b
-sinpi8sqrt2       DCD 0x8a8c8a8c
+cospi8sqrt2minus1 DCD 0x4e7b
+; because the lowest bit in 0x8a8c is 0, we can pre-shift this
+sinpi8sqrt2       DCD 0x4546
 
     END

From 236906863a043f63e2e92c91b5f6d73939aaa6df Mon Sep 17 00:00:00 2001
From: Guillermo Ballester Valor <gbvalor@gmail.com>
Date: Fri, 11 Jun 2010 14:33:49 -0400
Subject: [PATCH 168/307] Add high limit check for unsigned parameters

The patch related with issue #55 (5a72620) fixed some warnings, but the
fix was not optimal. It actually was a trick to confuse compiler rather
than a fix.

This patch fixes it by creating a new macro used when needed just a high
limit check for an unsigned.

Change-Id: I94b322e0f7fb07604b3b1df1f9321185f48cfcb5
---
 vp8/vp8_cx_iface.c | 37 +++++++++++++++++++++----------------
 1 file changed, 21 insertions(+), 16 deletions(-)

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 8845368fb..e3f99c08d 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -113,6 +113,11 @@ update_error_state(vpx_codec_alg_priv_t                 *ctx,
             ERROR(#memb " out of range ["#lo".."#hi"]");\
     } while(0)
 
+#define RANGE_CHECK_HI(p,memb,hi) do {\
+        if(!((p)->memb <= (hi))) \
+            ERROR(#memb " out of range [.."#hi"]");\
+    } while(0)
+
 #define RANGE_CHECK_LO(p,memb,lo) do {\
         if(!((p)->memb >= (lo))) \
             ERROR(#memb " out of range ["#lo"..]");\
@@ -130,24 +135,24 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
     RANGE_CHECK(cfg, g_h,                   2, 16384);
     RANGE_CHECK(cfg, g_timebase.den,        1, 1000000000);
     RANGE_CHECK(cfg, g_timebase.num,        1, cfg->g_timebase.den);
-    RANGE_CHECK(cfg, g_profile,             0, 3);
-    RANGE_CHECK(cfg, rc_min_quantizer,      0, 63);
-    RANGE_CHECK(cfg, rc_max_quantizer,      0, 63);
-    RANGE_CHECK(cfg, g_threads,             0, 64);
+    RANGE_CHECK_HI(cfg, g_profile,          3);
+    RANGE_CHECK_HI(cfg, rc_min_quantizer,   63);
+    RANGE_CHECK_HI(cfg, rc_max_quantizer,   63);
+    RANGE_CHECK_HI(cfg, g_threads,          64);
 #if !(CONFIG_REALTIME_ONLY)
-    RANGE_CHECK(cfg, g_lag_in_frames,       0, 25);
+    RANGE_CHECK_HI(cfg, g_lag_in_frames,    25);
 #else
-    RANGE_CHECK(cfg, g_lag_in_frames,       0, 0);
+    RANGE_CHECK_HI(cfg, g_lag_in_frames,    0);
 #endif
     RANGE_CHECK(cfg, rc_end_usage,          VPX_VBR, VPX_CBR);
-    RANGE_CHECK(cfg, rc_undershoot_pct,     0, 100);
-    RANGE_CHECK(cfg, rc_2pass_vbr_bias_pct, 0, 100);
+    RANGE_CHECK_HI(cfg, rc_undershoot_pct,  100);
+    RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
     RANGE_CHECK(cfg, kf_mode,               VPX_KF_DISABLED, VPX_KF_AUTO);
     //RANGE_CHECK_BOOL(cfg,                 g_delete_firstpassfile);
     RANGE_CHECK_BOOL(cfg,                   rc_resize_allowed);
-    RANGE_CHECK(cfg, rc_dropframe_thresh,   0, 100);
-    RANGE_CHECK(cfg, rc_resize_up_thresh,   0, 100);
-    RANGE_CHECK(cfg, rc_resize_down_thresh, 0, 100);
+    RANGE_CHECK_HI(cfg, rc_dropframe_thresh,   100);
+    RANGE_CHECK_HI(cfg, rc_resize_up_thresh,   100);
+    RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100);
 #if !(CONFIG_REALTIME_ONLY)
     RANGE_CHECK(cfg,        g_pass,         VPX_RC_ONE_PASS, VPX_RC_LAST_PASS);
 #else
@@ -166,7 +171,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
 #if !(CONFIG_REALTIME_ONLY)
     RANGE_CHECK(vp8_cfg, encoding_mode,      VP8_BEST_QUALITY_ENCODING, VP8_REAL_TIME_ENCODING);
     RANGE_CHECK(vp8_cfg, cpu_used,           -16, 16);
-    RANGE_CHECK(vp8_cfg, noise_sensitivity,  0, 6);
+    RANGE_CHECK_HI(vp8_cfg, noise_sensitivity,  6);
 #else
     RANGE_CHECK(vp8_cfg, encoding_mode,      VP8_REAL_TIME_ENCODING, VP8_REAL_TIME_ENCODING);
 
@@ -177,10 +182,10 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
 #endif
 
     RANGE_CHECK(vp8_cfg, token_partitions,   VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
-    RANGE_CHECK(vp8_cfg, Sharpness,         0, 7);
-    RANGE_CHECK(vp8_cfg, arnr_max_frames,    0, 15);
-    RANGE_CHECK(vp8_cfg, arnr_strength,     0, 6);
-    RANGE_CHECK(vp8_cfg, arnr_type,         0, 0xffffffff);
+    RANGE_CHECK_HI(vp8_cfg, Sharpness,       7);
+    RANGE_CHECK_HI(vp8_cfg, arnr_max_frames, 15);
+    RANGE_CHECK_HI(vp8_cfg, arnr_strength,   6);
+    RANGE_CHECK_HI(vp8_cfg, arnr_type,       0xffffffff);
 
     if (cfg->g_pass == VPX_RC_LAST_PASS)
     {

From 8eae7fe7e872874a997018acb9a2ff7c49be2632 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Wed, 15 Sep 2010 14:07:32 -0700
Subject: [PATCH 169/307] Better choice of instruction filter mask comparision.

Use pmaxub instead of a combination of psubusb/por to
determine if any comparisons go over the limit.

Change-Id: I3f0bd7d2aabe5fee9ba6620508e2b60605abcb82
---
 vp8/common/x86/loopfilter_sse2.asm | 70 ++++++++++++------------------
 1 file changed, 27 insertions(+), 43 deletions(-)

diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 57276b661..de801df52 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -11,6 +11,8 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
+; Use of pmaxub instead of psubusb to compute filter mask was seen
+; in ffvp8
 
 %macro LFH_FILTER_MASK 1
 %if %1
@@ -33,8 +35,6 @@
         psubusb     xmm2,                   xmm6              ; q3-=q2
         por         xmm1,                   xmm2              ; abs(q3-q2)
 
-        psubusb     xmm1,                   xmm7
-
 %if %1
         movdqa      xmm4,                   [rsi+rax]         ; q1
 %else
@@ -49,9 +49,7 @@
         psubusb     xmm4,                   xmm6              ; q1-=q2
         psubusb     xmm6,                   xmm3              ; q2-=q1
         por         xmm4,                   xmm6              ; abs(q2-q1)
-        psubusb     xmm4,                   xmm7
-
-        por         xmm1,                   xmm4
+        pmaxub      xmm1,                   xmm4
 
 %if %1
         movdqa      xmm4,                   [rsi]             ; q0
@@ -67,9 +65,7 @@
         psubusb     xmm3,                   xmm0              ; q1-=q0
         por         xmm4,                   xmm3              ; abs(q0-q1)
         movdqa      t0,                     xmm4              ; save to t0
-
-        psubusb     xmm4,                   xmm7
-        por         xmm1,                   xmm4
+        pmaxub      xmm1,                   xmm4
 
 %if %1
         neg         rax                     ; negate pitch to deal with above border
@@ -95,9 +91,7 @@
         psubusb     xmm4,                   xmm2              ; p2-=p3
         psubusb     xmm2,                   xmm5              ; p3-=p2
         por         xmm4,                   xmm2              ; abs(p3 - p2)
-
-        psubusb     xmm4,                   xmm7
-        por         xmm1,                   xmm4
+        pmaxub      xmm1,                   xmm4
 
 %if %1
         movdqa      xmm4,                   [rsi+2*rax]       ; p1
@@ -113,9 +107,8 @@
         psubusb     xmm4,                   xmm5              ; p1-=p2
         psubusb     xmm5,                   xmm3              ; p2-=p1
         por         xmm4,                   xmm5              ; abs(p2 - p1)
-        psubusb     xmm4,                   xmm7
+        pmaxub      xmm1,                   xmm4
 
-        por         xmm1,                   xmm4
         movdqa      xmm2,                   xmm3              ; p1
 
 %if %1
@@ -133,8 +126,8 @@
         por         xmm4,                   xmm3              ; abs(p1 - p0)
         movdqa        t1,                   xmm4              ; save to t1
 
-        psubusb     xmm4,                   xmm7
-        por         xmm1,                   xmm4
+        pmaxub      xmm1,                   xmm4
+        psubusb     xmm1,                   xmm7
 
 %if %1
         movdqa      xmm3,                   [rdi]             ; q1
@@ -872,19 +865,18 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         psubusb     xmm0,               xmm7            ; q2-q3
 
         psubusb     xmm7,               xmm6            ; q3-q2
-        por         xmm7,               xmm0            ; abs (q3-q2)
-
         movdqa      xmm4,               xmm5            ; q1
+
+        por         xmm7,               xmm0            ; abs (q3-q2)
         psubusb     xmm4,               xmm6            ; q1-q2
 
-        psubusb     xmm6,               xmm5            ; q2-q1
-        por         xmm6,               xmm4            ; abs (q2-q1)
-
         movdqa      xmm0,               xmm1
+        psubusb     xmm6,               xmm5            ; q2-q1
 
+        por         xmm6,               xmm4            ; abs (q2-q1)
         psubusb     xmm0,               xmm2            ; p2 - p3;
-        psubusb     xmm2,               xmm1            ; p3 - p2;
 
+        psubusb     xmm2,               xmm1            ; p3 - p2;
         por         xmm0,               xmm2            ; abs(p2-p3)
 %if %1
         movdqa      xmm2,               [rdx]           ; p1
@@ -892,39 +884,28 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         movdqa      xmm2,               [rdx+32]        ; p1
 %endif
         movdqa      xmm5,               xmm2            ; p1
+        pmaxub      xmm0,               xmm7
 
         psubusb     xmm5,               xmm1            ; p1-p2
         psubusb     xmm1,               xmm2            ; p2-p1
 
-        por         xmm1,               xmm5            ; abs(p2-p1)
-
-        mov         rdx,                arg(3)          ; limit
-        movdqa      xmm4,               [rdx]           ; limit
-
-        psubusb     xmm7,               xmm4
-
-        psubusb     xmm0,               xmm4            ; abs(p3-p2) > limit
-        psubusb     xmm1,               xmm4            ; abs(p2-p1) > limit
-
-        psubusb     xmm6,               xmm4            ; abs(q2-q1) > limit
-        por         xmm7,               xmm6            ; or
-
-        por         xmm0,               xmm1
-        por         xmm0,               xmm7            ; abs(q3-q2) > limit || abs(p3-p2) > limit ||abs(p2-p1) > limit || abs(q2-q1) > limit
-
-        movdqa      xmm1,               xmm2            ; p1
-
         movdqa      xmm7,               xmm3            ; p0
         psubusb     xmm7,               xmm2            ; p0-p1
 
+        por         xmm1,               xmm5            ; abs(p2-p1)
+        pmaxub      xmm0,               xmm6
+
+        pmaxub      xmm0,               xmm1
+        movdqa      xmm1,               xmm2            ; p1
+
         psubusb     xmm2,               xmm3            ; p1-p0
         por         xmm2,               xmm7            ; abs(p1-p0)
 
         movdqa      t0,                 xmm2            ; save abs(p1-p0)
         lea         rdx,                srct
 
-        psubusb     xmm2,               xmm4            ; abs(p1-p0)>limit
-        por         xmm0,               xmm2            ; mask
+        pmaxub      xmm0,               xmm2
+
 %if %1
         movdqa      xmm5,               [rdx+32]        ; q0
         movdqa      xmm7,               [rdx+48]        ; q1
@@ -940,9 +921,12 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         por         xmm7,               xmm5            ; abs(q1-q0)
 
         movdqa      t1,                 xmm7            ; save abs(q1-q0)
-        psubusb     xmm7,               xmm4            ; abs(q1-q0)> limit
 
-        por         xmm0,               xmm7            ; mask
+        mov         rdx,                arg(3)          ; limit
+        movdqa      xmm4,               [rdx]           ; limit
+
+        pmaxub      xmm0,               xmm7
+        psubusb     xmm0,               xmm4
 
         movdqa      xmm5,               xmm2            ; q1
         psubusb     xmm5,               xmm1            ; q1-=p1

From b7dc9398f2b5b749a1369312ed4a4666e4e326cf Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Mon, 20 Sep 2010 09:30:49 -0700
Subject: [PATCH 170/307] Use movq instead of movdqu.

Movdqu is more expensive (throughput, uops) than movq.  Minimal
impact for newer big cores, but ~2.25% gain on Atom.

Change-Id: I62c80bb1cc01d8a91c350c4c7719462809a4ef7f
---
 vp8/common/x86/subpixel_ssse3.asm | 278 +++++++++++++++++-------------
 1 file changed, 162 insertions(+), 116 deletions(-)

diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
index f7209ccc0..fe921c019 100644
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -70,27 +70,35 @@ sym(vp8_filter_block1d8_h6_ssse3):
     sub         rdi, rdx
 ;xmm3 free
 filter_block1d8_h6_rowloop_ssse3:
-    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+    movq        xmm0,   MMWORD PTR [rsi - 2]    ; -2 -1  0  1  2  3  4  5
 
-    movdqa      xmm1, xmm0
-    pshufb      xmm0, [shuf1b GLOBAL]
+    movq        xmm2,   MMWORD PTR [rsi + 3]    ;  3  4  5  6  7  8  9 10
 
-    movdqa      xmm2, xmm1
-    pshufb      xmm1, [shuf2b GLOBAL]
-    pmaddubsw   xmm0, xmm4
-    pmaddubsw   xmm1, xmm5
+    punpcklbw   xmm0,   xmm2                    ; -2  3 -1  4  0  5  1  6  2  7  3  8  4  9  5 10
 
-    pshufb      xmm2, [shuf3b GLOBAL]
-    add         rdi, rdx
-    pmaddubsw   xmm2, xmm6
+    movdqa      xmm1,   xmm0
+    pmaddubsw   xmm0,   xmm4
+
+    movdqa      xmm2,   xmm1
+    pshufb      xmm1,   [shuf2bfrom1 GLOBAL]
+
+    pshufb      xmm2,   [shuf3bfrom1 GLOBAL]
+    pmaddubsw   xmm1,   xmm5
+
+    lea         rdi,    [rdi + rdx]
+    pmaddubsw   xmm2,   xmm6
 
     lea         rsi,    [rsi + rax]
     dec         rcx
-    paddsw      xmm0, xmm1
-    paddsw      xmm0, xmm7
-    paddsw      xmm0, xmm2
-    psraw       xmm0, 7
-    packuswb    xmm0, xmm0
+
+    paddsw      xmm0,   xmm1
+    paddsw      xmm2,   xmm7
+
+    paddsw      xmm0,   xmm2
+
+    psraw       xmm0,   7
+
+    packuswb    xmm0,   xmm0
 
     movq        MMWORD Ptr [rdi], xmm0
     jnz         filter_block1d8_h6_rowloop_ssse3
@@ -107,8 +115,8 @@ vp8_filter_block1d8_h4_ssse3:
     movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
     movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
 
-    movdqa      xmm3, XMMWORD PTR [shuf2b GLOBAL]
-    movdqa      xmm4, XMMWORD PTR [shuf3b GLOBAL]
+    movdqa      xmm3, XMMWORD PTR [shuf2bfrom1 GLOBAL]
+    movdqa      xmm4, XMMWORD PTR [shuf3bfrom1 GLOBAL]
 
     mov         rsi, arg(0)             ;src_ptr
 
@@ -118,24 +126,33 @@ vp8_filter_block1d8_h4_ssse3:
     movsxd      rdx, dword ptr arg(3)   ;output_pitch
 
     sub         rdi, rdx
-;xmm3 free
+
 filter_block1d8_h4_rowloop_ssse3:
-    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+    movq        xmm0,   MMWORD PTR [rsi - 2]    ; -2 -1  0  1  2  3  4  5
 
-    movdqa      xmm2, xmm0
-    pshufb      xmm0, xmm3 ;[shuf2b GLOBAL]
-    pshufb      xmm2, xmm4 ;[shuf3b GLOBAL]
+    movq        xmm1,   MMWORD PTR [rsi + 3]    ;  3  4  5  6  7  8  9 10
 
-    pmaddubsw   xmm0, xmm5
-    add         rdi, rdx
-    pmaddubsw   xmm2, xmm6
+    punpcklbw   xmm0,   xmm1                    ; -2  3 -1  4  0  5  1  6  2  7  3  8  4  9  5 10
+
+    movdqa      xmm2,   xmm0
+    pshufb      xmm0,   xmm3
+
+    pshufb      xmm2,   xmm4
+    pmaddubsw   xmm0,   xmm5
+
+    lea         rdi,    [rdi + rdx]
+    pmaddubsw   xmm2,   xmm6
 
     lea         rsi,    [rsi + rax]
     dec         rcx
-    paddsw      xmm0, xmm7
-    paddsw      xmm0, xmm2
-    psraw       xmm0, 7
-    packuswb    xmm0, xmm0
+
+    paddsw      xmm0,   xmm7
+
+    paddsw      xmm0,   xmm2
+
+    psraw       xmm0,   7
+
+    packuswb    xmm0,   xmm0
 
     movq        MMWORD Ptr [rdi], xmm0
 
@@ -168,74 +185,88 @@ sym(vp8_filter_block1d16_h6_ssse3):
     push        rdi
     ; end prolog
 
-    movsxd      rdx, DWORD PTR arg(5)   ;table index
+    movsxd      rdx, DWORD PTR arg(5)           ;table index
     xor         rsi, rsi
     shl         rdx, 4      ;
 
     lea         rax, [k0_k5 GLOBAL]
     add         rax, rdx
 
-    mov         rdi, arg(2)             ;output_ptr
-    movdqa      xmm7, [rd GLOBAL]
+    mov         rdi, arg(2)                     ;output_ptr
 
 ;;
 ;;    cmp         esi, DWORD PTR [rax]
 ;;    je          vp8_filter_block1d16_h4_ssse3
 
-    mov         rsi, arg(0)             ;src_ptr
+    mov         rsi, arg(0)                     ;src_ptr
 
     movdqa      xmm4, XMMWORD PTR [rax]         ;k0_k5
     movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
     movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
 
-    movsxd      rax, dword ptr arg(1)   ;src_pixels_per_line
-    movsxd      rcx, dword ptr arg(4)   ;output_height
-    movsxd      rdx, dword ptr arg(3)   ;output_pitch
+    movsxd      rax, dword ptr arg(1)           ;src_pixels_per_line
+    movsxd      rcx, dword ptr arg(4)           ;output_height
+    movsxd      rdx, dword ptr arg(3)           ;output_pitch
 
 filter_block1d16_h6_rowloop_ssse3:
-    movdqu      xmm0,   XMMWORD PTR [rsi - 2]
+    movq        xmm0,   MMWORD PTR [rsi - 2]    ; -2 -1  0  1  2  3  4  5
 
-    movdqa      xmm1, xmm0
-    pshufb      xmm0, [shuf1b GLOBAL]
-    movdqa      xmm2, xmm1
-    pmaddubsw   xmm0, xmm4
-    pshufb      xmm1, [shuf2b GLOBAL]
-    pshufb      xmm2, [shuf3b GLOBAL]
-    pmaddubsw   xmm1, xmm5
+    movq        xmm3,   MMWORD PTR [rsi + 3]    ;  3  4  5  6  7  8  9 10
 
-    movdqu      xmm3,   XMMWORD PTR [rsi + 6]
+    punpcklbw   xmm0,   xmm3                    ; -2  3 -1  4  0  5  1  6  2  7  3  8  4  9  5 10
 
-    pmaddubsw   xmm2, xmm6
-    paddsw      xmm0, xmm1
-    movdqa      xmm1, xmm3
-    pshufb      xmm3, [shuf1b GLOBAL]
-    paddsw      xmm0, xmm7
-    pmaddubsw   xmm3, xmm4
-    paddsw      xmm0, xmm2
-    movdqa      xmm2, xmm1
-    pshufb      xmm1, [shuf2b GLOBAL]
-    pshufb      xmm2, [shuf3b GLOBAL]
-    pmaddubsw   xmm1, xmm5
-    pmaddubsw   xmm2, xmm6
+    movdqa      xmm1,   xmm0
+    pmaddubsw   xmm0,   xmm4
+
+    movdqa      xmm2,   xmm1
+    pshufb      xmm1,   [shuf2bfrom1 GLOBAL]
+
+    pshufb      xmm2,   [shuf3bfrom1 GLOBAL]
+    movq        xmm3,   MMWORD PTR [rsi +  6]
+
+    pmaddubsw   xmm1,   xmm5
+    movq        xmm7,   MMWORD PTR [rsi + 11]
+
+    pmaddubsw   xmm2,   xmm6
+    punpcklbw   xmm3,   xmm7
+
+    paddsw      xmm0,   xmm1
+    movdqa      xmm1,   xmm3
+
+    pmaddubsw   xmm3,   xmm4
+    paddsw      xmm0,   xmm2
+
+    movdqa      xmm2,   xmm1
+    paddsw      xmm0,   [rd GLOBAL]
+
+    pshufb      xmm1,   [shuf2bfrom1 GLOBAL]
+    pshufb      xmm2,   [shuf3bfrom1 GLOBAL]
+
+    psraw       xmm0,   7
+    pmaddubsw   xmm1,   xmm5
+
+    pmaddubsw   xmm2,   xmm6
+    packuswb    xmm0,   xmm0
 
-    psraw       xmm0, 7
-    packuswb    xmm0, xmm0
     lea         rsi,    [rsi + rax]
-    paddsw      xmm3, xmm1
-    paddsw      xmm3, xmm7
-    paddsw      xmm3, xmm2
-    psraw       xmm3, 7
-    packuswb    xmm3, xmm3
+    paddsw      xmm3,   xmm1
 
-    punpcklqdq  xmm0, xmm3
+    paddsw      xmm3,   xmm2
+
+    paddsw      xmm3,   [rd GLOBAL]
+
+    psraw       xmm3,   7
+
+    packuswb    xmm3,   xmm3
+
+    punpcklqdq  xmm0,   xmm3
 
     movdqa      XMMWORD Ptr [rdi], xmm0
 
-    add         rdi, rdx
+    lea         rdi,    [rdi + rdx]
     dec         rcx
     jnz         filter_block1d16_h6_rowloop_ssse3
 
-
     ; begin epilog
     pop rdi
     pop rsi
@@ -268,7 +299,7 @@ filter_block1d16_h4_rowloop_ssse3:
     pshufb      xmm3, [shuf3b GLOBAL]
     pshufb      xmm0, [shuf2b GLOBAL]
 
-    paddsw      xmm1, xmm7
+    paddsw      xmm1, [rd GLOBAL]
     paddsw      xmm1, xmm2
 
     pmaddubsw   xmm0, xmm5
@@ -278,7 +309,7 @@ filter_block1d16_h4_rowloop_ssse3:
     packuswb    xmm1, xmm1
     lea         rsi,    [rsi + rax]
     paddsw      xmm3, xmm0
-    paddsw      xmm3, xmm7
+    paddsw      xmm3, [rd GLOBAL]
     psraw       xmm3, 7
     packuswb    xmm3, xmm3
 
@@ -939,17 +970,19 @@ sym(vp8_bilinear_predict16x16_ssse3):
 %if ABI_IS_32BIT=0
         movsxd      r8,         dword ptr arg(5)    ; dst_pitch
 %endif
-        movdqu      xmm3,       [rsi]               ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
-
-        movdqa      xmm4,       xmm3
-
-        movdqu      xmm5,       [rsi+1]             ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16
-        lea         rsi,        [rsi + rdx]         ; next line
+        movq        xmm3,       [rsi]               ; 00 01 02 03 04 05 06 07
+        movq        xmm5,       [rsi+1]             ; 01 02 03 04 05 06 07 08
 
         punpcklbw   xmm3,       xmm5                ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
+        movq        xmm4,       [rsi+8]             ; 08 09 10 11 12 13 14 15
+
+        movq        xmm5,       [rsi+9]             ; 09 10 11 12 13 14 15 16
+
+        lea         rsi,        [rsi + rdx]         ; next line
+
         pmaddubsw   xmm3,       xmm1                ; 00 02 04 06 08 10 12 14
 
-        punpckhbw   xmm4,       xmm5                ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16
+        punpcklbw   xmm4,       xmm5                ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16
         pmaddubsw   xmm4,       xmm1                ; 01 03 05 07 09 11 13 15
 
         paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
@@ -962,17 +995,18 @@ sym(vp8_bilinear_predict16x16_ssse3):
         packuswb    xmm7,       xmm4                ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
 
 .next_row:
-        movdqu      xmm6,       [rsi]               ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15
-
-        movdqa      xmm4,       xmm6
-
-        movdqu      xmm5,       [rsi+1]             ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16
-        lea         rsi,        [rsi + rdx]         ; next line
+        movq        xmm6,       [rsi]               ; 00 01 02 03 04 05 06 07
+        movq        xmm5,       [rsi+1]             ; 01 02 03 04 05 06 07 08
 
         punpcklbw   xmm6,       xmm5
+        movq        xmm4,       [rsi+8]             ; 08 09 10 11 12 13 14 15
+
+        movq        xmm5,       [rsi+9]             ; 09 10 11 12 13 14 15 16
+        lea         rsi,        [rsi + rdx]         ; next line
+
         pmaddubsw   xmm6,       xmm1
 
-        punpckhbw   xmm4,       xmm5
+        punpcklbw   xmm4,       xmm5
         pmaddubsw   xmm4,       xmm1
 
         paddw       xmm6,       [rd GLOBAL]         ; xmm6 += round value
@@ -1027,49 +1061,51 @@ b16x16_sp_only:
         movsxd      rax,        dword ptr arg(1)    ; src_pixels_per_line
 
         ; get the first horizontal line done
-        movdqu      xmm2,       [rsi]               ; load row 0
+        movq        xmm4,       [rsi]               ; load row 0
+        movq        xmm2,       [rsi + 8]           ; load row 0
 
         lea         rsi,        [rsi + rax]         ; next line
 .next_row:
-        movdqu      xmm3,       [rsi]               ; load row + 1
+        movq        xmm3,       [rsi]               ; load row + 1
+        movq        xmm5,       [rsi + 8]           ; load row + 1
 
-        movdqu      xmm4,       xmm2
         punpcklbw   xmm4,       xmm3
+        punpcklbw   xmm2,       xmm5
 
         pmaddubsw   xmm4,       xmm1
-        movdqu      xmm7,       [rsi + rax]         ; load row + 2
-
-        punpckhbw   xmm2,       xmm3
-        movdqu      xmm6,       xmm3
+        movq        xmm7,       [rsi + rax]         ; load row + 2
 
         pmaddubsw   xmm2,       xmm1
-        punpcklbw   xmm6,       xmm7
+        movq        xmm6,       [rsi + rax + 8]     ; load row + 2
 
+        punpcklbw   xmm3,       xmm7
+        punpcklbw   xmm5,       xmm6
+
+        pmaddubsw   xmm3,       xmm1
         paddw       xmm4,       [rd GLOBAL]
-        pmaddubsw   xmm6,       xmm1
+
+        pmaddubsw   xmm5,       xmm1
+        paddw       xmm2,       [rd GLOBAL]
 
         psraw       xmm4,       VP8_FILTER_SHIFT
-        punpckhbw   xmm3,       xmm7
-
-        paddw       xmm2,       [rd GLOBAL]
-        pmaddubsw   xmm3,       xmm1
-
         psraw       xmm2,       VP8_FILTER_SHIFT
-        paddw       xmm6,       [rd GLOBAL]
 
         packuswb    xmm4,       xmm2
-        psraw       xmm6,       VP8_FILTER_SHIFT
-
-        movdqa      [rdi],      xmm4                ; store row 0
         paddw       xmm3,       [rd GLOBAL]
 
+        movdqa      [rdi],      xmm4                ; store row 0
+        paddw       xmm5,       [rd GLOBAL]
+
         psraw       xmm3,       VP8_FILTER_SHIFT
+        psraw       xmm5,       VP8_FILTER_SHIFT
+
+        packuswb    xmm3,       xmm5
+        movdqa      xmm4,       xmm7
+
+        movdqa      [rdi + rdx],xmm3                ; store row 1
         lea         rsi,        [rsi + 2*rax]
 
-        packuswb    xmm6,       xmm3
-        movdqa      xmm2,       xmm7
-
-        movdqa      [rdi + rdx],xmm6                ; store row 1
+        movdqa      xmm2,       xmm6
         lea         rdi,        [rdi + 2*rdx]
 
         cmp         rdi,        rcx
@@ -1083,32 +1119,35 @@ b16x16_fp_only:
         movsxd      rax,        dword ptr arg(1)    ; src_pixels_per_line
 
 .next_row:
-        movdqu      xmm2,       [rsi]               ; row 0
-        movdqa      xmm3,       xmm2
-
-        movdqu      xmm4,       [rsi + 1]           ; row 0 + 1
-        lea         rsi,        [rsi + rax]         ; next line
+        movq        xmm2,       [rsi]               ; 00 01 02 03 04 05 06 07
+        movq        xmm4,       [rsi+1]             ; 01 02 03 04 05 06 07 08
 
         punpcklbw   xmm2,       xmm4
-        movdqu      xmm5,       [rsi]               ; row 1
+        movq        xmm3,       [rsi+8]             ; 08 09 10 11 12 13 14 15
 
         pmaddubsw   xmm2,       xmm1
-        movdqa      xmm6,       xmm5
+        movq        xmm4,       [rsi+9]             ; 09 10 11 12 13 14 15 16
 
-        punpckhbw   xmm3,       xmm4
-        movdqu      xmm7,       [rsi + 1]           ; row 1 + 1
+        lea         rsi,        [rsi + rax]         ; next line
+        punpcklbw   xmm3,       xmm4
 
         pmaddubsw   xmm3,       xmm1
-        paddw       xmm2,       [rd GLOBAL]
+        movq        xmm5,       [rsi]
 
+        paddw       xmm2,       [rd GLOBAL]
+        movq        xmm7,       [rsi+1]
+
+        movq        xmm6,       [rsi+8]
         psraw       xmm2,       VP8_FILTER_SHIFT
+
         punpcklbw   xmm5,       xmm7
+        movq        xmm7,       [rsi+9]
 
         paddw       xmm3,       [rd GLOBAL]
         pmaddubsw   xmm5,       xmm1
 
         psraw       xmm3,       VP8_FILTER_SHIFT
-        punpckhbw   xmm6,       xmm7
+        punpcklbw   xmm6,       xmm7
 
         packuswb    xmm2,       xmm3
         pmaddubsw   xmm6,       xmm1
@@ -1463,6 +1502,13 @@ shuf2b:
 shuf3b:
     db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10
 
+align 16
+shuf2bfrom1:
+    db  4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11, 9,13
+align 16
+shuf3bfrom1:
+    db  2, 6, 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11
+
 align 16
 rd:
     times 8 dw 0x40

From 4d391e8ed281c58bfa36e864050091f2934a7097 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 21 Sep 2010 11:54:36 -0400
Subject: [PATCH 171/307] Don't reset mb clamping state during splitmv decoding

The MV decoding changes in c5fb0eb introduced a bug where the
macroblock clamping state was reset for each partition, so if an
earlier partition needed clamping but a subsequent one didn't,
the MB wouldn't receive clamping. Instead, the state is only
set during splitmv decoding, never cleared.

Change-Id: I224fe258493405ee0f6a04596acdb622c475e845
---
 vp8/decoder/decodemv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index e9281f7ae..106dbde06 100755
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -394,7 +394,7 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
                     break;
                 }
 
-                mbmi->need_to_clamp_mvs = (mv->col < mb_to_left_edge) ? 1 : 0;
+                mbmi->need_to_clamp_mvs |= (mv->col < mb_to_left_edge) ? 1 : 0;
                 mbmi->need_to_clamp_mvs |= (mv->col > mb_to_right_edge) ? 1 : 0;
                 mbmi->need_to_clamp_mvs |= (mv->row < mb_to_top_edge) ? 1 : 0;
                 mbmi->need_to_clamp_mvs |= (mv->row > mb_to_bottom_edge) ? 1 : 0;

From 0511cbff7a9eb1869ec9b4c534e7a4989ac42e8a Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Tue, 21 Sep 2010 14:56:42 -0400
Subject: [PATCH 172/307] Fix typo

Also, move with other ppc32 options

Change-Id: I0b97413c767909c5682afc9bdd954f3d43401f6c
---
 configure | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure b/configure
index f4cd67ce2..33ab74ad1 100755
--- a/configure
+++ b/configure
@@ -95,9 +95,9 @@ all_platforms="${all_platforms} armv7-linux-gcc"     #neon Cortex-A8
 all_platforms="${all_platforms} mips32-linux-gcc"
 all_platforms="${all_platforms} ppc32-darwin8-gcc"
 all_platforms="${all_platforms} ppc32-darwin9-gcc"
+all_platforms="${all_platforms} ppc32-linux-gcc"
 all_platforms="${all_platforms} ppc64-darwin8-gcc"
 all_platforms="${all_platforms} ppc64-darwin9-gcc"
-all_platforms="${all_platormss} ppc32-linux-gcc"
 all_platforms="${all_platforms} ppc64-linux-gcc"
 all_platforms="${all_platforms} x86-darwin8-gcc"
 all_platforms="${all_platforms} x86-darwin8-icc"

From cdd20666871d0126a8546ccc45327e31bb7703c5 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 21 Sep 2010 19:48:06 -0400
Subject: [PATCH 173/307] unset execute bit on c source

Change-Id: I6625ee41f8872908cb015ce0729e1c7a105b5217
---
 vp8/decoder/decodemv.c | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 mode change 100755 => 100644 vp8/decoder/decodemv.c

diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
old mode 100755
new mode 100644

From 7fed3832e7703628cd5ac595c4cb1f9c0ee5c7ce Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Wed, 22 Sep 2010 11:07:34 -0400
Subject: [PATCH 174/307] Remove dead code

The new loopfilter was originally introduced as an experimental change.
It's permanent now.

Change-Id: I25dbedb6ceff3e9f9c04e18bb29f84c3ecb7e546
---
 vp8/common/loopfilter_filters.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index ea82e2a07..1b92e00aa 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -13,9 +13,6 @@
 #include "loopfilter.h"
 #include "onyxc_int.h"
 
-
-#define NEW_LOOPFILTER_MASK
-
 typedef unsigned char uc;
 
 static __inline signed char vp8_signed_char_clamp(int t)
@@ -37,11 +34,7 @@ static __inline signed char vp8_filter_mask(signed char limit, signed char flimi
     mask |= (abs(q1 - q0) > limit) * -1;
     mask |= (abs(q2 - q1) > limit) * -1;
     mask |= (abs(q3 - q2) > limit) * -1;
-#ifndef NEW_LOOPFILTER_MASK
-    mask |= (abs(p0 - q0) > flimit) * -1;
-#else
     mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  > flimit * 2 + limit) * -1;
-#endif
     mask = ~mask;
     return mask;
 }
@@ -286,11 +279,7 @@ static __inline signed char vp8_simple_filter_mask(signed char limit, signed cha
 // Why does this cause problems for win32?
 // error C2143: syntax error : missing ';' before 'type'
 //  (void) limit;
-#ifndef NEW_LOOPFILTER_MASK
-    signed char mask = (abs(p0 - q0) <= flimit) * -1;
-#else
     signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  <= flimit * 2 + limit) * -1;
-#endif
     return mask;
 }
 

From 8db5da2906a6a75dd5b8aba7147e73214a306d24 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 23 Sep 2010 13:53:09 -0400
Subject: [PATCH 175/307] Adjust multi-thread sync ranges according to image
 sizes

In multi-threaded decoder, set different sync ranges for
different video resolutions.

Change-Id: Iea48fd36f51919e0152c8ed3b1f10e1b723c0ca7
---
 vp8/decoder/onyxd_int.h |  1 +
 vp8/decoder/threading.c | 15 +++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index ab926e61d..522ac22d2 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -96,6 +96,7 @@ typedef struct VP8Decompressor
     // variable for threading
 #if CONFIG_MULTITHREAD
     int mt_baseline_filter_level[MAX_MB_SEGMENTS];
+    int sync_range;
     int *mt_current_mb_col;                  // Each row remembers its already decoded column.
 
     unsigned char **mt_yabove_row;           // mb_rows x width
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 93bc69e72..56dd5ef8e 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -257,6 +257,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                 int mb_row;
                 int num_part = 1 << pbi->common.multi_token_partition;
                 volatile int *last_row_current_mb_col;
+                int nsync = pbi->sync_range;
 
                 for (mb_row = ithread+1; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1))
                 {
@@ -292,9 +293,9 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                     for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
                     {
-                        if ((mb_col & 7) == 0)
+                        if ((mb_col & (nsync-1)) == 0)
                         {
-                            while (mb_col > (*last_row_current_mb_col - 8) && *last_row_current_mb_col != pc->mb_cols - 1)
+                            while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
                             {
                                 x86_pause_hint();
                                 thread_sleep(0);
@@ -608,6 +609,11 @@ int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
         if ((width & 0xf) != 0)
             width += 16 - (width & 0xf);
 
+        if (width < 640) pbi->sync_range = 1;
+        else if (width <= 1280) pbi->sync_range = 8;
+        else if (width <= 2560) pbi->sync_range =16;
+        else pbi->sync_range = 32;
+
         uv_width = width >>1;
 
         // Allocate an int for each mb row.
@@ -764,6 +770,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
     int num_part = 1 << pbi->common.multi_token_partition;
     int i, j;
     volatile int *last_row_current_mb_col = NULL;
+    int nsync = pbi->sync_range;
 
     int filter_level;
     loop_filter_info *lfi = pc->lf_info;
@@ -832,8 +839,8 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 
             for (mb_col = 0; mb_col < pc->mb_cols; mb_col++)
             {
-                if ( mb_row > 0 && (mb_col & 7) == 0){
-                    while (mb_col > (*last_row_current_mb_col - 8) && *last_row_current_mb_col != pc->mb_cols - 1)
+                if ( mb_row > 0 && (mb_col & (nsync-1)) == 0){
+                    while (mb_col > (*last_row_current_mb_col - nsync) && *last_row_current_mb_col != pc->mb_cols - 1)
                     {
                         x86_pause_hint();
                         thread_sleep(0);

From fa7a55bb043a6abfc661a8d06a2611b54372fe1c Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 21 Sep 2010 10:35:52 -0400
Subject: [PATCH 176/307] Add getter functions for the interface data symbols

Having these symbols be available as functions rather than data is
occasionally more convenient. Implemented this way rather than a
get-codec-by-id style to avoid creating a link-time dependency
between the encoder and the decoder.

Fixes issue #169

Change-Id: I319f281277033a5e7e3ee3b092b9a87cce2f463d
---
 vp8/exports_dec                   |  1 +
 vp8/exports_enc                   |  1 +
 vp8/vp8_cx_iface.c                |  2 +-
 vp8/vp8_dx_iface.c                |  2 +-
 vpx/internal/vpx_codec_internal.h | 14 ++++++++++++++
 vpx/vp8cx.h                       |  3 ++-
 vpx/vp8dx.h                       |  3 ++-
 7 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/vp8/exports_dec b/vp8/exports_dec
index f9b985c86..100ac5c27 100644
--- a/vp8/exports_dec
+++ b/vp8/exports_dec
@@ -1 +1,2 @@
 data vpx_codec_vp8_dx_algo
+text vpx_codec_vp8_dx
diff --git a/vp8/exports_enc b/vp8/exports_enc
index 996701113..29ff35ef7 100644
--- a/vp8/exports_enc
+++ b/vp8/exports_enc
@@ -1 +1,2 @@
 data vpx_codec_vp8_cx_algo
+text vpx_codec_vp8_cx
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index e3f99c08d..f8a4de85b 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -1079,7 +1079,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
 #ifndef VERSION_STRING
 #define VERSION_STRING
 #endif
-vpx_codec_iface_t vpx_codec_vp8_cx_algo =
+CODEC_INTERFACE(vpx_codec_vp8_cx) =
 {
     "WebM Project VP8 Encoder" VERSION_STRING,
     VPX_CODEC_INTERNAL_ABI_VERSION,
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index e7e535638..f19cb9a30 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -653,7 +653,7 @@ vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] =
 #ifndef VERSION_STRING
 #define VERSION_STRING
 #endif
-vpx_codec_iface_t vpx_codec_vp8_dx_algo =
+CODEC_INTERFACE(vpx_codec_vp8_dx) =
 {
     "WebM Project VP8 Decoder" VERSION_STRING,
     VPX_CODEC_INTERNAL_ABI_VERSION,
diff --git a/vpx/internal/vpx_codec_internal.h b/vpx/internal/vpx_codec_internal.h
index ab4cad10c..dcb451dca 100644
--- a/vpx/internal/vpx_codec_internal.h
+++ b/vpx/internal/vpx_codec_internal.h
@@ -389,6 +389,20 @@ struct vpx_codec_priv
 #define RECAST(id, x) id##__convert(x)
 
 
+/* CODEC_INTERFACE convenience macro
+ *
+ * By convention, each codec interface is a struct with extern linkage, where
+ * the symbol is suffixed with _algo. A getter function is also defined to
+ * return a pointer to the struct, since in some cases it's easier to work
+ * with text symbols than data symbols (see issue #169). This function has
+ * the same name as the struct, less the _algo suffix. The CODEC_INTERFACE
+ * macro is provided to define this getter function automatically.
+ */
+#define CODEC_INTERFACE(id)\
+vpx_codec_iface_t* id(void) { return &id##_algo; }\
+vpx_codec_iface_t  id##_algo
+
+
 /* Internal Utility Functions
  *
  * The following functions are indended to be used inside algorithms as
diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h
index e1c821144..efd79459a 100644
--- a/vpx/vp8cx.h
+++ b/vpx/vp8cx.h
@@ -29,7 +29,8 @@
  * This interface provides the capability to encode raw VP8 streams, as would
  * be found in AVI files.
  */
-extern vpx_codec_iface_t vpx_codec_vp8_cx_algo;
+extern vpx_codec_iface_t  vpx_codec_vp8_cx_algo;
+extern vpx_codec_iface_t* vpx_codec_vp8_cx(void);
 
 
 /*
diff --git a/vpx/vp8dx.h b/vpx/vp8dx.h
index 4cad838ff..fccd407f3 100644
--- a/vpx/vp8dx.h
+++ b/vpx/vp8dx.h
@@ -29,7 +29,8 @@
  * This interface provides the capability to decode raw VP8 streams, as would
  * be found in AVI files and other non-Flash uses.
  */
-extern vpx_codec_iface_t vpx_codec_vp8_dx_algo;
+extern vpx_codec_iface_t  vpx_codec_vp8_dx_algo;
+extern vpx_codec_iface_t* vpx_codec_vp8_dx(void);
 
 /* Include controls common to both the encoder and decoder */
 #include "vp8.h"

From f9b2ca5b99c9de6fa503daf44f789b983579522d Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 21 Sep 2010 10:06:41 -0400
Subject: [PATCH 177/307] configure: add --enable-small

Build with -O2 rather than -O3, to dissuade the compiler from inlining
so much. See issue #1.

Change-Id: Iacb8ddb59125d3f01c5fea846b45a1c004c9aee0
---
 build/make/configure.sh | 6 +++---
 configure               | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index e787c5d2c..e4a3da6b6 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -880,9 +880,9 @@ process_common_toolchain() {
     enabled gcov &&
         check_add_cflags -fprofile-arcs -ftest-coverage &&
         check_add_ldflags -fprofile-arcs -ftest-coverage
-    enabled optimizations && check_add_cflags -O3
-    if enabled rvct; then
-        enabled optimizations && check_add_cflags -Otime
+    if enabled optimizations; then
+        enabled rvct && check_add_cflags -Otime
+        enabled small && check_add_cflags -O2 || check_add_cflags -O3
     fi
 
     # Position Independant Code (PIC) support, for building relocatable
diff --git a/configure b/configure
index 33ab74ad1..b3cd0c72b 100755
--- a/configure
+++ b/configure
@@ -38,6 +38,7 @@ Advanced options:
   ${toggle_realtime_only}         enable this option while building for real-time encoding
   ${toggle_runtime_cpu_detect}    runtime cpu detection
   ${toggle_shared}                shared library support
+  ${toggle_small}                 favor smaller size over speed
   ${toggle_arm_asm_detok}         assembly version of the detokenizer (ARM platforms only)
 
 Codecs:
@@ -244,6 +245,7 @@ CONFIG_LIST="
     spatial_resampling
     realtime_only
     shared
+    small
     arm_asm_detok
 "
 CMDLINE_SELECT="
@@ -281,6 +283,7 @@ CMDLINE_SELECT="
     spatial_resampling
     realtime_only
     shared
+    small
     arm_asm_detok
 "
 

From e913eb97c9eb2abd325d3416423925e501710098 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 21 Sep 2010 10:34:51 -0400
Subject: [PATCH 178/307] configure: enable PIC for shared libs by default

Shared libs generally require PIC, so this saves a little typing at
configure time.

Change-Id: I357d70cc68434f3283fee78873052d2b7d77c777
---
 build/make/configure.sh | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index e4a3da6b6..86759a97f 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -562,6 +562,9 @@ process_common_toolchain() {
     mips*)        enable mips;;
     esac
 
+    # PIC is probably what we want when building shared libs
+    enabled shared && soft_enable pic
+
     # Handle darwin variants
     case ${toolchain} in
         *-darwin8-gcc)

From 48e76ff4fde05a03a395a31950de87692a579829 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 24 Sep 2010 11:21:35 -0400
Subject: [PATCH 179/307] move reconintra_mt to decoder (for now)

reconintra_mt.c is only required for building the decoder right now.
It could definitely be used for the encoder in the future, but it
currently depends on decoder only data structures. (onyxd_int.h,
VP8D_COMP, etc). Move it from common/ to decoder/ until the
necessary changes to the common multithread code are complete.

This patch is needed to build with --disable-vp8-decoder.

Change-Id: I568c52221a2b309234d269675cba97131ce35c86
---
 vp8/{common => decoder}/reconintra_mt.c | 0
 vp8/vp8_common.mk                       | 2 --
 vp8/vp8dx.mk                            | 2 ++
 3 files changed, 2 insertions(+), 2 deletions(-)
 rename vp8/{common => decoder}/reconintra_mt.c (100%)

diff --git a/vp8/common/reconintra_mt.c b/vp8/decoder/reconintra_mt.c
similarity index 100%
rename from vp8/common/reconintra_mt.c
rename to vp8/decoder/reconintra_mt.c
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index b357b28bc..ecca18a0a 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -81,8 +81,6 @@ VP8_COMMON_SRCS-yes += common/recon.c
 VP8_COMMON_SRCS-yes += common/reconinter.c
 VP8_COMMON_SRCS-yes += common/reconintra.c
 VP8_COMMON_SRCS-yes += common/reconintra4x4.c
-VP8_COMMON_SRCS-yes += common/reconintra_mt.h
-VP8_COMMON_SRCS-yes += common/reconintra_mt.c
 VP8_COMMON_SRCS-yes += common/setupintrarecon.c
 VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
 VP8_COMMON_SRCS-yes += common/textblit.c
diff --git a/vp8/vp8dx.mk b/vp8/vp8dx.mk
index 941961708..1acd67453 100644
--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -67,6 +67,8 @@ VP8_DX_SRCS-yes += decoder/treereader.h
 VP8_DX_SRCS-yes += decoder/onyxd_if.c
 VP8_DX_SRCS-yes += decoder/threading.c
 VP8_DX_SRCS-yes += decoder/idct_blk.c
+VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.h
+VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/reconintra_mt.c
 
 VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
 

From cbdc1298953b03bc18875d346c715fa07d20efa4 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 24 Sep 2010 11:39:27 -0400
Subject: [PATCH 180/307] darwin-icc: build for specific SDKs

Add the missing -isysroot and -mmacosx-version-min flags to ICC builds.
Fixes issue #185.

Change-Id: I2fb37fcaaafef7122a61ced603569f4aa17f8bbc
---
 build/make/configure.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index 86759a97f..cfac5391c 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -567,13 +567,13 @@ process_common_toolchain() {
 
     # Handle darwin variants
     case ${toolchain} in
-        *-darwin8-gcc)
+        *-darwin8-*)
             add_cflags  "-isysroot /Developer/SDKs/MacOSX10.4u.sdk"
             add_cflags  "-mmacosx-version-min=10.4"
             add_ldflags "-isysroot /Developer/SDKs/MacOSX10.4u.sdk"
             add_ldflags "-mmacosx-version-min=10.4"
             ;;
-        *-darwin9-gcc)
+        *-darwin9-*)
             add_cflags  "-isysroot /Developer/SDKs/MacOSX10.5.sdk"
             add_cflags  "-mmacosx-version-min=10.5"
             add_ldflags "-isysroot /Developer/SDKs/MacOSX10.5.sdk"

From 8ca779aba84866f97665705fc30d662ae8bc06c0 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 24 Sep 2010 11:10:25 -0400
Subject: [PATCH 181/307] disable compilation of debugging code

This patch avoids compiling some debugging code in onyx_if.c. The most
significant fix is to avoid generating code for vp8_write_yuv_frame,
which is never called. Some other code was removed by the dead code
elimination performed by the compiler, and this patch does it with the
preprocessor instead. There are advantages both ways.

Change-Id: I044fd43179d2e947553f0d6f2cad5b40907ac458
---
 vp8/encoder/onyx_if.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 56bac0aaa..b024507c7 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2676,6 +2676,8 @@ int vp8_update_entropy(VP8_PTR comp, int update)
     return 0;
 }
 
+
+#if OUTPUT_YUV_SRC
 void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s)
 {
     FILE *yuv_file = fopen(name, "ab");
@@ -2711,6 +2713,8 @@ void vp8_write_yuv_frame(const char *name, YV12_BUFFER_CONFIG *s)
 
     fclose(yuv_file);
 }
+#endif
+
 
 static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 {
@@ -4355,12 +4359,13 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
     if (cm->frame_type == KEY_FRAME)
         cm->refresh_last_frame = 1;
 
-    if (0)
+#if 0
     {
         FILE *f = fopen("gfactive.stt", "a");
         fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame);
         fclose(f);
     }
+#endif
 
     // For inter frames the current default behaviour is that when cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer
     // This is purely an encoder descision at present.
@@ -4604,9 +4609,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
         }
     }
 
-#if CONFIG_PSNR
-
-    if (0)
+#if 0 && CONFIG_PSNR
     {
         FILE *f = fopen("tmp.stt", "a");
 
@@ -4731,7 +4734,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
 
 
 
-    if (0)
+#if 0
     {
         char filename[512];
         FILE *recon_file;
@@ -4741,6 +4744,7 @@ static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsign
                cm->yv12_fb[cm->lst_fb_idx].frame_size, 1, recon_file);
         fclose(recon_file);
     }
+#endif
 
     // DEBUG
     //vp8_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show);
@@ -4800,8 +4804,6 @@ void vp8_check_gf_quality(VP8_COMP *cpi)
     }
 
 #if 0
-
-    if (0)
     {
         FILE *f = fopen("gfneeded.stt", "a");
         fprintf(f, "%10d %10d %10d %10d %10ld \n",

From f30e8dd7bd8d91b230586ce18ccc1001a6af1a18 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Fri, 24 Sep 2010 12:03:31 -0400
Subject: [PATCH 182/307] combine max values and compare once

previous implementation compared each set of values to limit and then
&'d them together, requiring a compare and & for each value.

this does the accumulation first, requiring only one compare

Change-Id: Ia5e3a1a50e47699c88470b8c41964f92a0dc1323
---
 .../neon/loopfilterhorizontaledge_uv_neon.asm | 46 +++------------
 .../neon/loopfilterhorizontaledge_y_neon.asm  | 46 +++------------
 .../neon/loopfilterverticaledge_uv_neon.asm   | 47 +++-------------
 .../neon/loopfilterverticaledge_y_neon.asm    | 47 +++-------------
 .../mbloopfilterhorizontaledge_uv_neon.asm    | 56 +++----------------
 .../mbloopfilterhorizontaledge_y_neon.asm     | 54 +++---------------
 .../neon/mbloopfilterverticaledge_uv_neon.asm | 53 +++---------------
 .../neon/mbloopfilterverticaledge_y_neon.asm  | 55 +++---------------
 8 files changed, 72 insertions(+), 332 deletions(-)

diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
index c0c3e337c..23ace0f95 100644
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
@@ -55,8 +55,7 @@
     vld1.s8     {d4[], d5[]}, [r12]         ; thresh
 
     ldr         r12, _lfhuv_coeff_
-    ;vp8_filter_mask() function
-    ;vp8_hevmask() function
+    ; vp8_filter_mask
     vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
     vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
     vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
@@ -65,22 +64,19 @@
     vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
     vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
 
-    vcge.u8     q15, q1, q11                ; (abs(p3 - p2) > limit)*-1
-    vcge.u8     q12, q1, q12                ; (abs(p2 - p1) > limit)*-1
-    vcge.u8     q10, q1, q13                ; (abs(p1 - p0) > limit)*-1
-    vcge.u8     q11, q1, q14                ; (abs(q1 - q0) > limit)*-1
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q4
+    vmax.u8     q15, q11, q12
 
+    ; vp8_hevmask
     vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
     vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
 
-    vcge.u8     q3, q1, q3                  ; (abs(q2 - q1) > limit)*-1
-    vcge.u8     q4, q1, q4                  ; (abs(q3 - q2) > limit)*-1
     vadd.u8     q0, q0, q0                  ; flimit * 2
     vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-
-    vand        q15, q15, q12
-    vand        q10, q10, q11
-    vand        q3, q3, q4
+    vcge.u8     q15, q1, q15                ; (max  > limit) * -1
 
     vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
     vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
@@ -90,8 +86,6 @@
 
     vld1.u8     {q0}, [r12]!
 
-    vand        q15, q15, q10
-
     ;vp8_filter() function
     veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
     veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
@@ -100,26 +94,20 @@
 ;;;;;;;;;;;;;;
     vld1.u8     {q10}, [r12]!
 
-    ;vqsub.s8   q2, q7, q6                  ; ( qs0 - ps0)
     vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
     vsubl.s8    q11, d15, d13
 
-    vand        q3, q3, q9
     vmovl.u8    q4, d20
 
     vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
     vorr        q14, q13, q14               ; q14: vp8_hevmask
 
-    ;vmul.i8    q2, q2, q10                 ; 3 * ( qs0 - ps0)
     vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
     vmul.i16    q11, q11, q4
 
     vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q3                ; q15: vp8_filter_mask
-    ;;
-    ;vld1.u8        {q4}, [r12]!            ;no need 7 any more
+    vand        q15, q15, q9                ; vp8_filter_mask
 
-    ;vqadd.s8   q1, q1, q2
     vaddw.s8    q2, q2, d2
     vaddw.s8    q11, q11, d3
 
@@ -130,21 +118,6 @@
     ;;
 
     vand        q1, q1, q15                 ; vp8_filter &= mask
-    ;;
-;;;;;;;;;;;;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-;   vand        q2, q1, q4                  ; s = vp8_filter & 7
-;   vqadd.s8    q1, q1, q9                  ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4)
-    ;;;;
-;   vshr.s8     q1, q1, #3                  ; vp8_filter >>= 3
-;   vceq.i8     q2, q2, q9                  ; s = (s==4)*-1
-    ;;
-;   ;calculate output
-;   vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - vp8_filter)
-;   vqadd.s8    q11, q2, q1                 ; u = vp8_signed_char_clamp(s + vp8_filter)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; q10=3
     vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
     vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
     vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
@@ -168,7 +141,6 @@
     ;;
 
     vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    ;vqadd.s8   q11, q6, q11                ; u = vp8_signed_char_clamp(ps0 + u)
     vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
     ;
 
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
index a8314cdd7..e1896e4d8 100644
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
@@ -39,8 +39,7 @@
     ldr         r12, _lfhy_coeff_
     vld1.u8     {q7}, [r0], r1              ; q0
 
-    ;vp8_filter_mask() function
-    ;vp8_hevmask() function
+    ; vp8_filter_mask
     vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
     vld1.u8     {q8}, [r0], r1              ; q1
     vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
@@ -52,22 +51,19 @@
     vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
     vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
 
-    vcge.u8     q15, q1, q11                ; (abs(p3 - p2) > limit)*-1
-    vcge.u8     q12, q1, q12                ; (abs(p2 - p1) > limit)*-1
-    vcge.u8     q10, q1, q13                ; (abs(p1 - p0) > limit)*-1
-    vcge.u8     q11, q1, q14                ; (abs(q1 - q0) > limit)*-1
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q4
+    vmax.u8     q15, q11, q12
 
+    ; vp8_hevmask
     vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
     vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
 
-    vcge.u8     q3, q1, q3                  ; (abs(q2 - q1) > limit)*-1
-    vcge.u8     q4, q1, q4                  ; (abs(q3 - q2) > limit)*-1
     vadd.u8     q0, q0, q0                  ; flimit * 2
     vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-
-    vand        q15, q15, q12
-    vand        q10, q10, q11
-    vand        q3, q3, q4
+    vcge.u8     q15, q1, q15
 
     vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
     vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
@@ -77,8 +73,6 @@
 
     vld1.u8     {q0}, [r12]!
 
-    vand        q15, q15, q10
-
     ;vp8_filter() function
     veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
     veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
@@ -87,26 +81,20 @@
 ;;;;;;;;;;;;;;
     vld1.u8     {q10}, [r12]!
 
-    ;vqsub.s8   q2, q7, q6                  ; ( qs0 - ps0)
     vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
     vsubl.s8    q11, d15, d13
 
-    vand        q3, q3, q9
     vmovl.u8    q4, d20
 
     vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
     vorr        q14, q13, q14               ; q14: vp8_hevmask
 
-    ;vmul.i8    q2, q2, q10                 ; 3 * ( qs0 - ps0)
     vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
     vmul.i16    q11, q11, q4
 
     vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q3                ; q15: vp8_filter_mask
-    ;;
-    ;vld1.u8        {q4}, [r12]!            ;no need 7 any more
+    vand        q15, q15, q9                ; vp8_filter_mask
 
-    ;vqadd.s8   q1, q1, q2
     vaddw.s8    q2, q2, d2
     vaddw.s8    q11, q11, d3
 
@@ -117,21 +105,6 @@
     ;;
 
     vand        q1, q1, q15                 ; vp8_filter &= mask
-    ;;
-;;;;;;;;;;;;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-;   vand        q2, q1, q4                  ; s = vp8_filter & 7
-;   vqadd.s8    q1, q1, q9                  ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4)
-    ;;;;
-;   vshr.s8     q1, q1, #3                  ; vp8_filter >>= 3
-;   vceq.i8     q2, q2, q9                  ; s = (s==4)*-1
-    ;;
-;   ;calculate output
-;   vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - vp8_filter)
-;   vqadd.s8    q11, q2, q1                 ; u = vp8_signed_char_clamp(s + vp8_filter)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; q10=3
     vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
     vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
     vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
@@ -153,7 +126,6 @@
     add         r2, r1, r0
 
     vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    ;vqadd.s8   q11, q6, q11                ; u = vp8_signed_char_clamp(ps0 + u)
     vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
 
     add         r3, r2, r1
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
index 57913d2bc..a9c2d12f0 100644
--- a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
@@ -71,8 +71,7 @@
     vld1.s8     {d4[], d5[]}, [r12]         ; thresh
 
     ldr         r12, _vlfuv_coeff_
-    ;vp8_filter_mask() function
-    ;vp8_hevmask() function
+    ; vp8_filter_mask
     vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
     vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
     vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
@@ -81,22 +80,19 @@
     vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
     vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
 
-    vcge.u8     q15, q1, q11                ; (abs(p3 - p2) > limit)*-1
-    vcge.u8     q12, q1, q12                ; (abs(p2 - p1) > limit)*-1
-    vcge.u8     q10, q1, q13                ; (abs(p1 - p0) > limit)*-1
-    vcge.u8     q11, q1, q14                ; (abs(q1 - q0) > limit)*-1
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q4
+    vmax.u8     q15, q11, q12
 
+    ; vp8_hevmask
     vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
     vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
 
-    vcge.u8     q3, q1, q3                  ; (abs(q2 - q1) > limit)*-1
-    vcge.u8     q4, q1, q4                  ; (abs(q3 - q2) > limit)*-1
     vadd.u8     q0, q0, q0                  ; flimit * 2
     vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-
-    vand        q15, q15, q12
-    vand        q10, q10, q11
-    vand        q3, q3, q4
+    vcge.u8     q15, q1, q15
 
     vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
     vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
@@ -106,9 +102,6 @@
 
     vld1.u8     {q0}, [r12]!
 
-    vand        q15, q15, q10
-
-
     ;vp8_filter() function
     veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
     veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
@@ -117,26 +110,20 @@
 ;;;;;;;;;;;;;;
     vld1.u8     {q10}, [r12]!
 
-    ;vqsub.s8   q2, q7, q6                  ; ( qs0 - ps0)
     vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
     vsubl.s8    q11, d15, d13
 
-    vand        q3, q3, q9
     vmovl.u8    q4, d20
 
     vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
     vorr        q14, q13, q14               ; q14: vp8_hevmask
 
-    ;vmul.i8    q2, q2, q10                 ; 3 * ( qs0 - ps0)
     vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
     vmul.i16    q11, q11, q4
 
     vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q3                ; q15: vp8_filter_mask
-    ;;
-    ;vld1.u8        {q4}, [r12]!            ;no need 7 any more
+    vand        q15, q15, q9                ; vp8_filter_mask
 
-    ;vqadd.s8   q1, q1, q2
     vaddw.s8    q2, q2, d2
     vaddw.s8    q11, q11, d3
 
@@ -147,21 +134,6 @@
     ;;
 
     vand        q1, q1, q15                 ; vp8_filter &= mask
-    ;;
-;;;;;;;;;;;;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-;   vand        q2, q1, q4                  ; s = vp8_filter & 7
-;   vqadd.s8    q1, q1, q9                  ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4)
-    ;;;;
-;   vshr.s8     q1, q1, #3                  ; vp8_filter >>= 3
-;   vceq.i8     q2, q2, q9                  ; s = (s==4)*-1
-    ;;
-;   ;calculate output
-;   vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - vp8_filter)
-;   vqadd.s8    q11, q2, q1                 ; u = vp8_signed_char_clamp(s + vp8_filter)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; q10=3
     vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
     vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
     vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
@@ -182,7 +154,6 @@
     add         r2, r2, #2
 
     vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    ;vqadd.s8   q11, q6, q11                ; u = vp8_signed_char_clamp(ps0 + u)
     vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
 
     veor        q7, q10, q0                 ; *oq0 = u^0x80
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
index 2eb695ff0..64a49bb41 100644
--- a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
@@ -67,8 +67,7 @@
     vtrn.8      q7, q8
     vtrn.8      q9, q10
 
-    ;vp8_filter_mask() function
-    ;vp8_hevmask() function
+    ; vp8_filter_mask
     vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
     vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
     vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
@@ -77,22 +76,19 @@
     vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
     vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
 
-    vcge.u8     q15, q1, q11                ; (abs(p3 - p2) > limit)*-1
-    vcge.u8     q12, q1, q12                ; (abs(p2 - p1) > limit)*-1
-    vcge.u8     q10, q1, q13                ; (abs(p1 - p0) > limit)*-1
-    vcge.u8     q11, q1, q14                ; (abs(q1 - q0) > limit)*-1
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q4
+    vmax.u8     q15, q11, q12
 
+    ; vp8_hevmask
     vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
     vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
 
-    vcge.u8     q3, q1, q3                  ; (abs(q2 - q1) > limit)*-1
-    vcge.u8     q4, q1, q4                  ; (abs(q3 - q2) > limit)*-1
     vadd.u8     q0, q0, q0                  ; flimit * 2
     vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-
-    vand        q15, q15, q12
-    vand        q10, q10, q11
-    vand        q3, q3, q4
+    vcge.u8     q15, q1, q15
 
     vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
     vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
@@ -102,9 +98,6 @@
 
     vld1.u8     {q0}, [r12]!
 
-    vand        q15, q15, q10
-
-
     ;vp8_filter() function
     veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
     veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
@@ -113,26 +106,20 @@
 ;;;;;;;;;;;;;;
     vld1.u8     {q10}, [r12]!
 
-    ;vqsub.s8   q2, q7, q6                  ; ( qs0 - ps0)
     vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
     vsubl.s8    q11, d15, d13
 
-    vand        q3, q3, q9
     vmovl.u8    q4, d20
 
     vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
     vorr        q14, q13, q14               ; q14: vp8_hevmask
 
-    ;vmul.i8    q2, q2, q10                 ; 3 * ( qs0 - ps0)
     vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
     vmul.i16    q11, q11, q4
 
     vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q3                ; q15: vp8_filter_mask
-    ;;
-    ;vld1.u8        {q4}, [r12]!            ;no need 7 any more
+    vand        q15, q15, q9                ; vp8_filter_mask
 
-    ;vqadd.s8   q1, q1, q2
     vaddw.s8    q2, q2, d2
     vaddw.s8    q11, q11, d3
 
@@ -143,21 +130,6 @@
     ;;
 
     vand        q1, q1, q15                 ; vp8_filter &= mask
-    ;;
-;;;;;;;;;;;;
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-;   vand        q2, q1, q4                  ; s = vp8_filter & 7
-;   vqadd.s8    q1, q1, q9                  ; vp8_filter = vp8_signed_char_clamp(vp8_filter+4)
-    ;;;;
-;   vshr.s8     q1, q1, #3                  ; vp8_filter >>= 3
-;   vceq.i8     q2, q2, q9                  ; s = (s==4)*-1
-    ;;
-;   ;calculate output
-;   vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - vp8_filter)
-;   vqadd.s8    q11, q2, q1                 ; u = vp8_signed_char_clamp(s + vp8_filter)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; q10=3
     vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
     vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
     vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
@@ -178,7 +150,6 @@
     ;
 
     vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    ;vqadd.s8   q11, q6, q11                ; u = vp8_signed_char_clamp(ps0 + u)
     vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
 
     veor        q7, q10, q0                 ; *oq0 = u^0x80
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
index 4576a6a8f..52ab059db 100644
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
@@ -52,8 +52,7 @@
 
     ldr         r12, _mbhlfuv_coeff_
 
-    ;vp8_filter_mask() function
-    ;vp8_hevmask() function
+    ; vp8_filter_mask
     vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
     vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
     vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
@@ -61,29 +60,25 @@
     vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
     vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
 
-    vcge.u8     q15, q1, q11                ; (abs(p3 - p2) > limit)*-1
-    vcge.u8     q12, q1, q12                ; (abs(p2 - p1) > limit)*-1
-    vcge.u8     q10, q1, q13                ; (abs(p1 - p0) > limit)*-1
-    vcge.u8     q11, q1, q14                ; (abs(q1 - q0) > limit)*-1
-    vcge.u8     q3, q1, q3                  ; (abs(q2 - q1) > limit)*-1
-    vcge.u8     q0, q1, q0                  ; (abs(q3 - q2) > limit)*-1
-
-    vand        q15, q15, q12
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q0
+    vmax.u8     q15, q11, q12
 
     vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
 
+    ; vp8_hevmask
     vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
     vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
 
     vld1.s8     {d4[], d5[]}, [r2]          ; flimit
 
-    vand        q10, q10, q11
-    vand        q3, q3, q0
-
     vld1.u8     {q0}, [r12]!
 
     vadd.u8     q2, q2, q2                  ; flimit * 2
     vadd.u8     q2, q2, q1                  ; flimit * 2 +  limit
+    vcge.u8     q15, q1, q15
 
     vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
     vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
@@ -91,8 +86,6 @@
     vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
     vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
 
-    vand        q15, q15, q10
-
     ;vp8_filter() function
     veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
     veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
@@ -103,29 +96,23 @@
 ;;;;;;;;;;;;;
     vorr        q14, q13, q14               ; q14: vp8_hevmask
 
-    ;vqsub.s8   q2, q7, q6                  ; ( qs0 - ps0)
     vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
     vsubl.s8    q13, d15, d13
 
     vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
 
-    ;vadd.s8    q10, q2, q2                 ; 3 * ( qs0 - ps0)
     vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
     vadd.s16    q11, q13, q13
+    vand        q15, q15, q12               ; vp8_filter_mask
 
-    vand        q3, q3, q12
-
-    ;vadd.s8    q2, q2, q10
     vadd.s16    q2, q2, q10
     vadd.s16    q13, q13, q11
 
     vld1.u8     {q12}, [r12]!               ;#3
 
-    ;vqadd.s8   q1, q1, q2                  ; vp8_filter + 3 * ( qs0 - ps0)
     vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
     vaddw.s8    q13, q13, d3
 
-    vand        q15, q15, q3                ; q15: vp8_filter_mask
     vld1.u8     {q11}, [r12]!               ;#4
 
     vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
@@ -139,32 +126,7 @@
     vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
 
     vld1.u8     {d7}, [r12]!                ;#9
-    ;
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-;   vand        q2, q13, q12                ; s = Filter2 & 7
-
-;   vqadd.s8    q13, q13, q11               ; Filter2 = vp8_signed_char_clamp(Filter2+4)
-;   vld1.u8     {d6}, [r12]!                ;#18
-
-;   sub         r0, r0, r1, lsl #3
-;   sub         r3, r3, r1, lsl #3
-
-;   vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-;   vceq.i8     q2, q2, q11                 ; s = (s==4)*-1
-
-;   add         r0, r0, r1
-;   add         r3, r3, r1
-
-;   vqsub.s8    q7, q7, q13                 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2)
-;   vqadd.s8    q11, q2, q13                ; u = vp8_signed_char_clamp(s + Filter2)
-
-;   vld1.u8     {d5}, [r12]!                ;#27
-;   vmov        q10, q15
-;   vmov        q12, q15
-
-;   vqadd.s8    q6, q6, q11                 ; ps0 = vp8_signed_char_clamp(ps0 + u)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
     vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
 
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
index 8e85caa45..b0755b05f 100644
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
@@ -36,8 +36,7 @@
     ldr         r12, _mbhlfy_coeff_
     vld1.u8     {q6}, [r0], r1              ; p0
 
-    ;vp8_filter_mask() function
-    ;vp8_hevmask() function
+    ; vp8_filter_mask
     vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
     vld1.u8     {q7}, [r0], r1              ; q0
     vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
@@ -49,29 +48,25 @@
     vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
     vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
 
-    vcge.u8     q15, q1, q11                ; (abs(p3 - p2) > limit)*-1
-    vcge.u8     q12, q1, q12                ; (abs(p2 - p1) > limit)*-1
-    vcge.u8     q10, q1, q13                ; (abs(p1 - p0) > limit)*-1
-    vcge.u8     q11, q1, q14                ; (abs(q1 - q0) > limit)*-1
-    vcge.u8     q3, q1, q3                  ; (abs(q2 - q1) > limit)*-1
-    vcge.u8     q0, q1, q0                  ; (abs(q3 - q2) > limit)*-1
-
-    vand        q15, q15, q12
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q0
+    vmax.u8     q15, q11, q12
 
     vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
 
+    ; vp8_hevmask
     vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
     vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
 
     vld1.s8     {d4[], d5[]}, [r2]          ; flimit
 
-    vand        q10, q10, q11
-    vand        q3, q3, q0
-
     vld1.u8     {q0}, [r12]!
 
     vadd.u8     q2, q2, q2                  ; flimit * 2
     vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
+    vcge.u8     q15, q1, q15
 
     vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
     vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
@@ -79,8 +74,6 @@
     vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
     vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
 
-    vand        q15, q15, q10
-
     ;vp8_filter() function
     veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
     veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
@@ -91,29 +84,23 @@
 ;;;;;;;;;;;;;
     vorr        q14, q13, q14               ; q14: vp8_hevmask
 
-    ;vqsub.s8   q2, q7, q6                  ; ( qs0 - ps0)
     vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
     vsubl.s8    q13, d15, d13
 
     vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
 
-    ;vadd.s8    q10, q2, q2                 ; 3 * ( qs0 - ps0)
     vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
     vadd.s16    q11, q13, q13
+    vand        q15, q15, q12               ; vp8_filter_mask
 
-    vand        q3, q3, q12
-
-    ;vadd.s8    q2, q2, q10
     vadd.s16    q2, q2, q10
     vadd.s16    q13, q13, q11
 
     vld1.u8     {q12}, [r12]!               ;#3
 
-    ;vqadd.s8   q1, q1, q2                  ; vp8_filter + 3 * ( qs0 - ps0)
     vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
     vaddw.s8    q13, q13, d3
 
-    vand        q15, q15, q3                ; q15: vp8_filter_mask
     vld1.u8     {q11}, [r12]!               ;#4
 
     vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
@@ -129,29 +116,6 @@
     vld1.u8     {d7}, [r12]!                ;#9
     sub         r0, r0, r1, lsl #3
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-;   vand        q2, q13, q12                ; s = Filter2 & 7
-
-;   vqadd.s8    q13, q13, q11               ; Filter2 = vp8_signed_char_clamp(Filter2+4)
-;   vld1.u8     {d6}, [r12]!                ;#18
-
-;   add         r0, r0, r1
-;   add         r2, r0, r1
-
-;   vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-;   vceq.i8     q2, q2, q11                 ; s = (s==4)*-1
-
-;   add         r3, r2, r1
-
-;   vqsub.s8    q7, q7, q13                 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2)
-;   vqadd.s8    q11, q2, q13                ; u = vp8_signed_char_clamp(s + Filter2)
-
-;   vld1.u8     {d5}, [r12]!                ;#27
-;   vmov        q10, q15
-;   vmov        q12, q15
-
-;   vqadd.s8    q6, q6, q11                 ; ps0 = vp8_signed_char_clamp(ps0 + u)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
     vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
 
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
index d9dbdcfe5..33cd55e1c 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
@@ -71,8 +71,7 @@
     ldr         r12, _mbvlfuv_coeff_
     vst1.u8     {q10}, [sp]!
 
-    ;vp8_filter_mask() function
-    ;vp8_hevmask() function
+    ; vp8_filter_mask
     vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
     vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
     vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
@@ -80,29 +79,25 @@
     vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
     vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
 
-    vcge.u8     q15, q1, q11                ; (abs(p3 - p2) > limit)*-1
-    vcge.u8     q12, q1, q12                ; (abs(p2 - p1) > limit)*-1
-    vcge.u8     q10, q1, q13                ; (abs(p1 - p0) > limit)*-1
-    vcge.u8     q11, q1, q14                ; (abs(q1 - q0) > limit)*-1
-    vcge.u8     q3, q1, q3                  ; (abs(q2 - q1) > limit)*-1
-    vcge.u8     q0, q1, q0                  ; (abs(q3 - q2) > limit)*-1
-
-    vand        q15, q15, q12
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q0
+    vmax.u8     q15, q11, q12
 
     vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
 
+    ; vp8_hevmask
     vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
     vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
 
     vld1.s8     {d4[], d5[]}, [r2]          ; flimit
 
-    vand        q10, q10, q11
-    vand        q3, q3, q0
-
     vld1.u8     {q0}, [r12]!
 
     vadd.u8     q2, q2, q2                  ; flimit * 2
     vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
+    vcge.u8     q15, q1, q15
 
     vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
     vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
@@ -110,8 +105,6 @@
     vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
     vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
 
-    vand        q15, q15, q10
-
     ;vp8_filter() function
     veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
     veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
@@ -122,29 +115,23 @@
 ;;;;;;;;;;;;;
     vorr        q14, q13, q14               ; q14: vp8_hevmask
 
-    ;vqsub.s8   q2, q7, q6                  ; ( qs0 - ps0)
     vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
     vsubl.s8    q13, d15, d13
 
     vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
 
-    ;vadd.s8    q10, q2, q2                 ; 3 * ( qs0 - ps0)
     vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
     vadd.s16    q11, q13, q13
+    vand        q15, q15, q12               ; vp8_filter_mask
 
-    vand        q3, q3, q12
-
-    ;vadd.s8    q2, q2, q10
     vadd.s16    q2, q2, q10
     vadd.s16    q13, q13, q11
 
     vld1.u8     {q12}, [r12]!               ;#3
 
-    ;vqadd.s8   q1, q1, q2                  ; vp8_filter + 3 * ( qs0 - ps0)
     vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
     vaddw.s8    q13, q13, d3
 
-    vand        q15, q15, q3                ; q15: vp8_filter_mask
     vld1.u8     {q11}, [r12]!               ;#4
 
     vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
@@ -160,28 +147,6 @@
     vld1.u8     {d7}, [r12]!                ;#9
     ;
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-;   vand        q2, q13, q12                ; s = Filter2 & 7
-
-;   vqadd.s8    q13, q13, q11               ; Filter2 = vp8_signed_char_clamp(Filter2+4)
-;   vld1.u8     {d6}, [r12]!                ;#18
-
-;   sub         r0, r0, r1, lsl #3
-;   sub         r3, r3, r1, lsl #3
-;   sub         sp, sp, #32
-
-;   vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-;   vceq.i8     q2, q2, q11                 ; s = (s==4)*-1
-
-;   vqsub.s8    q7, q7, q13                 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2)
-;   vqadd.s8    q11, q2, q13                ; u = vp8_signed_char_clamp(s + Filter2)
-
-;   vld1.u8     {d5}, [r12]!                ;#27
-;   vmov        q10, q15
-;   vmov        q12, q15
-
-;   vqadd.s8    q6, q6, q11                 ; ps0 = vp8_signed_char_clamp(ps0 + u)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
     vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
 
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
index bdffc62ee..f51fd0bd4 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
@@ -69,8 +69,7 @@
     ldr         r12, _mbvlfy_coeff_
     vst1.u8     {q10}, [sp]!
 
-    ;vp8_filter_mask() function
-    ;vp8_hevmask() function
+    ; vp8_filter_mask
     vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
     vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
     vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
@@ -78,29 +77,25 @@
     vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
     vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
 
-    vcge.u8     q15, q1, q11                ; (abs(p3 - p2) > limit)*-1
-    vcge.u8     q12, q1, q12                ; (abs(p2 - p1) > limit)*-1
-    vcge.u8     q10, q1, q13                ; (abs(p1 - p0) > limit)*-1
-    vcge.u8     q11, q1, q14                ; (abs(q1 - q0) > limit)*-1
-    vcge.u8     q3, q1, q3                  ; (abs(q2 - q1) > limit)*-1
-    vcge.u8     q0, q1, q0                  ; (abs(q3 - q2) > limit)*-1
-
-    vand        q15, q15, q12
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q0
+    vmax.u8     q15, q11, q12
 
     vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
 
+    ; vp8_hevmask
     vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
     vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
 
     vld1.s8     {d4[], d5[]}, [r2]          ; flimit
 
-    vand        q10, q10, q11
-    vand        q3, q3, q0
-
     vld1.u8     {q0}, [r12]!
 
     vadd.u8     q2, q2, q2                  ; flimit * 2
     vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
+    vcge.u8     q15, q1, q15
 
     vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
     vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
@@ -108,8 +103,6 @@
     vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
     vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
 
-    vand        q15, q15, q10
-
     ;vp8_filter() function
     veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
     veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
@@ -120,29 +113,23 @@
 ;;;;;;;;;;;;;
     vorr        q14, q13, q14               ; q14: vp8_hevmask
 
-    ;vqsub.s8   q2, q7, q6                  ; ( qs0 - ps0)
     vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
     vsubl.s8    q13, d15, d13
 
     vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
 
-    ;vadd.s8    q10, q2, q2                 ; 3 * ( qs0 - ps0)
     vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
     vadd.s16    q11, q13, q13
+    vand        q15, q15, q12               ; vp8_filter_mask
 
-    vand        q3, q3, q12
-
-    ;vadd.s8    q2, q2, q10
     vadd.s16    q2, q2, q10
     vadd.s16    q13, q13, q11
 
     vld1.u8     {q12}, [r12]!               ;#3
 
-    ;vqadd.s8   q1, q1, q2                  ; vp8_filter + 3 * ( qs0 - ps0)
     vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
     vaddw.s8    q13, q13, d3
 
-    vand        q15, q15, q3                ; q15: vp8_filter_mask
     vld1.u8     {q11}, [r12]!               ;#4
 
     vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
@@ -158,30 +145,6 @@
     vld1.u8     {d7}, [r12]!                ;#9
     ;
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;Change for VP8 from VP7
-;   vand        q2, q13, q12                ; s = Filter2 & 7
-
-;   vqadd.s8    q13, q13, q11               ; Filter2 = vp8_signed_char_clamp(Filter2+4)
-;   vld1.u8     {d6}, [r12]!                ;#18
-
-;   sub         r0, r0, r1, lsl #4
-;   sub         sp, sp, #32
-;   add         r2, r0, r1
-
-;   vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-;   vceq.i8     q2, q2, q11                 ; s = (s==4)*-1
-
-;   add         r3, r2, r1
-
-;   vqsub.s8    q7, q7, q13                 ; qs0 = vp8_signed_char_clamp(qs0 - Filter2)
-;   vqadd.s8    q11, q2, q13                ; u = vp8_signed_char_clamp(s + Filter2)
-
-;   vld1.u8     {d5}, [r12]!                ;#27
-;   vmov        q10, q15
-;   vmov        q12, q15
-
-;   vqadd.s8    q6, q6, q11                 ; ps0 = vp8_signed_char_clamp(ps0 + u)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
     vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
     vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
 

From e2795e9978580a21e7399d3ccf919dc2f06f97ca Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Fri, 24 Sep 2010 14:30:13 -0700
Subject: [PATCH 183/307] Fix valgrind errors in vp8_sixtap_predict8x4_armv6().

This function was accessing values below the stack pointer, which
 can be corrupted by signal delivery at any time.

Change-Id: I92945b30817562eb0340f289e74c108da72aeaca
---
 vp8/common/arm/armv6/sixtappredict8x4_v6.asm | 37 ++++++++------------
 1 file changed, 15 insertions(+), 22 deletions(-)

diff --git a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
index 8fb80ef29..8b9939484 100644
--- a/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
+++ b/vp8/common/arm/armv6/sixtappredict8x4_v6.asm
@@ -25,10 +25,10 @@
 ;and the result is stored in transpose.
 |vp8_sixtap_predict8x4_armv6| PROC
     stmdb       sp!, {r4 - r11, lr}
-    sub         sp, sp, #184                ;reserve space on stack for temporary storage: 20x(8+1) +4
+    str         r3, [sp, #-184]!            ;reserve space on stack for temporary storage, store yoffset
 
     cmp         r2, #0                      ;skip first_pass filter if xoffset=0
-    str         r3, [sp], #4                ;store yoffset
+    add         lr, sp, #4                  ;point to temporary buffer
     beq         skip_firstpass_filter
 
 ;first-pass filter
@@ -45,7 +45,6 @@
     mov         r2, #0x90000                ; height=9 is top part of counter
 
     sub         r1, r1, #8
-    mov         lr, #20
 
 |first_pass_hloop_v6|
     ldrb        r6, [r0, #-5]               ; load source data
@@ -83,10 +82,10 @@
     tst         r2, #0xff                   ; test loop counter
     usat        r11, #8, r11, asr #7
     add         r12, r12, #0x40
-    strh        r11, [sp], lr               ; result is transposed and stored, which
+    strh        r11, [lr], #20              ; result is transposed and stored, which
     usat        r12, #8, r12, asr #7
 
-    strh        r12, [sp], lr
+    strh        r12, [lr], #20
 
     movne       r11, r6
     movne       r12, r7
@@ -107,8 +106,7 @@
 
     subs        r2, r2, #0x10000
 
-    mov         r6, #158
-    sub         sp, sp, r6
+    sub         lr, lr, #158
 
     add         r0, r0, r1                  ; move to next input line
 
@@ -116,10 +114,7 @@
 
 ;second pass filter
 secondpass_filter
-    mov         r1, #18
-    sub         sp, sp, r1                  ; 18+4
-
-    ldr         r3, [sp, #-4]               ; load back yoffset
+    ldr         r3, [sp], #4                ; load back yoffset
     ldr         r0, [sp, #216]              ; load dst address from stack 180+36
     ldr         r1, [sp, #220]              ; load dst stride from stack 180+40
 
@@ -192,30 +187,28 @@ skip_firstpass_filter
     sub         r0, r0, r1, lsl #1
     sub         r1, r1, #8
     mov         r2, #9
-    mov         r3, #20
 
 skip_firstpass_hloop
     ldrb        r4, [r0], #1                ; load data
     subs        r2, r2, #1
     ldrb        r5, [r0], #1
-    strh        r4, [sp], r3                ; store it to immediate buffer
+    strh        r4, [lr], #20               ; store it to immediate buffer
     ldrb        r6, [r0], #1                ; load data
-    strh        r5, [sp], r3
+    strh        r5, [lr], #20
     ldrb        r7, [r0], #1
-    strh        r6, [sp], r3
+    strh        r6, [lr], #20
     ldrb        r8, [r0], #1
-    strh        r7, [sp], r3
+    strh        r7, [lr], #20
     ldrb        r9, [r0], #1
-    strh        r8, [sp], r3
+    strh        r8, [lr], #20
     ldrb        r10, [r0], #1
-    strh        r9, [sp], r3
+    strh        r9, [lr], #20
     ldrb        r11, [r0], #1
-    strh        r10, [sp], r3
+    strh        r10, [lr], #20
     add         r0, r0, r1                  ; move to next input line
-    strh        r11, [sp], r3
+    strh        r11, [lr], #20
 
-    mov         r4, #158
-    sub         sp, sp, r4                  ; move over to next column
+    sub         lr, lr, #158                ; move over to next column
     bne         skip_firstpass_hloop
 
     b           secondpass_filter

From 2b521ab551934b38f13a52be371fb071b42e6602 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 27 Sep 2010 12:48:31 -0400
Subject: [PATCH 184/307] move reconintra_mt to decoder (fixup)

Missed the .h file in the move.

Change-Id: Ib408183fbb4d019fd46394b362f89ca6ea9d10bc
---
 vp8/{common => decoder}/reconintra_mt.h | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename vp8/{common => decoder}/reconintra_mt.h (100%)

diff --git a/vp8/common/reconintra_mt.h b/vp8/decoder/reconintra_mt.h
similarity index 100%
rename from vp8/common/reconintra_mt.h
rename to vp8/decoder/reconintra_mt.h

From 305be4e4179214c58796de91e86badadbca29451 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Fri, 24 Sep 2010 17:52:55 +0100
Subject: [PATCH 185/307] Badly placed initialization of rolling rate monitors.

This affects control of the active quantizer range.

Change-Id: I30511fc81ac9f75ff20d9f1372382423d56739da
---
 vp8/encoder/onyx_if.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index b024507c7..ed112b4d7 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1330,11 +1330,6 @@ void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
     cpi->per_frame_bandwidth          = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate);
     cpi->av_per_frame_bandwidth        = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate);
     cpi->min_frame_bandwidth          = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
-    cpi->rolling_target_bits          = cpi->av_per_frame_bandwidth;
-    cpi->rolling_actual_bits          = cpi->av_per_frame_bandwidth;
-
-    cpi->long_rolling_target_bits      = cpi->av_per_frame_bandwidth;
-    cpi->long_rolling_actual_bits      = cpi->av_per_frame_bandwidth;
     cpi->max_gf_interval = (int)(cpi->output_frame_rate / 2) + 2;
 
     //cpi->max_gf_interval = (int)(cpi->output_frame_rate * 2 / 3) + 1;
@@ -1580,6 +1575,10 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
     cpi->active_best_quality          = cpi->oxcf.best_allowed_q;
     cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
 
+    cpi->rolling_target_bits          = cpi->av_per_frame_bandwidth;
+    cpi->rolling_actual_bits          = cpi->av_per_frame_bandwidth;
+    cpi->long_rolling_target_bits      = cpi->av_per_frame_bandwidth;
+    cpi->long_rolling_actual_bits      = cpi->av_per_frame_bandwidth;
 
     cpi->total_actual_bits            = 0;
     cpi->total_target_vs_actual        = 0;
@@ -1858,6 +1857,10 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
     cpi->active_best_quality          = cpi->oxcf.best_allowed_q;
     cpi->buffered_mode = (cpi->oxcf.optimal_buffer_level > 0) ? TRUE : FALSE;
 
+    cpi->rolling_target_bits          = cpi->av_per_frame_bandwidth;
+    cpi->rolling_actual_bits          = cpi->av_per_frame_bandwidth;
+    cpi->long_rolling_target_bits      = cpi->av_per_frame_bandwidth;
+    cpi->long_rolling_actual_bits      = cpi->av_per_frame_bandwidth;
 
     cpi->total_actual_bits            = 0;
     cpi->total_target_vs_actual        = 0;

From 18dc92fd664357db31d7ef43337e2dee3a0f5062 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Mon, 27 Sep 2010 17:18:18 -0700
Subject: [PATCH 186/307] Add 4-tap version of 2nd-pass ARMv6 MC filter.

The existing code applied a 6-tap filter with 0's on either end.
We're already paying the branch penalty to avoid computing the two
 extra columns needed as input to this filter.
We might as well save time computing the filter as well.
This reduces the inner loop from 21 instructions to 16, the number
 of loads per iteration from 4 to 1, and the number of multiplies
 from 7 to 4.
The gain in overall decoding performance, however, is small (less
 than 1%).

This change also means we now valgrind clean on ARMv6, which is
 its real purpose.
The errors reported here were valgrind's fault (it does not detect
 that 0 times an uninitialized value is initialized), but Julian
 Seward says it would slow down valgrind considerably to make such
 checks.
Speeding up libvpx rather, even by a small amount, seems a much
 better idea if only to enable proper valgrind checking of the
 rest of the codec.

Change-Id: Ifb376ea195e086b60f61daf1097d8910c4d8ff16
---
 vp8/common/arm/armv6/filter_v6.asm | 59 ++++++++++++++++++++++++++++++
 vp8/common/arm/filter_arm.c        | 33 ++++++++++++++---
 2 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/vp8/common/arm/armv6/filter_v6.asm b/vp8/common/arm/armv6/filter_v6.asm
index 8bc6d7735..03b5bccd7 100644
--- a/vp8/common/arm/armv6/filter_v6.asm
+++ b/vp8/common/arm/armv6/filter_v6.asm
@@ -11,6 +11,7 @@
 
     EXPORT  |vp8_filter_block2d_first_pass_armv6|
     EXPORT  |vp8_filter_block2d_second_pass_armv6|
+    EXPORT  |vp8_filter4_block2d_second_pass_armv6|
     EXPORT  |vp8_filter_block2d_first_pass_only_armv6|
     EXPORT  |vp8_filter_block2d_second_pass_only_armv6|
 
@@ -192,6 +193,64 @@
 
     ENDP
 
+;---------------------------------
+; r0    short         *src_ptr,
+; r1    unsigned char *output_ptr,
+; r2    unsigned int output_pitch,
+; r3    unsigned int cnt,
+; stack const short *vp8_filter
+;---------------------------------
+|vp8_filter4_block2d_second_pass_armv6| PROC
+    stmdb   sp!, {r4 - r11, lr}
+
+    ldr     r11, [sp, #36]                  ; vp8_filter address
+    mov     r7, r3, lsl #16                 ; height is top part of counter
+
+    ldr     r4, [r11]                       ; load up packed filter coefficients
+    add     lr, r1, r3                      ; save final destination pointer
+    ldr     r5, [r11, #4]
+    ldr     r6, [r11, #8]
+
+    pkhbt   r12, r5, r4                     ; pack the filter differently
+    pkhbt   r11, r6, r5
+    mov     r4, #0x40                       ; rounding factor (for smlad{x})
+
+|height_loop_2nd_4|
+    ldrd    r8, [r0, #-4]                   ; load the data
+    orr     r7, r7, r3, lsr #1              ; loop counter
+
+|width_loop_2nd_4|
+    ldr     r10, [r0, #4]!
+    smladx  r6, r9, r12, r4                 ; apply filter
+    pkhbt   r8, r9, r8
+    smlad   r5, r8, r12, r4
+    pkhbt   r8, r10, r9
+    smladx  r6, r10, r11, r6
+    sub     r7, r7, #1
+    smlad   r5, r8, r11, r5
+
+    mov     r8, r9                          ; shift the data for the next loop
+    mov     r9, r10
+
+    usat    r6, #8, r6, asr #7              ; shift and clamp
+    usat    r5, #8, r5, asr #7
+
+    strb    r5, [r1], r2                    ; the result is transposed back and stored
+    tst     r7, #0xff
+    strb    r6, [r1], r2
+
+    bne     width_loop_2nd_4
+
+    subs    r7, r7, #0x10000
+    add     r0, r0, #16                     ; update src for next loop
+    sub     r1, lr, r7, lsr #16             ; update dst for next loop
+
+    bne     height_loop_2nd_4
+
+    ldmia   sp!, {r4 - r11, pc}
+
+    ENDP
+
 ;------------------------------------
 ; r0    unsigned char *src_ptr
 ; r1    unsigned char *output_ptr,
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index 5ed4f8094..4bfa3ab34 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -50,6 +50,15 @@ extern void vp8_filter_block2d_second_pass_armv6
     const short *vp8_filter
 );
 
+extern void vp8_filter4_block2d_second_pass_armv6
+(
+    short         *src_ptr,
+    unsigned char *output_ptr,
+    unsigned int output_pitch,
+    unsigned int cnt,
+    const short *vp8_filter
+);
+
 extern void vp8_filter_block2d_first_pass_only_armv6
 (
     unsigned char *src_ptr,
@@ -107,12 +116,16 @@ void vp8_sixtap_predict_armv6
     {
         // Vfilter is a 4 tap filter
         if (yoffset & 0x1)
+        {
             vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
+            vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
+        }
         // Vfilter is 6 tap filter
         else
+        {
             vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
-
-        vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
+            vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
+        }
     }
 }
 
@@ -186,11 +199,15 @@ void vp8_sixtap_predict8x8_armv6
     else
     {
         if (yoffset & 0x1)
+        {
             vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
+            vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
+        }
         else
+        {
             vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
-
-        vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
+            vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
+        }
     }
 }
 
@@ -224,11 +241,15 @@ void vp8_sixtap_predict16x16_armv6
     else
     {
         if (yoffset & 0x1)
+        {
             vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
+            vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
+        }
         else
+        {
             vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
-
-        vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
+            vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
+        }
     }
 
 }

From 6fa5c24a99e57367efd60ed56377f68808399444 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Tue, 28 Sep 2010 09:31:11 -0400
Subject: [PATCH 187/307] update gitignore

this was excluding all .asm files when it should have just been .asm
files in the top level directory and .asm.s files lower down. also be
more restrictive on some other items, and run the whole thing through
sort to keep it organized

Change-Id: Ia48525033226b13098a491ce89465d0377b990c2
---
 .gitignore | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/.gitignore b/.gitignore
index 9a149a8d7..ae616b28c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,19 +1,16 @@
-*~
 *.a
+*.asm.s
 *.d
 *.o
-*-old
-*-new
-*.mk
-*.asm
-TAGS
-.bins
-.libs
-.deps
-/.cflags-new
-/.cflags-old
+*~
+/*-*.mk
+/*.asm
+/*.doxy
+/.bins
+/.deps
 /.docs
 /.install-*
+/.libs
 /Makefile
 /config.err
 /config.mk
@@ -23,14 +20,11 @@ TAGS
 /decode_with_drops
 /decode_with_drops.c
 /decode_with_drops.dox
-/docs-generic-gnu.mk
 /docs/
 /doxyfile
 /error_resilient
 /error_resilient.c
 /error_resilient.dox
-/examples-generic-gnu.mk
-/examples.doxy
 /force_keyframe
 /force_keyframe.c
 /force_keyframe.dox
@@ -38,8 +32,7 @@ TAGS
 /ivfdec.dox
 /ivfenc
 /ivfenc.dox
-/libs-generic-gnu.mk
-/libs.doxy
+/obj_int_extract
 /postproc
 /postproc.c
 /postproc.dox
@@ -66,3 +59,4 @@ TAGS
 /vpx_config.c
 /vpx_config.h
 /vpx_version.h
+TAGS

From 1b2f8308e48704b1c784484b070f243ffb575b88 Mon Sep 17 00:00:00 2001
From: Adrian Grange <agrange@google.com>
Date: Tue, 28 Sep 2010 15:23:41 +0100
Subject: [PATCH 188/307] Made AltRef filter adaptive & added motion
 compensation

Modified AltRef temporal filter to adapt filter length based
on macroblock coding modes selected during first-pass
encode.

Also added sub-pixel motion compensation to the AltRef
filter.
---
 vp8/encoder/encodeframe.c |  44 ++-
 vp8/encoder/firstpass.c   | 163 ++++++----
 vp8/encoder/onyx_if.c     | 617 +++++++++++++++++++++++++++++---------
 vp8/encoder/onyx_int.h    |   7 +-
 4 files changed, 610 insertions(+), 221 deletions(-)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index d8a76d5b5..8c40a18e8 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -545,31 +545,29 @@ void vp8_encode_frame(VP8_COMP *cpi)
     int segment_counts[MAX_MB_SEGMENTS];
     int totalrate;
 
-    if (cm->frame_type != KEY_FRAME)
+    // Functions setup for all frame types so we can use MC in AltRef
+    if (cm->mcomp_filter_type == SIXTAP)
     {
-        if (cm->mcomp_filter_type == SIXTAP)
-        {
-            xd->subpixel_predict     = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap4x4);
-            xd->subpixel_predict8x4      = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap8x4);
-            xd->subpixel_predict8x8      = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap8x8);
-            xd->subpixel_predict16x16    = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, sixtap16x16);
-        }
-        else
-        {
-            xd->subpixel_predict     = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear4x4);
-            xd->subpixel_predict8x4      = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear8x4);
-            xd->subpixel_predict8x8      = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear8x8);
-            xd->subpixel_predict16x16    = SUBPIX_INVOKE(&cpi->common.rtcd.subpix, bilinear16x16);
-        }
+        xd->subpixel_predict        = SUBPIX_INVOKE(
+                                        &cpi->common.rtcd.subpix, sixtap4x4);
+        xd->subpixel_predict8x4     = SUBPIX_INVOKE(
+                                        &cpi->common.rtcd.subpix, sixtap8x4);
+        xd->subpixel_predict8x8     = SUBPIX_INVOKE(
+                                        &cpi->common.rtcd.subpix, sixtap8x8);
+        xd->subpixel_predict16x16   = SUBPIX_INVOKE(
+                                        &cpi->common.rtcd.subpix, sixtap16x16);
+    }
+    else
+    {
+        xd->subpixel_predict        = SUBPIX_INVOKE(
+                                        &cpi->common.rtcd.subpix, bilinear4x4);
+        xd->subpixel_predict8x4     = SUBPIX_INVOKE(
+                                        &cpi->common.rtcd.subpix, bilinear8x4);
+        xd->subpixel_predict8x8     = SUBPIX_INVOKE(
+                                        &cpi->common.rtcd.subpix, bilinear8x8);
+        xd->subpixel_predict16x16   = SUBPIX_INVOKE(
+                                      &cpi->common.rtcd.subpix, bilinear16x16);
     }
-
-    //else  // Key Frame
-    //{
-    // For key frames make sure the intra ref frame probability value
-    // is set to "all intra"
-    //cpi->prob_intra_coded = 255;
-    //}
-
 
     x->gf_active_ptr = (signed char *)cpi->gf_active_flags;     // Point to base of GF active flags data structure
 
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index a65bce6e1..684ad9b12 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -258,7 +258,7 @@ void vp8_output_stats(struct vpx_codec_pkt_list *pktlist,
     vpx_codec_pkt_list_add(pktlist, &pkt);
 
 // TEMP debug code
-#ifdef OUTPUT_FPF
+#if OUTPUT_FPF
     {
         FILE *fpfile;
         fpfile = fopen("firstpass.stt", "a");
@@ -369,50 +369,33 @@ void vp8_fpmm_reset_pos(VP8_COMP *cpi, int target_pos)
 
 void vp8_advance_fpmm(VP8_COMP *cpi, int count)
 {
-#ifdef FIRSTPASS_MM
+#if FIRSTPASS_MM
     fseek(cpi->fp_motion_mapfile, (int)(count * cpi->common.MBs), SEEK_CUR);
 #endif
 }
 
-void vp8_input_fpmm(VP8_COMP *cpi, int count)
+void vp8_input_fpmm(VP8_COMP *cpi)
 {
-#ifdef FIRSTPASS_MM
-
-    unsigned char *tmp_motion_map;
-    int i, j;
+#if FIRSTPASS_MM
+    int MBs = cpi->common.MBs;
+    int max_frames = cpi->active_arnr_frames;
 
     if (!cpi->fp_motion_mapfile)
         return;                 // Error
 
-    // Create the first pass motion map structure and set to 0
-    CHECK_MEM_ERROR(tmp_motion_map, vpx_calloc(cpi->common.MBs, 1));
-
-    // Reset the state of the global map
-    vpx_memset(cpi->fp_motion_map, 0, cpi->common.MBs);
-
-    // Read the specified number of frame maps and set the global map to the highest value seen for each mb.
-    for (i = 0; i < count; i++)
+    // Read the specified number of frame motion maps
+    if (fread(cpi->fp_motion_map, 1,
+              max_frames * MBs,
+              cpi->fp_motion_mapfile) != max_frames*MBs)
     {
-        if (fread(tmp_motion_map, 1, cpi->common.MBs, cpi->fp_motion_mapfile) == cpi->common.MBs)
-        {
-            for (j = 0; j < cpi->common.MBs; j++)
-            {
-                if (tmp_motion_map[j] > 1)
-                    cpi->fp_motion_map[j] += 5;   // Intra is flagged
-                else
-                    cpi->fp_motion_map[j] += tmp_motion_map[j];
-            }
-        }
-        else
-            break;  // Read error
-
+        // Read error
+        return;
     }
 
-    if (tmp_motion_map != 0)
-        vpx_free(tmp_motion_map);
+    // Flag the use of weights in the temporal filter
+    cpi->use_weighted_temporal_filter = 1;
 
 #endif
-
 }
 
 void vp8_init_first_pass(VP8_COMP *cpi)
@@ -438,7 +421,7 @@ void vp8_end_first_pass(VP8_COMP *cpi)
 {
     vp8_output_stats(cpi->output_pkt_list, &cpi->total_stats);
 
-#ifdef FIRSTPASS_MM
+#if FIRSTPASS_MM
 
     if (cpi->fp_motion_mapfile)
         fclose(cpi->fp_motion_mapfile);
@@ -603,6 +586,8 @@ void vp8_first_pass(VP8_COMP *cpi)
         for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
         {
             int this_error;
+            int zero_error;
+            int zz_to_best_ratio;
             int gf_motion_error = INT_MAX;
             int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row);
 
@@ -624,7 +609,7 @@ void vp8_first_pass(VP8_COMP *cpi)
             intra_error += this_error;
 
             // Indicate default assumption of intra in the motion map
-            *fp_motion_map_ptr = 2;
+            *fp_motion_map_ptr = 0;
 
             // Set up limit values for motion vectors to prevent them extending outside the UMV borders
             x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
@@ -646,6 +631,9 @@ void vp8_first_pass(VP8_COMP *cpi)
                 d->bmi.mv.as_mv.row = 0;
                 d->bmi.mv.as_mv.col = 0;
 
+                // Save (0,0) error for later use
+                zero_error = motion_error;
+
                 // Test last reference frame using the previous best mv as the
                 // starting point (best reference) for the search
                 vp8_first_pass_motion_search(cpi, x, &best_ref_mv,
@@ -719,8 +707,6 @@ void vp8_first_pass(VP8_COMP *cpi)
                     {
                         mvcount++;
 
-                        *fp_motion_map_ptr = 1;
-
                         // Does the Row vector point inwards or outwards
                         if (mb_row < cm->mb_rows / 2)
                         {
@@ -752,12 +738,30 @@ void vp8_first_pass(VP8_COMP *cpi)
                             else if (d->bmi.mv.as_mv.col < 0)
                                 sum_in_vectors--;
                         }
+
+                        // Compute how close (0,0) predictor is to best
+                        // predictor in terms of their prediction error
+                        zz_to_best_ratio = (10*zero_error + this_error/2)
+                                            / (this_error+!this_error);
+
+                        if ((zero_error < 50000) &&
+                            (zz_to_best_ratio <= 11) )
+                            *fp_motion_map_ptr = 1;
+                        else
+                            *fp_motion_map_ptr = 0;
                     }
                     else
-                        *fp_motion_map_ptr = 0;    // 0,0 mv was best
+                    {
+                        // 0,0 mv was best
+                        if( zero_error<50000 )
+                            *fp_motion_map_ptr = 2;
+                        else
+                            *fp_motion_map_ptr = 1;
+                    }
                 }
                 else
                 {
+                    // Intra was best
                     best_ref_mv.row = 0;
                     best_ref_mv.col = 0;
                 }
@@ -839,7 +843,7 @@ void vp8_first_pass(VP8_COMP *cpi)
         vp8_output_stats(cpi->output_pkt_list, &cpi->this_frame_stats);
         vp8_accumulate_stats(&cpi->total_stats, &fps);
 
-#ifdef FIRSTPASS_MM
+#if FIRSTPASS_MM
         fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, cpi->fp_motion_mapfile);
 #endif
     }
@@ -1180,7 +1184,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
 
     }
 
-#ifdef FIRSTPASS_MM
+#if FIRSTPASS_MM
     cpi->fp_motion_mapfile = 0;
     cpi->fp_motion_mapfile = fopen("fpmotionmap.stt", "rb");
 #endif
@@ -1189,7 +1193,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
 
 void vp8_end_second_pass(VP8_COMP *cpi)
 {
-#ifdef FIRSTPASS_MM
+#if FIRSTPASS_MM
 
     if (cpi->fp_motion_mapfile)
         fclose(cpi->fp_motion_mapfile);
@@ -1230,7 +1234,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
     int max_bits = frame_max_bits(cpi);    // Max for a single frame
 
-#ifdef FIRSTPASS_MM
+#if FIRSTPASS_MM
     int fpmm_pos;
 #endif
 
@@ -1239,7 +1243,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
     vp8_clear_system_state();  //__asm emms;
 
-#ifdef FIRSTPASS_MM
+#if FIRSTPASS_MM
     fpmm_pos = vp8_fpmm_get_pos(cpi);
 #endif
 
@@ -1452,6 +1456,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         // Only use an arf if it is likely we will be able to code it at a lower Q than the surrounding frames.
         if (tmp_q < cpi->worst_quality)
         {
+            int half_gf_int;
+            int frames_after_arf;
+            int frames_bwd = cpi->oxcf.arnr_max_frames - 1;
+            int frames_fwd = cpi->oxcf.arnr_max_frames - 1;
+
             cpi->source_alt_ref_pending = TRUE;
 
             // For alt ref frames the error score for the end frame of the group (the alt ref frame) should not contribute to the group total and hence
@@ -1462,20 +1471,63 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             // The future frame itself is part of the next group
             cpi->baseline_gf_interval = i - 1;
 
-#ifdef FIRSTPASS_MM
-            // Read through the motion map to load up the entry for the ARF
-            {
-                int j;
+            // Define the arnr filter width for this group of frames:
+            // We only filter frames that lie within a distance of half
+            // the GF interval from the ARF frame. We also have to trap
+            // cases where the filter extends beyond the end of clip.
+            // Note: this_frame->frame has been updated in the loop
+            // so it now points at the ARF frame.
+            half_gf_int = cpi->baseline_gf_interval >> 1;
+            frames_after_arf = cpi->total_stats.count - this_frame->frame - 1;
 
-                // Advance to the region of interest
-                // Current default 2 frames before to 2 frames after the ARF frame itsef
+            switch (cpi->oxcf.arnr_type)
+            {
+            case 1: // Backward filter
+                frames_fwd = 0;
+                if (frames_bwd > half_gf_int)
+                    frames_bwd = half_gf_int;
+                break;
+
+            case 2: // Forward filter
+                if (frames_fwd > half_gf_int)
+                    frames_fwd = half_gf_int;
+                if (frames_fwd > frames_after_arf)
+                    frames_fwd = frames_after_arf;
+                frames_bwd = 0;
+                break;
+
+            case 3: // Centered filter
+            default:
+                frames_fwd >>= 1;
+                if (frames_fwd > frames_after_arf)
+                    frames_fwd = frames_after_arf;
+                if (frames_fwd > half_gf_int)
+                    frames_fwd = half_gf_int;
+
+                frames_bwd = frames_fwd;
+
+                // For even length filter there is one more frame backward
+                // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff.
+                if (frames_bwd < half_gf_int)
+                    frames_bwd += (cpi->oxcf.arnr_max_frames+1) & 0x1;
+                break;
+            }
+
+            cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd;
+
+#if FIRSTPASS_MM
+            {
+                // Advance to & read in the motion map for those frames
+                // to be considered for filtering based on the position
+                // of the ARF
                 vp8_fpmm_reset_pos(cpi, cpi->fpmm_pos);
 
-                for (j = 0; j < cpi->baseline_gf_interval - 2; j++)
-                    vp8_advance_fpmm(cpi, 1);
+                // Position at the 'earliest' frame to be filtered
+                vp8_advance_fpmm(cpi,
+                    cpi->baseline_gf_interval - frames_bwd);
 
                 // Read / create a motion map for the region of interest
-                vp8_input_fpmm(cpi, 5);
+                vp8_input_fpmm(cpi);
             }
 #endif
         }
@@ -1713,7 +1765,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         reset_fpf_position(cpi, start_pos);
     }
 
-#ifdef FIRSTPASS_MM
+#if FIRSTPASS_MM
     // Reset the First pass motion map file position
     vp8_fpmm_reset_pos(cpi, fpmm_pos);
 #endif
@@ -1798,10 +1850,13 @@ void vp8_second_pass(VP8_COMP *cpi)
     if (EOF == vp8_input_stats(cpi, &this_frame))
         return;
 
-#ifdef FIRSTPASS_MM
-    vpx_memset(cpi->fp_motion_map, 0, cpi->common.MBs);
+#if FIRSTPASS_MM
+    vpx_memset(cpi->fp_motion_map, 0,
+                cpi->oxcf.arnr_max_frames*cpi->common.MBs);
     cpi->fpmm_pos = vp8_fpmm_get_pos(cpi);
-    vp8_advance_fpmm(cpi, 1);         // Read this frame's first pass motion map
+
+    // Step over this frame's first pass motion map
+    vp8_advance_fpmm(cpi, 1);
 #endif
 
     this_frame_error = this_frame.ssim_weighted_pred_err;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index ed112b4d7..ce15acaac 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -43,6 +43,9 @@
 #define RTCD(x) NULL
 #endif
 
+#define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
+#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
+
 extern void vp8cx_init_mv_bits_sadcost();
 extern void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi);
 extern void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val);
@@ -1662,13 +1665,16 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
 #endif
 
 #if VP8_TEMPORAL_ALT_REF
+
+    cpi->use_weighted_temporal_filter = 0;
+
     {
         int i;
 
         cpi->fixed_divide[0] = 0;
 
-        for (i = 1; i < 255; i++)
-            cpi->fixed_divide[i] = 0x10000 / i;
+        for (i = 1; i < 512; i++)
+            cpi->fixed_divide[i] = 0x80000 / i;
     }
 #endif
 }
@@ -2042,7 +2048,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->active_map_enabled = 0;
 
     // Create the first pass motion map structure and set to 0
-    CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(cpi->common.MBs, 1));
+    // Allocate space for maximum of 15 buffers
+    CHECK_MEM_ERROR(cpi->fp_motion_map, vpx_calloc(15*cpi->common.MBs, 1));
 
 #if 0
     // Experimental code for lagged and one pass
@@ -3290,97 +3297,479 @@ static int modifier_lut[7][19] =
     {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
 };
 #endif
-static void vp8cx_temp_blur1_c
+static void build_predictors_mb
 (
-    VP8_COMP *cpi,
-    unsigned char **frames,
-    int frame_count,
-    unsigned char *src,
-    unsigned char *dst,
-    int width,
+    MACROBLOCKD *x,
+    unsigned char *y_mb_ptr,
+    unsigned char *u_mb_ptr,
+    unsigned char *v_mb_ptr,
     int stride,
-    int height,
-    int strength,
-    int *fixed_divide,
-    unsigned char *motion_map_ptr,
-    unsigned char block_size
+    int mv_row,
+    int mv_col,
+    unsigned char *pred
+)
+{
+    int offset;
+    unsigned char *yptr, *uptr, *vptr;
+
+    // Y
+    yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
+
+    if ((mv_row | mv_col) & 7)
+    {
+//        vp8_sixtap_predict16x16_c(yptr, stride,
+//                                    mv_col & 7, mv_row & 7, &pred[0], 16);
+        x->subpixel_predict16x16(yptr, stride,
+                                    mv_col & 7, mv_row & 7, &pred[0], 16);
+    }
+    else
+    {
+        //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16);
+        RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
+    }
+
+    // U & V
+    mv_row >>= 1;
+    mv_col >>= 1;
+    stride >>= 1;
+    offset = (mv_row >> 3) * stride + (mv_col >> 3);
+    uptr = u_mb_ptr + offset;
+    vptr = v_mb_ptr + offset;
+
+    if ((mv_row | mv_col) & 7)
+    {
+        x->subpixel_predict8x8(uptr, stride,
+                            mv_col & 7, mv_row & 7, &pred[256], 8);
+        x->subpixel_predict8x8(vptr, stride,
+                            mv_col & 7, mv_row & 7, &pred[320], 8);
+    }
+    else
+    {
+        RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
+        RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
+    }
+}
+static void apply_temporal_filter
+(
+    unsigned char *frame1,
+    unsigned int stride,
+    unsigned char *frame2,
+    unsigned int block_size,
+    int strength,
+    int filter_weight,
+    int *accumulator,
+    int *count
 )
 {
-    int byte = 0;         // Buffer offset for current pixel being filtered
-    int frame = 0;
-    int modifier = 0;
     int i, j, k;
-    int block_ofset;
-    int cols;
-    unsigned char Shift = (block_size == 16) ? 4 : 3;
+    int modifier;
+    int byte = 0;
+
 #if USE_FILTER_LUT
     int *lut = modifier_lut[strength];
 #endif
 
-    cols = cpi->common.mb_cols;
-
-    for (i = 0; i < height; i++)
+    for (i = 0,k = 0; i < block_size; i++)
     {
-        block_ofset = (i >> Shift) * cols;
-
-        for (j = 0; j < cols; j ++)
+        for (j = 0; j < block_size; j++, k++)
         {
-            if (motion_map_ptr[block_ofset] > 2)
-            {
-                vpx_memcpy(&dst[byte], &src[byte], block_size);
-                byte += block_size;
-            }
+
+            int src_byte = frame1[byte];
+            int pixel_value = *frame2++;
+
+#if USE_FILTER_LUT
+            // LUT implementation --
+            // improves precision of filter
+            modifier = abs(src_byte-pixel_value);
+            modifier = modifier>18 ? 0 : lut[modifier];
+#else
+            modifier   = src_byte;
+            modifier  -= pixel_value;
+            modifier  *= modifier;
+            modifier >>= strength;
+            modifier  *= 3;
+
+            if (modifier > 16)
+                modifier = 16;
+
+            modifier = 16 - modifier;
+#endif
+            modifier *= filter_weight;
+            
+            count[k] += modifier;
+            accumulator[k] += modifier * pixel_value;
+
+            byte++;
+        }
+
+        byte += stride - block_size;
+    }
+}
+
+#if ALT_REF_MC_ENABLED
+static int dummy_cost[2*mv_max+1];
+
+static int find_matching_mb
+(
+    VP8_COMP *cpi,
+    YV12_BUFFER_CONFIG *arf_frame,
+    YV12_BUFFER_CONFIG *frame_ptr,
+    int mb_offset,
+    int error_thresh
+)
+{
+    MACROBLOCK *x = &cpi->mb;
+    int thissme;
+    int step_param;
+    int further_steps;
+    int n = 0;
+    int sadpb = x->sadperbit16;
+    int bestsme = INT_MAX;
+    int num00 = 0;
+
+    BLOCK *b = &x->block[0];
+    BLOCKD *d = &x->e_mbd.block[0];
+    MV best_ref_mv1 = {0,0};
+
+    int *mvcost[2]    = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
+    int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
+
+    // Save input state
+    unsigned char **base_src = b->base_src;
+    int src = b->src;
+    int src_stride = b->src_stride;
+    unsigned char **base_pre = d->base_pre;
+    int pre = d->pre;
+    int pre_stride = d->pre_stride;
+
+    // Setup frame pointers
+    b->base_src = &arf_frame->y_buffer;
+    b->src_stride = arf_frame->y_stride;
+    b->src = mb_offset;
+
+    d->base_pre = &frame_ptr->y_buffer;
+    d->pre_stride = frame_ptr->y_stride;
+    d->pre = mb_offset;
+
+    // Further step/diamond searches as necessary
+    if (cpi->Speed < 8)
+    {
+        step_param = cpi->sf.first_step +
+                    ((cpi->Speed > 5) ? 1 : 0);
+        further_steps =
+            (cpi->sf.max_step_search_steps - 1)-step_param;
+    }
+    else
+    {
+        step_param = cpi->sf.first_step + 2;
+        further_steps = 0;
+    }
+
+    if (1/*cpi->sf.search_method == HEX*/)
+    {
+        // TODO Check that the 16x16 vf & sdf are selected here
+        bestsme = vp8_hex_search(x, b, d,
+            &best_ref_mv1, &d->bmi.mv.as_mv,
+            step_param,
+            sadpb/*x->errorperbit*/,
+            &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf,
+            mvsadcost, mvcost);
+    }
+    else
+    {
+        int mv_x, mv_y;
+
+        bestsme = cpi->diamond_search_sad(x, b, d,
+            &best_ref_mv1, &d->bmi.mv.as_mv,
+            step_param,
+            sadpb / 2/*x->errorperbit*/,
+            &num00, &cpi->fn_ptr,
+            mvsadcost, mvcost); //sadpb < 9
+
+        // Further step/diamond searches as necessary
+        n = 0;
+        //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
+
+        n = num00;
+        num00 = 0;
+
+        while (n < further_steps)
+        {
+            n++;
+
+            if (num00)
+                num00--;
             else
             {
-                for (k = 0; k < block_size; k++)
+                thissme = cpi->diamond_search_sad(x, b, d,
+                    &best_ref_mv1, &d->bmi.mv.as_mv,
+                    step_param + n,
+                    sadpb / 4/*x->errorperbit*/,
+                    &num00, &cpi->fn_ptr,
+                    mvsadcost, mvcost); //sadpb = 9
+
+                if (thissme < bestsme)
                 {
-                    int accumulator = 0;
-                    int count = 0;
-                    int src_byte = src[byte];
+                    bestsme = thissme;
+                    mv_y = d->bmi.mv.as_mv.row;
+                    mv_x = d->bmi.mv.as_mv.col;
+                }
+                else
+                {
+                    d->bmi.mv.as_mv.row = mv_y;
+                    d->bmi.mv.as_mv.col = mv_x;
+                }
+            }
+        }
+    }
 
-                    for (frame = 0; frame < frame_count; frame++)
-                    {
-                        // get current frame pixel value
-                        int pixel_value = frames[frame][byte];
-#if USE_FILTER_LUT
-                        // LUT implementation --
-                        // improves precision of filter
-                        modifier = abs(src_byte-pixel_value);
-                        modifier = modifier>18 ? 0 : lut[modifier];
-#else
-                        modifier   = src_byte;
-                        modifier  -= pixel_value;
-                        modifier  *= modifier;
-                        modifier >>= strength;
-                        modifier  *= 3;
-
-                        if (modifier > 16)
-                            modifier = 16;
-
-                        modifier = 16 - modifier;
+#if ALT_REF_SUBPEL_ENABLED
+    // Try sub-pixel MC?
+    //if (bestsme > error_thresh && bestsme < INT_MAX)
+    {
+        bestsme = cpi->find_fractional_mv_step(x, b, d,
+                    &d->bmi.mv.as_mv, &best_ref_mv1,
+                    x->errorperbit, cpi->fn_ptr.svf,
+                    cpi->fn_ptr.vf, cpi->mb.mvcost);
+    }
 #endif
-                        accumulator += modifier * pixel_value;
 
-                        count += modifier;
+    // Save input state
+    b->base_src = base_src;
+    b->src = src;
+    b->src_stride = src_stride;
+    d->base_pre = base_pre;
+    d->pre = pre;
+    d->pre_stride = pre_stride;
+
+    return bestsme;
+}
+#endif
+
+static void vp8cx_temp_blur1_c
+(
+    VP8_COMP *cpi,
+    int frame_count,
+    int alt_ref_index,
+    int strength
+)
+{
+    int byte;
+    int frame;
+    int mb_col, mb_row;
+    unsigned int filter_weight[MAX_LAG_BUFFERS];
+    unsigned char *mm_ptr = cpi->fp_motion_map;
+    int cols = cpi->common.mb_cols;
+    int rows = cpi->common.mb_rows;
+    int MBs  = cpi->common.MBs;
+    int mb_y_offset = 0;
+    int mb_uv_offset = 0;
+    unsigned int accumulator[384];
+    unsigned int count[384];
+    MACROBLOCKD *mbd = &cpi->mb.e_mbd;
+    YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
+    unsigned char *dst1, *dst2;
+    DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
+    
+    // Save input state
+    unsigned char *y_buffer = mbd->pre.y_buffer;
+    unsigned char *u_buffer = mbd->pre.u_buffer;
+    unsigned char *v_buffer = mbd->pre.v_buffer;
+
+    if (!cpi->use_weighted_temporal_filter)
+    {
+        // Temporal filtering is unweighted
+        for (frame = 0; frame < frame_count; frame++)
+            filter_weight[frame] = 1;
+    }
+
+    for (mb_row = 0; mb_row < rows; mb_row++)
+    {
+#if ALT_REF_MC_ENABLED
+        // Reduced search extent by 3 for 6-tap filter & smaller UMV border
+        cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
+        cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
+                                + (VP8BORDERINPIXELS - 19);
+#endif
+
+        for (mb_col = 0; mb_col < cols; mb_col++)
+        {
+            int i, j, k, w;
+            int weight_cap;
+            int stride;
+
+            vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
+            vpx_memset(count, 0, 384*sizeof(unsigned int));
+
+#if ALT_REF_MC_ENABLED
+            // Reduced search extent by 3 for 6-tap filter & smaller UMV border
+            cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
+            cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
+                                    + (VP8BORDERINPIXELS - 19);
+#endif
+
+            // Read & process macroblock weights from motion map
+            if (cpi->use_weighted_temporal_filter)
+            {
+                weight_cap = 2;
+
+                for (frame = alt_ref_index-1; frame >= 0; frame--)
+                {
+                    w = *(mm_ptr + (frame+1)*MBs);
+                    filter_weight[frame] = w < weight_cap ? w : weight_cap;
+                    weight_cap = w;
+                }
+
+                filter_weight[alt_ref_index] = 2;
+
+                weight_cap = 2;
+
+                for (frame = alt_ref_index+1; frame < frame_count; frame++)
+                {
+                    w = *(mm_ptr + frame*MBs);
+                    filter_weight[frame] = w < weight_cap ? w : weight_cap;
+                    weight_cap = w;
+                }
+
+            }
+
+            for (frame = 0; frame < frame_count; frame++)
+            {
+                int err;
+
+                if (cpi->frames[frame] == NULL)
+                    continue;
+
+                mbd->block[0].bmi.mv.as_mv.row = 0;
+                mbd->block[0].bmi.mv.as_mv.col = 0;
+
+#if ALT_REF_MC_ENABLED
+                //if (filter_weight[frame] == 0)
+                {
+#define THRESH_LOW   10000
+#define THRESH_HIGH  20000
+
+                    // Correlation has been lost try MC
+                    err = find_matching_mb ( cpi,
+                                             cpi->frames[alt_ref_index],
+                                             cpi->frames[frame],
+                                             mb_y_offset,
+                                             THRESH_LOW );
+
+                    if (filter_weight[frame] < 2)
+                    {
+                        // Set weight depending on error
+                        filter_weight[frame] = err<THRESH_LOW
+                                                ? 2 : err<THRESH_HIGH ? 1 : 0;
                     }
+                }
+#endif
+                if (filter_weight[frame] != 0)
+                {
+                    // Construct the predictors
+                    build_predictors_mb (
+                              mbd,
+                              cpi->frames[frame]->y_buffer + mb_y_offset,
+                              cpi->frames[frame]->u_buffer + mb_uv_offset,
+                              cpi->frames[frame]->v_buffer + mb_uv_offset,
+                              cpi->frames[frame]->y_stride,
+                              mbd->block[0].bmi.mv.as_mv.row,
+                              mbd->block[0].bmi.mv.as_mv.col,
+                              predictor );
 
-                    accumulator += (count >> 1);
-                    accumulator *= fixed_divide[count];
-                    accumulator >>= 16;
+                    // Apply the filter (YUV)
+                    apply_temporal_filter ( f->y_buffer + mb_y_offset,
+                                            f->y_stride,
+                                            predictor,
+                                            16,
+                                            strength,
+                                            filter_weight[frame],
+                                            accumulator,
+                                            count );
 
-                    dst[byte] = accumulator;
+                    apply_temporal_filter ( f->u_buffer + mb_uv_offset,
+                                            f->uv_stride,
+                                            predictor + 256,
+                                            8,
+                                            strength,
+                                            filter_weight[frame],
+                                            accumulator + 256,
+                                            count + 256 );
+
+                    apply_temporal_filter ( f->v_buffer + mb_uv_offset,
+                                            f->uv_stride,
+                                            predictor + 320,
+                                            8,
+                                            strength,
+                                            filter_weight[frame],
+                                            accumulator + 320,
+                                            count + 320 );
+                }
+            }
+
+            // Normalize filter output to produce AltRef frame
+            dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer;
+            stride = cpi->alt_ref_buffer.source_buffer.y_stride;
+            byte = mb_y_offset;
+            for (i = 0,k = 0; i < 16; i++)
+            {
+                for (j = 0; j < 16; j++, k++)
+                {
+                    unsigned int pval = accumulator[k] + (count[k] >> 1);
+                    pval *= cpi->fixed_divide[count[k]];
+                    pval >>= 19;
+
+                    dst1[byte] = (unsigned char)pval;
 
                     // move to next pixel
                     byte++;
                 }
+
+                byte += stride - 16;
             }
 
-            block_ofset++;
+            dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer;
+            dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer;
+            stride = cpi->alt_ref_buffer.source_buffer.uv_stride;
+            byte = mb_uv_offset;
+            for (i = 0,k = 256; i < 8; i++)
+            {
+                for (j = 0; j < 8; j++, k++)
+                {
+                    int m=k+64;
+
+                    // U
+                    unsigned int pval = accumulator[k] + (count[k] >> 1);
+                    pval *= cpi->fixed_divide[count[k]];
+                    pval >>= 19;
+                    dst1[byte] = (unsigned char)pval;
+
+                    // V
+                    pval = accumulator[m] + (count[m] >> 1);
+                    pval *= cpi->fixed_divide[count[m]];
+                    pval >>= 19;
+                    dst2[byte] = (unsigned char)pval;
+
+                    // move to next pixel
+                    byte++;
+                }
+
+                byte += stride - 8;
+            }
+
+            mm_ptr++;
+            mb_y_offset += 16;
+            mb_uv_offset += 8;
         }
 
-        // Step byte on over the UMV border to the start of the next line
-        byte += stride - width;
+        mb_y_offset += 16*f->y_stride-f->y_width;
+        mb_uv_offset += 8*f->uv_stride-f->uv_width;
     }
+
+    // Restore input state
+    mbd->pre.y_buffer = y_buffer;
+    mbd->pre.u_buffer = u_buffer;
+    mbd->pre.v_buffer = v_buffer;
 }
 
 static void vp8cx_temp_filter_c
@@ -3388,11 +3777,7 @@ static void vp8cx_temp_filter_c
     VP8_COMP *cpi
 )
 {
-    YV12_BUFFER_CONFIG *temp_source_buffer;
-    int *fixed_divide = cpi->fixed_divide;
-
     int frame = 0;
-    int max_frames = 11;
 
     int num_frames_backward = 0;
     int num_frames_forward = 0;
@@ -3400,15 +3785,13 @@ static void vp8cx_temp_filter_c
     int frames_to_blur_forward = 0;
     int frames_to_blur = 0;
     int start_frame = 0;
+    unsigned int filtered = 0;
 
     int strength = cpi->oxcf.arnr_strength;
 
     int blur_type = cpi->oxcf.arnr_type;
 
-    int new_max_frames = cpi->oxcf.arnr_max_frames;
-
-    if (new_max_frames > 0)
-        max_frames = new_max_frames;
+    int max_frames = cpi->active_arnr_frames;
 
     num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
 
@@ -3455,8 +3838,9 @@ static void vp8cx_temp_filter_c
         if (frames_to_blur_backward > frames_to_blur_forward)
             frames_to_blur_backward = frames_to_blur_forward;
 
-        if (frames_to_blur_forward > (max_frames / 2))
-            frames_to_blur_forward = (max_frames / 2);
+        // When max_frames is even we have 1 more frame backward than forward
+        if (frames_to_blur_forward > (max_frames - 1) / 2)
+            frames_to_blur_forward = ((max_frames - 1) / 2);
 
         if (frames_to_blur_backward > (max_frames / 2))
             frames_to_blur_backward = (max_frames / 2);
@@ -3488,7 +3872,8 @@ static void vp8cx_temp_filter_c
         break;
     }
 
-    start_frame = (cpi->last_alt_ref_sei + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
+    start_frame = (cpi->last_alt_ref_sei
+                    + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
 
 #ifdef DEBUGFWG
     // DEBUG FWG
@@ -3504,6 +3889,8 @@ static void vp8cx_temp_filter_c
            , start_frame);
 #endif
 
+    // Setup frame pointers, NULL indicates frame not included in filter
+    vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
     for (frame = 0; frame < frames_to_blur; frame++)
     {
         int which_buffer =  start_frame - frame;
@@ -3511,80 +3898,26 @@ static void vp8cx_temp_filter_c
         if (which_buffer < 0)
             which_buffer += cpi->oxcf.lag_in_frames;
 
-        cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.y_buffer;
+        cpi->frames[frames_to_blur-1-frame]
+                = &cpi->src_buffer[which_buffer].source_buffer;
     }
 
-    temp_source_buffer = &cpi->src_buffer[cpi->last_alt_ref_sei].source_buffer;
-
-    // Blur Y
-    vp8cx_temp_blur1_c(
+    vp8cx_temp_blur1_c (
         cpi,
-        cpi->frames,
         frames_to_blur,
-        temp_source_buffer->y_buffer,  // cpi->Source->y_buffer,
-        cpi->alt_ref_buffer.source_buffer.y_buffer,  // cpi->Source->y_buffer,
-        temp_source_buffer->y_width,
-        temp_source_buffer->y_stride,
-        temp_source_buffer->y_height,
-        //temp_source_buffer->y_height * temp_source_buffer->y_stride,
-        strength,
-        fixed_divide,
-        cpi->fp_motion_map, 16);
-
-    for (frame = 0; frame < frames_to_blur; frame++)
-    {
-        int which_buffer =  start_frame - frame;
-
-        if (which_buffer < 0)
-            which_buffer += cpi->oxcf.lag_in_frames;
-
-        cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.u_buffer;
-    }
-
-    // Blur U
-    vp8cx_temp_blur1_c(
-        cpi,
-        cpi->frames,
-        frames_to_blur,
-        temp_source_buffer->u_buffer,
-        cpi->alt_ref_buffer.source_buffer.u_buffer,  // cpi->Source->u_buffer,
-        temp_source_buffer->uv_width,
-        temp_source_buffer->uv_stride,
-        temp_source_buffer->uv_height,
-        //temp_source_buffer->uv_height * temp_source_buffer->uv_stride,
-        strength,
-        fixed_divide,
-        cpi->fp_motion_map, 8);
-
-    for (frame = 0; frame < frames_to_blur; frame++)
-    {
-        int which_buffer =  start_frame - frame;
-
-        if (which_buffer < 0)
-            which_buffer += cpi->oxcf.lag_in_frames;
-
-        cpi->frames[frame] = cpi->src_buffer[which_buffer].source_buffer.v_buffer;
-    }
-
-    // Blur V
-    vp8cx_temp_blur1_c(
-        cpi,
-        cpi->frames,
-        frames_to_blur,
-        temp_source_buffer->v_buffer,
-        cpi->alt_ref_buffer.source_buffer.v_buffer,  // cpi->Source->v_buffer,
-        temp_source_buffer->uv_width,
-        temp_source_buffer->uv_stride,
-        //temp_source_buffer->uv_height * temp_source_buffer->uv_stride,
-        temp_source_buffer->uv_height,
-        strength,
-        fixed_divide,
-        cpi->fp_motion_map, 8);
+        frames_to_blur_backward,
+        strength );
 }
 #endif
 
 
-static void encode_frame_to_data_rate(VP8_COMP *cpi, unsigned long *size, unsigned char *dest, unsigned int *frame_flags)
+static void encode_frame_to_data_rate
+(
+    VP8_COMP *cpi,
+    unsigned long *size,
+    unsigned char *dest,
+    unsigned int *frame_flags
+)
 {
     int Q;
     int frame_over_shoot_limit;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 211f65912..0ea6a3b47 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -378,6 +378,7 @@ typedef struct
     int max_gf_interval;
     int baseline_gf_interval;
     int gf_decay_rate;
+    int active_arnr_frames;           // <= cpi->oxcf.arnr_max_frames
 
     INT64 key_frame_count;
     INT64 tot_key_frame_bits;
@@ -616,9 +617,11 @@ typedef struct
 #endif
 #if VP8_TEMPORAL_ALT_REF
     SOURCE_SAMPLE alt_ref_buffer;
-    unsigned char *frames[MAX_LAG_BUFFERS];
-    int fixed_divide[255];
+    YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS];
+    int fixed_divide[512];
 #endif
+    // Flag to indicate temporal filter method
+    int use_weighted_temporal_filter;
 
 #if CONFIG_PSNR
     int    count;

From 47fc8f2683946c7ba29a300c4528e66349adf704 Mon Sep 17 00:00:00 2001
From: Adrian Grange <agrange@google.com>
Date: Tue, 28 Sep 2010 16:52:19 +0100
Subject: [PATCH 189/307] Enabled AltRef motion map creation

Enabled the first-pass encode to output the
map of macroblock coding modes required by
the AltRef filter.
---
 vp8/encoder/firstpass.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 684ad9b12..0a6e8c50e 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -30,7 +30,7 @@
 #include "encodemv.h"
 
 //#define OUTPUT_FPF 1
-//#define FIRSTPASS_MM 1
+#define FIRSTPASS_MM 1
 
 #if CONFIG_RUNTIME_CPU_DETECT
 #define IF_RTCD(x) (x)

From 0964ef0e7195fdf990162da9ba3efa911180bb02 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Tue, 28 Sep 2010 12:01:34 -0700
Subject: [PATCH 190/307] Optimizations on the loopfilters.

- Scheduling for Atom processors
- Combining of macros to allow for better interleaving
- Change from multiplies to adds for main filter
- Use of movhps/movlps to fill xmm registers without
  shifting and orring

Change-Id: I0b3500a5f58abf7085253ec92d64c8a96723040b
---
 vp8/common/x86/loopfilter_sse2.asm | 1245 +++++++++++-----------------
 1 file changed, 472 insertions(+), 773 deletions(-)

diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index de801df52..dc8167d4d 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -14,175 +14,172 @@
 ; Use of pmaxub instead of psubusb to compute filter mask was seen
 ; in ffvp8
 
-%macro LFH_FILTER_MASK 1
+%macro LFH_FILTER_AND_HEV_MASK 1
 %if %1
         movdqa      xmm2,                   [rdi+2*rax]       ; q3
         movdqa      xmm1,                   [rsi+2*rax]       ; q2
-%else
-        movq        xmm0,                   [rsi + rcx*2]     ; q3
-        movq        xmm2,                   [rdi + rcx*2]
-        pslldq      xmm2,                   8
-        por         xmm2,                   xmm0
-        movq        xmm1,                   [rsi + rcx]       ; q2
-        movq        xmm3,                   [rdi + rcx]
-        pslldq      xmm3,                   8
-        por         xmm1,                   xmm3
-        movdqa      XMMWORD PTR [rsp],      xmm1              ; store q2
-%endif
-
-        movdqa      xmm6,                   xmm1              ; q2
-        psubusb     xmm1,                   xmm2              ; q2-=q3
-        psubusb     xmm2,                   xmm6              ; q3-=q2
-        por         xmm1,                   xmm2              ; abs(q3-q2)
-
-%if %1
         movdqa      xmm4,                   [rsi+rax]         ; q1
-%else
-        movq        xmm0,                   [rsi]             ; q1
-        movq        xmm4,                   [rdi]
-        pslldq      xmm4,                   8
-        por         xmm4,                   xmm0
-        movdqa      XMMWORD PTR [rsp + 16], xmm4              ; store q1
-%endif
-
-        movdqa      xmm3,                   xmm4              ; q1
-        psubusb     xmm4,                   xmm6              ; q1-=q2
-        psubusb     xmm6,                   xmm3              ; q2-=q1
-        por         xmm4,                   xmm6              ; abs(q2-q1)
-        pmaxub      xmm1,                   xmm4
-
-%if %1
-        movdqa      xmm4,                   [rsi]             ; q0
-%else
-        movq        xmm4,                   [rsi + rax]       ; q0
-        movq        xmm0,                   [rdi + rax]
-        pslldq      xmm0,                   8
-        por         xmm4,                   xmm0
-%endif
-
-        movdqa      xmm0,                   xmm4              ; q0
-        psubusb     xmm4,                   xmm3              ; q0-=q1
-        psubusb     xmm3,                   xmm0              ; q1-=q0
-        por         xmm4,                   xmm3              ; abs(q0-q1)
-        movdqa      t0,                     xmm4              ; save to t0
-        pmaxub      xmm1,                   xmm4
-
-%if %1
+        movdqa      xmm5,                   [rsi]             ; q0
         neg         rax                     ; negate pitch to deal with above border
-
-        movdqa      xmm2,                   [rsi+4*rax]       ; p3
-        movdqa      xmm4,                   [rdi+4*rax]       ; p2
 %else
+        movlps      xmm2,                   [rsi + rcx*2]     ; q3
+        movlps      xmm1,                   [rsi + rcx]       ; q2
+        movlps      xmm4,                   [rsi]             ; q1
+        movlps      xmm5,                   [rsi + rax]       ; q0
+
+        movhps      xmm2,                   [rdi + rcx*2]
+        movhps      xmm1,                   [rdi + rcx]
+        movhps      xmm4,                   [rdi]
+        movhps      xmm5,                   [rdi + rax]
+
         lea         rsi,                    [rsi + rax*4]
         lea         rdi,                    [rdi + rax*4]
 
-        movq        xmm2,                   [rsi + rax]       ; p3
-        movq        xmm3,                   [rdi + rax]
-        pslldq      xmm3,                   8
-        por         xmm2,                   xmm3
-        movq        xmm4,                   [rsi]             ; p2
-        movq        xmm5,                   [rdi]
-        pslldq      xmm5,                   8
-        por         xmm4,                   xmm5
+        movdqa      XMMWORD PTR [rsp],      xmm1              ; store q2
+        movdqa      XMMWORD PTR [rsp + 16], xmm4              ; store q1
+%endif
+
+        movdqa      xmm6,                   xmm1              ; q2
+        movdqa      xmm3,                   xmm4              ; q1
+
+        psubusb     xmm1,                   xmm2              ; q2-=q3
+        psubusb     xmm2,                   xmm6              ; q3-=q2
+
+        psubusb     xmm4,                   xmm6              ; q1-=q2
+        psubusb     xmm6,                   xmm3              ; q2-=q1
+
+        por         xmm4,                   xmm6              ; abs(q2-q1)
+        por         xmm1,                   xmm2              ; abs(q3-q2)
+
+        movdqa      xmm0,                   xmm5              ; q0
+        pmaxub      xmm1,                   xmm4
+
+        psubusb     xmm5,                   xmm3              ; q0-=q1
+        psubusb     xmm3,                   xmm0              ; q1-=q0
+
+        por         xmm5,                   xmm3              ; abs(q0-q1)
+        movdqa      t0,                     xmm5              ; save to t0
+
+        pmaxub      xmm1,                   xmm5
+
+%if %1
+        movdqa      xmm2,                   [rsi+4*rax]       ; p3
+        movdqa      xmm4,                   [rdi+4*rax]       ; p2
+        movdqa      xmm6,                   [rsi+2*rax]       ; p1
+%else
+        movlps      xmm2,                   [rsi + rax]       ; p3
+        movlps      xmm4,                   [rsi]             ; p2
+        movlps      xmm6,                   [rsi + rcx]       ; p1
+
+        movhps      xmm2,                   [rdi + rax]
+        movhps      xmm4,                   [rdi]
+        movhps      xmm6,                   [rdi + rcx]
+
         movdqa      XMMWORD PTR [rsp + 32], xmm4              ; store p2
+        movdqa      XMMWORD PTR [rsp + 48], xmm6              ; store p1
 %endif
 
         movdqa      xmm5,                   xmm4              ; p2
+        movdqa      xmm3,                   xmm6              ; p1
+
         psubusb     xmm4,                   xmm2              ; p2-=p3
         psubusb     xmm2,                   xmm5              ; p3-=p2
-        por         xmm4,                   xmm2              ; abs(p3 - p2)
-        pmaxub      xmm1,                   xmm4
 
-%if %1
-        movdqa      xmm4,                   [rsi+2*rax]       ; p1
-%else
-        movq        xmm4,                   [rsi + rcx]       ; p1
-        movq        xmm3,                   [rdi + rcx]
-        pslldq      xmm3,                   8
-        por         xmm4,                   xmm3
-        movdqa      XMMWORD PTR [rsp + 48], xmm4              ; store p1
-%endif
+        psubusb     xmm3,                   xmm5              ; p1-=p2
+        pmaxub      xmm1,                   xmm4              ; abs(p3 - p2)
 
-        movdqa      xmm3,                   xmm4              ; p1
-        psubusb     xmm4,                   xmm5              ; p1-=p2
-        psubusb     xmm5,                   xmm3              ; p2-=p1
-        por         xmm4,                   xmm5              ; abs(p2 - p1)
-        pmaxub      xmm1,                   xmm4
+        psubusb     xmm5,                   xmm6              ; p2-=p1
+        pmaxub      xmm1,                   xmm2              ; abs(p3 - p2)
 
-        movdqa      xmm2,                   xmm3              ; p1
+        pmaxub      xmm1,                   xmm5              ; abs(p2 - p1)
+        movdqa      xmm2,                   xmm6              ; p1
 
+        pmaxub      xmm1,                   xmm3              ; abs(p2 - p1)
 %if %1
         movdqa      xmm4,                   [rsi+rax]         ; p0
-%else
-        movq        xmm4,                   [rsi + rcx*2]     ; p0
-        movq        xmm5,                   [rdi + rcx*2]
-        pslldq      xmm5,                   8
-        por         xmm4,                   xmm5
-%endif
-
-        movdqa      xmm5,                   xmm4              ; p0
-        psubusb     xmm4,                   xmm3              ; p0-=p1
-        psubusb     xmm3,                   xmm5              ; p1-=p0
-        por         xmm4,                   xmm3              ; abs(p1 - p0)
-        movdqa        t1,                   xmm4              ; save to t1
-
-        pmaxub      xmm1,                   xmm4
-        psubusb     xmm1,                   xmm7
-
-%if %1
         movdqa      xmm3,                   [rdi]             ; q1
 %else
+        movlps      xmm4,                   [rsi + rcx*2]     ; p0
+        movhps      xmm4,                   [rdi + rcx*2]
         movdqa      xmm3,                   q1                ; q1
 %endif
 
+        movdqa      xmm5,                   xmm4              ; p0
+        psubusb     xmm4,                   xmm6              ; p0-=p1
+
+        psubusb     xmm6,                   xmm5              ; p1-=p0
+
+        por         xmm6,                   xmm4              ; abs(p1 - p0)
+        mov         rdx,                    arg(2)            ; get flimit
+
+        movdqa        t1,                   xmm6              ; save to t1
+
         movdqa      xmm4,                   xmm3              ; q1
+        pmaxub      xmm1,                   xmm6
+
         psubusb     xmm3,                   xmm2              ; q1-=p1
         psubusb     xmm2,                   xmm4              ; p1-=q1
+
+        psubusb     xmm1,                   xmm7
         por         xmm2,                   xmm3              ; abs(p1-q1)
+
+        movdqa      xmm4,                   XMMWORD PTR [rdx] ; flimit
+
+        movdqa      xmm3,                   xmm0              ; q0
         pand        xmm2,                   [tfe GLOBAL]      ; set lsb of each byte to zero
-        psrlw       xmm2,                   1                 ; abs(p1-q1)/2
+
+        mov         rdx,                    arg(4)            ; hev get thresh
 
         movdqa      xmm6,                   xmm5              ; p0
-        movdqa      xmm3,                   xmm0              ; q0
+        psrlw       xmm2,                   1                 ; abs(p1-q1)/2
+
         psubusb     xmm5,                   xmm3              ; p0-=q0
+        paddb       xmm4,                   xmm4              ; flimit*2 (less than 255)
+
         psubusb     xmm3,                   xmm6              ; q0-=p0
         por         xmm5,                   xmm3              ; abs(p0 - q0)
+
         paddusb     xmm5,                   xmm5              ; abs(p0-q0)*2
+        paddb       xmm7,                   xmm4              ; flimit * 2 + limit (less than 255)
+
+        movdqa      xmm4,                   t0                ; hev get abs (q1 - q0)
+
+        movdqa      xmm3,                   t1                ; get abs (p1 - p0)
+
         paddusb     xmm5,                   xmm2              ; abs (p0 - q0) *2 + abs(p1-q1)/2
 
-        mov         rdx,                    arg(2)            ; get flimit
-        movdqa      xmm2,                   XMMWORD PTR [rdx]
-        paddb       xmm2,                   xmm2              ; flimit*2 (less than 255)
-        paddb       xmm7,                   xmm2              ; flimit * 2 + limit (less than 255)
+        movdqa      xmm2,                   XMMWORD PTR [rdx] ; hev
 
         psubusb     xmm5,                   xmm7              ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
+        psubusb     xmm4,                   xmm2              ; hev
+
+        psubusb     xmm3,                   xmm2              ; hev
         por         xmm1,                   xmm5
-        pxor        xmm5,                   xmm5
-        pcmpeqb     xmm1,                   xmm5              ; mask mm1
+
+        pxor        xmm7,                   xmm7
+        paddb       xmm4,                   xmm3              ; hev abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
+
+        pcmpeqb     xmm4,                   xmm5              ; hev
+        pcmpeqb     xmm3,                   xmm3              ; hev
+
+        pcmpeqb     xmm1,                   xmm7              ; mask xmm1
+        pxor        xmm4,                   xmm3              ; hev
 %endmacro
 
-%macro LFH_HEV_MASK 0
-        mov         rdx,                    arg(4)            ; get thresh
-        movdqa      xmm7,                   XMMWORD PTR [rdx]
-
-        movdqa      xmm4,                   t0                ; get abs (q1 - q0)
-        psubusb     xmm4,                   xmm7
-        movdqa      xmm3,                   t1                ; get abs (p1 - p0)
-        psubusb     xmm3,                   xmm7
-        paddb       xmm4,                   xmm3              ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-        pcmpeqb     xmm4,                   xmm5
-
-        pcmpeqb     xmm5,                   xmm5
-        pxor        xmm4,                   xmm5
-%endmacro
-
-%macro BH_FILTER 1
-%if %1
-        movdqa      xmm2,                   [rsi+2*rax]       ; p1
-        movdqa      xmm7,                   [rdi]             ; q1
-%else
+%macro B_FILTER 1
+%if %1 == 0
         movdqa      xmm2,                   p1                ; p1
         movdqa      xmm7,                   q1                ; q1
+%elif %1 == 1
+        movdqa      xmm2,                   [rsi+2*rax]       ; p1
+        movdqa      xmm7,                   [rdi]             ; q1
+%elif %1 == 2
+        lea         rdx,                    srct
+
+        movdqa      xmm2,                   [rdx]             ; p1
+        movdqa      xmm7,                   [rdx+48]          ; q1
+        movdqa      xmm6,                   [rdx+16]          ; p0
+        movdqa      xmm0,                   [rdx+32]          ; q0
 %endif
 
         pxor        xmm2,                   [t80 GLOBAL]      ; p1 offset to convert to signed values
@@ -196,88 +193,84 @@
 
         movdqa      xmm3,                   xmm0              ; q0
         psubsb      xmm0,                   xmm6              ; q0 - p0
+
         paddsb      xmm2,                   xmm0              ; 1 * (q0 - p0) + hvm(p1 - q1)
+
         paddsb      xmm2,                   xmm0              ; 2 * (q0 - p0) + hvm(p1 - q1)
+
         paddsb      xmm2,                   xmm0              ; 3 * (q0 - p0) + hvm(p1 - q1)
+
         pand        xmm1,                   xmm2              ; mask filter values we don't care about
+
         movdqa      xmm2,                   xmm1
+
         paddsb      xmm1,                   [t4 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 4
         paddsb      xmm2,                   [t3 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 3
 
         punpckhbw   xmm5,                   xmm2              ; axbxcxdx
         punpcklbw   xmm2,                   xmm2              ; exfxgxhx
 
-        psraw       xmm5,                   11                ; sign extended shift right by 3
-        psraw       xmm2,                   11                ; sign extended shift right by 3
-        packsswb    xmm2,                   xmm5              ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
-
         punpcklbw   xmm0,                   xmm1              ; exfxgxhx
+        psraw       xmm5,                   11                ; sign extended shift right by 3
+
         punpckhbw   xmm1,                   xmm1              ; axbxcxdx
+        psraw       xmm2,                   11                ; sign extended shift right by 3
 
+        packsswb    xmm2,                   xmm5              ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
         psraw       xmm0,                   11                ; sign extended shift right by 3
+
         psraw       xmm1,                   11                ; sign extended shift right by 3
-
         movdqa      xmm5,                   xmm0              ; save results
+
         packsswb    xmm0,                   xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
-
         paddsw      xmm5,                   [ones GLOBAL]
-        paddsw      xmm1,                   [ones GLOBAL]
 
+        paddsw      xmm1,                   [ones GLOBAL]
         psraw       xmm5,                   1                 ; partial shifted one more time for 2nd tap
+
         psraw       xmm1,                   1                 ; partial shifted one more time for 2nd tap
 
-        packsswb    xmm5,                   xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
-        pandn       xmm4,                   xmm5              ; high edge variance additive
-%endmacro
-
-%macro BH_WRITEBACK 1
         paddsb      xmm6,                   xmm2              ; p0+= p0 add
+        packsswb    xmm5,                   xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
+
+%if %1 == 0
+        movdqa      xmm1,                   p1                ; p1
+%elif %1 == 1
+        movdqa      xmm1,                   [rsi+2*rax]       ; p1
+%elif %1 == 2
+        movdqa      xmm1,                   [rdx]             ; p1
+%endif
+        pandn       xmm4,                   xmm5              ; high edge variance additive
         pxor        xmm6,                   [t80 GLOBAL]      ; unoffset
-%if %1
-        movdqa      [rsi+rax],              xmm6              ; write back
-%else
+
+        pxor        xmm1,                   [t80 GLOBAL]      ; reoffset
+        psubsb      xmm3,                   xmm0              ; q0-= q0 add
+
+        paddsb      xmm1,                   xmm4              ; p1+= p1 add
+        pxor        xmm3,                   [t80 GLOBAL]      ; unoffset
+
+        pxor        xmm1,                   [t80 GLOBAL]      ; unoffset
+        psubsb      xmm7,                   xmm4              ; q1-= q1 add
+
+        pxor        xmm7,                   [t80 GLOBAL]      ; unoffset
+%if %1 == 0
         lea         rsi,                    [rsi + rcx*2]
         lea         rdi,                    [rdi + rcx*2]
         movq        MMWORD PTR [rsi],       xmm6              ; p0
-        psrldq      xmm6,                   8
-        movq        MMWORD PTR [rdi],       xmm6
-%endif
-
-%if %1
-        movdqa      xmm6,                   [rsi+2*rax]       ; p1
-%else
-        movdqa      xmm6,                   p1                ; p1
-%endif
-        pxor        xmm6,                   [t80 GLOBAL]      ; reoffset
-        paddsb      xmm6,                   xmm4               ; p1+= p1 add
-        pxor        xmm6,                   [t80 GLOBAL]      ; unoffset
-%if %1
-        movdqa      [rsi+2*rax],            xmm6              ; write back
-%else
-        movq        MMWORD PTR [rsi + rax], xmm6              ; p1
-        psrldq      xmm6,                   8
-        movq        MMWORD PTR [rdi + rax], xmm6
-%endif
-
-        psubsb      xmm3,                   xmm0              ; q0-= q0 add
-        pxor        xmm3,                   [t80 GLOBAL]      ; unoffset
-%if %1
-        movdqa      [rsi],                  xmm3              ; write back
-%else
+        movhps      MMWORD PTR [rdi],       xmm6
+        movq        MMWORD PTR [rsi + rax], xmm1              ; p1
+        movhps      MMWORD PTR [rdi + rax], xmm1
         movq        MMWORD PTR [rsi + rcx], xmm3              ; q0
-        psrldq      xmm3,                   8
-        movq        MMWORD PTR [rdi + rcx], xmm3
+        movhps      MMWORD PTR [rdi + rcx], xmm3
+        movq        MMWORD PTR [rsi + rcx*2],xmm7             ; q1
+        movhps      MMWORD PTR [rdi + rcx*2],xmm7
+%elif %1 == 1
+        movdqa      [rsi+rax],              xmm6              ; write back
+        movdqa      [rsi+2*rax],            xmm1              ; write back
+        movdqa      [rsi],                  xmm3              ; write back
+        movdqa      [rdi],                  xmm7              ; write back
 %endif
 
-        psubsb      xmm7,                   xmm4              ; q1-= q1 add
-        pxor        xmm7,                   [t80 GLOBAL]      ; unoffset
-%if %1
-        movdqa      [rdi],                  xmm7              ; write back
-%else
-        movq        MMWORD PTR [rsi + rcx*2],xmm7             ; q1
-        psrldq      xmm7,                   8
-        movq        MMWORD PTR [rdi + rcx*2],xmm7
-%endif
 %endmacro
 
 
@@ -314,16 +307,10 @@ sym(vp8_loop_filter_horizontal_edge_sse2):
 
         lea         rdi,                    [rsi+rax]        ; rdi points to row +1 for indirect addressing
 
-        ; calculate breakout conditions
-        LFH_FILTER_MASK 1
-
-        ; calculate high edge variance
-        LFH_HEV_MASK
-
-        ; start work on filters
-        BH_FILTER 1
-        ; write back the result
-        BH_WRITEBACK 1
+        ; calculate breakout conditions and high edge variance
+        LFH_FILTER_AND_HEV_MASK 1
+        ; filter and write back the result
+        B_FILTER 1
 
     add rsp, 32
     pop rsp
@@ -378,15 +365,10 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
         lea         rsi,                    [rsi + rcx]
         lea         rdi,                    [rdi + rcx]
 
-        ; calculate breakout conditions
-        LFH_FILTER_MASK 0
-        ; calculate high edge variance
-        LFH_HEV_MASK
-
-        ; start work on filters
-        BH_FILTER 0
-        ; write back the result
-        BH_WRITEBACK 0
+        ; calculate breakout conditions and high edge variance
+        LFH_FILTER_AND_HEV_MASK 0
+        ; filter and write back the result
+        B_FILTER 0
 
     add rsp, 96
     pop rsp
@@ -400,208 +382,191 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
     ret
 
 
-%macro MBH_FILTER 1
-%if %1
-        movdqa      xmm2,                   [rsi+2*rax]       ; p1
-        movdqa      xmm7,                   [rdi]             ; q1
-%else
-        movdqa      xmm2,                   p1                ; p1
-        movdqa      xmm7,                   q1                ; q1
+%macro MB_FILTER_AND_WRITEBACK 1
+%if %1 == 0
+        movdqa      xmm2,                   p1              ; p1
+        movdqa      xmm7,                   q1              ; q1
+%elif %1 == 1
+        movdqa      xmm2,                   [rsi+2*rax]     ; p1
+        movdqa      xmm7,                   [rdi]           ; q1
+
+        mov         rcx,                    rax
+        neg         rcx
+%elif %1 == 2
+        lea         rdx,                    srct
+
+        movdqa      xmm2,                   [rdx+32]        ; p1
+        movdqa      xmm7,                   [rdx+80]        ; q1
+        movdqa      xmm6,                   [rdx+48]        ; p0
+        movdqa      xmm0,                   [rdx+64]        ; q0
 %endif
-        pxor        xmm2,                   [t80 GLOBAL]      ; p1 offset to convert to signed values
-        pxor        xmm7,                   [t80 GLOBAL]      ; q1 offset to convert to signed values
 
-        psubsb      xmm2,                   xmm7              ; p1 - q1
-        pxor        xmm6,                   [t80 GLOBAL]      ; offset to convert to signed values
-        pxor        xmm0,                   [t80 GLOBAL]      ; offset to convert to signed values
-        movdqa      xmm3,                   xmm0              ; q0
-        psubsb      xmm0,                   xmm6              ; q0 - p0
-        paddsb      xmm2,                   xmm0              ; 1 * (q0 - p0) + (p1 - q1)
-        paddsb      xmm2,                   xmm0              ; 2 * (q0 - p0)
-        paddsb      xmm2,                   xmm0              ; 3 * (q0 - p0) + (p1 - q1)
+        pxor        xmm2,                   [t80 GLOBAL]    ; p1 offset to convert to signed values
+        pxor        xmm7,                   [t80 GLOBAL]    ; q1 offset to convert to signed values
+        pxor        xmm6,                   [t80 GLOBAL]    ; offset to convert to signed values
+        pxor        xmm0,                   [t80 GLOBAL]    ; offset to convert to signed values
 
-        pand        xmm1,                   xmm2              ; mask filter values we don't care about
-        movdqa      xmm2,                   xmm1              ; vp8_filter
-        pand        xmm2,                   xmm4;             ; Filter2 = vp8_filter & hev
+        psubsb      xmm2,                   xmm7            ; p1 - q1
+        movdqa      xmm3,                   xmm0            ; q0
 
+        psubsb      xmm0,                   xmm6            ; q0 - p0
+
+        paddsb      xmm2,                   xmm0            ; 1 * (q0 - p0) + (p1 - q1)
+
+        paddsb      xmm2,                   xmm0            ; 2 * (q0 - p0)
+
+        paddsb      xmm2,                   xmm0            ; 3 * (q0 - p0) + (p1 - q1)
+
+        pand        xmm1,                   xmm2            ; mask filter values we don't care about
+
+        movdqa      xmm2,                   xmm1            ; vp8_filter
+
+        pand        xmm2,                   xmm4            ; Filter2 = vp8_filter & hev
+        pxor        xmm0,                   xmm0
+
+        pandn       xmm4,                   xmm1            ; vp8_filter&=~hev
+        pxor        xmm1,                   xmm1
+
+        punpcklbw   xmm0,                   xmm4            ; Filter 2 (hi)
         movdqa      xmm5,                   xmm2
-        paddsb      xmm5,                   [t3 GLOBAL]       ; vp8_signed_char_clamp(Filter2 + 3)
 
-        punpckhbw   xmm7,                   xmm5              ; axbxcxdx
-        punpcklbw   xmm5,                   xmm5              ; exfxgxhx
+        punpckhbw   xmm1,                   xmm4            ; Filter 2 (lo)
+        paddsb      xmm5,                   [t3 GLOBAL]     ; vp8_signed_char_clamp(Filter2 + 3)
 
-        psraw       xmm7,                   11                ; sign extended shift right by 3
-        psraw       xmm5,                   11                ; sign extended shift right by 3
+        pmulhw      xmm1,                   [s9 GLOBAL]     ; Filter 2 (lo) * 9
 
-        packsswb    xmm5,                   xmm7              ; Filter2 >>=3;
-        paddsb      xmm2,                   [t4 GLOBAL]       ; vp8_signed_char_clamp(Filter2 + 4)
+        pmulhw      xmm0,                   [s9 GLOBAL]     ; Filter 2 (hi) * 9
 
-        punpckhbw   xmm7,                   xmm2              ; axbxcxdx
-        punpcklbw   xmm0,                   xmm2              ; exfxgxhx
+        punpckhbw   xmm7,                   xmm5            ; axbxcxdx
+        paddsb      xmm2,                   [t4 GLOBAL]     ; vp8_signed_char_clamp(Filter2 + 4)
 
-        psraw       xmm7,                   11                ; sign extended shift right by 3
-        psraw       xmm0,                   11                ; sign extended shift right by 3
+        punpcklbw   xmm5,                   xmm5            ; exfxgxhx
+        psraw       xmm7,                   11              ; sign extended shift right by 3
 
-        packsswb    xmm0,                   xmm7              ; Filter2 >>=3;
-        paddsb      xmm6,                   xmm5              ; ps0 =ps0 + Fitler2
+        psraw       xmm5,                   11              ; sign extended shift right by 3
+        punpckhbw   xmm4,                   xmm2            ; axbxcxdx
 
-        psubsb      xmm3,                   xmm0              ; qs0 =qs0 - filter1
-        pandn       xmm4,                   xmm1              ; vp8_filter&=~hev
-%endmacro
+        punpcklbw   xmm2,                   xmm2            ; exfxgxhx
+        psraw       xmm4,                   11              ; sign extended shift right by 3
 
-%macro MBH_WRITEBACK 1
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
-        ; s = vp8_signed_char_clamp(qs0 - u);
-        ; *oq0 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps0 + u);
-        ; *op0 = s^0x80;
-        pxor        xmm1,                   xmm1
+        packsswb    xmm5,                   xmm7            ; Filter2 >>=3;
+        psraw       xmm2,                   11              ; sign extended shift right by 3
 
-        pxor        xmm2,                   xmm2
-        punpcklbw   xmm1,                   xmm4
+        packsswb    xmm2,                   xmm4            ; Filter1 >>=3;
+        movdqa      xmm7,                   xmm1
 
-        punpckhbw   xmm2,                   xmm4
-        pmulhw      xmm1,                   [s27 GLOBAL]
+        paddsb      xmm6,                   xmm5            ; ps0 =ps0 + Fitler2
+        movdqa      xmm4,                   xmm1
 
-        pmulhw      xmm2,                   [s27 GLOBAL]
-        paddw       xmm1,                   [s63 GLOBAL]
+        psubsb      xmm3,                   xmm2            ; qs0 =qs0 - Filter1
+        movdqa      xmm5,                   xmm0
 
-        paddw       xmm2,                   [s63 GLOBAL]
-        psraw       xmm1,                   7
+        movdqa      xmm2,                   xmm5
+        paddw       xmm0,                   [s63 GLOBAL]    ; Filter 2 (hi) * 9 + 63
 
-        psraw       xmm2,                   7
-        packsswb    xmm1,                   xmm2
+        paddw       xmm1,                   [s63 GLOBAL]    ; Filter 2 (lo) * 9 + 63
+        paddw       xmm5,                   xmm5            ; Filter 2 (hi) * 18
 
-        psubsb      xmm3,                   xmm1
-        paddsb      xmm6,                   xmm1
+        paddw       xmm7,                   xmm7            ; Filter 2 (lo) * 18
+        paddw       xmm5,                   xmm0            ; Filter 2 (hi) * 27 + 63
 
-        pxor        xmm3,                   [t80 GLOBAL]
-        pxor        xmm6,                   [t80 GLOBAL]
+        paddw       xmm7,                   xmm1            ; Filter 2 (lo) * 27 + 63
+        paddw       xmm2,                   xmm0            ; Filter 2 (hi) * 18 + 63
 
-%if %1
-        movdqa      XMMWORD PTR [rsi+rax],  xmm6
-        movdqa      XMMWORD PTR [rsi],      xmm3
-%else
-        lea         rsi,                    [rsi + rcx*2]
-        lea         rdi,                    [rdi + rcx*2]
+        paddw       xmm4,                   xmm1            ; Filter 2 (lo) * 18 + 63
+        psraw       xmm0,                   7               ; (Filter 2 (hi) * 9 + 63) >> 7
 
-        movq        MMWORD PTR [rsi],       xmm6              ; p0
-        psrldq      xmm6,                   8
-        movq        MMWORD PTR [rdi],       xmm6
-        movq        MMWORD PTR [rsi + rcx], xmm3              ; q0
-        psrldq      xmm3,                   8
-        movq        MMWORD PTR [rdi + rcx], xmm3
+        psraw       xmm1,                   7               ; (Filter 2 (lo) * 9 + 63) >> 7
+        psraw       xmm2,                   7               ; (Filter 2 (hi) * 18 + 63) >> 7
+
+        packsswb    xmm0,                   xmm1            ; u1 = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
+        psraw       xmm4,                   7               ; (Filter 2 (lo) * 18 + 63) >> 7
+
+        psraw       xmm5,                   7               ; (Filter 2 (hi) * 27 + 63) >> 7
+        packsswb    xmm2,                   xmm4            ; u2 = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
+
+        psraw       xmm7,                   7               ; (Filter 2 (lo) * 27 + 63) >> 7
+
+        packsswb    xmm5,                   xmm7            ; u3 = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
+
+        psubsb      xmm3,                   xmm5            ; sq = vp8_signed_char_clamp(qs0 - u3)
+        paddsb      xmm6,                   xmm5            ; sp = vp8_signed_char_clamp(ps0 - u3)
+
+%if %1 == 0
+        movdqa      xmm5,                   q2              ; q2
+        movdqa      xmm1,                   q1              ; q1
+        movdqa      xmm4,                   p1              ; p1
+        movdqa      xmm7,                   p2              ; p2
+
+%elif %1 == 1
+        movdqa      xmm5,                   XMMWORD PTR [rdi+rcx]   ; q2
+        movdqa      xmm1,                   XMMWORD PTR [rdi]       ; q1
+        movdqa      xmm4,                   XMMWORD PTR [rsi+rax*2] ; p1
+        movdqa      xmm7,                   XMMWORD PTR [rdi+rax*4] ; p2
+%elif %1 == 2
+        movdqa      xmm5,                   XMMWORD PTR [rdx+96]    ; q2
+        movdqa      xmm1,                   XMMWORD PTR [rdx+80]    ; q1
+        movdqa      xmm4,                   XMMWORD PTR [rdx+32]    ; p1
+        movdqa      xmm7,                   XMMWORD PTR [rdx+16]    ; p2
 %endif
 
-        ; roughly 2/7th difference across boundary
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
-        ; s = vp8_signed_char_clamp(qs1 - u);
-        ; *oq1 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps1 + u);
-        ; *op1 = s^0x80;
-        pxor        xmm1,                   xmm1
-        pxor        xmm2,                   xmm2
+        pxor        xmm3,                   [t80 GLOBAL]    ; *oq0 = sq^0x80
+        pxor        xmm6,                   [t80 GLOBAL]    ; *oq0 = sp^0x80
 
-        punpcklbw   xmm1,                   xmm4
-        punpckhbw   xmm2,                   xmm4
+        pxor        xmm1,                   [t80 GLOBAL]
+        pxor        xmm4,                   [t80 GLOBAL]
 
-        pmulhw      xmm1,                   [s18 GLOBAL]
-        pmulhw      xmm2,                   [s18 GLOBAL]
+        psubsb      xmm1,                   xmm2            ; sq = vp8_signed_char_clamp(qs1 - u2)
+        paddsb      xmm4,                   xmm2            ; sp = vp8_signed_char_clamp(ps1 - u2)
 
-        paddw       xmm1,                   [s63 GLOBAL]
-        paddw       xmm2,                   [s63 GLOBAL]
+        pxor        xmm1,                   [t80 GLOBAL]    ; *oq1 = sq^0x80;
+        pxor        xmm4,                   [t80 GLOBAL]    ; *op1 = sp^0x80;
 
-        psraw       xmm1,                   7
-        psraw       xmm2,                   7
+        pxor        xmm7,                   [t80 GLOBAL]
+        pxor        xmm5,                   [t80 GLOBAL]
 
-        packsswb    xmm1,                   xmm2
+        paddsb      xmm7,                   xmm0            ; sp = vp8_signed_char_clamp(ps2 - u)
+        psubsb      xmm5,                   xmm0            ; sq = vp8_signed_char_clamp(qs2 - u)
 
-%if %1
-        movdqa      xmm3,                   XMMWORD PTR [rdi]
-        movdqa      xmm6,                   XMMWORD PTR [rsi+rax*2] ; p1
-%else
-        movdqa      xmm3,                   q1                ; q1
-        movdqa      xmm6,                   p1                ; p1
-%endif
+        pxor        xmm7,                   [t80 GLOBAL]    ; *op2 = sp^0x80;
+        pxor        xmm5,                   [t80 GLOBAL]    ; *oq2 = sq^0x80;
 
-        pxor        xmm3,                   [t80 GLOBAL]
-        pxor        xmm6,                   [t80 GLOBAL]
+%if %1 == 0
+        lea         rsi,                    [rsi+rcx*2]
+        lea         rdi,                    [rdi+rcx*2]
 
-        paddsb      xmm6,                   xmm1
-        psubsb      xmm3,                   xmm1
+        movq        MMWORD PTR [rsi],       xmm6            ; p0
+        movhps      MMWORD PTR [rdi],       xmm6
+        movq        MMWORD PTR [rsi + rcx], xmm3            ; q0
+        movhps      MMWORD PTR [rdi + rcx], xmm3
 
-        pxor        xmm6,                   [t80 GLOBAL]
-        pxor        xmm3,                   [t80 GLOBAL]
+        movq        MMWORD PTR [rsi+rcx*2], xmm1            ; q1
+        movhps      MMWORD PTR [rdi+rcx*2], xmm1
 
-%if %1
-        movdqa      XMMWORD PTR [rdi],      xmm3
-        movdqa      XMMWORD PTR [rsi+rax*2],xmm6
-%else
-        movq        MMWORD PTR [rsi + rcx*2],xmm3             ; q1
-        psrldq      xmm3,                   8
-        movq        MMWORD PTR [rdi + rcx*2],xmm3
+        movq        MMWORD PTR [rsi + rax], xmm4            ; p1
+        movhps      MMWORD PTR [rdi + rax], xmm4
 
-        movq        MMWORD PTR [rsi + rax], xmm6              ; p1
-        psrldq      xmm6,                   8
-        movq        MMWORD PTR [rdi + rax], xmm6
-%endif
-        ; roughly 1/7th difference across boundary
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
-        ; s = vp8_signed_char_clamp(qs2 - u);
-        ; *oq2 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps2 + u);
-        ; *op2 = s^0x80;
-        pxor        xmm1,                   xmm1
-        pxor        xmm2,                   xmm2
-
-        punpcklbw   xmm1,                   xmm4
-        punpckhbw   xmm2,                   xmm4
-
-        pmulhw      xmm1,                   [s9 GLOBAL]
-        pmulhw      xmm2,                   [s9 GLOBAL]
-
-        paddw       xmm1,                   [s63 GLOBAL]
-        paddw       xmm2,                   [s63 GLOBAL]
-
-        psraw       xmm1,                   7
-        psraw       xmm2,                   7
-
-        packsswb    xmm1,                   xmm2
-
-%if %1
-        movdqa      xmm6,                   XMMWORD PTR [rdi+rax*4]
-        neg         rax
-
-        movdqa      xmm3,                   XMMWORD PTR [rdi+rax]
-%else
-        movdqa      xmm6,                   p2                ; p2
-        movdqa      xmm3,                   q2                ; q2
-%endif
-
-        pxor        xmm6,                   [t80 GLOBAL]
-        pxor        xmm3,                   [t80 GLOBAL]
-
-        paddsb      xmm6,                   xmm1
-        psubsb      xmm3,                   xmm1
-
-        pxor        xmm6,                   [t80 GLOBAL]
-        pxor        xmm3,                   [t80 GLOBAL]
-%if %1
-        movdqa      XMMWORD PTR [rdi+rax  ],xmm3
-        neg         rax
-
-        movdqa      XMMWORD PTR [rdi+rax*4],xmm6
-%else
-        movq        MMWORD PTR [rsi+rax*2], xmm6              ; p2
-        psrldq      xmm6,                   8
-        movq        MMWORD PTR [rdi+rax*2], xmm6
+        movq        MMWORD PTR [rsi+rax*2], xmm7            ; p2
+        movhps      MMWORD PTR [rdi+rax*2], xmm7
 
         lea         rsi,                    [rsi + rcx]
         lea         rdi,                    [rdi + rcx]
-        movq        MMWORD PTR [rsi+rcx*2  ],xmm3             ; q2
-        psrldq      xmm3,                   8
-        movq        MMWORD PTR [rdi+rcx*2  ],xmm3
+        movq        MMWORD PTR [rsi+rcx*2], xmm5            ; q2
+        movhps      MMWORD PTR [rdi+rcx*2], xmm5
+%elif %1 == 1
+        movdqa      XMMWORD PTR [rdi+rcx],  xmm5            ; q2
+        movdqa      XMMWORD PTR [rdi],      xmm1            ; q1
+        movdqa      XMMWORD PTR [rsi],      xmm3            ; q0
+        movdqa      XMMWORD PTR [rsi+rax  ],xmm6            ; p0
+        movdqa      XMMWORD PTR [rsi+rax*2],xmm4            ; p1
+        movdqa      XMMWORD PTR [rdi+rax*4],xmm7            ; p2
+%elif %1 == 2
+        movdqa      XMMWORD PTR [rdx+80],   xmm1            ; q1
+        movdqa      XMMWORD PTR [rdx+64],   xmm3            ; q0
+        movdqa      XMMWORD PTR [rdx+48],   xmm6            ; p0
+        movdqa      XMMWORD PTR [rdx+32],   xmm4            ; p1
 %endif
+
 %endmacro
 
 
@@ -638,16 +603,10 @@ sym(vp8_mbloop_filter_horizontal_edge_sse2):
 
         lea         rdi,                    [rsi+rax]         ; rdi points to row +1 for indirect addressing
 
-        ; calculate breakout conditions
-        LFH_FILTER_MASK 1
-
-        ; calculate high edge variance
-        LFH_HEV_MASK
-
-        ; start work on filters
-        MBH_FILTER 1
-        ; write back the result
-        MBH_WRITEBACK 1
+        ; calculate breakout conditions and high edge variance
+        LFH_FILTER_AND_HEV_MASK 1
+        ; filter and write back the results
+        MB_FILTER_AND_WRITEBACK 1
 
     add rsp, 32
     pop rsp
@@ -702,16 +661,10 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         lea         rsi,                    [rsi + rcx]
         lea         rdi,                    [rdi + rcx]
 
-        ; calculate breakout conditions
-        LFH_FILTER_MASK 0
-
-        ; calculate high edge variance
-        LFH_HEV_MASK
-
-        ; start work on filters
-        MBH_FILTER 0
-        ; write back the result
-        MBH_WRITEBACK 0
+        ; calculate breakout conditions and high edge variance
+        LFH_FILTER_AND_HEV_MASK 0
+        ; filter and write back the results
+        MB_FILTER_AND_WRITEBACK 0
 
     add rsp, 96
     pop rsp
@@ -725,64 +678,80 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
     ret
 
 
-%macro TRANSPOSE_16X8_1 0
-        movq        xmm4,               QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
-        movq        xmm7,               QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
-
-        punpcklbw   xmm4,               xmm7            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
+%macro TRANSPOSE_16X8 2
+        movq        xmm4,               QWORD PTR [rsi]        ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00
+        movq        xmm1,               QWORD PTR [rdi]        ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10
         movq        xmm0,               QWORD PTR [rsi+2*rax]  ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20
-
-        movdqa      xmm3,               xmm4            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
-
         movq        xmm7,               QWORD PTR [rdi+2*rax]  ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30
-        punpcklbw   xmm0,               xmm7            ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20
-
         movq        xmm5,               QWORD PTR [rsi+4*rax]  ; xx xx xx xx xx xx xx xx 47 46 45 44 43 42 41 40
         movq        xmm2,               QWORD PTR [rdi+4*rax]  ; xx xx xx xx xx xx xx xx 57 56 55 54 53 52 51 50
 
-        punpcklbw   xmm5,               xmm2            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
-        movq        xmm7,               QWORD PTR [rsi+2*rcx]  ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60
+        punpcklbw   xmm4,               xmm1            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
 
         movq        xmm1,               QWORD PTR [rdi+2*rcx]  ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70
-        movdqa      xmm6,               xmm5            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
 
+        movdqa      xmm3,               xmm4            ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00
+        punpcklbw   xmm0,               xmm7            ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20
+
+        movq        xmm7,               QWORD PTR [rsi+2*rcx]  ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60
+
+        punpcklbw   xmm5,               xmm2            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
+%if %1
+        lea         rsi,                [rsi+rax*8]
+%else
+        mov         rsi,                arg(5)          ; v_ptr
+%endif
+
+        movdqa      xmm6,               xmm5            ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40
         punpcklbw   xmm7,               xmm1            ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60
+
         punpcklwd   xmm5,               xmm7            ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40
 
         punpckhwd   xmm6,               xmm7            ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44
+%if %1
+        lea         rdi,                [rdi+rax*8]
+%else
+        lea         rsi,                [rsi - 4]
+%endif
 
         punpcklwd   xmm3,               xmm0            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+%if %1
+        lea         rdx,                srct
+%else
+        lea         rdi,                [rsi + rax]     ; rdi points to row +1 for indirect addressing
+%endif
+
+        movdqa      xmm2,               xmm3            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
         punpckhwd   xmm4,               xmm0            ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
 
         movdqa      xmm7,               xmm4            ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04
-        movdqa      xmm2,               xmm3            ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00
+        punpckhdq   xmm3,               xmm5            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
 
         punpckhdq   xmm7,               xmm6            ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
+
         punpckldq   xmm4,               xmm6            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
 
-        punpckhdq   xmm3,               xmm5            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
         punpckldq   xmm2,               xmm5            ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00
 
         movdqa      t0,                 xmm2            ; save to free XMM2
-%endmacro
-
-%macro TRANSPOSE_16X8_2 1
-        movq        xmm2,               QWORD PTR [rsi] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80
-        movq        xmm5,               QWORD PTR [rdi] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90
-
-        punpcklbw   xmm2,               xmm5            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
+        movq        xmm2,               QWORD PTR [rsi]       ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80
+        movq        xmm6,               QWORD PTR [rdi]       ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90
         movq        xmm0,               QWORD PTR [rsi+2*rax] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0
-
         movq        xmm5,               QWORD PTR [rdi+2*rax] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0
-        punpcklbw   xmm0,               xmm5                  ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
-
         movq        xmm1,               QWORD PTR [rsi+4*rax] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0
+
+        punpcklbw   xmm2,               xmm6            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
+
         movq        xmm6,               QWORD PTR [rdi+4*rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0
 
-        punpcklbw   xmm1,               xmm6            ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0
+        punpcklbw   xmm0,               xmm5                  ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0
+
         movq        xmm5,               QWORD PTR [rsi+2*rcx] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0
 
+        punpcklbw   xmm1,               xmm6            ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0
+
         movq        xmm6,               QWORD PTR [rdi+2*rcx] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0
+
         punpcklbw   xmm5,               xmm6            ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0
 
         movdqa      xmm6,               xmm1            ;
@@ -792,75 +761,81 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         movdqa      xmm5,               xmm2            ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80
 
         punpcklwd   xmm5,               xmm0            ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80
+
         punpckhwd   xmm2,               xmm0            ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
 
         movdqa      xmm0,               xmm5
         punpckldq   xmm0,               xmm1            ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80
 
-
         punpckhdq   xmm5,               xmm1            ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82
         movdqa      xmm1,               xmm2            ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84
 
         punpckldq   xmm1,               xmm6            ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84
-        punpckhdq   xmm2,               xmm6            ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
 
+        punpckhdq   xmm2,               xmm6            ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86
         movdqa      xmm6,               xmm7            ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06
+
         punpcklqdq  xmm6,               xmm2            ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
 
         punpckhqdq  xmm7,               xmm2            ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07
-%if %1
+%if %2
         movdqa      xmm2,               xmm3            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
-
         punpcklqdq  xmm2,               xmm5            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
 
         punpckhqdq  xmm3,               xmm5            ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+
         movdqa      [rdx],              xmm2            ; save 2
 
         movdqa      xmm5,               xmm4            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
         punpcklqdq  xmm4,               xmm1            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
 
         movdqa      [rdx+16],           xmm3            ; save 3
+
         punpckhqdq  xmm5,               xmm1            ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
 
         movdqa      [rdx+32],           xmm4            ; save 4
         movdqa      [rdx+48],           xmm5            ; save 5
-
         movdqa      xmm1,               t0              ; get
-        movdqa      xmm2,               xmm1            ;
 
+        movdqa      xmm2,               xmm1            ;
         punpckhqdq  xmm1,               xmm0            ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+
         punpcklqdq  xmm2,               xmm0            ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
 %else
         movdqa      [rdx+112],          xmm7            ; save 7
-        movdqa      xmm2,               xmm3            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
 
         movdqa      [rdx+96],           xmm6            ; save 6
+
+        movdqa      xmm2,               xmm3            ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02
+        punpckhqdq  xmm3,               xmm5            ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
+
         punpcklqdq  xmm2,               xmm5            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
 
-        punpckhqdq  xmm3,               xmm5            ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03
         movdqa      [rdx+32],           xmm2            ; save 2
 
         movdqa      xmm5,               xmm4            ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04
         punpcklqdq  xmm4,               xmm1            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
 
         movdqa      [rdx+48],           xmm3            ; save 3
+
         punpckhqdq  xmm5,               xmm1            ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05
 
         movdqa      [rdx+64],           xmm4            ; save 4
         movdqa      [rdx+80],           xmm5            ; save 5
-
         movdqa      xmm1,               t0              ; get
-        movdqa      xmm2,               xmm1
 
+        movdqa      xmm2,               xmm1
         punpckhqdq  xmm1,               xmm0            ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
+
         punpcklqdq  xmm2,               xmm0            ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
 
         movdqa      [rdx+16],           xmm1
+
         movdqa      [rdx],              xmm2
 %endif
 %endmacro
 
-%macro LFV_FILTER_MASK 1
+%macro LFV_FILTER_MASK_HEV_MASK 1
         movdqa      xmm0,               xmm6            ; q2
         psubusb     xmm0,               xmm7            ; q2-q3
 
@@ -899,10 +874,11 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         movdqa      xmm1,               xmm2            ; p1
 
         psubusb     xmm2,               xmm3            ; p1-p0
+        lea         rdx,                srct
+
         por         xmm2,               xmm7            ; abs(p1-p0)
 
         movdqa      t0,                 xmm2            ; save abs(p1-p0)
-        lea         rdx,                srct
 
         pmaxub      xmm0,               xmm2
 
@@ -913,136 +889,70 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         movdqa      xmm5,               [rdx+64]        ; q0
         movdqa      xmm7,               [rdx+80]        ; q1
 %endif
+        mov         rdx,                arg(3)          ; limit
+
         movdqa      xmm6,               xmm5            ; q0
         movdqa      xmm2,               xmm7            ; q1
-        psubusb     xmm5,               xmm7            ; q0-q1
 
+        psubusb     xmm5,               xmm7            ; q0-q1
         psubusb     xmm7,               xmm6            ; q1-q0
+
         por         xmm7,               xmm5            ; abs(q1-q0)
 
         movdqa      t1,                 xmm7            ; save abs(q1-q0)
 
-        mov         rdx,                arg(3)          ; limit
-        movdqa      xmm4,               [rdx]           ; limit
+        movdqa      xmm4,               XMMWORD PTR [rdx]; limit
 
         pmaxub      xmm0,               xmm7
-        psubusb     xmm0,               xmm4
+        mov         rdx,                arg(2)          ; flimit
 
+        psubusb     xmm0,               xmm4
         movdqa      xmm5,               xmm2            ; q1
+
         psubusb     xmm5,               xmm1            ; q1-=p1
         psubusb     xmm1,               xmm2            ; p1-=q1
+
         por         xmm5,               xmm1            ; abs(p1-q1)
-        pand        xmm5,               [tfe GLOBAL]    ; set lsb of each byte to zero
-        psrlw       xmm5,               1               ; abs(p1-q1)/2
-
-        mov         rdx,                arg(2)          ; flimit
-        movdqa      xmm2,               [rdx]           ; flimit
-
         movdqa      xmm1,               xmm3            ; p0
-        movdqa      xmm7,               xmm6            ; q0
-        psubusb     xmm1,               xmm7            ; p0-q0
-        psubusb     xmm7,               xmm3            ; q0-p0
-        por         xmm1,               xmm7            ; abs(q0-p0)
-        paddusb     xmm1,               xmm1            ; abs(q0-p0)*2
-        paddusb     xmm1,               xmm5            ; abs (p0 - q0) *2 + abs(p1-q1)/2
 
-        paddb       xmm2,               xmm2            ; flimit*2 (less than 255)
-        paddb       xmm4,               xmm2            ; flimit * 2 + limit (less than 255)
+        pand        xmm5,               [tfe GLOBAL]    ; set lsb of each byte to zero
+        psubusb     xmm1,               xmm6            ; p0-q0
 
-        psubusb     xmm1,               xmm4            ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
-        por         xmm1,               xmm0;           ; mask
-        pxor        xmm0,               xmm0
-        pcmpeqb     xmm1,               xmm0
-%endmacro
+        psrlw       xmm5,               1               ; abs(p1-q1)/2
+        psubusb     xmm6,               xmm3            ; q0-p0
+
+        movdqa      xmm2,               XMMWORD PTR [rdx]; flimit
 
-%macro LFV_HEV_MASK 0
         mov         rdx,                arg(4)          ; get thresh
-        movdqa      xmm7,               XMMWORD PTR [rdx]
 
-        movdqa      xmm4,               t0              ; get abs (q1 - q0)
-        psubusb     xmm4,               xmm7            ; abs(q1 - q0) > thresh
+        por         xmm1,               xmm6            ; abs(q0-p0)
+        paddb       xmm2,               xmm2            ; flimit*2 (less than 255)
+
+        movdqa      xmm6,               t0              ; get abs (q1 - q0)
+
+        paddusb     xmm1,               xmm1            ; abs(q0-p0)*2
 
         movdqa      xmm3,               t1              ; get abs (p1 - p0)
+
+        movdqa      xmm7,               XMMWORD PTR [rdx]
+
+        paddusb     xmm1,               xmm5            ; abs (p0 - q0) *2 + abs(p1-q1)/2
+        psubusb     xmm6,               xmm7            ; abs(q1 - q0) > thresh
+
+        paddb       xmm4,               xmm2            ; flimit * 2 + limit (less than 255)
         psubusb     xmm3,               xmm7            ; abs(p1 - p0)> thresh
 
-        por         xmm4,               xmm3            ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
-        pcmpeqb     xmm4,               xmm0
+        psubusb     xmm1,               xmm4            ; abs (p0 - q0) *2 + abs(p1-q1)/2  > flimit * 2 + limit
+        por         xmm6,               xmm3            ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh
 
-        pcmpeqb     xmm0,               xmm0
-        pxor        xmm4,               xmm0
-%endmacro
+        por         xmm1,               xmm0            ; mask
+        pcmpeqb     xmm6,               xmm0
 
-%macro BV_FILTER 0
-        lea         rdx,                srct
+        pxor        xmm0,               xmm0
+        pcmpeqb     xmm4,               xmm4
 
-        movdqa      xmm2,               [rdx]           ; p1        lea         rsi,       [rsi+rcx*8]
-        movdqa      xmm7,               [rdx+48]        ; q1
-        movdqa      xmm6,               [rdx+16]        ; p0
-        movdqa      xmm0,               [rdx+32]        ; q0
-
-        pxor        xmm2,               [t80 GLOBAL]    ; p1 offset to convert to signed values
-        pxor        xmm7,               [t80 GLOBAL]    ; q1 offset to convert to signed values
-
-        psubsb      xmm2,               xmm7            ; p1 - q1
-        pand        xmm2,               xmm4            ; high var mask (hvm)(p1 - q1)
-
-        pxor        xmm6,               [t80 GLOBAL]    ; offset to convert to signed values
-        pxor        xmm0,               [t80 GLOBAL]    ; offset to convert to signed values
-
-        movdqa      xmm3,               xmm0            ; q0
-        psubsb      xmm0,               xmm6            ; q0 - p0
-
-        paddsb      xmm2,               xmm0            ; 1 * (q0 - p0) + hvm(p1 - q1)
-        paddsb      xmm2,               xmm0            ; 2 * (q0 - p0) + hvm(p1 - q1)
-
-        paddsb      xmm2,               xmm0            ; 3 * (q0 - p0) + hvm(p1 - q1)
-        pand        xmm1,               xmm2            ; mask filter values we don't care about
-
-        movdqa      xmm2,               xmm1
-        paddsb      xmm1,               [t4 GLOBAL]     ; 3* (q0 - p0) + hvm(p1 - q1) + 4
-
-        paddsb      xmm2,               [t3 GLOBAL]     ; 3* (q0 - p0) + hvm(p1 - q1) + 3
-
-        punpckhbw   xmm5,               xmm2
-        punpcklbw   xmm2,               xmm2
-
-        psraw       xmm5,               11
-        psraw       xmm2,               11
-
-        packsswb    xmm2,               xmm5            ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3;
-        punpcklbw   xmm0,               xmm1            ; exfxgxhx
-
-        punpckhbw   xmm1,               xmm1            ; axbxcxdx
-        psraw       xmm0,               11              ; sign extended shift right by 3
-
-        psraw       xmm1,               11              ; sign extended shift right by 3
-        movdqa      xmm5,               xmm0            ; save results
-
-        packsswb    xmm0,               xmm1            ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
-        paddsw      xmm5,               [ones GLOBAL]
-
-        paddsw      xmm1,               [ones GLOBAL]
-        psraw       xmm5,               1               ; partial shifted one more time for 2nd tap
-
-        psraw       xmm1,               1               ; partial shifted one more time for 2nd tap
-        packsswb    xmm5,               xmm1            ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
-
-        pandn       xmm4,               xmm5            ; high edge variance additive
-
-        paddsb      xmm6,               xmm2            ; p0+= p0 add
-        pxor        xmm6,               [t80 GLOBAL]    ; unoffset
-
-        movdqa      xmm1,               [rdx]           ; p1
-        pxor        xmm1,               [t80 GLOBAL]    ; reoffset
-
-        paddsb      xmm1,               xmm4            ; p1+= p1 add
-        pxor        xmm1,               [t80 GLOBAL]    ; unoffset
-
-        psubsb      xmm3,               xmm0            ; q0-= q0 add
-        pxor        xmm3,               [t80 GLOBAL]    ; unoffset
-
-        psubsb      xmm7,               xmm4            ; q1-= q1 add
-        pxor        xmm7,               [t80 GLOBAL]    ; unoffset
+        pcmpeqb     xmm1,               xmm0
+        pxor        xmm4,               xmm6
 %endmacro
 
 %macro BV_TRANSPOSE 0
@@ -1057,6 +967,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         punpckhbw   xmm1,               xmm6            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
 
         punpcklbw   xmm4,               xmm7            ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
+
         punpckhbw   xmm3,               xmm7            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
 
         movdqa      xmm6,               xmm2            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
@@ -1066,6 +977,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         movdqa      xmm5,               xmm1            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
 
         punpcklwd   xmm1,               xmm3            ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82
+
         punpckhwd   xmm5,               xmm3            ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2
         ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02
         ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42
@@ -1132,20 +1044,13 @@ sym(vp8_loop_filter_vertical_edge_sse2):
         lea         rcx,        [rax*2+rax]
 
         ;transpose 16x8 to 8x16, and store the 8-line result on stack.
-        TRANSPOSE_16X8_1
+        TRANSPOSE_16X8 1, 1
 
-        lea         rsi,        [rsi+rax*8]
-        lea         rdi,        [rdi+rax*8]
-        lea         rdx,        srct
-        TRANSPOSE_16X8_2 1
-
-        ; calculate filter mask
-        LFV_FILTER_MASK 1
-        ; calculate high edge variance
-        LFV_HEV_MASK
+        ; calculate filter mask and high edge variance
+        LFV_FILTER_MASK_HEV_MASK 1
 
         ; start work on filters
-        BV_FILTER
+        B_FILTER 2
 
         ; tranpose and write back - only work on q1, q0, p0, p1
         BV_TRANSPOSE
@@ -1205,23 +1110,16 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
         lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
         lea         rcx,        [rax+2*rax]
 
-        ;transpose 16x8 to 8x16, and store the 8-line result on stack.
-        TRANSPOSE_16X8_1
-
-        mov         rsi,        arg(5)                  ; v_ptr
-        lea         rsi,        [rsi - 4]
-        lea         rdi,        [rsi + rax]             ; rdi points to row +1 for indirect addressing
-
         lea         rdx,        srct
-        TRANSPOSE_16X8_2 1
 
-        ; calculate filter mask
-        LFV_FILTER_MASK 1
-        ; calculate high edge variance
-        LFV_HEV_MASK
+        ;transpose 16x8 to 8x16, and store the 8-line result on stack.
+        TRANSPOSE_16X8 0, 1
+
+        ; calculate filter mask and high edge variance
+        LFV_FILTER_MASK_HEV_MASK 1
 
         ; start work on filters
-        BV_FILTER
+        B_FILTER 2
 
         ; tranpose and write back - only work on q1, q0, p0, p1
         BV_TRANSPOSE
@@ -1247,174 +1145,12 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
     pop         rbp
     ret
 
-
-%macro MBV_FILTER 0
-        lea         rdx,                srct
-
-        movdqa      xmm2,               [rdx+32]        ; p1
-        movdqa      xmm7,               [rdx+80]        ; q1
-        movdqa      xmm6,               [rdx+48]        ; p0
-        movdqa      xmm0,               [rdx+64]        ; q0
-
-        pxor        xmm2,               [t80 GLOBAL]    ; p1 offset to convert to signed values
-        pxor        xmm7,               [t80 GLOBAL]    ; q1 offset to convert to signed values
-        pxor        xmm6,               [t80 GLOBAL]    ; offset to convert to signed values
-        pxor        xmm0,               [t80 GLOBAL]    ; offset to convert to signed values
-
-        psubsb      xmm2,               xmm7            ; p1 - q1
-
-        movdqa      xmm3,               xmm0            ; q0
-
-        psubsb      xmm0,               xmm6            ; q0 - p0
-        paddsb      xmm2,               xmm0            ; 1 * (q0 - p0) + (p1 - q1)
-
-        paddsb      xmm2,               xmm0            ; 2 * (q0 - p0)
-        paddsb      xmm2,               xmm0            ; 3 * (q0 - p0)+ (p1 - q1)
-
-        pand        xmm1,               xmm2            ; mask filter values we don't care about
-
-        movdqa      xmm2,               xmm1            ; vp8_filter
-        pand        xmm2,               xmm4;           ; Filter2 = vp8_filter & hev
-
-        movdqa      xmm5,               xmm2
-        paddsb      xmm5,               [t3 GLOBAL]
-
-        punpckhbw   xmm7,               xmm5            ; axbxcxdx
-        punpcklbw   xmm5,               xmm5            ; exfxgxhx
-
-        psraw       xmm7,               11              ; sign extended shift right by 3
-        psraw       xmm5,               11              ; sign extended shift right by 3
-
-        packsswb    xmm5,               xmm7            ; Filter2 >>=3;
-
-        paddsb      xmm2,               [t4 GLOBAL]     ; vp8_signed_char_clamp(Filter2 + 4)
-
-        punpcklbw   xmm0,               xmm2            ; exfxgxhx
-        punpckhbw   xmm7,               xmm2            ; axbxcxdx
-
-        psraw       xmm0,               11              ; sign extended shift right by 3
-        psraw       xmm7,               11              ; sign extended shift right by 3
-
-        packsswb    xmm0,               xmm7            ; Filter2 >>=3;
-
-        psubsb      xmm3,               xmm0            ; qs0 =qs0 - filter1
-        paddsb      xmm6,               xmm5            ; ps0 =ps0 + Fitler2
-
-        ; vp8_filter &= ~hev;
-        ; Filter2 = vp8_filter;
-        pandn       xmm4,               xmm1            ; vp8_filter&=~hev
-
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7);
-        ; s = vp8_signed_char_clamp(qs0 - u);
-        ; *oq0 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps0 + u);
-        ; *op0 = s^0x80;
-        pxor        xmm1,               xmm1
-
-        pxor        xmm2,               xmm2
-        punpcklbw   xmm1,               xmm4
-
-        punpckhbw   xmm2,               xmm4
-        pmulhw      xmm1,               [s27 GLOBAL]
-
-        pmulhw      xmm2,               [s27 GLOBAL]
-        paddw       xmm1,               [s63 GLOBAL]
-
-        paddw       xmm2,               [s63 GLOBAL]
-        psraw       xmm1,               7
-
-        psraw       xmm2,               7
-        packsswb    xmm1,               xmm2
-
-        psubsb      xmm3,               xmm1
-        paddsb      xmm6,               xmm1
-
-        pxor        xmm3,               [t80 GLOBAL]
-        pxor        xmm6,               [t80 GLOBAL]
-
-        movdqa      [rdx+48],           xmm6
-        movdqa      [rdx+64],           xmm3
-
-        ; roughly 2/7th difference across boundary
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7);
-        ; s = vp8_signed_char_clamp(qs1 - u);
-        ; *oq1 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps1 + u);
-        ; *op1 = s^0x80;
-        pxor        xmm1,               xmm1
-        pxor        xmm2,               xmm2
-
-        punpcklbw   xmm1,               xmm4
-        punpckhbw   xmm2,               xmm4
-
-        pmulhw      xmm1,               [s18 GLOBAL]
-        pmulhw      xmm2,               [s18 GLOBAL]
-
-        paddw       xmm1,               [s63 GLOBAL]
-        paddw       xmm2,               [s63 GLOBAL]
-
-        psraw       xmm1,               7
-        psraw       xmm2,               7
-
-        packsswb    xmm1,               xmm2
-
-        movdqa      xmm3,               [rdx + 80]              ; q1
-        movdqa      xmm6,               [rdx + 32]              ; p1
-
-        pxor        xmm3,               [t80 GLOBAL]
-        pxor        xmm6,               [t80 GLOBAL]
-
-        paddsb      xmm6,               xmm1
-        psubsb      xmm3,               xmm1
-
-        pxor        xmm6,               [t80 GLOBAL]
-        pxor        xmm3,               [t80 GLOBAL]
-
-        movdqa      [rdx + 80],         xmm3
-        movdqa      [rdx + 32],         xmm6
-
-        ; roughly 1/7th difference across boundary
-        ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7);
-        ; s = vp8_signed_char_clamp(qs2 - u);
-        ; *oq2 = s^0x80;
-        ; s = vp8_signed_char_clamp(ps2 + u);
-        ; *op2 = s^0x80;
-        pxor        xmm1,               xmm1
-        pxor        xmm2,               xmm2
-
-        punpcklbw   xmm1,               xmm4
-        punpckhbw   xmm2,               xmm4
-
-        pmulhw      xmm1,               [s9 GLOBAL]
-        pmulhw      xmm2,               [s9 GLOBAL]
-
-        paddw       xmm1,               [s63 GLOBAL]
-        paddw       xmm2,               [s63 GLOBAL]
-
-        psraw       xmm1,               7
-        psraw       xmm2,               7
-
-        packsswb    xmm1,               xmm2
-
-        movdqa      xmm6,               [rdx+16]
-        movdqa      xmm3,               [rdx+96]
-
-        pxor        xmm6,               [t80 GLOBAL]
-        pxor        xmm3,               [t80 GLOBAL]
-
-        paddsb      xmm6,               xmm1
-        psubsb      xmm3,               xmm1
-
-        pxor        xmm6,               [t80 GLOBAL]        ; xmm6 = f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01
-        pxor        xmm3,               [t80 GLOBAL]        ; xmm3 = f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 15 06
-%endmacro
-
 %macro MBV_TRANSPOSE 0
         movdqa      xmm0,               [rdx]               ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
         movdqa      xmm1,               xmm0                ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00
 
-        punpcklbw   xmm0,               xmm6                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
-        punpckhbw   xmm1,               xmm6                ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
+        punpcklbw   xmm0,               xmm7                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+        punpckhbw   xmm1,               xmm7                ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
 
         movdqa      xmm2,               [rdx+32]            ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
         movdqa      xmm6,               xmm2                ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02
@@ -1422,10 +1158,10 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
         punpcklbw   xmm2,               [rdx+48]            ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02
         punpckhbw   xmm6,               [rdx+48]            ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82
 
-        movdqa      xmm5,               xmm0                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
+        movdqa      xmm3,               xmm0                ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00
         punpcklwd   xmm0,               xmm2                ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00
 
-        punpckhwd   xmm5,               xmm2                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+        punpckhwd   xmm3,               xmm2                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
         movdqa      xmm4,               xmm1                ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80
 
         punpcklwd   xmm1,               xmm6                ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
@@ -1434,7 +1170,7 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
         movdqa      xmm2,               [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
         punpcklbw   xmm2,               [rdx+80]            ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
 
-        movdqa      xmm6,               xmm3                ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
+        movdqa      xmm6,               xmm5                ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06
         punpcklbw   xmm6,               [rdx+112]           ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06
 
         movdqa      xmm7,               xmm2                ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04
@@ -1449,70 +1185,53 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
 
 %macro MBV_WRITEBACK_1 0
         movq        QWORD PTR [rsi],    xmm0
-        psrldq      xmm0,               8
-
-        movq        QWORD PTR [rdi],    xmm0
+        movhps      QWORD PTR [rdi],    xmm0
 
         movq        QWORD PTR [rsi+2*rax], xmm6
-        psrldq      xmm6,               8
+        movhps      QWORD PTR [rdi+2*rax], xmm6
 
-        movq        QWORD PTR [rdi+2*rax], xmm6
-
-        movdqa      xmm0,               xmm5                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
+        movdqa      xmm0,               xmm3                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
         punpckldq   xmm0,               xmm7                ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40
 
-        punpckhdq   xmm5,               xmm7                ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60
+        punpckhdq   xmm3,               xmm7                ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60
 
         movq        QWORD PTR [rsi+4*rax], xmm0
-        psrldq      xmm0,               8
+        movhps      QWORD PTR [rdi+4*rax], xmm0
 
-        movq        QWORD PTR [rdi+4*rax], xmm0
-
-        movq        QWORD PTR [rsi+2*rcx], xmm5
-        psrldq      xmm5,               8
-
-        movq        QWORD PTR [rdi+2*rcx], xmm5
+        movq        QWORD PTR [rsi+2*rcx], xmm3
+        movhps      QWORD PTR [rdi+2*rcx], xmm3
 
         movdqa      xmm2,               [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
         punpckhbw   xmm2,               [rdx+80]            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
 
-        punpckhbw   xmm3,               [rdx+112]           ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86
+        punpckhbw   xmm5,               [rdx+112]           ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86
         movdqa      xmm0,               xmm2
 
-        punpcklwd   xmm0,               xmm3                ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84
-        punpckhwd   xmm2,               xmm3                ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4
+        punpcklwd   xmm0,               xmm5                ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84
+        punpckhwd   xmm2,               xmm5                ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4
 
-        movdqa      xmm3,               xmm1                ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
+        movdqa      xmm5,               xmm1                ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80
         punpckldq   xmm1,               xmm0                ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80
 
-        punpckhdq   xmm3,               xmm0                ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0
+        punpckhdq   xmm5,               xmm0                ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0
 %endmacro
 
 %macro MBV_WRITEBACK_2 0
-        movq        QWORD PTR [rsi], xmm1
-        psrldq      xmm1,               8
+        movq        QWORD PTR [rsi],    xmm1
+        movhps      QWORD PTR [rdi],    xmm1
 
-        movq        QWORD PTR [rdi], xmm1
-
-        movq        QWORD PTR [rsi+2*rax], xmm3
-        psrldq      xmm3,               8
-
-        movq        QWORD PTR [rdi+2*rax], xmm3
+        movq        QWORD PTR [rsi+2*rax], xmm5
+        movhps      QWORD PTR [rdi+2*rax], xmm5
 
         movdqa      xmm1,               xmm4                ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
         punpckldq   xmm1,               xmm2                ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0
-
         punpckhdq   xmm4,               xmm2                ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0
+
         movq        QWORD PTR [rsi+4*rax], xmm1
-
-        psrldq      xmm1,               8
-
-        movq        QWORD PTR [rdi+4*rax], xmm1
+        movhps      QWORD PTR [rdi+4*rax], xmm1
 
         movq        QWORD PTR [rsi+2*rcx], xmm4
-        psrldq      xmm4,               8
-
-        movq        QWORD PTR [rdi+2*rcx], xmm4
+        movhps      QWORD PTR [rdi+2*rcx], xmm4
 %endmacro
 
 
@@ -1550,21 +1269,14 @@ sym(vp8_mbloop_filter_vertical_edge_sse2):
         lea         rcx,                [rax*2+rax]
 
         ; Transpose
-        TRANSPOSE_16X8_1
+        TRANSPOSE_16X8 1, 0
 
-        lea         rsi,        [rsi+rax*8]
-        lea         rdi,        [rdi+rax*8]
-        lea         rdx,        srct
-        TRANSPOSE_16X8_2 0
-
-        ; calculate filter mask
-        LFV_FILTER_MASK 0
-        ; calculate high edge variance
-        LFV_HEV_MASK
+        ; calculate filter mask and high edge variance
+        LFV_FILTER_MASK_HEV_MASK 0
 
         neg         rax
         ; start work on filters
-        MBV_FILTER
+        MB_FILTER_AND_WRITEBACK 2
 
         lea         rsi,                [rsi+rax*8]
         lea         rdi,                [rdi+rax*8]
@@ -1625,23 +1337,16 @@ sym(vp8_mbloop_filter_vertical_edge_uv_sse2):
         lea         rdi,                [rsi + rax]         ; rdi points to row +1 for indirect addressing
         lea         rcx,                [rax+2*rax]
 
-        ; Transpose
-        TRANSPOSE_16X8_1
-
-        ; XMM3 XMM4 XMM7 in use
-        mov         rsi,                arg(5)              ; v_ptr
-        lea         rsi,                [rsi - 4]
-        lea         rdi,                [rsi + rax]
         lea         rdx,                srct
-        TRANSPOSE_16X8_2 0
 
-        ; calculate filter mask
-        LFV_FILTER_MASK 0
-        ; calculate high edge variance
-        LFV_HEV_MASK
+        ; Transpose
+        TRANSPOSE_16X8 0, 0
+
+        ; calculate filter mask and high edge variance
+        LFV_FILTER_MASK_HEV_MASK 0
 
         ; start work on filters
-        MBV_FILTER
+        MB_FILTER_AND_WRITEBACK 2
 
         ; transpose and write back
         MBV_TRANSPOSE
@@ -2068,12 +1773,6 @@ align 16
 ones:
     times 8 dw 0x0001
 align 16
-s27:
-    times 8 dw 0x1b00
-align 16
-s18:
-    times 8 dw 0x1200
-align 16
 s9:
     times 8 dw 0x0900
 align 16

From ff3068d6da5808c1ffba4c774ed6f307ef3998bd Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Wed, 29 Sep 2010 12:03:19 +0100
Subject: [PATCH 191/307] Control of active min quantizer for two pass.

Create  look up tables for controlling the active quantizer range.
Some initial tuning to improve quality circa 0.5% on test set.
Clean up of some stats output code

Change-Id: Ia698a8525f8b8129a503cadace3ee73fe888f543
---
 vp8/encoder/onyx_if.c | 214 +++++++++++++++++++++++++++++-------------
 1 file changed, 149 insertions(+), 65 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index ce15acaac..ef4c69249 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -150,6 +150,95 @@ extern const int qzbin_factors[129];
 extern void vp8cx_init_quantizer(VP8_COMP *cpi);
 extern const int vp8cx_base_skip_false_prob[128];
 
+// Tables relating active max Q to active min Q
+static const int kf_low_motion_minq[QINDEX_RANGE] =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4,
+    5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 10,10,
+    11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,
+    19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26,
+    27,27,28,28,29,29,30,30,31,32,33,34,35,36,37,38,
+};
+static const int kf_high_motion_minq[QINDEX_RANGE] =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+    2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5,
+    6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10,10,
+    11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,
+    19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26,
+    27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34,
+    35,35,36,36,37,38,39,40,41,42,43,44,45,46,47,48,
+};
+/*static const int kf_minq[QINDEX_RANGE] =
+{
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1,
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 6, 6,
+    7, 7, 8, 8, 9, 9, 10,10,11,11,12,12,13,13,14,14,
+    15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22,
+    23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30,
+    31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,38
+};*/
+static const int gf_low_motion_minq[QINDEX_RANGE] =
+{
+    0,0,0,0,1,1,1,1,1,1,1,1,2,2,2,2,
+    3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6,
+    7,7,7,7,8,8,8,8,9,9,9,9,10,10,10,10,
+    11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18,
+    19,19,20,20,21,21,22,22,23,23,24,24,25,25,26,26,
+    27,27,28,28,29,29,30,30,31,31,32,32,33,33,34,34,
+    35,35,36,36,37,37,38,38,39,39,40,40,41,41,42,42,
+    43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58
+};
+static const int gf_mid_motion_minq[QINDEX_RANGE] =
+{
+    0,0,0,0,1,1,1,1,1,1,2,2,3,3,3,4,
+    4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,
+    9,10,10,10,10,11,11,11,12,12,12,12,13,13,13,14,
+    14,14,15,15,16,16,17,17,18,18,19,19,20,20,21,21,
+    22,22,23,23,24,24,25,25,26,26,27,27,28,28,29,29,
+    30,30,31,31,32,32,33,33,34,34,35,35,36,36,37,37,
+    38,39,39,40,40,41,41,42,42,43,43,44,45,46,47,48,
+    49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,
+};
+static const int gf_high_motion_minq[QINDEX_RANGE] =
+{
+    0,0,0,0,1,1,1,1,1,2,2,2,3,3,3,4,
+    4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,
+    9,10,10,10,11,11,12,12,13,13,14,14,15,15,16,16,
+    17,17,18,18,19,19,20,20,21,21,22,22,23,23,24,24,
+    25,25,26,26,27,27,28,28,29,29,30,30,31,31,32,32,
+    33,33,34,34,35,35,36,36,37,37,38,38,39,39,40,40,
+    41,41,42,42,43,44,45,46,47,48,49,50,51,52,53,54,
+    55,56,57,58,59,60,62,64,66,68,70,72,74,76,78,80,
+};
+/*static const int gf_arf_minq[QINDEX_RANGE] =
+{
+    0,0,0,0,1,1,1,1,1,1,2,2,3,3,3,4,
+    4,4,5,5,5,6,6,6,7,7,7,8,8,8,9,9,
+    9,10,10,10,11,11,11,12,12,12,13,13,13,14,14,14,
+    15,15,16,16,17,17,18,18,19,19,20,20,21,21,22,22,
+    23,23,24,24,25,25,26,26,27,27,28,28,29,29,30,30,
+    31,31,32,32,33,33,34,34,35,35,36,36,37,37,38,39,
+    39,40,40,41,41,42,42,43,43,44,45,46,47,48,49,50,
+    51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66
+};*/
+static const int inter_minq[QINDEX_RANGE] =
+{
+    0,0,0,0,1,1,2,3,3,4,4,5,6,6,7,7,
+    8,8,9,9,10,11,11,12,12,13,13,14,14,15,15,16,
+    16,17,17,17,18,18,19,19,20,20,21,21,22,22,22,23,
+    23,24,24,24,25,25,26,27,28,28,29,30,31,32,33,34,
+    35,35,36,37,38,39,39,40,41,42,43,43,44,45,46,47,
+    47,48,49,49,51,52,53,54,54,55,56,56,57,57,58,58,
+    59,59,60,61,61,62,62,63,64,64,65,66,67,67,68,69,
+    69,70,71,71,72,73,74,75,76,76,77,78,79,80,81,81,
+};
 
 void vp8_initialize()
 {
@@ -4167,87 +4256,49 @@ static void encode_frame_to_data_rate
     // Set an active best quality and if necessary active worst quality
     if (cpi->pass == 2 || (cm->current_video_frame > 150))
     {
-        //if ( (cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame  )
         int Q;
         int i;
         int bpm_target;
+        //int tmp;
+
+        vp8_clear_system_state();
 
         Q = cpi->active_worst_quality;
 
         if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame)
         {
-            vp8_clear_system_state();
-
             if (cm->frame_type != KEY_FRAME)
             {
-                // Where a gf overlays an existing arf then allow active max Q to drift to highest allowed value.
-                //if ( cpi->common.refresh_golden_frame && cpi->source_alt_ref_active )
-                //cpi->active_worst_quality = cpi->worst_quality;
-
                 if (cpi->avg_frame_qindex < cpi->active_worst_quality)
                     Q = cpi->avg_frame_qindex;
 
-                if (cpi->section_is_low_motion)
-                    bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * ((Q * 3 / 2) + 128)) / 64;
-                else if (cpi->section_is_fast_motion)
-                    bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 128)) / 64;
+                if ( cpi->gfu_boost > 1000 )
+                    cpi->active_best_quality = gf_low_motion_minq[Q];
+                else if ( cpi->gfu_boost < 400 )
+                    cpi->active_best_quality = gf_high_motion_minq[Q];
                 else
-                    bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * ((Q * 5 / 4) + 128)) / 64;
-            }
-            // KEY FRAMES
-            else
-            {
-                if (cpi->section_is_low_motion)
-                    bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 240)) / 64; // Approx 2.5 to 4.5 where Q has the range 0-127
-                else
-                    bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 160)) / 64;
-            }
+                    cpi->active_best_quality = gf_mid_motion_minq[Q];
 
-            for (i = Q; i > 0; i--)
-            {
-                if (bpm_target <= vp8_bits_per_mb[cm->frame_type][i])
-                    break;
-            }
-
-            cpi->active_best_quality = i;
-
-            // this entire section could be replaced by a look up table
-#if 0
-            {
-                int Q, best_q[128];
-
-                for (Q = 0; Q < 128; Q++)
-                {
-                    bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 160)) / 64; // Approx 2.5 to 4.5 where Q has the range 0-127
-
-                    for (i = Q; i > 0; i--)
-                    {
-                        if (bpm_target <= vp8_bits_per_mb[cm->frame_type][i])
-                            break;
-                    }
-
-                    best_q[Q] = i;
-                }
-
-                Q += 0;
-            }
-#endif
+                /*cpi->active_best_quality = gf_arf_minq[Q];
+                tmp = (cpi->gfu_boost > 1000) ? 600 : cpi->gfu_boost - 400;
+                //tmp = (cpi->gfu_boost > 1000) ? 600 :
+                          //(cpi->gfu_boost < 400) ? 0 : cpi->gfu_boost - 400;
+                tmp = 128 - (tmp >> 4);
+                cpi->active_best_quality = (cpi->active_best_quality * tmp)>>7;*/
 
+           }
+           // KEY FRAMES
+           else
+           {
+               if (cpi->gfu_boost > 1000)
+                   cpi->active_best_quality = kf_low_motion_minq[Q];
+               else
+                   cpi->active_best_quality = kf_high_motion_minq[Q];
+           }
         }
         else
         {
-            vp8_clear_system_state();
-
-            //bpm_target = (vp8_bits_per_mb[cm->frame_type][Q]*(Q+128))/64; // Approx 2 to 4 where Q has the range 0-127
-            bpm_target = (vp8_bits_per_mb[cm->frame_type][Q] * (Q + 192)) / 128; // Approx * 1.5 to 2.5 where Q has range 0-127
-
-            for (i = Q; i > 0; i--)
-            {
-                if (bpm_target <= vp8_bits_per_mb[cm->frame_type][i])
-                    break;
-            }
-
-            cpi->active_best_quality = i;
+            cpi->active_best_quality = inter_minq[Q];
         }
 
         // If CBR and the buffer is as full then it is reasonable to allow higher quality on the frames
@@ -4952,9 +5003,39 @@ static void encode_frame_to_data_rate
         vp8_clear_system_state();  //__asm emms;
 
         if (cpi->total_coded_error_left != 0.0)
-            fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f %10.3f %8ld\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), (int)cpi->total_target_vs_actual, (cpi->oxcf.starting_buffer_level - cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality,  cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->est_max_qcorrection_factor, (int)cpi->bits_left, cpi->total_coded_error_left, (double)cpi->bits_left / cpi->total_coded_error_left,  cpi->tot_recode_hits);
+            fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld"
+                       "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f"
+                       "%10.3f %8ld\n",
+                       cpi->common.current_video_frame, cpi->this_frame_target,
+                       cpi->projected_frame_size,
+                       (cpi->projected_frame_size - cpi->this_frame_target),
+                       (int)cpi->total_target_vs_actual,
+                       (cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
+                       (int)cpi->total_actual_bits, cm->base_qindex,
+                       cpi->active_best_quality, cpi->active_worst_quality,
+                       cpi->avg_frame_qindex, cpi->zbin_over_quant,
+                       cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
+                       cm->frame_type, cpi->gfu_boost,
+                       cpi->est_max_qcorrection_factor, (int)cpi->bits_left,
+                       cpi->total_coded_error_left,
+                       (double)cpi->bits_left / cpi->total_coded_error_left,
+                       cpi->tot_recode_hits);
         else
-            fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld %6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f %8ld\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), (int)cpi->total_target_vs_actual, (cpi->oxcf.starting_buffer_level - cpi->bits_off_target), (int)cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality,  cpi->avg_frame_qindex, cpi->zbin_over_quant, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->est_max_qcorrection_factor, (int)cpi->bits_left, cpi->total_coded_error_left,   cpi->tot_recode_hits);
+            fprintf(f, "%10d %10d %10d %10d %10d %10d %10d %10d %6ld %6ld"
+                       "%6ld %6ld %5ld %5ld %5ld %8ld %8.2f %10d %10.3f"
+                       "%8ld\n",
+                       cpi->common.current_video_frame,
+                       cpi->this_frame_target, cpi->projected_frame_size,
+                       (cpi->projected_frame_size - cpi->this_frame_target),
+                       (int)cpi->total_target_vs_actual,
+                       (cpi->oxcf.starting_buffer_level-cpi->bits_off_target),
+                       (int)cpi->total_actual_bits, cm->base_qindex,
+                       cpi->active_best_quality, cpi->active_worst_quality,
+                       cpi->avg_frame_qindex, cpi->zbin_over_quant,
+                       cm->refresh_golden_frame, cm->refresh_alt_ref_frame,
+                       cm->frame_type, cpi->gfu_boost,
+                       cpi->est_max_qcorrection_factor, (int)cpi->bits_left,
+                       cpi->total_coded_error_left, cpi->tot_recode_hits);
 
         fclose(f);
 
@@ -4962,7 +5043,10 @@ static void encode_frame_to_data_rate
             FILE *fmodes = fopen("Modes.stt", "a");
             int i;
 
-            fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame, cm->frame_type, cm->refresh_golden_frame, cm->refresh_alt_ref_frame);
+            fprintf(fmodes, "%6d:%1d:%1d:%1d ",
+                        cpi->common.current_video_frame,
+                        cm->frame_type, cm->refresh_golden_frame,
+                        cm->refresh_alt_ref_frame);
 
             for (i = 0; i < MAX_MODES; i++)
                 fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]);

From 0e7c45b39148ad385f40fed37d4c0c75ba8fcb95 Mon Sep 17 00:00:00 2001
From: Adrian Grange <agrange@google.com>
Date: Wed, 29 Sep 2010 13:03:07 +0100
Subject: [PATCH 192/307] Moved row-specific computation of MV bounds out of
 col loop

Moved the bounds computation on vertical MV component out
of the loop that processes MBs within a MB row.
---
 vp8/encoder/encodeframe.c | 26 ++++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 8c40a18e8..efbe2365b 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -400,21 +400,31 @@ void encode_mb_row(VP8_COMP *cpi,
     cpi->tplist[mb_row].start = *tp;
     //printf("Main mb_row = %d\n", mb_row);
 
+    // Distance of Mb to the top & bottom edges, specified in 1/8th pel
+    // units as they are always compared to values that are in 1/8th pel units
+    xd->mb_to_top_edge = -((mb_row * 16) << 3);
+    xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
+
+    // Set up limit values for vertical motion vector components
+    // to prevent them extending beyond the UMV borders
+    x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
+    x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) 
+                        + (VP8BORDERINPIXELS - 16);
+
     // for each macroblock col in image
     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
     {
-        // Distance of Mb to the various image edges.
-        // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+        // Distance of Mb to the left & right edges, specified in 
+        // 1/8th pel units as they are always compared to values 
+        // that are in 1/8th pel units
         xd->mb_to_left_edge = -((mb_col * 16) << 3);
         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
-        xd->mb_to_top_edge = -((mb_row * 16) << 3);
-        xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3;
 
-        // Set up limit values for motion vectors used to prevent them extending outside the UMV borders
+        // Set up limit values for horizontal motion vector components
+        // to prevent them extending beyond the UMV borders
         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
-        x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16);
-        x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
-        x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16);
+        x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) 
+                            + (VP8BORDERINPIXELS - 16);
 
         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
         xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset;

From 7288cdf79dd179d5bbf927db6240e3b9a4da412b Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Wed, 29 Sep 2010 13:22:05 +0100
Subject: [PATCH 193/307] Change to coefficient optimization rules.

Allow coefficient optimization for good quality speed 0.

Change-Id: Id0cb363df6823c6798671584fbba097916a7df2c
---
 vp8/encoder/onyx_if.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index ef4c69249..18ea7b819 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -661,7 +661,6 @@ void vp8_set_speed_features(VP8_COMP *cpi)
         break;
     case 1:
     case 3:
-        sf->optimize_coefficients = 0;
         sf->thresh_mult[THR_NEARESTMV] = 0;
         sf->thresh_mult[THR_ZEROMV   ] = 0;
         sf->thresh_mult[THR_DC       ] = 0;
@@ -722,6 +721,9 @@ void vp8_set_speed_features(VP8_COMP *cpi)
 
         if (Speed > 0)
         {
+            // Disable coefficient optimization above speed 0
+            sf->optimize_coefficients = 0;
+
             cpi->mode_check_freq[THR_SPLITG] = 4;
             cpi->mode_check_freq[THR_SPLITA] = 4;
             cpi->mode_check_freq[THR_SPLITMV] = 2;

From b9be7a464f065c3700f937eea01d9298002b37eb Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 29 Sep 2010 13:04:04 -0400
Subject: [PATCH 194/307] Fix loopfilter delta zero transitions

Loopfilter deltas are initialized to zero on keyframes in the decoder.
The values then persist from the previous frame unless an update bit
is set in the bitstream. This data is not included in the entropy
data saved by the 'refresh entropy' bit in the bitstream, so it is
effectively an additional contextual element beyond the 3 ref-frames
and the entropy data.

The encoder was treating this delta update bit as update-if-nonzero,
meaning that the value would be refreshed even if it hadn't changed,
and more significantly, if the correct value for the delta changed
to zero, the update wouldn't be sent, and the decoder would preserve
the last (presumably non-zero) value.

This patch updates the encoder to send an update only if the value
has changed from the previously transmitted value. It also forces the
value to be transmitted in error resilient mode, to account for lost
context in the event of lost frames.

Change-Id: I56671d5b42965d0166ac226765dbfce3e5301868
---
 vp8/common/blockd.h     |  4 ++--
 vp8/encoder/bitstream.c | 14 ++++++++++----
 vp8/encoder/onyx_if.c   | 20 ++++++++------------
 3 files changed, 20 insertions(+), 18 deletions(-)

diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 4b7f1a359..75dd4f79d 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -262,9 +262,9 @@ typedef struct
     unsigned char mode_ref_lf_delta_update;
 
     // Delta values have the range +/- MAX_LOOP_FILTER
-    //char ref_lf_deltas[MAX_REF_LF_DELTAS];                      // 0 = Intra, Last, GF, ARF
-    //char mode_lf_deltas[MAX_MODE_LF_DELTAS];                            // 0 = BPRED, ZERO_MV, MV, SPLIT
+    signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];                // 0 = Intra, Last, GF, ARF
     signed char ref_lf_deltas[MAX_REF_LF_DELTAS];                     // 0 = Intra, Last, GF, ARF
+    signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];                      // 0 = BPRED, ZERO_MV, MV, SPLIT
     signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                           // 0 = BPRED, ZERO_MV, MV, SPLIT
 
     // Distance of MB away from frame edges
diff --git a/vp8/encoder/bitstream.c b/vp8/encoder/bitstream.c
index 929c17841..412542d10 100644
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -1490,9 +1490,11 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
     if (xd->mode_ref_lf_delta_enabled)
     {
         // Do the deltas need to be updated
-        vp8_write_bit(bc, (xd->mode_ref_lf_delta_update) ? 1 : 0);
+        int send_update = xd->mode_ref_lf_delta_update
+                          || cpi->oxcf.error_resilient_mode;
 
-        if (xd->mode_ref_lf_delta_update)
+        vp8_write_bit(bc, send_update);
+        if (send_update)
         {
             int Data;
 
@@ -1502,8 +1504,10 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
                 Data = xd->ref_lf_deltas[i];
 
                 // Frame level data
-                if (Data)
+                if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i]
+                    || cpi->oxcf.error_resilient_mode)
                 {
+                    xd->last_ref_lf_deltas[i] = xd->ref_lf_deltas[i];
                     vp8_write_bit(bc, 1);
 
                     if (Data > 0)
@@ -1527,8 +1531,10 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned long *size)
             {
                 Data = xd->mode_lf_deltas[i];
 
-                if (Data)
+                if (xd->mode_lf_deltas[i] != xd->last_mode_lf_deltas[i]
+                    || cpi->oxcf.error_resilient_mode)
                 {
+                    xd->last_mode_lf_deltas[i] = xd->mode_lf_deltas[i];
                     vp8_write_bit(bc, 1);
 
                     if (Data > 0)
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 18ea7b819..7ea7884e4 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -272,6 +272,8 @@ static void setup_features(VP8_COMP *cpi)
     cpi->mb.e_mbd.mode_ref_lf_delta_update = 0;
     vpx_memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
     vpx_memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
+    vpx_memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
+    vpx_memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
 
     // jbb trial !
     mode_ref_lf_test_function(cpi);
@@ -4093,6 +4095,9 @@ static void encode_frame_to_data_rate
     {
         int i;
 
+        // Reset the loop filter deltas and segmentation map
+        setup_features(cpi);
+
         // If segmentation is enabled force a map update for key frames
         if (cpi->mb.e_mbd.segmentation_enabled)
         {
@@ -4100,12 +4105,6 @@ static void encode_frame_to_data_rate
             cpi->mb.e_mbd.update_mb_segmentation_data = 1;
         }
 
-        // If mode or reference frame based loop filter deltas are enabled then force an update for key frames.
-        if (cpi->mb.e_mbd.mode_ref_lf_delta_enabled)
-        {
-            cpi->mb.e_mbd.mode_ref_lf_delta_update = 1;
-        }
-
         // The alternate reference frame cannot be active for a key frame
         cpi->source_alt_ref_active = FALSE;
 
@@ -4527,6 +4526,9 @@ static void encode_frame_to_data_rate
                 // Clear the Alt reference frame active flag when we have a key frame
                 cpi->source_alt_ref_active = FALSE;
 
+                // Reset the loop filter deltas and segmentation map
+                setup_features(cpi);
+
                 // If segmentation is enabled force a map update for key frames
                 if (cpi->mb.e_mbd.segmentation_enabled)
                 {
@@ -4534,12 +4536,6 @@ static void encode_frame_to_data_rate
                     cpi->mb.e_mbd.update_mb_segmentation_data = 1;
                 }
 
-                // If mode or reference frame based loop filter deltas are enabled then force an update for key frames.
-                if (cpi->mb.e_mbd.mode_ref_lf_delta_enabled)
-                {
-                    cpi->mb.e_mbd.mode_ref_lf_delta_update = 1;
-                }
-
                 vp8_restore_coding_context(cpi);
 
                 Q = vp8_regulate_q(cpi, cpi->this_frame_target);

From 7e5e31516c19494630da1cb2e49e3963f7f1fc61 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 29 Sep 2010 13:53:08 -0400
Subject: [PATCH 195/307] Rename mode_ref_lf_test_function

This function graduated from being a test func to something that's on
by default. Rename it and remove some spurious comments that confuse
its status.

Change-Id: I689695a3ad29c35e9a72a43ec93766733ac6c20b
---
 vp8/encoder/onyx_if.c | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 7ea7884e4..f09777a95 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -73,7 +73,7 @@ int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const
 int vp8_calc_low_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, const vp8_variance_rtcd_vtable_t *rtcd);
 
 
-static void mode_ref_lf_test_function(VP8_COMP *cpi);
+static void set_default_lf_deltas(VP8_COMP *cpi);
 
 extern const int vp8_gf_interval_table[101];
 
@@ -275,8 +275,7 @@ static void setup_features(VP8_COMP *cpi)
     vpx_memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas));
     vpx_memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas));
 
-    // jbb trial !
-    mode_ref_lf_test_function(cpi);
+    set_default_lf_deltas(cpi);
 
 }
 
@@ -534,7 +533,7 @@ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment)
 
 }
 
-static void mode_ref_lf_test_function(VP8_COMP *cpi)
+static void set_default_lf_deltas(VP8_COMP *cpi)
 {
     cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1;
     cpi->mb.e_mbd.mode_ref_lf_delta_update = 1;
@@ -2184,9 +2183,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
     // Test function for segmentation
     //segmentation_test_function((VP8_PTR) cpi);
 
-    // Loop filter mode / ref deltas test function
-    //mode_ref_lf_test_function(cpi);
-
 #ifdef ENTROPY_STATS
     init_context_counters();
 #endif

From 8ee7284d604a25838e71636b9cd4a16d53a2555e Mon Sep 17 00:00:00 2001
From: Adrian Grange <agrange@google.com>
Date: Thu, 30 Sep 2010 10:06:09 +0100
Subject: [PATCH 196/307] Changed defaults & range checking for AltRef params

Modified the range checking of parameters used in the
AltRef temporal filter (arnr-max-frames, arnr-strength,
arnr-type) and default values for each of them.

Change-Id: Ib261028d501b9523f6e44cb4790cc52167b6e92b
---
 vp8/vp8_cx_iface.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index f8a4de85b..a6cb27b93 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -63,9 +63,9 @@ static const struct extraconfig_map extracfg_map[] =
             0,                          /* Sharpness */
             0,                          /* static_thresh */
             VP8_ONE_TOKENPARTITION,     /* token_partitions */
-            0, /* arnr_max_frames */
-            0, /* arnr_strength */
-            0, /* arnr_type*/
+            0,                          /* arnr_max_frames */
+            3,                          /* arnr_strength */
+            3,                          /* arnr_type*/
         }
     }
 };
@@ -183,9 +183,9 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
 
     RANGE_CHECK(vp8_cfg, token_partitions,   VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
     RANGE_CHECK_HI(vp8_cfg, Sharpness,       7);
-    RANGE_CHECK_HI(vp8_cfg, arnr_max_frames, 15);
-    RANGE_CHECK_HI(vp8_cfg, arnr_strength,   6);
-    RANGE_CHECK_HI(vp8_cfg, arnr_type,       0xffffffff);
+    RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
+    RANGE_CHECK(vp8_cfg, arnr_strength,   1, 6);
+    RANGE_CHECK(vp8_cfg, arnr_type,       1, 3);
 
     if (cfg->g_pass == VPX_RC_LAST_PASS)
     {

From a465076e02aece158093ebe55f30c3420ab1ad14 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Thu, 30 Sep 2010 20:40:45 -0700
Subject: [PATCH 197/307] Fix valgrind errors in the NEON loop filters.

Like the ARMv6 code, these functions were accessing values below
 the stack pointer, which can be corrupted by signal delivery at
 any time.
---
 vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm | 7 +++----
 vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm  | 8 ++++----
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
index 33cd55e1c..044b3a3a8 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
@@ -67,9 +67,10 @@
 
     sub         sp, sp, #32
     vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vst1.u8     {q3}, [sp]!
+    mov         r12, sp
+    vst1.u8     {q3}, [r12]!
+    vst1.u8     {q10}, [r12]!
     ldr         r12, _mbvlfuv_coeff_
-    vst1.u8     {q10}, [sp]!
 
     ; vp8_filter_mask
     vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
@@ -165,8 +166,6 @@
 
     vld1.u8     {d5}, [r12]!                ;#27
 
-    sub         sp, sp, #32
-
     vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
index f51fd0bd4..e0716625c 100644
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
@@ -63,11 +63,12 @@
     vtrn.8      q7, q8
     vtrn.8      q9, q10
 
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vst1.u8     {q3}, [sp]!
     vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    mov         r12, sp
+    vst1.u8     {q3}, [r12]!
+    vst1.u8     {q10}, [r12]!
     ldr         r12, _mbvlfy_coeff_
-    vst1.u8     {q10}, [sp]!
 
     ; vp8_filter_mask
     vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
@@ -150,7 +151,6 @@
 
     vld1.u8     {d6}, [r12]!                ;#18
     sub         r0, r0, r1, lsl #4
-    sub         sp, sp, #32
 
     add         r2, r0, r1
 

From 999bc003018877a992ce59c1e26cbd6bb45b7991 Mon Sep 17 00:00:00 2001
From: Adrian Grange <agrange@google.com>
Date: Fri, 1 Oct 2010 10:14:01 +0100
Subject: [PATCH 198/307] Made temporal filter default to use centered mode

If temporal filtering is enabled but a filter type is not specified
centered filter mode is used by default.

Change-Id: I87306f267c1390074c806c506a69b4ba914d92a2
---
 vp8/encoder/onyx_if.c | 24 +-----------------------
 1 file changed, 1 insertion(+), 23 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index f09777a95..46c283d68 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3916,6 +3916,7 @@ static void vp8cx_temp_filter_c
         break;
 
     case 3:
+    default:
         /////////////////////////////////////////
         // Center Blur
         frames_to_blur_forward = num_frames_forward;
@@ -3936,29 +3937,6 @@ static void vp8cx_temp_filter_c
 
         frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
         break;
-
-    default:
-        /////////////////////////////////////////
-        // At most 4 frames forward Blur
-        frames_to_blur_forward = 4;
-        frames_to_blur_backward = num_frames_backward;
-
-        if (max_frames > 5)
-        {
-            if ((frames_to_blur_backward + frames_to_blur_forward) >= max_frames)
-            {
-                frames_to_blur_backward
-                    = max_frames - frames_to_blur_forward - 1;
-            }
-        }
-        else
-        {
-            frames_to_blur_forward = max_frames - 1;
-            frames_to_blur_backward = 0;
-        }
-
-        frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
-        break;
     }
 
     start_frame = (cpi->last_alt_ref_sei

From dcd29e369f35055fd39208a230bf64397217dfa6 Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Thu, 30 Sep 2010 20:41:37 -0700
Subject: [PATCH 199/307] enable trellis quantization for 2nd order blocks

Experimented with different value for Y2_RD_MULT ranging f[1, 32],
without adapting the value to MB coding mode/frame type/Q value,
4 works out best among all values, providing overall 0.1% coding
gain on the test set.

Change-Id: I6b2583a8aa5db5e7e5c65c646301909c0c58f876
---
 vp8/encoder/encodemb.c | 40 ++++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index e10b5159a..63cdf3c1a 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -242,7 +242,20 @@ struct vp8_token_state{
   short         qc;
 };
 
-void vp8_optimize_b(MACROBLOCK *mb, int i, int type,
+// TODO: experiments to find optimal multiple numbers
+#define Y1_RD_MULT 1
+#define UV_RD_MULT 1
+#define Y2_RD_MULT 4
+
+static const int plane_rd_mult[4]=
+{
+    Y1_RD_MULT,
+    Y2_RD_MULT,
+    UV_RD_MULT,
+    Y1_RD_MULT
+};
+
+void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
                     ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l,
                     const VP8_ENCODER_RTCD *rtcd)
 {
@@ -275,9 +288,11 @@ void vp8_optimize_b(MACROBLOCK *mb, int i, int type,
     int best;
     int band;
     int pt;
+    int i;
+    int err_mult = plane_rd_mult[type];
 
-    b = &mb->block[i];
-    d = &mb->e_mbd.block[i];
+    b = &mb->block[ib];
+    d = &mb->e_mbd.block[ib];
 
     /* Enable this to test the effect of RDO as a replacement for the dynamic
      *  zero bin instead of an augmentation of it.
@@ -295,7 +310,7 @@ void vp8_optimize_b(MACROBLOCK *mb, int i, int type,
 
     /* Now set up a Viterbi trellis to evaluate alternative roundings. */
     /* TODO: These should vary with the block type, since the quantizer does. */
-    rdmult = mb->rdmult << 2;
+    rdmult = (mb->rdmult << 2)*err_mult;
     rddiv = mb->rddiv;
     best_mask[0] = best_mask[1] = 0;
     /* Initialize the sentinel node of the trellis. */
@@ -523,14 +538,12 @@ void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
     }
 
 
-    /*
     if (has_2nd_order)
     {
-        vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT],
-            x->e_mbd.left_context[Y2CONTEXT], 1);
-        vp8_optimize_b(x, 24, 1, t.a, t.l, rtcd);
+        b=24;
+        vp8_optimize_b(x, b, vp8_block2type[b],
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
-    */
 }
 
 
@@ -595,14 +608,13 @@ void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
         ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
 
-    /*
+
     if (has_2nd_order)
     {
-        vp8_setup_temp_context(&t, x->e_mbd.above_context[Y2CONTEXT],
-            x->e_mbd.left_context[Y2CONTEXT], 1);
-        vp8_optimize_b(x, 24, 1, t.a, t.l, rtcd);
+        b=24;
+        vp8_optimize_b(x, b, vp8_block2type[b],
+            ta + vp8_block2above[b], tl + vp8_block2left[b], rtcd);
     }
-    */
 }
 
 void vp8_optimize_mbuv(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)

From 788c0eb54ef741153557953c22fe3f14bceb13d3 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Sat, 2 Oct 2010 17:31:46 +0100
Subject: [PATCH 200/307] Tune effect of motion on KF/GF boost in two pass;

This code adjust the impact of the amount and speed of motion
on GF and KF boost.

Sections with lots of slow motion will tend to have a
somewhat bigger boost and sections with fast motion may
have less.

There is a knock on effect to the selection of the active
quantizer range.

This will likely require further tuning but helps with a couple
of particularly bad edge cases.

Change-Id: Ic2449cda7305672b69acf42fc0a845b77ac98d40
---
 vp8/encoder/firstpass.c | 84 ++++++++++++++++++++++++++++++-----------
 vp8/encoder/onyx_if.c   |  4 +-
 2 files changed, 63 insertions(+), 25 deletions(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 0a6e8c50e..13633e9a4 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1218,10 +1218,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
     double mv_accumulator_rabs  = 0.0;
     double mv_accumulator_cabs  = 0.0;
-    double this_mv_rabs;
-    double this_mv_cabs;
     double mv_ratio_accumulator = 0.0;
-    double distance_factor = 0.0;
     double decay_accumulator = 1.0;
 
     double boost_factor = IIFACTOR;
@@ -1270,9 +1267,10 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     while (((i < cpi->max_gf_interval) || ((cpi->frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->frames_to_key))
     {
         double r;
-        double motion_factor;
         double this_frame_mvr_ratio;
         double this_frame_mvc_ratio;
+        double motion_decay;
+        double motion_pct = next_frame.pcnt_motion;
 
         i++;                                                    // Increment the loop counter
 
@@ -1287,12 +1285,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             break;
 
         // Accumulate motion stats.
-        motion_factor = next_frame.pcnt_motion;
-        this_mv_rabs = fabs(next_frame.mvr_abs * motion_factor);
-        this_mv_cabs = fabs(next_frame.mvc_abs * motion_factor);
-
-        mv_accumulator_rabs += fabs(next_frame.mvr_abs * motion_factor);
-        mv_accumulator_cabs += fabs(next_frame.mvc_abs * motion_factor);
+        mv_accumulator_rabs += fabs(next_frame.mvr_abs * motion_pct);
+        mv_accumulator_cabs += fabs(next_frame.mvc_abs * motion_pct);
 
         //Accumulate Motion In/Out of frame stats
         this_frame_mv_in_out = next_frame.mv_in_out_count * next_frame.pcnt_motion;
@@ -1300,13 +1294,23 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         abs_mv_in_out_accumulator += fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion);
 
         // If there is a significant amount of motion
-        if (motion_factor > 0.05)
+        if (motion_pct > 0.05)
         {
-            this_frame_mvr_ratio = fabs(next_frame.mvr_abs) / DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVr));
-            this_frame_mvc_ratio = fabs(next_frame.mvc_abs) / DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVc));
+            this_frame_mvr_ratio = fabs(next_frame.mvr_abs) /
+                                   DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVr));
 
-            mv_ratio_accumulator += (this_frame_mvr_ratio < next_frame.mvr_abs) ? (this_frame_mvr_ratio * motion_factor) : next_frame.mvr_abs * motion_factor;
-            mv_ratio_accumulator += (this_frame_mvc_ratio < next_frame.mvc_abs) ? (this_frame_mvc_ratio * motion_factor) : next_frame.mvc_abs * motion_factor;
+            this_frame_mvc_ratio = fabs(next_frame.mvc_abs) /
+                                   DOUBLE_DIVIDE_CHECK(fabs(next_frame.MVc));
+
+            mv_ratio_accumulator +=
+                (this_frame_mvr_ratio < next_frame.mvr_abs)
+                    ? (this_frame_mvr_ratio * motion_pct)
+                    : next_frame.mvr_abs * motion_pct;
+
+            mv_ratio_accumulator +=
+                (this_frame_mvc_ratio < next_frame.mvc_abs)
+                    ? (this_frame_mvc_ratio * motion_pct)
+                    : next_frame.mvc_abs * motion_pct;
         }
         else
         {
@@ -1334,14 +1338,26 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         loop_decay_rate = next_frame.pcnt_inter;
 
         // High % motion -> somewhat higher decay rate
-        if ((1.0 - (next_frame.pcnt_motion / 10.0)) < loop_decay_rate)
-            loop_decay_rate = (1.0 - (next_frame.pcnt_motion / 10.0));
+        motion_decay = (1.0 - (motion_pct / 20.0));
+        if (motion_decay < loop_decay_rate)
+            loop_decay_rate = motion_decay;
 
-        distance_factor = sqrt((this_mv_rabs * this_mv_rabs) + (this_mv_cabs * this_mv_cabs)) / 300.0;
-        distance_factor = ((distance_factor > 1.0) ? 0.0 : (1.0 - distance_factor));
+        // Adjustment to decay rate based on speed of motion
+        {
+            double this_mv_rabs;
+            double this_mv_cabs;
+            double distance_factor;
 
-        if (distance_factor < loop_decay_rate)
-            loop_decay_rate = distance_factor;
+            this_mv_rabs = fabs(next_frame.mvr_abs * motion_pct);
+            this_mv_cabs = fabs(next_frame.mvc_abs * motion_pct);
+
+            distance_factor = sqrt((this_mv_rabs * this_mv_rabs) +
+                                   (this_mv_cabs * this_mv_cabs)) / 250.0;
+            distance_factor = ((distance_factor > 1.0)
+                                    ? 0.0 : (1.0 - distance_factor));
+            if (distance_factor < loop_decay_rate)
+                loop_decay_rate = distance_factor;
+        }
 
         // Cumulative effect of decay
         decay_accumulator = decay_accumulator * loop_decay_rate;
@@ -2281,6 +2297,8 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     for (i = 0 ; i < cpi->frames_to_key ; i++)
     {
         double r;
+        double motion_decay;
+        double motion_pct = next_frame.pcnt_motion;
 
         if (EOF == vp8_input_stats(cpi, &next_frame))
             break;
@@ -2294,10 +2312,30 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         //if ( next_frame.pcnt_inter < loop_decay_rate )
         loop_decay_rate = next_frame.pcnt_inter;
 
-        if ((1.0 - (next_frame.pcnt_motion / 10.0)) < loop_decay_rate)
-            loop_decay_rate = (1.0 - (next_frame.pcnt_motion / 10.0));
+        // High % motion -> somewhat higher decay rate
+        motion_decay = (1.0 - (motion_pct / 20.0));
+        if (motion_decay < loop_decay_rate)
+            loop_decay_rate = motion_decay;
+
+        // Adjustment to decay rate based on speed of motion
+        {
+            double this_mv_rabs;
+            double this_mv_cabs;
+            double distance_factor;
+
+            this_mv_rabs = fabs(next_frame.mvr_abs * motion_pct);
+            this_mv_cabs = fabs(next_frame.mvc_abs * motion_pct);
+
+            distance_factor = sqrt((this_mv_rabs * this_mv_rabs) +
+                                   (this_mv_cabs * this_mv_cabs)) / 250.0;
+            distance_factor = ((distance_factor > 1.0)
+                                    ? 0.0 : (1.0 - distance_factor));
+            if (distance_factor < loop_decay_rate)
+                loop_decay_rate = distance_factor;
+        }
 
         decay_accumulator = decay_accumulator * loop_decay_rate;
+        decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator;
 
         boost_score += (decay_accumulator * r);
 
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 46c283d68..6b7333f1d 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -4247,7 +4247,7 @@ static void encode_frame_to_data_rate
                 if (cpi->avg_frame_qindex < cpi->active_worst_quality)
                     Q = cpi->avg_frame_qindex;
 
-                if ( cpi->gfu_boost > 1000 )
+               if ( cpi->gfu_boost > 1000 )
                     cpi->active_best_quality = gf_low_motion_minq[Q];
                 else if ( cpi->gfu_boost < 400 )
                     cpi->active_best_quality = gf_high_motion_minq[Q];
@@ -4265,7 +4265,7 @@ static void encode_frame_to_data_rate
            // KEY FRAMES
            else
            {
-               if (cpi->gfu_boost > 1000)
+               if (cpi->gfu_boost > 600)
                    cpi->active_best_quality = kf_low_motion_minq[Q];
                else
                    cpi->active_best_quality = kf_high_motion_minq[Q];

From 49fdb7c41ed9184d74c05264c8610a5223d07018 Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Mon, 4 Oct 2010 10:58:42 -0700
Subject: [PATCH 201/307] fixed a typo that mis-used Y plane stride for UV
 blocks.

Raised by Lei Yang, the Y plane stride was used for UV blocks.
This is clearly a typo. But as the comments in the code suggested
that this port of code has not been used yet, so the typo should
not have created any damage yet.

Change-Id: Iea895edc17469a51c803a8cc6d0fce65a1a7fc2f
---
 vp8/common/reconinter.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index ffdc660c2..2a7f12908 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -669,11 +669,15 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
 
                 if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
                 {
-                    x->subpixel_predict8x4(ptr, d0->pre_stride, d0->bmi.mv.as_mv.col & 7, d0->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride);
+                    x->subpixel_predict8x4(ptr, d0->pre_stride,
+                        d0->bmi.mv.as_mv.col & 7,
+                        d0->bmi.mv.as_mv.row & 7,
+                        dst_ptr, x->dst.uv_stride);
                 }
                 else
                 {
-                    RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d0->pre_stride, dst_ptr, x->dst.y_stride);
+                    RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr,
+                        d0->pre_stride, dst_ptr, x->dst.uv_stride);
                 }
             }
             else

From e114f699f654235e05fb1bbd605c35708890d8b8 Mon Sep 17 00:00:00 2001
From: Jan Kratochvil <jan.kratochvil@redhat.com>
Date: Mon, 4 Oct 2010 23:19:33 +0200
Subject: [PATCH 202/307] nasm: match instruction length (movd/movq) to
 parameters

nasm requires the instruction length (movd/movq) to match to its
parameters. I find it more clear to really use 64bit instructions when
we use 64bit registers in the assembly.

Provide nasm compatibility. No binary change by this patch with yasm on
{x86_64,i686}-fedora13-linux-gnu. Few longer opcodes with nasm on
{x86_64,i686}-fedora13-linux-gnu have been checked as safe.

Change-Id: Id9b1a5cdfb1bc05697e523c317a296df43d42a91
---
 vp8/common/x86/iwalsh_mmx.asm          |  2 +-
 vp8/decoder/x86/dequantize_mmx.asm     |  2 +-
 vp8/encoder/x86/encodeopt.asm          | 12 ++++-----
 vp8/encoder/x86/quantize_mmx.asm       |  6 ++---
 vp8/encoder/x86/sad_mmx.asm            | 10 +++----
 vp8/encoder/x86/sad_sse2.asm           | 16 +++++------
 vp8/encoder/x86/sad_sse3.asm           |  4 +--
 vp8/encoder/x86/variance_impl_mmx.asm  |  2 +-
 vp8/encoder/x86/variance_impl_sse2.asm |  4 +--
 vpx_ports/x86_abi_support.asm          | 37 ++++++++++++++++++++++++++
 10 files changed, 66 insertions(+), 29 deletions(-)

diff --git a/vp8/common/x86/iwalsh_mmx.asm b/vp8/common/x86/iwalsh_mmx.asm
index 3f0671c58..10b5274dc 100644
--- a/vp8/common/x86/iwalsh_mmx.asm
+++ b/vp8/common/x86/iwalsh_mmx.asm
@@ -69,7 +69,7 @@ sym(vp8_short_inv_walsh4x4_mmx):
     movq    mm2, [rsi + 16]       ;ip[8]
     movq    mm3, [rsi + 24]       ;ip[12]
 
-    movd    mm7, rax
+    movq    mm7, rax
     movq    mm4, mm0
 
     punpcklwd mm7, mm7          ;0003000300030003h
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
index 150d090b6..eb9d1f8aa 100644
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -288,7 +288,7 @@ sym(vp8_dequant_dc_idct_add_mmx):
         psrlq       mm0,    16
         movzx       rcx,    word ptr arg(6) ;Dc
         psllq       mm0,    16
-        movd        mm7,    rcx
+        movq        mm7,    rcx
         por         mm0,    mm7
 
         movsxd      rax,            dword ptr arg(4) ;pitch
diff --git a/vp8/encoder/x86/encodeopt.asm b/vp8/encoder/x86/encodeopt.asm
index 413d74d61..c0f06bbbb 100644
--- a/vp8/encoder/x86/encodeopt.asm
+++ b/vp8/encoder/x86/encodeopt.asm
@@ -50,7 +50,7 @@ sym(vp8_block_error_xmm):
         psrldq      xmm0,       8
         paddd       xmm0,       xmm3
 
-        movd        rax,        xmm0
+        movq        rax,        xmm0
 
     pop rdi
     pop rsi
@@ -115,7 +115,7 @@ sym(vp8_block_error_mmx):
         psrlq       mm1,        32
         paddd       mm0,        mm1
 
-        movd        rax,        mm0
+        movq        rax,        mm0
 
     pop rdi
     pop rsi
@@ -192,7 +192,7 @@ mberror_loop_mmx:
         psrlq       mm2,        32
 
         paddd       mm0,        mm2
-        movd        rax,        mm0
+        movq        rax,        mm0
 
     pop rdi
     pop rsi
@@ -260,7 +260,7 @@ mberror_loop:
         psrldq      xmm0,       8
 
         paddd       xmm0,       xmm1
-        movd        rax,        xmm0
+        movq        rax,        xmm0
 
     pop rdi
     pop rsi
@@ -317,7 +317,7 @@ mbuverror_loop_mmx:
         psrlq           mm7,        32
 
         paddd           mm0,        mm7
-        movd            rax,        mm0
+        movq            rax,        mm0
 
     pop rdi
     pop rsi
@@ -374,7 +374,7 @@ mbuverror_loop:
         psrldq      xmm1,           8
         paddd       xmm1,           xmm2
 
-        movd            rax,            xmm1
+        movq            rax,            xmm1
 
     pop rdi
     pop rsi
diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm
index a867409b5..51cd94078 100644
--- a/vp8/encoder/x86/quantize_mmx.asm
+++ b/vp8/encoder/x86/quantize_mmx.asm
@@ -249,7 +249,7 @@ sym(vp8_fast_quantize_b_impl_mmx):
         paddd           mm0,        mm5
 
         ; eob adjustment begins here
-        movd            rcx,        mm0
+        movq            rcx,        mm0
         and             rcx,        0xffff
 
         xor             rdx,        rdx
@@ -262,7 +262,7 @@ sym(vp8_fast_quantize_b_impl_mmx):
         and             rax,        rdx
         ; Substitute the sse assembly for the old mmx mixed assembly/C. The
         ; following is kept as reference
-        ;    movd            rcx,        mm0
+        ;    movq            rcx,        mm0
         ;    bsr             rax,        rcx
         ;
         ;    mov             eob,        rax
@@ -418,7 +418,7 @@ sym(vp8_fast_quantize_b_impl_sse):
         psrldq          xmm0,       4
         paddd           xmm1,       xmm0
 
-        movd            rcx,        xmm1
+        movq            rcx,        xmm1
         and             rcx,        0xffff
 
         xor             rdx,        rdx
diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm
index ad9658bf6..19041d49f 100644
--- a/vp8/encoder/x86/sad_mmx.asm
+++ b/vp8/encoder/x86/sad_mmx.asm
@@ -100,7 +100,7 @@ x16x16sad_mmx_loop:
         psrlq           mm0,        32
         paddw           mm7,        mm0
 
-        movd            rax,        mm7
+        movq            rax,        mm7
 
     pop rdi
     pop rsi
@@ -172,7 +172,7 @@ x8x16sad_mmx_loop:
         psrlq           mm0,        32
 
         paddw           mm7,        mm0
-        movd            rax,        mm7
+        movq            rax,        mm7
 
     pop rdi
     pop rsi
@@ -242,7 +242,7 @@ x8x8sad_mmx_loop:
         psrlq           mm0,        32
 
         paddw           mm7,        mm0
-        movd            rax,        mm7
+        movq            rax,        mm7
 
     pop rdi
     pop rsi
@@ -331,7 +331,7 @@ sym(vp8_sad4x4_mmx):
         psrlq           mm0,        32
         paddw           mm0,        mm1
 
-        movd            rax,        mm0
+        movq            rax,        mm0
 
     pop rdi
     pop rsi
@@ -418,7 +418,7 @@ x16x8sad_mmx_loop:
         psrlq           mm0,        32
 
         paddw           mm7,        mm0
-        movd            rax,        mm7
+        movq            rax,        mm7
 
     pop rdi
     pop rsi
diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm
index 9f34a7ac4..0f6c5d9c4 100644
--- a/vp8/encoder/x86/sad_sse2.asm
+++ b/vp8/encoder/x86/sad_sse2.asm
@@ -75,7 +75,7 @@ x16x16sad_wmt_loop:
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            rax,        xmm0
+        movq            rax,        xmm0
 
     ; begin epilog
     pop rdi
@@ -113,7 +113,7 @@ sym(vp8_sad8x16_wmt):
 
 x8x16sad_wmt_loop:
 
-        movd            rax,        mm7
+        movq            rax,        mm7
         cmp             rax,        arg(4)
         jg              x8x16sad_wmt_early_exit
 
@@ -135,7 +135,7 @@ x8x16sad_wmt_loop:
         cmp             rsi,        rcx
         jne             x8x16sad_wmt_loop
 
-        movd            rax,        mm7
+        movq            rax,        mm7
 
 x8x16sad_wmt_early_exit:
 
@@ -174,7 +174,7 @@ sym(vp8_sad8x8_wmt):
 
 x8x8sad_wmt_loop:
 
-        movd            rax,        mm7
+        movq            rax,        mm7
         cmp             rax,        arg(4)
         jg              x8x8sad_wmt_early_exit
 
@@ -190,7 +190,7 @@ x8x8sad_wmt_loop:
         cmp             rsi,        rcx
         jne             x8x8sad_wmt_loop
 
-        movd            rax,        mm7
+        movq            rax,        mm7
 x8x8sad_wmt_early_exit:
 
     ; begin epilog
@@ -246,7 +246,7 @@ sym(vp8_sad4x4_wmt):
         psadbw          mm4,        mm5
 
         paddw           mm0,        mm4
-        movd            rax,        mm0
+        movq            rax,        mm0
 
     ; begin epilog
     pop rdi
@@ -283,7 +283,7 @@ sym(vp8_sad16x8_wmt):
 
 x16x8sad_wmt_loop:
 
-        movd            rax,        mm7
+        movq            rax,        mm7
         cmp             rax,        arg(4)
         jg              x16x8sad_wmt_early_exit
 
@@ -317,7 +317,7 @@ x16x8sad_wmt_loop:
         cmp             rsi,        rcx
         jne             x16x8sad_wmt_loop
 
-        movd            rax,        mm7
+        movq            rax,        mm7
 
 x16x8sad_wmt_early_exit:
 
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index c2a1ae70a..b12c81562 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -530,7 +530,7 @@ sym(vp8_sad16x16_sse3):
 
 vp8_sad16x16_sse3_loop:
 
-        movd            rax,        mm7
+        movq            rax,        mm7
         cmp             rax,        arg(4)
         jg              vp8_sad16x16_early_exit
 
@@ -564,7 +564,7 @@ vp8_sad16x16_sse3_loop:
         cmp             rsi,        rcx
         jne             vp8_sad16x16_sse3_loop
 
-        movd            rax,        mm7
+        movq            rax,        mm7
 
 vp8_sad16x16_early_exit:
 
diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm
index 173238e24..d4ec63bd6 100644
--- a/vp8/encoder/x86/variance_impl_mmx.asm
+++ b/vp8/encoder/x86/variance_impl_mmx.asm
@@ -498,7 +498,7 @@ sym(vp8_get4x4sse_cs_mmx):
         psrlq       mm7,    32
 
         paddd       mm0,    mm7
-        movd        rax,    mm0
+        movq        rax,    mm0
 
 
     ; begin epilog
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index f47d9ccdd..38b3f33ee 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -58,7 +58,7 @@ NEXTROW:
         movdqa      xmm3,xmm4
         psrldq      xmm4,4
         paddd       xmm4,xmm3
-        movd        rax,xmm4
+        movq        rax,xmm4
 
 
     ; begin epilog
@@ -471,7 +471,7 @@ sym(vp8_get8x8var_sse2):
         mov         rax,            arg(5) ;[Sum]
         mov         rdi,            arg(4) ;[SSE]
 
-        movd        rdx,            xmm7
+        movq        rdx,            xmm7
         movsx       rcx,            dx
 
         mov  dword ptr [rax],       ecx
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index dc9e2d92c..470c58a6d 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -36,6 +36,43 @@
 %define rsp esp
 %define rbp ebp
 %define movsxd mov
+%macro movq 2
+  %ifidn %1,eax
+    movd %1,%2
+  %elifidn %2,eax
+    movd %1,%2
+  %elifidn %1,ebx
+    movd %1,%2
+  %elifidn %2,ebx
+    movd %1,%2
+  %elifidn %1,ecx
+    movd %1,%2
+  %elifidn %2,ecx
+    movd %1,%2
+  %elifidn %1,edx
+    movd %1,%2
+  %elifidn %2,edx
+    movd %1,%2
+  %elifidn %1,esi
+    movd %1,%2
+  %elifidn %2,esi
+    movd %1,%2
+  %elifidn %1,edi
+    movd %1,%2
+  %elifidn %2,edi
+    movd %1,%2
+  %elifidn %1,esp
+    movd %1,%2
+  %elifidn %2,esp
+    movd %1,%2
+  %elifidn %1,ebp
+    movd %1,%2
+  %elifidn %2,ebp
+    movd %1,%2
+  %else
+    movq %1,%2
+  %endif
+%endmacro
 %endif
 
 

From 5cdc3a4c29c5da43a4c346d57932c1c46068abec Mon Sep 17 00:00:00 2001
From: Jan Kratochvil <jan.kratochvil@redhat.com>
Date: Mon, 4 Oct 2010 23:18:58 +0200
Subject: [PATCH 203/307] nasm: address labels 'rel label' vice 'wrt rip'

nasm does not support `label wrt rip', it requires `rel label'. It is
still fully compatible with yasm.

Provide nasm compatibility. No binary change by this patch with yasm on
{x86_64,i686}-fedora13-linux-gnu. Few longer opcodes with nasm on
{x86_64,i686}-fedora13-linux-gnu have been checked as safe.

Change-Id: I488773a4e930a56e43b0cc72d867ee5291215f50
---
 vp8/common/x86/idctllm_mmx.asm         |  24 +--
 vp8/common/x86/idctllm_sse2.asm        |  44 +++---
 vp8/common/x86/loopfilter_mmx.asm      | 208 ++++++++++++-------------
 vp8/common/x86/loopfilter_sse2.asm     | 106 ++++++-------
 vp8/common/x86/postproc_mmx.asm        |  10 +-
 vp8/common/x86/postproc_sse2.asm       |  10 +-
 vp8/common/x86/subpixel_mmx.asm        |  42 ++---
 vp8/common/x86/subpixel_sse2.asm       |  48 +++---
 vp8/common/x86/subpixel_ssse3.asm      | 140 ++++++++---------
 vp8/decoder/x86/dequantize_mmx.asm     |  40 ++---
 vp8/encoder/x86/dct_mmx.asm            |   4 +-
 vp8/encoder/x86/dct_sse2.asm           |  30 ++--
 vp8/encoder/x86/variance_impl_mmx.asm  |  18 +--
 vp8/encoder/x86/variance_impl_sse2.asm |   6 +-
 vpx_ports/x86_abi_support.asm          |   8 +-
 15 files changed, 369 insertions(+), 369 deletions(-)

diff --git a/vp8/common/x86/idctllm_mmx.asm b/vp8/common/x86/idctllm_mmx.asm
index 99e09a50e..43735bc4b 100644
--- a/vp8/common/x86/idctllm_mmx.asm
+++ b/vp8/common/x86/idctllm_mmx.asm
@@ -58,11 +58,11 @@ sym(vp8_short_idct4x4llm_mmx):
         movq        mm5,            mm1
         paddw       mm2,            mm0             ; a1 =0+2
 
-        pmulhw      mm5,            [x_s1sqr2 GLOBAL]        ;
+        pmulhw      mm5,            [GLOBAL(x_s1sqr2)]       ;
         paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
 
         movq        mm7,            mm3             ;
-        pmulhw      mm7,            [x_c1sqr2less1 GLOBAL]    ;
+        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)]   ;
 
         paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
         psubw       mm7,            mm5             ; c1
@@ -70,10 +70,10 @@ sym(vp8_short_idct4x4llm_mmx):
         movq        mm5,            mm1
         movq        mm4,            mm3
 
-        pmulhw      mm5,            [x_c1sqr2less1 GLOBAL]
+        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
         paddw       mm5,            mm1
 
-        pmulhw      mm3,            [x_s1sqr2 GLOBAL]
+        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
         paddw       mm3,            mm4
 
         paddw       mm3,            mm5             ; d1
@@ -113,11 +113,11 @@ sym(vp8_short_idct4x4llm_mmx):
         movq        mm5,            mm1
         paddw       mm2,            mm0             ; a1 =0+2
 
-        pmulhw      mm5,            [x_s1sqr2 GLOBAL]         ;
+        pmulhw      mm5,            [GLOBAL(x_s1sqr2)]        ;
         paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
 
         movq        mm7,            mm3             ;
-        pmulhw      mm7,            [x_c1sqr2less1 GLOBAL]    ;
+        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)]   ;
 
         paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
         psubw       mm7,            mm5             ; c1
@@ -125,16 +125,16 @@ sym(vp8_short_idct4x4llm_mmx):
         movq        mm5,            mm1
         movq        mm4,            mm3
 
-        pmulhw      mm5,            [x_c1sqr2less1 GLOBAL]
+        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
         paddw       mm5,            mm1
 
-        pmulhw      mm3,            [x_s1sqr2 GLOBAL]
+        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
         paddw       mm3,            mm4
 
         paddw       mm3,            mm5             ; d1
-        paddw       mm0,            [fours GLOBAL]
+        paddw       mm0,            [GLOBAL(fours)]
 
-        paddw       mm2,            [fours GLOBAL]
+        paddw       mm2,            [GLOBAL(fours)]
         movq        mm6,            mm2             ; a1
 
         movq        mm4,            mm0             ; b1
@@ -196,7 +196,7 @@ sym(vp8_short_idct4x4llm_1_mmx):
         mov         rax,            arg(0) ;input
         movd        mm0,            [rax]
 
-        paddw       mm0,            [fours GLOBAL]
+        paddw       mm0,            [GLOBAL(fours)]
         mov         rdx,            arg(1) ;output
 
         psraw       mm0,            3
@@ -239,7 +239,7 @@ sym(vp8_dc_only_idct_add_mmx):
 
         movd        mm5,            arg(0) ;input_dc
 
-        paddw       mm5,            [fours GLOBAL]
+        paddw       mm5,            [GLOBAL(fours)]
 
         psraw       mm5,            3
 
diff --git a/vp8/common/x86/idctllm_sse2.asm b/vp8/common/x86/idctllm_sse2.asm
index ac941851b..edee1578e 100644
--- a/vp8/common/x86/idctllm_sse2.asm
+++ b/vp8/common/x86/idctllm_sse2.asm
@@ -51,7 +51,7 @@ sym(idct_dequant_0_2x_sse2):
         pshufhw     xmm4,           xmm4,       00000000b
 
         mov         rax,            arg(2) ; pre
-        paddw       xmm4,           [fours GLOBAL]
+        paddw       xmm4,           [GLOBAL(fours)]
 
         movsxd      rcx,            dword ptr arg(5) ; blk_stride
         psraw       xmm4,           3
@@ -160,11 +160,11 @@ sym(idct_dequant_full_2x_sse2):
         movdqa      xmm5,           xmm1
         paddw       xmm2,           xmm0        ; a1 = 0+2
 
-        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        pmulhw      xmm5,           [GLOBAL(x_s1sqr2)]
         paddw       xmm5,           xmm1        ; ip1 * sin(pi/8) * sqrt(2)
 
         movdqa      xmm7,           xmm3
-        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+        pmulhw      xmm7,           [GLOBAL(x_c1sqr2less1)]
 
         paddw       xmm7,           xmm3        ; ip3 * cos(pi/8) * sqrt(2)
         psubw       xmm7,           xmm5        ; c1
@@ -172,10 +172,10 @@ sym(idct_dequant_full_2x_sse2):
         movdqa      xmm5,           xmm1
         movdqa      xmm4,           xmm3
 
-        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        pmulhw      xmm5,           [GLOBAL(x_c1sqr2less1)]
         paddw       xmm5,           xmm1
 
-        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        pmulhw      xmm3,           [GLOBAL(x_s1sqr2)]
         paddw       xmm3,           xmm4
 
         paddw       xmm3,           xmm5        ; d1
@@ -229,11 +229,11 @@ sym(idct_dequant_full_2x_sse2):
         movdqa      xmm5,           xmm1
         paddw       xmm2,           xmm0            ; a1 = 0+2
 
-        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        pmulhw      xmm5,           [GLOBAL(x_s1sqr2)]
         paddw       xmm5,           xmm1            ; ip1 * sin(pi/8) * sqrt(2)
 
         movdqa      xmm7,           xmm3
-        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+        pmulhw      xmm7,           [GLOBAL(x_c1sqr2less1)]
 
         paddw       xmm7,           xmm3            ; ip3 * cos(pi/8) * sqrt(2)
         psubw       xmm7,           xmm5            ; c1
@@ -241,16 +241,16 @@ sym(idct_dequant_full_2x_sse2):
         movdqa      xmm5,           xmm1
         movdqa      xmm4,           xmm3
 
-        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        pmulhw      xmm5,           [GLOBAL(x_c1sqr2less1)]
         paddw       xmm5,           xmm1
 
-        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        pmulhw      xmm3,           [GLOBAL(x_s1sqr2)]
         paddw       xmm3,           xmm4
 
         paddw       xmm3,           xmm5            ; d1
-        paddw       xmm0,           [fours GLOBAL]
+        paddw       xmm0,           [GLOBAL(fours)]
 
-        paddw       xmm2,           [fours GLOBAL]
+        paddw       xmm2,           [GLOBAL(fours)]
         movdqa      xmm6,           xmm2            ; a1
 
         movdqa      xmm4,           xmm0            ; b1
@@ -394,7 +394,7 @@ sym(idct_dequant_dc_0_2x_sse2):
         punpckldq   xmm4,           xmm4
 
     ; Rounding to dequant and downshift
-        paddw       xmm4,           [fours GLOBAL]
+        paddw       xmm4,           [GLOBAL(fours)]
         psraw       xmm4,           3
 
     ; Predict buffer needs to be expanded from bytes to words
@@ -505,11 +505,11 @@ sym(idct_dequant_dc_full_2x_sse2):
         movdqa      xmm5,           xmm1
         paddw       xmm2,           xmm0        ; a1 = 0+2
 
-        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        pmulhw      xmm5,           [GLOBAL(x_s1sqr2)]
         paddw       xmm5,           xmm1        ; ip1 * sin(pi/8) * sqrt(2)
 
         movdqa      xmm7,           xmm3
-        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+        pmulhw      xmm7,           [GLOBAL(x_c1sqr2less1)]
 
         paddw       xmm7,           xmm3        ; ip3 * cos(pi/8) * sqrt(2)
         psubw       xmm7,           xmm5        ; c1
@@ -517,10 +517,10 @@ sym(idct_dequant_dc_full_2x_sse2):
         movdqa      xmm5,           xmm1
         movdqa      xmm4,           xmm3
 
-        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        pmulhw      xmm5,           [GLOBAL(x_c1sqr2less1)]
         paddw       xmm5,           xmm1
 
-        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        pmulhw      xmm3,           [GLOBAL(x_s1sqr2)]
         paddw       xmm3,           xmm4
 
         paddw       xmm3,           xmm5        ; d1
@@ -574,11 +574,11 @@ sym(idct_dequant_dc_full_2x_sse2):
         movdqa      xmm5,           xmm1
         paddw       xmm2,           xmm0            ; a1 = 0+2
 
-        pmulhw      xmm5,           [x_s1sqr2 GLOBAL]
+        pmulhw      xmm5,           [GLOBAL(x_s1sqr2)]
         paddw       xmm5,           xmm1            ; ip1 * sin(pi/8) * sqrt(2)
 
         movdqa      xmm7,           xmm3
-        pmulhw      xmm7,           [x_c1sqr2less1 GLOBAL]
+        pmulhw      xmm7,           [GLOBAL(x_c1sqr2less1)]
 
         paddw       xmm7,           xmm3            ; ip3 * cos(pi/8) * sqrt(2)
         psubw       xmm7,           xmm5            ; c1
@@ -586,16 +586,16 @@ sym(idct_dequant_dc_full_2x_sse2):
         movdqa      xmm5,           xmm1
         movdqa      xmm4,           xmm3
 
-        pmulhw      xmm5,           [x_c1sqr2less1 GLOBAL]
+        pmulhw      xmm5,           [GLOBAL(x_c1sqr2less1)]
         paddw       xmm5,           xmm1
 
-        pmulhw      xmm3,           [x_s1sqr2 GLOBAL]
+        pmulhw      xmm3,           [GLOBAL(x_s1sqr2)]
         paddw       xmm3,           xmm4
 
         paddw       xmm3,           xmm5            ; d1
-        paddw       xmm0,           [fours GLOBAL]
+        paddw       xmm0,           [GLOBAL(fours)]
 
-        paddw       xmm2,           [fours GLOBAL]
+        paddw       xmm2,           [GLOBAL(fours)]
         movdqa      xmm6,           xmm2            ; a1
 
         movdqa      xmm4,           xmm0            ; b1
diff --git a/vp8/common/x86/loopfilter_mmx.asm b/vp8/common/x86/loopfilter_mmx.asm
index 0b39e627d..c6c215c3c 100644
--- a/vp8/common/x86/loopfilter_mmx.asm
+++ b/vp8/common/x86/loopfilter_mmx.asm
@@ -111,7 +111,7 @@ next8_h:
         psubusb     mm3, mm2              ; q1-=p1
         psubusb     mm2, mm4              ; p1-=q1
         por         mm2, mm3              ; abs(p1-q1)
-        pand        mm2, [tfe GLOBAL]     ; set lsb of each byte to zero
+        pand        mm2, [GLOBAL(tfe)]    ; set lsb of each byte to zero
         psrlw       mm2, 1                ; abs(p1-q1)/2
 
         movq        mm6, mm5              ; p0
@@ -150,12 +150,12 @@ next8_h:
         ; start work on filters
         movq        mm2, [rsi+2*rax]      ; p1
         movq        mm7, [rdi]            ; q1
-        pxor        mm2, [t80 GLOBAL]     ; p1 offset to convert to signed values
-        pxor        mm7, [t80 GLOBAL]     ; q1 offset to convert to signed values
+        pxor        mm2, [GLOBAL(t80)]    ; p1 offset to convert to signed values
+        pxor        mm7, [GLOBAL(t80)]    ; q1 offset to convert to signed values
         psubsb      mm2, mm7              ; p1 - q1
         pand        mm2, mm4              ; high var mask (hvm)(p1 - q1)
-        pxor        mm6, [t80 GLOBAL]     ; offset to convert to signed values
-        pxor        mm0, [t80 GLOBAL]     ; offset to convert to signed values
+        pxor        mm6, [GLOBAL(t80)]    ; offset to convert to signed values
+        pxor        mm0, [GLOBAL(t80)]    ; offset to convert to signed values
         movq        mm3, mm0              ; q0
         psubsb      mm0, mm6              ; q0 - p0
         paddsb      mm2, mm0              ; 1 * (q0 - p0) + hvm(p1 - q1)
@@ -163,8 +163,8 @@ next8_h:
         paddsb      mm2, mm0              ; 3 * (q0 - p0) + hvm(p1 - q1)
         pand        mm1, mm2                  ; mask filter values we don't care about
         movq        mm2, mm1
-        paddsb      mm1, [t4 GLOBAL]      ; 3* (q0 - p0) + hvm(p1 - q1) + 4
-        paddsb      mm2, [t3 GLOBAL]      ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+        paddsb      mm1, [GLOBAL(t4)]     ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+        paddsb      mm2, [GLOBAL(t3)]     ; 3* (q0 - p0) + hvm(p1 - q1) + 3
 
         pxor        mm0, mm0             ;
         pxor        mm5, mm5
@@ -185,29 +185,29 @@ next8_h:
         movq        mm5, mm0              ; save results
 
         packsswb    mm0, mm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
-        paddsw      mm5, [ones GLOBAL]
-        paddsw      mm1, [ones GLOBAL]
+        paddsw      mm5, [GLOBAL(ones)]
+        paddsw      mm1, [GLOBAL(ones)]
         psraw       mm5, 1                ; partial shifted one more time for 2nd tap
         psraw       mm1, 1                ; partial shifted one more time for 2nd tap
         packsswb    mm5, mm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4
         pandn       mm4, mm5              ; high edge variance additive
 
         paddsb      mm6, mm2              ; p0+= p0 add
-        pxor        mm6, [t80 GLOBAL]     ; unoffset
+        pxor        mm6, [GLOBAL(t80)]    ; unoffset
         movq        [rsi+rax], mm6        ; write back
 
         movq        mm6, [rsi+2*rax]      ; p1
-        pxor        mm6, [t80 GLOBAL]     ; reoffset
+        pxor        mm6, [GLOBAL(t80)]    ; reoffset
         paddsb      mm6, mm4              ; p1+= p1 add
-        pxor        mm6, [t80 GLOBAL]     ; unoffset
+        pxor        mm6, [GLOBAL(t80)]    ; unoffset
         movq        [rsi+2*rax], mm6      ; write back
 
         psubsb      mm3, mm0              ; q0-= q0 add
-        pxor        mm3, [t80 GLOBAL]     ; unoffset
+        pxor        mm3, [GLOBAL(t80)]    ; unoffset
         movq        [rsi], mm3            ; write back
 
         psubsb      mm7, mm4              ; q1-= q1 add
-        pxor        mm7, [t80 GLOBAL]     ; unoffset
+        pxor        mm7, [GLOBAL(t80)]    ; unoffset
         movq        [rdi], mm7            ; write back
 
         add         rsi,8
@@ -403,7 +403,7 @@ next8_v:
         psubusb     mm5,        mm1                         ; q1-=p1
         psubusb     mm1,        mm2                         ; p1-=q1
         por         mm5,        mm1                         ; abs(p1-q1)
-        pand        mm5,        [tfe GLOBAL]                ; set lsb of each byte to zero
+        pand        mm5,        [GLOBAL(tfe)]               ; set lsb of each byte to zero
         psrlw       mm5,        1                           ; abs(p1-q1)/2
 
         mov         rdx,        arg(2) ;flimit                      ;
@@ -455,14 +455,14 @@ next8_v:
         movq        mm6,        [rdx+8]         ; p0
         movq        mm0,        [rdx+16]        ; q0
 
-        pxor        mm2,        [t80 GLOBAL]    ; p1 offset to convert to signed values
-        pxor        mm7,        [t80 GLOBAL]    ; q1 offset to convert to signed values
+        pxor        mm2,        [GLOBAL(t80)]   ; p1 offset to convert to signed values
+        pxor        mm7,        [GLOBAL(t80)]   ; q1 offset to convert to signed values
 
         psubsb      mm2,        mm7             ; p1 - q1
         pand        mm2,        mm4             ; high var mask (hvm)(p1 - q1)
 
-        pxor        mm6,        [t80 GLOBAL]    ; offset to convert to signed values
-        pxor        mm0,        [t80 GLOBAL]    ; offset to convert to signed values
+        pxor        mm6,        [GLOBAL(t80)]   ; offset to convert to signed values
+        pxor        mm0,        [GLOBAL(t80)]   ; offset to convert to signed values
 
         movq        mm3,        mm0             ; q0
         psubsb      mm0,        mm6             ; q0 - p0
@@ -474,9 +474,9 @@ next8_v:
         pand       mm1,        mm2              ; mask filter values we don't care about
 
         movq        mm2,        mm1
-        paddsb      mm1,        [t4 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+        paddsb      mm1,        [GLOBAL(t4)]      ; 3* (q0 - p0) + hvm(p1 - q1) + 4
 
-        paddsb      mm2,        [t3 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+        paddsb      mm2,        [GLOBAL(t3)]      ; 3* (q0 - p0) + hvm(p1 - q1) + 3
         pxor        mm0,        mm0          ;
 
         pxor        mm5,        mm5
@@ -503,9 +503,9 @@ next8_v:
         movq        mm5,        mm0              ; save results
 
         packsswb    mm0,        mm1           ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
-        paddsw      mm5,        [ones GLOBAL]
+        paddsw      mm5,        [GLOBAL(ones)]
 
-        paddsw      mm1,        [ones GLOBAL]
+        paddsw      mm1,        [GLOBAL(ones)]
         psraw       mm5,        1                 ; partial shifted one more time for 2nd tap
 
         psraw       mm1,        1                 ; partial shifted one more time for 2nd tap
@@ -514,22 +514,22 @@ next8_v:
         pandn       mm4,        mm5             ; high edge variance additive
 
         paddsb      mm6,        mm2             ; p0+= p0 add
-        pxor        mm6,        [t80 GLOBAL]    ; unoffset
+        pxor        mm6,        [GLOBAL(t80)]   ; unoffset
 
         ; mm6=p0                               ;
         movq        mm1,        [rdx]           ; p1
-        pxor        mm1,        [t80 GLOBAL]    ; reoffset
+        pxor        mm1,        [GLOBAL(t80)]   ; reoffset
 
         paddsb      mm1,        mm4                 ; p1+= p1 add
-        pxor        mm1,        [t80 GLOBAL]        ; unoffset
+        pxor        mm1,        [GLOBAL(t80)]       ; unoffset
         ; mm6 = p0 mm1 = p1
 
         psubsb      mm3,        mm0                 ; q0-= q0 add
-        pxor        mm3,        [t80 GLOBAL]        ; unoffset
+        pxor        mm3,        [GLOBAL(t80)]       ; unoffset
 
         ; mm3 = q0
         psubsb      mm7,        mm4                 ; q1-= q1 add
-        pxor        mm7,        [t80 GLOBAL]        ; unoffset
+        pxor        mm7,        [GLOBAL(t80)]       ; unoffset
         ; mm7 = q1
 
         ; tranpose and write back
@@ -708,7 +708,7 @@ next8_mbh:
         psubusb     mm3, mm2              ; q1-=p1
         psubusb     mm2, mm4              ; p1-=q1
         por         mm2, mm3              ; abs(p1-q1)
-        pand        mm2, [tfe GLOBAL]     ; set lsb of each byte to zero
+        pand        mm2, [GLOBAL(tfe)]    ; set lsb of each byte to zero
         psrlw       mm2, 1                ; abs(p1-q1)/2
 
         movq        mm6, mm5              ; p0
@@ -753,12 +753,12 @@ next8_mbh:
         ; start work on filters
         movq        mm2, [rsi+2*rax]      ; p1
         movq        mm7, [rdi]            ; q1
-        pxor        mm2, [t80 GLOBAL]     ; p1 offset to convert to signed values
-        pxor        mm7, [t80 GLOBAL]     ; q1 offset to convert to signed values
+        pxor        mm2, [GLOBAL(t80)]    ; p1 offset to convert to signed values
+        pxor        mm7, [GLOBAL(t80)]    ; q1 offset to convert to signed values
         psubsb      mm2, mm7              ; p1 - q1
 
-        pxor        mm6, [t80 GLOBAL]     ; offset to convert to signed values
-        pxor        mm0, [t80 GLOBAL]     ; offset to convert to signed values
+        pxor        mm6, [GLOBAL(t80)]    ; offset to convert to signed values
+        pxor        mm0, [GLOBAL(t80)]    ; offset to convert to signed values
         movq        mm3, mm0              ; q0
         psubsb      mm0, mm6              ; q0 - p0
         paddsb      mm2, mm0              ; 1 * (q0 - p0) + (p1 - q1)
@@ -772,7 +772,7 @@ next8_mbh:
         pand        mm2, mm4;             ; Filter2 = vp8_filter & hev
 
         movq        mm5,        mm2       ;
-        paddsb      mm5,        [t3 GLOBAL];
+        paddsb      mm5,        [GLOBAL(t3)];
 
         pxor        mm0, mm0              ; 0
         pxor        mm7, mm7              ; 0
@@ -785,7 +785,7 @@ next8_mbh:
 
         movq        mm5, mm0              ; Filter2
 
-        paddsb      mm2, [t4 GLOBAL]      ; vp8_signed_char_clamp(Filter2 + 4)
+        paddsb      mm2, [GLOBAL(t4)]     ; vp8_signed_char_clamp(Filter2 + 4)
         pxor        mm0, mm0              ; 0
         pxor        mm7, mm7              ; 0
 
@@ -818,10 +818,10 @@ next8_mbh:
         pxor        mm2, mm2
         punpcklbw   mm1, mm4
         punpckhbw   mm2, mm4
-        pmulhw      mm1, [s27 GLOBAL]
-        pmulhw      mm2, [s27 GLOBAL]
-        paddw       mm1, [s63 GLOBAL]
-        paddw       mm2, [s63 GLOBAL]
+        pmulhw      mm1, [GLOBAL(s27)]
+        pmulhw      mm2, [GLOBAL(s27)]
+        paddw       mm1, [GLOBAL(s63)]
+        paddw       mm2, [GLOBAL(s63)]
         psraw       mm1, 7
         psraw       mm2, 7
         packsswb    mm1, mm2
@@ -829,8 +829,8 @@ next8_mbh:
         psubsb      mm3, mm1
         paddsb      mm6, mm1
 
-        pxor        mm3, [t80 GLOBAL]
-        pxor        mm6, [t80 GLOBAL]
+        pxor        mm3, [GLOBAL(t80)]
+        pxor        mm6, [GLOBAL(t80)]
         movq        [rsi+rax], mm6
         movq        [rsi],     mm3
 
@@ -844,10 +844,10 @@ next8_mbh:
         pxor        mm2, mm2
         punpcklbw   mm1, mm4
         punpckhbw   mm2, mm4
-        pmulhw      mm1, [s18 GLOBAL]
-        pmulhw      mm2, [s18 GLOBAL]
-        paddw       mm1, [s63 GLOBAL]
-        paddw       mm2, [s63 GLOBAL]
+        pmulhw      mm1, [GLOBAL(s18)]
+        pmulhw      mm2, [GLOBAL(s18)]
+        paddw       mm1, [GLOBAL(s63)]
+        paddw       mm2, [GLOBAL(s63)]
         psraw       mm1, 7
         psraw       mm2, 7
         packsswb    mm1, mm2
@@ -855,14 +855,14 @@ next8_mbh:
         movq        mm3, [rdi]
         movq        mm6, [rsi+rax*2]       ; p1
 
-        pxor        mm3, [t80 GLOBAL]
-        pxor        mm6, [t80 GLOBAL]
+        pxor        mm3, [GLOBAL(t80)]
+        pxor        mm6, [GLOBAL(t80)]
 
         paddsb      mm6, mm1
         psubsb      mm3, mm1
 
-        pxor        mm6, [t80 GLOBAL]
-        pxor        mm3, [t80 GLOBAL]
+        pxor        mm6, [GLOBAL(t80)]
+        pxor        mm3, [GLOBAL(t80)]
         movq        [rdi], mm3
         movq        [rsi+rax*2], mm6
 
@@ -876,10 +876,10 @@ next8_mbh:
         pxor        mm2, mm2
         punpcklbw   mm1, mm4
         punpckhbw   mm2, mm4
-        pmulhw      mm1, [s9 GLOBAL]
-        pmulhw      mm2, [s9 GLOBAL]
-        paddw       mm1, [s63 GLOBAL]
-        paddw       mm2, [s63 GLOBAL]
+        pmulhw      mm1, [GLOBAL(s9)]
+        pmulhw      mm2, [GLOBAL(s9)]
+        paddw       mm1, [GLOBAL(s63)]
+        paddw       mm2, [GLOBAL(s63)]
         psraw       mm1, 7
         psraw       mm2, 7
         packsswb    mm1, mm2
@@ -889,14 +889,14 @@ next8_mbh:
         neg         rax
         movq        mm3, [rdi+rax  ]
 
-        pxor        mm6, [t80 GLOBAL]
-        pxor        mm3, [t80 GLOBAL]
+        pxor        mm6, [GLOBAL(t80)]
+        pxor        mm3, [GLOBAL(t80)]
 
         paddsb      mm6, mm1
         psubsb      mm3, mm1
 
-        pxor        mm6, [t80 GLOBAL]
-        pxor        mm3, [t80 GLOBAL]
+        pxor        mm6, [GLOBAL(t80)]
+        pxor        mm3, [GLOBAL(t80)]
         movq        [rdi+rax  ], mm3
         neg         rax
         movq        [rdi+rax*4], mm6
@@ -1105,7 +1105,7 @@ next8_mbv:
         psubusb     mm5,        mm1                         ; q1-=p1
         psubusb     mm1,        mm2                         ; p1-=q1
         por         mm5,        mm1                         ; abs(p1-q1)
-        pand        mm5,        [tfe GLOBAL]                ; set lsb of each byte to zero
+        pand        mm5,        [GLOBAL(tfe)]               ; set lsb of each byte to zero
         psrlw       mm5,        1                           ; abs(p1-q1)/2
 
         mov         rdx,        arg(2) ;flimit                      ;
@@ -1155,14 +1155,14 @@ next8_mbv:
         ; start work on filters
         movq        mm2, [rdx+16]         ; p1
         movq        mm7, [rdx+40]         ; q1
-        pxor        mm2, [t80 GLOBAL]     ; p1 offset to convert to signed values
-        pxor        mm7, [t80 GLOBAL]     ; q1 offset to convert to signed values
+        pxor        mm2, [GLOBAL(t80)]    ; p1 offset to convert to signed values
+        pxor        mm7, [GLOBAL(t80)]    ; q1 offset to convert to signed values
         psubsb      mm2, mm7              ; p1 - q1
 
         movq        mm6, [rdx+24]         ; p0
         movq        mm0, [rdx+32]         ; q0
-        pxor        mm6, [t80 GLOBAL]     ; offset to convert to signed values
-        pxor        mm0, [t80 GLOBAL]     ; offset to convert to signed values
+        pxor        mm6, [GLOBAL(t80)]    ; offset to convert to signed values
+        pxor        mm0, [GLOBAL(t80)]    ; offset to convert to signed values
 
         movq        mm3, mm0              ; q0
         psubsb      mm0, mm6              ; q0 - p0
@@ -1176,7 +1176,7 @@ next8_mbv:
         pand        mm2, mm4;             ; Filter2 = vp8_filter & hev
 
         movq        mm5,        mm2       ;
-        paddsb      mm5,        [t3 GLOBAL];
+        paddsb      mm5,        [GLOBAL(t3)];
 
         pxor        mm0, mm0              ; 0
         pxor        mm7, mm7              ; 0
@@ -1189,7 +1189,7 @@ next8_mbv:
 
         movq        mm5, mm0              ; Filter2
 
-        paddsb      mm2, [t4 GLOBAL]      ; vp8_signed_char_clamp(Filter2 + 4)
+        paddsb      mm2, [GLOBAL(t4)]     ; vp8_signed_char_clamp(Filter2 + 4)
         pxor        mm0, mm0              ; 0
         pxor        mm7, mm7              ; 0
 
@@ -1222,10 +1222,10 @@ next8_mbv:
         pxor        mm2, mm2
         punpcklbw   mm1, mm4
         punpckhbw   mm2, mm4
-        pmulhw      mm1, [s27 GLOBAL]
-        pmulhw      mm2, [s27 GLOBAL]
-        paddw       mm1, [s63 GLOBAL]
-        paddw       mm2, [s63 GLOBAL]
+        pmulhw      mm1, [GLOBAL(s27)]
+        pmulhw      mm2, [GLOBAL(s27)]
+        paddw       mm1, [GLOBAL(s63)]
+        paddw       mm2, [GLOBAL(s63)]
         psraw       mm1, 7
         psraw       mm2, 7
         packsswb    mm1, mm2
@@ -1233,8 +1233,8 @@ next8_mbv:
         psubsb      mm3, mm1
         paddsb      mm6, mm1
 
-        pxor        mm3, [t80 GLOBAL]
-        pxor        mm6, [t80 GLOBAL]
+        pxor        mm3, [GLOBAL(t80)]
+        pxor        mm6, [GLOBAL(t80)]
         movq        [rdx+24], mm6
         movq        [rdx+32], mm3
 
@@ -1248,24 +1248,24 @@ next8_mbv:
         pxor        mm2, mm2
         punpcklbw   mm1, mm4
         punpckhbw   mm2, mm4
-        pmulhw      mm1, [s18 GLOBAL]
-        pmulhw      mm2, [s18 GLOBAL]
-        paddw       mm1, [s63 GLOBAL]
-        paddw       mm2, [s63 GLOBAL]
+        pmulhw      mm1, [GLOBAL(s18)]
+        pmulhw      mm2, [GLOBAL(s18)]
+        paddw       mm1, [GLOBAL(s63)]
+        paddw       mm2, [GLOBAL(s63)]
         psraw       mm1, 7
         psraw       mm2, 7
         packsswb    mm1, mm2
 
         movq        mm3, [rdx + 40]
         movq        mm6, [rdx + 16]       ; p1
-        pxor        mm3, [t80 GLOBAL]
-        pxor        mm6, [t80 GLOBAL]
+        pxor        mm3, [GLOBAL(t80)]
+        pxor        mm6, [GLOBAL(t80)]
 
         paddsb      mm6, mm1
         psubsb      mm3, mm1
 
-        pxor        mm6, [t80 GLOBAL]
-        pxor        mm3, [t80 GLOBAL]
+        pxor        mm6, [GLOBAL(t80)]
+        pxor        mm3, [GLOBAL(t80)]
         movq        [rdx + 40], mm3
         movq        [rdx + 16], mm6
 
@@ -1279,10 +1279,10 @@ next8_mbv:
         pxor        mm2, mm2
         punpcklbw   mm1, mm4
         punpckhbw   mm2, mm4
-        pmulhw      mm1, [s9 GLOBAL]
-        pmulhw      mm2, [s9 GLOBAL]
-        paddw       mm1, [s63 GLOBAL]
-        paddw       mm2, [s63 GLOBAL]
+        pmulhw      mm1, [GLOBAL(s9)]
+        pmulhw      mm2, [GLOBAL(s9)]
+        paddw       mm1, [GLOBAL(s63)]
+        paddw       mm2, [GLOBAL(s63)]
         psraw       mm1, 7
         psraw       mm2, 7
         packsswb    mm1, mm2
@@ -1290,14 +1290,14 @@ next8_mbv:
         movq        mm6, [rdx+ 8]
         movq        mm3, [rdx+48]
 
-        pxor        mm6, [t80 GLOBAL]
-        pxor        mm3, [t80 GLOBAL]
+        pxor        mm6, [GLOBAL(t80)]
+        pxor        mm3, [GLOBAL(t80)]
 
         paddsb      mm6, mm1
         psubsb      mm3, mm1
 
-        pxor        mm6, [t80 GLOBAL]           ; mm6 = 71 61 51 41 31 21 11 01
-        pxor        mm3, [t80 GLOBAL]           ; mm3 = 76 66 56 46 36 26 15 06
+        pxor        mm6, [GLOBAL(t80)]          ; mm6 = 71 61 51 41 31 21 11 01
+        pxor        mm3, [GLOBAL(t80)]          ; mm3 = 76 66 56 46 36 26 15 06
 
         ; tranpose and write back
         movq        mm0,    [rdx]               ; mm0 = 70 60 50 40 30 20 10 00
@@ -1432,7 +1432,7 @@ nexts8_h:
         psubusb     mm0, mm1              ; q1-=p1
         psubusb     mm1, mm4              ; p1-=q1
         por         mm1, mm0              ; abs(p1-q1)
-        pand        mm1, [tfe GLOBAL]     ; set lsb of each byte to zero
+        pand        mm1, [GLOBAL(tfe)]    ; set lsb of each byte to zero
         psrlw       mm1, 1                ; abs(p1-q1)/2
 
         movq        mm5, [rsi+rax]        ; p0
@@ -1450,12 +1450,12 @@ nexts8_h:
         pcmpeqb     mm5, mm3
 
         ; start work on filters
-        pxor        mm2, [t80 GLOBAL]     ; p1 offset to convert to signed values
-        pxor        mm7, [t80 GLOBAL]     ; q1 offset to convert to signed values
+        pxor        mm2, [GLOBAL(t80)]    ; p1 offset to convert to signed values
+        pxor        mm7, [GLOBAL(t80)]    ; q1 offset to convert to signed values
         psubsb      mm2, mm7              ; p1 - q1
 
-        pxor        mm6, [t80 GLOBAL]     ; offset to convert to signed values
-        pxor        mm0, [t80 GLOBAL]     ; offset to convert to signed values
+        pxor        mm6, [GLOBAL(t80)]    ; offset to convert to signed values
+        pxor        mm0, [GLOBAL(t80)]    ; offset to convert to signed values
         movq        mm3, mm0              ; q0
         psubsb      mm0, mm6              ; q0 - p0
         paddsb      mm2, mm0              ; p1 - q1 + 1 * (q0 - p0)
@@ -1464,7 +1464,7 @@ nexts8_h:
         pand        mm5, mm2              ; mask filter values we don't care about
 
         ; do + 4 side
-        paddsb      mm5, [t4 GLOBAL]      ; 3* (q0 - p0) + (p1 - q1) + 4
+        paddsb      mm5, [GLOBAL(t4)]     ; 3* (q0 - p0) + (p1 - q1) + 4
 
         movq        mm0, mm5              ; get a copy of filters
         psllw       mm0, 8                ; shift left 8
@@ -1477,12 +1477,12 @@ nexts8_h:
         por         mm0, mm1              ; put the two together to get result
 
         psubsb      mm3, mm0              ; q0-= q0 add
-        pxor        mm3, [t80 GLOBAL]     ; unoffset
+        pxor        mm3, [GLOBAL(t80)]    ; unoffset
         movq        [rsi], mm3            ; write back
 
 
         ; now do +3 side
-        psubsb      mm5, [t1s GLOBAL]      ; +3 instead of +4
+        psubsb      mm5, [GLOBAL(t1s)]     ; +3 instead of +4
 
         movq        mm0, mm5              ; get a copy of filters
         psllw       mm0, 8                ; shift left 8
@@ -1494,7 +1494,7 @@ nexts8_h:
 
 
         paddsb      mm6, mm0              ; p0+= p0 add
-        pxor        mm6, [t80 GLOBAL]     ; unoffset
+        pxor        mm6, [GLOBAL(t80)]    ; unoffset
         movq        [rsi+rax], mm6        ; write back
 
         add         rsi,8
@@ -1589,7 +1589,7 @@ nexts8_v:
         psubusb     mm7,        mm6                             ; q1-=p1
         psubusb     mm6,        mm3                             ; p1-=q1
         por         mm6,        mm7                             ; abs(p1-q1)
-        pand        mm6,        [tfe GLOBAL]                    ; set lsb of each byte to zero
+        pand        mm6,        [GLOBAL(tfe)]                   ; set lsb of each byte to zero
         psrlw       mm6,        1                               ; abs(p1-q1)/2
 
         movq        mm5,        mm1                             ; p0
@@ -1617,16 +1617,16 @@ nexts8_v:
         movq        t0,         mm0
         movq        t1,         mm3
 
-        pxor        mm0,        [t80 GLOBAL]                    ; p1 offset to convert to signed values
-        pxor        mm3,        [t80 GLOBAL]                    ; q1 offset to convert to signed values
+        pxor        mm0,        [GLOBAL(t80)]                   ; p1 offset to convert to signed values
+        pxor        mm3,        [GLOBAL(t80)]                   ; q1 offset to convert to signed values
 
         psubsb      mm0,        mm3                             ; p1 - q1
         movq        mm6,        mm1                             ; p0
 
         movq        mm7,        mm2                             ; q0
-        pxor        mm6,        [t80 GLOBAL]                    ; offset to convert to signed values
+        pxor        mm6,        [GLOBAL(t80)]                   ; offset to convert to signed values
 
-        pxor        mm7,        [t80 GLOBAL]                    ; offset to convert to signed values
+        pxor        mm7,        [GLOBAL(t80)]                   ; offset to convert to signed values
         movq        mm3,        mm7                             ; offseted ; q0
 
         psubsb      mm7,        mm6                             ; q0 - p0
@@ -1637,7 +1637,7 @@ nexts8_v:
 
         pand        mm5,        mm0                             ; mask filter values we don't care about
 
-        paddsb      mm5,        [t4 GLOBAL]                     ;  3* (q0 - p0) + (p1 - q1) + 4
+        paddsb      mm5,        [GLOBAL(t4)]                    ;  3* (q0 - p0) + (p1 - q1) + 4
 
         movq        mm0,        mm5                             ; get a copy of filters
         psllw       mm0,        8                               ; shift left 8
@@ -1651,10 +1651,10 @@ nexts8_v:
         por         mm0,        mm7                             ; put the two together to get result
 
         psubsb      mm3,        mm0                             ; q0-= q0sz add
-        pxor        mm3,        [t80 GLOBAL]                    ; unoffset
+        pxor        mm3,        [GLOBAL(t80)]                   ; unoffset
 
         ; now do +3 side
-        psubsb      mm5, [t1s GLOBAL]                           ; +3 instead of +4
+        psubsb      mm5, [GLOBAL(t1s)]                          ; +3 instead of +4
 
         movq        mm0, mm5                                    ; get a copy of filters
         psllw       mm0, 8                                      ; shift left 8
@@ -1666,7 +1666,7 @@ nexts8_v:
         por         mm0, mm5                                    ; put the two together to get result
 
         paddsb      mm6, mm0                                    ; p0+= p0 add
-        pxor        mm6, [t80 GLOBAL]                           ; unoffset
+        pxor        mm6, [GLOBAL(t80)]                          ; unoffset
 
 
         movq        mm0,        t0
diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index dc8167d4d..1ab6d0f39 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -126,7 +126,7 @@
         movdqa      xmm4,                   XMMWORD PTR [rdx] ; flimit
 
         movdqa      xmm3,                   xmm0              ; q0
-        pand        xmm2,                   [tfe GLOBAL]      ; set lsb of each byte to zero
+        pand        xmm2,                   [GLOBAL(tfe)]     ; set lsb of each byte to zero
 
         mov         rdx,                    arg(4)            ; hev get thresh
 
@@ -182,14 +182,14 @@
         movdqa      xmm0,                   [rdx+32]          ; q0
 %endif
 
-        pxor        xmm2,                   [t80 GLOBAL]      ; p1 offset to convert to signed values
-        pxor        xmm7,                   [t80 GLOBAL]      ; q1 offset to convert to signed values
+        pxor        xmm2,                   [GLOBAL(t80)]     ; p1 offset to convert to signed values
+        pxor        xmm7,                   [GLOBAL(t80)]     ; q1 offset to convert to signed values
 
         psubsb      xmm2,                   xmm7              ; p1 - q1
-        pxor        xmm6,                   [t80 GLOBAL]      ; offset to convert to signed values
+        pxor        xmm6,                   [GLOBAL(t80)]     ; offset to convert to signed values
 
         pand        xmm2,                   xmm4              ; high var mask (hvm)(p1 - q1)
-        pxor        xmm0,                   [t80 GLOBAL]      ; offset to convert to signed values
+        pxor        xmm0,                   [GLOBAL(t80)]     ; offset to convert to signed values
 
         movdqa      xmm3,                   xmm0              ; q0
         psubsb      xmm0,                   xmm6              ; q0 - p0
@@ -204,8 +204,8 @@
 
         movdqa      xmm2,                   xmm1
 
-        paddsb      xmm1,                   [t4 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 4
-        paddsb      xmm2,                   [t3 GLOBAL]       ; 3* (q0 - p0) + hvm(p1 - q1) + 3
+        paddsb      xmm1,                   [GLOBAL(t4)]      ; 3* (q0 - p0) + hvm(p1 - q1) + 4
+        paddsb      xmm2,                   [GLOBAL(t3)]      ; 3* (q0 - p0) + hvm(p1 - q1) + 3
 
         punpckhbw   xmm5,                   xmm2              ; axbxcxdx
         punpcklbw   xmm2,                   xmm2              ; exfxgxhx
@@ -223,9 +223,9 @@
         movdqa      xmm5,                   xmm0              ; save results
 
         packsswb    xmm0,                   xmm1              ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3
-        paddsw      xmm5,                   [ones GLOBAL]
+        paddsw      xmm5,                   [GLOBAL(ones)]
 
-        paddsw      xmm1,                   [ones GLOBAL]
+        paddsw      xmm1,                   [GLOBAL(ones)]
         psraw       xmm5,                   1                 ; partial shifted one more time for 2nd tap
 
         psraw       xmm1,                   1                 ; partial shifted one more time for 2nd tap
@@ -241,18 +241,18 @@
         movdqa      xmm1,                   [rdx]             ; p1
 %endif
         pandn       xmm4,                   xmm5              ; high edge variance additive
-        pxor        xmm6,                   [t80 GLOBAL]      ; unoffset
+        pxor        xmm6,                   [GLOBAL(t80)]     ; unoffset
 
-        pxor        xmm1,                   [t80 GLOBAL]      ; reoffset
+        pxor        xmm1,                   [GLOBAL(t80)]     ; reoffset
         psubsb      xmm3,                   xmm0              ; q0-= q0 add
 
         paddsb      xmm1,                   xmm4              ; p1+= p1 add
-        pxor        xmm3,                   [t80 GLOBAL]      ; unoffset
+        pxor        xmm3,                   [GLOBAL(t80)]     ; unoffset
 
-        pxor        xmm1,                   [t80 GLOBAL]      ; unoffset
+        pxor        xmm1,                   [GLOBAL(t80)]     ; unoffset
         psubsb      xmm7,                   xmm4              ; q1-= q1 add
 
-        pxor        xmm7,                   [t80 GLOBAL]      ; unoffset
+        pxor        xmm7,                   [GLOBAL(t80)]     ; unoffset
 %if %1 == 0
         lea         rsi,                    [rsi + rcx*2]
         lea         rdi,                    [rdi + rcx*2]
@@ -401,10 +401,10 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
         movdqa      xmm0,                   [rdx+64]        ; q0
 %endif
 
-        pxor        xmm2,                   [t80 GLOBAL]    ; p1 offset to convert to signed values
-        pxor        xmm7,                   [t80 GLOBAL]    ; q1 offset to convert to signed values
-        pxor        xmm6,                   [t80 GLOBAL]    ; offset to convert to signed values
-        pxor        xmm0,                   [t80 GLOBAL]    ; offset to convert to signed values
+        pxor        xmm2,                   [GLOBAL(t80)]   ; p1 offset to convert to signed values
+        pxor        xmm7,                   [GLOBAL(t80)]   ; q1 offset to convert to signed values
+        pxor        xmm6,                   [GLOBAL(t80)]   ; offset to convert to signed values
+        pxor        xmm0,                   [GLOBAL(t80)]   ; offset to convert to signed values
 
         psubsb      xmm2,                   xmm7            ; p1 - q1
         movdqa      xmm3,                   xmm0            ; q0
@@ -431,14 +431,14 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
         movdqa      xmm5,                   xmm2
 
         punpckhbw   xmm1,                   xmm4            ; Filter 2 (lo)
-        paddsb      xmm5,                   [t3 GLOBAL]     ; vp8_signed_char_clamp(Filter2 + 3)
+        paddsb      xmm5,                   [GLOBAL(t3)]    ; vp8_signed_char_clamp(Filter2 + 3)
 
-        pmulhw      xmm1,                   [s9 GLOBAL]     ; Filter 2 (lo) * 9
+        pmulhw      xmm1,                   [GLOBAL(s9)]    ; Filter 2 (lo) * 9
 
-        pmulhw      xmm0,                   [s9 GLOBAL]     ; Filter 2 (hi) * 9
+        pmulhw      xmm0,                   [GLOBAL(s9)]    ; Filter 2 (hi) * 9
 
         punpckhbw   xmm7,                   xmm5            ; axbxcxdx
-        paddsb      xmm2,                   [t4 GLOBAL]     ; vp8_signed_char_clamp(Filter2 + 4)
+        paddsb      xmm2,                   [GLOBAL(t4)]    ; vp8_signed_char_clamp(Filter2 + 4)
 
         punpcklbw   xmm5,                   xmm5            ; exfxgxhx
         psraw       xmm7,                   11              ; sign extended shift right by 3
@@ -462,9 +462,9 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
         movdqa      xmm5,                   xmm0
 
         movdqa      xmm2,                   xmm5
-        paddw       xmm0,                   [s63 GLOBAL]    ; Filter 2 (hi) * 9 + 63
+        paddw       xmm0,                   [GLOBAL(s63)]   ; Filter 2 (hi) * 9 + 63
 
-        paddw       xmm1,                   [s63 GLOBAL]    ; Filter 2 (lo) * 9 + 63
+        paddw       xmm1,                   [GLOBAL(s63)]   ; Filter 2 (lo) * 9 + 63
         paddw       xmm5,                   xmm5            ; Filter 2 (hi) * 18
 
         paddw       xmm7,                   xmm7            ; Filter 2 (lo) * 18
@@ -510,26 +510,26 @@ sym(vp8_loop_filter_horizontal_edge_uv_sse2):
         movdqa      xmm7,                   XMMWORD PTR [rdx+16]    ; p2
 %endif
 
-        pxor        xmm3,                   [t80 GLOBAL]    ; *oq0 = sq^0x80
-        pxor        xmm6,                   [t80 GLOBAL]    ; *oq0 = sp^0x80
+        pxor        xmm3,                   [GLOBAL(t80)]   ; *oq0 = sq^0x80
+        pxor        xmm6,                   [GLOBAL(t80)]   ; *oq0 = sp^0x80
 
-        pxor        xmm1,                   [t80 GLOBAL]
-        pxor        xmm4,                   [t80 GLOBAL]
+        pxor        xmm1,                   [GLOBAL(t80)]
+        pxor        xmm4,                   [GLOBAL(t80)]
 
         psubsb      xmm1,                   xmm2            ; sq = vp8_signed_char_clamp(qs1 - u2)
         paddsb      xmm4,                   xmm2            ; sp = vp8_signed_char_clamp(ps1 - u2)
 
-        pxor        xmm1,                   [t80 GLOBAL]    ; *oq1 = sq^0x80;
-        pxor        xmm4,                   [t80 GLOBAL]    ; *op1 = sp^0x80;
+        pxor        xmm1,                   [GLOBAL(t80)]   ; *oq1 = sq^0x80;
+        pxor        xmm4,                   [GLOBAL(t80)]   ; *op1 = sp^0x80;
 
-        pxor        xmm7,                   [t80 GLOBAL]
-        pxor        xmm5,                   [t80 GLOBAL]
+        pxor        xmm7,                   [GLOBAL(t80)]
+        pxor        xmm5,                   [GLOBAL(t80)]
 
         paddsb      xmm7,                   xmm0            ; sp = vp8_signed_char_clamp(ps2 - u)
         psubsb      xmm5,                   xmm0            ; sq = vp8_signed_char_clamp(qs2 - u)
 
-        pxor        xmm7,                   [t80 GLOBAL]    ; *op2 = sp^0x80;
-        pxor        xmm5,                   [t80 GLOBAL]    ; *oq2 = sq^0x80;
+        pxor        xmm7,                   [GLOBAL(t80)]   ; *op2 = sp^0x80;
+        pxor        xmm5,                   [GLOBAL(t80)]   ; *oq2 = sq^0x80;
 
 %if %1 == 0
         lea         rsi,                    [rsi+rcx*2]
@@ -915,7 +915,7 @@ sym(vp8_mbloop_filter_horizontal_edge_uv_sse2):
         por         xmm5,               xmm1            ; abs(p1-q1)
         movdqa      xmm1,               xmm3            ; p0
 
-        pand        xmm5,               [tfe GLOBAL]    ; set lsb of each byte to zero
+        pand        xmm5,               [GLOBAL(tfe)]   ; set lsb of each byte to zero
         psubusb     xmm1,               xmm6            ; p0-q0
 
         psrlw       xmm5,               1               ; abs(p1-q1)/2
@@ -1415,7 +1415,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
         psubusb     xmm0, xmm1              ; q1-=p1
         psubusb     xmm1, xmm4              ; p1-=q1
         por         xmm1, xmm0              ; abs(p1-q1)
-        pand        xmm1, [tfe GLOBAL]      ; set lsb of each byte to zero
+        pand        xmm1, [GLOBAL(tfe)]     ; set lsb of each byte to zero
         psrlw       xmm1, 1                 ; abs(p1-q1)/2
 
         movdqu      xmm5, [rsi+rax]         ; p0
@@ -1433,12 +1433,12 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
         pcmpeqb     xmm5, xmm3
 
         ; start work on filters
-        pxor        xmm2, [t80 GLOBAL]      ; p1 offset to convert to signed values
-        pxor        xmm7, [t80 GLOBAL]      ; q1 offset to convert to signed values
+        pxor        xmm2, [GLOBAL(t80)]     ; p1 offset to convert to signed values
+        pxor        xmm7, [GLOBAL(t80)]     ; q1 offset to convert to signed values
         psubsb      xmm2, xmm7              ; p1 - q1
 
-        pxor        xmm6, [t80 GLOBAL]      ; offset to convert to signed values
-        pxor        xmm0, [t80 GLOBAL]      ; offset to convert to signed values
+        pxor        xmm6, [GLOBAL(t80)]     ; offset to convert to signed values
+        pxor        xmm0, [GLOBAL(t80)]     ; offset to convert to signed values
         movdqa      xmm3, xmm0              ; q0
         psubsb      xmm0, xmm6              ; q0 - p0
         paddsb      xmm2, xmm0              ; p1 - q1 + 1 * (q0 - p0)
@@ -1447,7 +1447,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
         pand        xmm5, xmm2              ; mask filter values we don't care about
 
         ; do + 4 side
-        paddsb      xmm5, [t4 GLOBAL]       ; 3* (q0 - p0) + (p1 - q1) + 4
+        paddsb      xmm5, [GLOBAL(t4)]      ; 3* (q0 - p0) + (p1 - q1) + 4
 
         movdqa      xmm0, xmm5              ; get a copy of filters
         psllw       xmm0, 8                 ; shift left 8
@@ -1460,11 +1460,11 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
         por         xmm0, xmm1              ; put the two together to get result
 
         psubsb      xmm3, xmm0              ; q0-= q0 add
-        pxor        xmm3, [t80 GLOBAL]      ; unoffset
+        pxor        xmm3, [GLOBAL(t80)]     ; unoffset
         movdqu      [rsi], xmm3             ; write back
 
         ; now do +3 side
-        psubsb      xmm5, [t1s GLOBAL]      ; +3 instead of +4
+        psubsb      xmm5, [GLOBAL(t1s)]     ; +3 instead of +4
 
         movdqa      xmm0, xmm5              ; get a copy of filters
         psllw       xmm0, 8                 ; shift left 8
@@ -1476,7 +1476,7 @@ sym(vp8_loop_filter_simple_horizontal_edge_sse2):
 
 
         paddsb      xmm6, xmm0              ; p0+= p0 add
-        pxor        xmm6, [t80 GLOBAL]      ; unoffset
+        pxor        xmm6, [GLOBAL(t80)]     ; unoffset
         movdqu      [rsi+rax], xmm6         ; write back
 
     ; begin epilog
@@ -1596,7 +1596,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
         psubusb     xmm7,       xmm0                            ; q1-=p1
         psubusb     xmm6,       xmm3                            ; p1-=q1
         por         xmm6,       xmm7                            ; abs(p1-q1)
-        pand        xmm6,       [tfe GLOBAL]                    ; set lsb of each byte to zero
+        pand        xmm6,       [GLOBAL(tfe)]                   ; set lsb of each byte to zero
         psrlw       xmm6,       1                               ; abs(p1-q1)/2
 
         movdqa      xmm5,       xmm1                            ; p0
@@ -1622,16 +1622,16 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
         movdqa        t0,        xmm0
         movdqa        t1,        xmm3
 
-        pxor        xmm0,        [t80 GLOBAL]                   ; p1 offset to convert to signed values
-        pxor        xmm3,        [t80 GLOBAL]                   ; q1 offset to convert to signed values
+        pxor        xmm0,        [GLOBAL(t80)]                  ; p1 offset to convert to signed values
+        pxor        xmm3,        [GLOBAL(t80)]                  ; q1 offset to convert to signed values
 
         psubsb      xmm0,        xmm3                           ; p1 - q1
         movdqa      xmm6,        xmm1                           ; p0
 
         movdqa      xmm7,        xmm2                           ; q0
-        pxor        xmm6,        [t80 GLOBAL]                   ; offset to convert to signed values
+        pxor        xmm6,        [GLOBAL(t80)]                  ; offset to convert to signed values
 
-        pxor        xmm7,        [t80 GLOBAL]                   ; offset to convert to signed values
+        pxor        xmm7,        [GLOBAL(t80)]                  ; offset to convert to signed values
         movdqa      xmm3,        xmm7                           ; offseted ; q0
 
         psubsb      xmm7,        xmm6                           ; q0 - p0
@@ -1643,7 +1643,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
         pand        xmm5,        xmm0                           ; mask filter values we don't care about
 
 
-        paddsb      xmm5,        [t4 GLOBAL]                    ;  3* (q0 - p0) + (p1 - q1) + 4
+        paddsb      xmm5,        [GLOBAL(t4)]                   ;  3* (q0 - p0) + (p1 - q1) + 4
 
         movdqa      xmm0,        xmm5                           ; get a copy of filters
         psllw       xmm0,        8                              ; shift left 8
@@ -1658,10 +1658,10 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
         por         xmm0,        xmm7                           ; put the two together to get result
 
         psubsb      xmm3,        xmm0                           ; q0-= q0sz add
-        pxor        xmm3,        [t80 GLOBAL]                   ; unoffset   q0
+        pxor        xmm3,        [GLOBAL(t80)]                  ; unoffset   q0
 
         ; now do +3 side
-        psubsb      xmm5,        [t1s GLOBAL]                   ; +3 instead of +4
+        psubsb      xmm5,        [GLOBAL(t1s)]                  ; +3 instead of +4
         movdqa      xmm0,        xmm5                           ; get a copy of filters
 
         psllw       xmm0,        8                              ; shift left 8
@@ -1674,7 +1674,7 @@ sym(vp8_loop_filter_simple_vertical_edge_sse2):
         por         xmm0,        xmm5                           ; put the two together to get result
 
         paddsb      xmm6,        xmm0                           ; p0+= p0 add
-        pxor        xmm6,        [t80 GLOBAL]                   ; unoffset   p0
+        pxor        xmm6,        [GLOBAL(t80)]                  ; unoffset   p0
 
         movdqa      xmm0,        t0                             ; p1
         movdqa      xmm4,        t1                             ; q1
diff --git a/vp8/common/x86/postproc_mmx.asm b/vp8/common/x86/postproc_mmx.asm
index 349ac0d3b..787e83268 100644
--- a/vp8/common/x86/postproc_mmx.asm
+++ b/vp8/common/x86/postproc_mmx.asm
@@ -37,16 +37,16 @@ sym(vp8_post_proc_down_and_across_mmx):
 %if ABI_IS_32BIT=1 && CONFIG_PIC=1
     ; move the global rd onto the stack, since we don't have enough registers
     ; to do PIC addressing
-    movq        mm0, [rd GLOBAL]
+    movq        mm0, [GLOBAL(rd)]
     sub         rsp, 8
     movq        [rsp], mm0
 %define RD [rsp]
 %else
-%define RD [rd GLOBAL]
+%define RD [GLOBAL(rd)]
 %endif
 
         push        rbx
-        lea         rbx, [Blur GLOBAL]
+        lea         rbx, [GLOBAL(Blur)]
         movd        mm2, dword ptr arg(6) ;flimit
         punpcklwd   mm2, mm2
         punpckldq   mm2, mm2
@@ -286,7 +286,7 @@ sym(vp8_mbpost_proc_down_mmx):
 %define flimit2 [rsp+128]
 
 %if ABI_IS_32BIT=0
-    lea         r8,       [sym(vp8_rv) GLOBAL]
+    lea         r8,       [GLOBAL(sym(vp8_rv))]
 %endif
 
     ;rows +=8;
@@ -404,7 +404,7 @@ loop_row:
             and         rcx,        127
 %if ABI_IS_32BIT=1 && CONFIG_PIC=1
             push        rax
-            lea         rax,        [sym(vp8_rv) GLOBAL]
+            lea         rax,        [GLOBAL(sym(vp8_rv))]
             movq        mm4,        [rax + rcx*2] ;vp8_rv[rcx*2]
             pop         rax
 %elif ABI_IS_32BIT=0
diff --git a/vp8/common/x86/postproc_sse2.asm b/vp8/common/x86/postproc_sse2.asm
index 276f208ff..30b4bf53a 100644
--- a/vp8/common/x86/postproc_sse2.asm
+++ b/vp8/common/x86/postproc_sse2.asm
@@ -36,12 +36,12 @@ sym(vp8_post_proc_down_and_across_xmm):
     ALIGN_STACK 16, rax
     ; move the global rd onto the stack, since we don't have enough registers
     ; to do PIC addressing
-    movdqa      xmm0, [rd42 GLOBAL]
+    movdqa      xmm0, [GLOBAL(rd42)]
     sub         rsp, 16
     movdqa      [rsp], xmm0
 %define RD42 [rsp]
 %else
-%define RD42 [rd42 GLOBAL]
+%define RD42 [GLOBAL(rd42)]
 %endif
 
 
@@ -275,7 +275,7 @@ sym(vp8_mbpost_proc_down_xmm):
 %define flimit4 [rsp+128]
 
 %if ABI_IS_32BIT=0
-    lea         r8,       [sym(vp8_rv) GLOBAL]
+    lea         r8,       [GLOBAL(sym(vp8_rv))]
 %endif
 
     ;rows +=8;
@@ -393,7 +393,7 @@ loop_row:
             and         rcx,        127
 %if ABI_IS_32BIT=1 && CONFIG_PIC=1
             push        rax
-            lea         rax,        [sym(vp8_rv) GLOBAL]
+            lea         rax,        [GLOBAL(sym(vp8_rv))]
             movdqu      xmm4,       [rax + rcx*2] ;vp8_rv[rcx*2]
             pop         rax
 %elif ABI_IS_32BIT=0
@@ -579,7 +579,7 @@ nextcol4:
             punpcklwd   xmm1,   xmm0
 
             paddd       xmm1,   xmm6
-            paddd       xmm1,   [four8s GLOBAL]
+            paddd       xmm1,   [GLOBAL(four8s)]
 
             psrad       xmm1,   4
             packssdw    xmm1,   xmm0
diff --git a/vp8/common/x86/subpixel_mmx.asm b/vp8/common/x86/subpixel_mmx.asm
index 06db0c6a0..23ed4e208 100644
--- a/vp8/common/x86/subpixel_mmx.asm
+++ b/vp8/common/x86/subpixel_mmx.asm
@@ -84,7 +84,7 @@ nextrow:
         pmullw      mm5,    [rdx]            ; mm5 *= kernel 5 modifiers
         paddsw      mm3,    mm5              ; mm3 += mm5
 
-        paddsw      mm3,    [rd GLOBAL]               ; mm3 += round value
+        paddsw      mm3,    [GLOBAL(rd)]              ; mm3 += round value
         psraw       mm3,    VP8_FILTER_SHIFT     ; mm3 /= 128
         packuswb    mm3,    mm0              ; pack and unpack to saturate
         punpcklbw   mm3,    mm0              ;
@@ -136,7 +136,7 @@ sym(vp8_filter_block1d_v6_mmx):
     push        rdi
     ; end prolog
 
-        movq      mm5, [rd GLOBAL]
+        movq      mm5, [GLOBAL(rd)]
         push        rbx
         mov         rbx, arg(6) ;vp8_filter
         movq      mm1, [rbx + 16]             ; do both the negative taps first!!!
@@ -225,7 +225,7 @@ sym(vp8_filter_block1dc_v6_mmx):
     push        rdi
     ; end prolog
 
-        movq      mm5, [rd GLOBAL]
+        movq      mm5, [GLOBAL(rd)]
         push        rbx
         mov         rbx, arg(7) ;vp8_filter
         movq      mm1, [rbx + 16]             ; do both the negative taps first!!!
@@ -320,7 +320,7 @@ sym(vp8_bilinear_predict8x8_mmx):
         mov         rdi,        arg(4) ;dst_ptr           ;
 
         shl         rax,        5 ; offset * 32
-        lea         rcx,        [sym(vp8_bilinear_filters_mmx) GLOBAL]
+        lea         rcx,        [GLOBAL(sym(vp8_bilinear_filters_mmx))]
 
         add         rax,        rcx ; HFilter
         mov         rsi,        arg(0) ;src_ptr              ;
@@ -363,10 +363,10 @@ sym(vp8_bilinear_predict8x8_mmx):
         paddw       mm3,        mm5                 ;
         paddw       mm4,        mm6                 ;
 
-        paddw       mm3,        [rd GLOBAL]                  ; xmm3 += round value
+        paddw       mm3,        [GLOBAL(rd)]                 ; xmm3 += round value
         psraw       mm3,        VP8_FILTER_SHIFT        ; xmm3 /= 128
 
-        paddw       mm4,        [rd GLOBAL]                  ;
+        paddw       mm4,        [GLOBAL(rd)]                 ;
         psraw       mm4,        VP8_FILTER_SHIFT        ;
 
         movq        mm7,        mm3                 ;
@@ -404,10 +404,10 @@ next_row_8x8:
         pmullw      mm5,        [rax]               ;
         pmullw      mm6,        [rax]               ;
 
-        paddw       mm3,        [rd GLOBAL]                  ; xmm3 += round value
+        paddw       mm3,        [GLOBAL(rd)]                 ; xmm3 += round value
         psraw       mm3,        VP8_FILTER_SHIFT        ; xmm3 /= 128
 
-        paddw       mm4,        [rd GLOBAL]                  ;
+        paddw       mm4,        [GLOBAL(rd)]                 ;
         psraw       mm4,        VP8_FILTER_SHIFT        ;
 
         movq        mm7,        mm3                 ;
@@ -421,10 +421,10 @@ next_row_8x8:
         paddw       mm4,        mm6                 ;
 
 
-        paddw       mm3,        [rd GLOBAL]                  ; xmm3 += round value
+        paddw       mm3,        [GLOBAL(rd)]                 ; xmm3 += round value
         psraw       mm3,        VP8_FILTER_SHIFT        ; xmm3 /= 128
 
-        paddw       mm4,        [rd GLOBAL]                  ;
+        paddw       mm4,        [GLOBAL(rd)]                 ;
         psraw       mm4,        VP8_FILTER_SHIFT        ;
 
         packuswb    mm3,        mm4
@@ -476,7 +476,7 @@ sym(vp8_bilinear_predict8x4_mmx):
         movsxd      rax,        dword ptr arg(2) ;xoffset
         mov         rdi,        arg(4) ;dst_ptr           ;
 
-        lea         rcx,        [sym(vp8_bilinear_filters_mmx) GLOBAL]
+        lea         rcx,        [GLOBAL(sym(vp8_bilinear_filters_mmx))]
         shl         rax,        5
 
         mov         rsi,        arg(0) ;src_ptr              ;
@@ -518,10 +518,10 @@ sym(vp8_bilinear_predict8x4_mmx):
         paddw       mm3,        mm5                 ;
         paddw       mm4,        mm6                 ;
 
-        paddw       mm3,        [rd GLOBAL]                  ; xmm3 += round value
+        paddw       mm3,        [GLOBAL(rd)]                 ; xmm3 += round value
         psraw       mm3,        VP8_FILTER_SHIFT        ; xmm3 /= 128
 
-        paddw       mm4,        [rd GLOBAL]                  ;
+        paddw       mm4,        [GLOBAL(rd)]                 ;
         psraw       mm4,        VP8_FILTER_SHIFT        ;
 
         movq        mm7,        mm3                 ;
@@ -559,10 +559,10 @@ next_row_8x4:
         pmullw      mm5,        [rax]               ;
         pmullw      mm6,        [rax]               ;
 
-        paddw       mm3,        [rd GLOBAL]                  ; xmm3 += round value
+        paddw       mm3,        [GLOBAL(rd)]                 ; xmm3 += round value
         psraw       mm3,        VP8_FILTER_SHIFT        ; xmm3 /= 128
 
-        paddw       mm4,        [rd GLOBAL]                  ;
+        paddw       mm4,        [GLOBAL(rd)]                 ;
         psraw       mm4,        VP8_FILTER_SHIFT        ;
 
         movq        mm7,        mm3                 ;
@@ -576,10 +576,10 @@ next_row_8x4:
         paddw       mm4,        mm6                 ;
 
 
-        paddw       mm3,        [rd GLOBAL]                  ; xmm3 += round value
+        paddw       mm3,        [GLOBAL(rd)]                 ; xmm3 += round value
         psraw       mm3,        VP8_FILTER_SHIFT        ; xmm3 /= 128
 
-        paddw       mm4,        [rd GLOBAL]                  ;
+        paddw       mm4,        [GLOBAL(rd)]                 ;
         psraw       mm4,        VP8_FILTER_SHIFT        ;
 
         packuswb    mm3,        mm4
@@ -631,7 +631,7 @@ sym(vp8_bilinear_predict4x4_mmx):
         movsxd      rax,        dword ptr arg(2) ;xoffset
         mov         rdi,        arg(4) ;dst_ptr           ;
 
-        lea         rcx,        [sym(vp8_bilinear_filters_mmx) GLOBAL]
+        lea         rcx,        [GLOBAL(sym(vp8_bilinear_filters_mmx))]
         shl         rax,        5
 
         add         rax,        rcx ; HFilter
@@ -662,7 +662,7 @@ sym(vp8_bilinear_predict4x4_mmx):
         pmullw      mm5,        mm2                 ;
 
         paddw       mm3,        mm5                 ;
-        paddw       mm3,        [rd GLOBAL]                  ; xmm3 += round value
+        paddw       mm3,        [GLOBAL(rd)]                 ; xmm3 += round value
 
         psraw       mm3,        VP8_FILTER_SHIFT        ; xmm3 /= 128
 
@@ -686,7 +686,7 @@ next_row_4x4:
         punpcklbw   mm5,        mm0                 ;
 
         pmullw      mm5,        [rax]               ;
-        paddw       mm3,        [rd GLOBAL]                  ; xmm3 += round value
+        paddw       mm3,        [GLOBAL(rd)]                 ; xmm3 += round value
 
         psraw       mm3,        VP8_FILTER_SHIFT        ; xmm3 /= 128
         movq        mm7,        mm3                 ;
@@ -697,7 +697,7 @@ next_row_4x4:
         paddw       mm3,        mm5                 ;
 
 
-        paddw       mm3,        [rd GLOBAL]                  ; xmm3 += round value
+        paddw       mm3,        [GLOBAL(rd)]                 ; xmm3 += round value
         psraw       mm3,        VP8_FILTER_SHIFT        ; xmm3 /= 128
 
         packuswb    mm3,        mm0
diff --git a/vp8/common/x86/subpixel_sse2.asm b/vp8/common/x86/subpixel_sse2.asm
index 2385abfd0..b87cad259 100644
--- a/vp8/common/x86/subpixel_sse2.asm
+++ b/vp8/common/x86/subpixel_sse2.asm
@@ -107,7 +107,7 @@ filter_block1d8_h6_rowloop:
         paddsw      xmm4,       xmm6
 
         paddsw      xmm4,       xmm1
-        paddsw      xmm4,       [rd GLOBAL]
+        paddsw      xmm4,       [GLOBAL(rd)]
 
         psraw       xmm4,       7
 
@@ -231,7 +231,7 @@ filter_block1d16_h6_sse2_rowloop:
         paddsw      xmm4,       xmm6
 
         paddsw      xmm4,       xmm1
-        paddsw      xmm4,       [rd GLOBAL]
+        paddsw      xmm4,       [GLOBAL(rd)]
 
         psraw       xmm4,       7
 
@@ -284,7 +284,7 @@ filter_block1d16_h6_sse2_rowloop:
         paddsw      xmm4,       xmm6
 
         paddsw      xmm4,       xmm2
-        paddsw      xmm4,       [rd GLOBAL]
+        paddsw      xmm4,       [GLOBAL(rd)]
 
         psraw       xmm4,       7
 
@@ -351,7 +351,7 @@ sym(vp8_filter_block1d8_v6_sse2):
         movsxd      rcx,        DWORD PTR arg(5) ;[output_height]
         pxor        xmm0,       xmm0                        ; clear xmm0
 
-        movdqa      xmm7,       XMMWORD PTR [rd GLOBAL]
+        movdqa      xmm7,       XMMWORD PTR [GLOBAL(rd)]
 %if ABI_IS_32BIT=0
         movsxd      r8,         dword ptr arg(2) ; dst_ptich
 %endif
@@ -489,7 +489,7 @@ vp8_filter_block1d16_v6_sse2_loop:
         pmullw      xmm5,       [rax + 80]
         pmullw      xmm6,       [rax + 80]
 
-        movdqa      xmm7,       XMMWORD PTR [rd GLOBAL]
+        movdqa      xmm7,       XMMWORD PTR [GLOBAL(rd)]
         pxor        xmm0,       xmm0                        ; clear xmm0
 
         paddsw      xmm1,       xmm3
@@ -608,7 +608,7 @@ filter_block1d8_h6_only_rowloop:
         paddsw      xmm4,       xmm6
 
         paddsw      xmm4,       xmm1
-        paddsw      xmm4,       [rd GLOBAL]
+        paddsw      xmm4,       [GLOBAL(rd)]
 
         psraw       xmm4,       7
 
@@ -723,7 +723,7 @@ filter_block1d16_h6_only_sse2_rowloop:
         paddsw      xmm4,       xmm6
 
         paddsw      xmm4,       xmm1
-        paddsw      xmm4,       [rd GLOBAL]
+        paddsw      xmm4,       [GLOBAL(rd)]
 
         psraw       xmm4,       7
 
@@ -773,7 +773,7 @@ filter_block1d16_h6_only_sse2_rowloop:
         paddsw      xmm4,       xmm6
 
         paddsw      xmm4,       xmm2
-        paddsw      xmm4,       [rd GLOBAL]
+        paddsw      xmm4,       [GLOBAL(rd)]
 
         psraw       xmm4,       7
 
@@ -832,7 +832,7 @@ sym(vp8_filter_block1d8_v6_only_sse2):
 
         pxor        xmm0,       xmm0                        ; clear xmm0
 
-        movdqa      xmm7,       XMMWORD PTR [rd GLOBAL]
+        movdqa      xmm7,       XMMWORD PTR [GLOBAL(rd)]
 %if ABI_IS_32BIT=0
         movsxd      r8,         dword ptr arg(3) ; dst_ptich
 %endif
@@ -978,7 +978,7 @@ sym(vp8_bilinear_predict16x16_sse2):
     ;const short *HFilter = bilinear_filters_mmx[xoffset]
     ;const short *VFilter = bilinear_filters_mmx[yoffset]
 
-        lea         rcx,        [sym(vp8_bilinear_filters_mmx) GLOBAL]
+        lea         rcx,        [GLOBAL(sym(vp8_bilinear_filters_mmx))]
         movsxd      rax,        dword ptr arg(2) ;xoffset
 
         cmp         rax,        0      ;skip first_pass filter if xoffset=0
@@ -1033,10 +1033,10 @@ sym(vp8_bilinear_predict16x16_sse2):
         paddw       xmm3,       xmm5
         paddw       xmm4,       xmm6
 
-        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        paddw       xmm3,       [GLOBAL(rd)]        ; xmm3 += round value
         psraw       xmm3,       VP8_FILTER_SHIFT        ; xmm3 /= 128
 
-        paddw       xmm4,       [rd GLOBAL]
+        paddw       xmm4,       [GLOBAL(rd)]
         psraw       xmm4,       VP8_FILTER_SHIFT
 
         movdqa      xmm7,       xmm3
@@ -1074,10 +1074,10 @@ next_row:
         pmullw      xmm5,       [rax]
         pmullw      xmm6,       [rax]
 
-        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        paddw       xmm3,       [GLOBAL(rd)]        ; xmm3 += round value
         psraw       xmm3,       VP8_FILTER_SHIFT        ; xmm3 /= 128
 
-        paddw       xmm4,       [rd GLOBAL]
+        paddw       xmm4,       [GLOBAL(rd)]
         psraw       xmm4,       VP8_FILTER_SHIFT
 
         movdqa      xmm7,       xmm3
@@ -1089,10 +1089,10 @@ next_row:
         paddw       xmm3,       xmm5
         paddw       xmm4,       xmm6
 
-        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        paddw       xmm3,       [GLOBAL(rd)]        ; xmm3 += round value
         psraw       xmm3,       VP8_FILTER_SHIFT        ; xmm3 /= 128
 
-        paddw       xmm4,       [rd GLOBAL]
+        paddw       xmm4,       [GLOBAL(rd)]
         psraw       xmm4,       VP8_FILTER_SHIFT
 
         packuswb    xmm3,       xmm4
@@ -1154,10 +1154,10 @@ next_row_spo:
         paddw       xmm3,       xmm5
         paddw       xmm4,       xmm6
 
-        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        paddw       xmm3,       [GLOBAL(rd)]        ; xmm3 += round value
         psraw       xmm3,       VP8_FILTER_SHIFT        ; xmm3 /= 128
 
-        paddw       xmm4,       [rd GLOBAL]
+        paddw       xmm4,       [GLOBAL(rd)]
         psraw       xmm4,       VP8_FILTER_SHIFT
 
         packuswb    xmm3,       xmm4
@@ -1198,10 +1198,10 @@ next_row_fpo:
         paddw       xmm3,       xmm5
         paddw       xmm4,       xmm6
 
-        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        paddw       xmm3,       [GLOBAL(rd)]        ; xmm3 += round value
         psraw       xmm3,       VP8_FILTER_SHIFT        ; xmm3 /= 128
 
-        paddw       xmm4,       [rd GLOBAL]
+        paddw       xmm4,       [GLOBAL(rd)]
         psraw       xmm4,       VP8_FILTER_SHIFT
 
         packuswb    xmm3,       xmm4
@@ -1249,7 +1249,7 @@ sym(vp8_bilinear_predict8x8_sse2):
 
     ;const short *HFilter = bilinear_filters_mmx[xoffset]
     ;const short *VFilter = bilinear_filters_mmx[yoffset]
-        lea         rcx,        [sym(vp8_bilinear_filters_mmx) GLOBAL]
+        lea         rcx,        [GLOBAL(sym(vp8_bilinear_filters_mmx))]
 
         mov         rsi,        arg(0) ;src_ptr
         movsxd      rdx,        dword ptr arg(1) ;src_pixels_per_line
@@ -1315,7 +1315,7 @@ sym(vp8_bilinear_predict8x8_sse2):
 
         paddw       xmm3,       xmm4
 
-        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        paddw       xmm3,       [GLOBAL(rd)]        ; xmm3 += round value
         psraw       xmm3,       VP8_FILTER_SHIFT        ; xmm3 /= 128
 
         movdqa      xmm7,       xmm3
@@ -1334,7 +1334,7 @@ next_row8x8:
         paddw       xmm3,       xmm4
         pmullw      xmm7,       xmm5
 
-        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        paddw       xmm3,       [GLOBAL(rd)]        ; xmm3 += round value
         psraw       xmm3,       VP8_FILTER_SHIFT        ; xmm3 /= 128
 
         movdqa      xmm4,       xmm3
@@ -1344,7 +1344,7 @@ next_row8x8:
 
         movdqa      xmm7,       xmm4
 
-        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        paddw       xmm3,       [GLOBAL(rd)]        ; xmm3 += round value
         psraw       xmm3,       VP8_FILTER_SHIFT        ; xmm3 /= 128
 
         packuswb    xmm3,       xmm0
diff --git a/vp8/common/x86/subpixel_ssse3.asm b/vp8/common/x86/subpixel_ssse3.asm
index fe921c019..7f6fd93e4 100644
--- a/vp8/common/x86/subpixel_ssse3.asm
+++ b/vp8/common/x86/subpixel_ssse3.asm
@@ -48,9 +48,9 @@ sym(vp8_filter_block1d8_h6_ssse3):
     xor         rsi, rsi
     shl         rdx, 4
 
-    movdqa      xmm7, [rd GLOBAL]
+    movdqa      xmm7, [GLOBAL(rd)]
 
-    lea         rax, [k0_k5 GLOBAL]
+    lea         rax, [GLOBAL(k0_k5)]
     add         rax, rdx
     mov         rdi, arg(2)             ;output_ptr
 
@@ -80,9 +80,9 @@ filter_block1d8_h6_rowloop_ssse3:
     pmaddubsw   xmm0,   xmm4
 
     movdqa      xmm2,   xmm1
-    pshufb      xmm1,   [shuf2bfrom1 GLOBAL]
+    pshufb      xmm1,   [GLOBAL(shuf2bfrom1)]
 
-    pshufb      xmm2,   [shuf3bfrom1 GLOBAL]
+    pshufb      xmm2,   [GLOBAL(shuf3bfrom1)]
     pmaddubsw   xmm1,   xmm5
 
     lea         rdi,    [rdi + rdx]
@@ -115,8 +115,8 @@ vp8_filter_block1d8_h4_ssse3:
     movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
     movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
 
-    movdqa      xmm3, XMMWORD PTR [shuf2bfrom1 GLOBAL]
-    movdqa      xmm4, XMMWORD PTR [shuf3bfrom1 GLOBAL]
+    movdqa      xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)]
+    movdqa      xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)]
 
     mov         rsi, arg(0)             ;src_ptr
 
@@ -189,7 +189,7 @@ sym(vp8_filter_block1d16_h6_ssse3):
     xor         rsi, rsi
     shl         rdx, 4      ;
 
-    lea         rax, [k0_k5 GLOBAL]
+    lea         rax, [GLOBAL(k0_k5)]
     add         rax, rdx
 
     mov         rdi, arg(2)                     ;output_ptr
@@ -219,9 +219,9 @@ filter_block1d16_h6_rowloop_ssse3:
     pmaddubsw   xmm0,   xmm4
 
     movdqa      xmm2,   xmm1
-    pshufb      xmm1,   [shuf2bfrom1 GLOBAL]
+    pshufb      xmm1,   [GLOBAL(shuf2bfrom1)]
 
-    pshufb      xmm2,   [shuf3bfrom1 GLOBAL]
+    pshufb      xmm2,   [GLOBAL(shuf3bfrom1)]
     movq        xmm3,   MMWORD PTR [rsi +  6]
 
     pmaddubsw   xmm1,   xmm5
@@ -237,10 +237,10 @@ filter_block1d16_h6_rowloop_ssse3:
     paddsw      xmm0,   xmm2
 
     movdqa      xmm2,   xmm1
-    paddsw      xmm0,   [rd GLOBAL]
+    paddsw      xmm0,   [GLOBAL(rd)]
 
-    pshufb      xmm1,   [shuf2bfrom1 GLOBAL]
-    pshufb      xmm2,   [shuf3bfrom1 GLOBAL]
+    pshufb      xmm1,   [GLOBAL(shuf2bfrom1)]
+    pshufb      xmm2,   [GLOBAL(shuf3bfrom1)]
 
     psraw       xmm0,   7
     pmaddubsw   xmm1,   xmm5
@@ -253,7 +253,7 @@ filter_block1d16_h6_rowloop_ssse3:
 
     paddsw      xmm3,   xmm2
 
-    paddsw      xmm3,   [rd GLOBAL]
+    paddsw      xmm3,   [GLOBAL(rd)]
 
     psraw       xmm3,   7
 
@@ -288,18 +288,18 @@ filter_block1d16_h4_rowloop_ssse3:
     movdqu      xmm1,   XMMWORD PTR [rsi - 2]
 
     movdqa      xmm2, xmm1
-    pshufb      xmm1, [shuf2b GLOBAL]
-    pshufb      xmm2, [shuf3b GLOBAL]
+    pshufb      xmm1, [GLOBAL(shuf2b)]
+    pshufb      xmm2, [GLOBAL(shuf3b)]
     pmaddubsw   xmm1, xmm5
 
     movdqu      xmm3,   XMMWORD PTR [rsi + 6]
 
     pmaddubsw   xmm2, xmm6
     movdqa      xmm0, xmm3
-    pshufb      xmm3, [shuf3b GLOBAL]
-    pshufb      xmm0, [shuf2b GLOBAL]
+    pshufb      xmm3, [GLOBAL(shuf3b)]
+    pshufb      xmm0, [GLOBAL(shuf2b)]
 
-    paddsw      xmm1, [rd GLOBAL]
+    paddsw      xmm1, [GLOBAL(rd)]
     paddsw      xmm1, xmm2
 
     pmaddubsw   xmm0, xmm5
@@ -309,7 +309,7 @@ filter_block1d16_h4_rowloop_ssse3:
     packuswb    xmm1, xmm1
     lea         rsi,    [rsi + rax]
     paddsw      xmm3, xmm0
-    paddsw      xmm3, [rd GLOBAL]
+    paddsw      xmm3, [GLOBAL(rd)]
     psraw       xmm3, 7
     packuswb    xmm3, xmm3
 
@@ -353,9 +353,9 @@ sym(vp8_filter_block1d4_h6_ssse3):
     xor         rsi, rsi
     shl         rdx, 4      ;
 
-    lea         rax, [k0_k5 GLOBAL]
+    lea         rax, [GLOBAL(k0_k5)]
     add         rax, rdx
-    movdqa      xmm7, [rd GLOBAL]
+    movdqa      xmm7, [GLOBAL(rd)]
 
     cmp         esi, DWORD PTR [rax]
     je          vp8_filter_block1d4_h4_ssse3
@@ -376,12 +376,12 @@ filter_block1d4_h6_rowloop_ssse3:
     movdqu      xmm0,   XMMWORD PTR [rsi - 2]
 
     movdqa      xmm1, xmm0
-    pshufb      xmm0, [shuf1b GLOBAL]
+    pshufb      xmm0, [GLOBAL(shuf1b)]
 
     movdqa      xmm2, xmm1
-    pshufb      xmm1, [shuf2b GLOBAL]
+    pshufb      xmm1, [GLOBAL(shuf2b)]
     pmaddubsw   xmm0, xmm4
-    pshufb      xmm2, [shuf3b GLOBAL]
+    pshufb      xmm2, [GLOBAL(shuf3b)]
     pmaddubsw   xmm1, xmm5
 
 ;--
@@ -413,8 +413,8 @@ filter_block1d4_h6_rowloop_ssse3:
 vp8_filter_block1d4_h4_ssse3:
     movdqa      xmm5, XMMWORD PTR [rax+256]     ;k2_k4
     movdqa      xmm6, XMMWORD PTR [rax+128]     ;k1_k3
-    movdqa      xmm0, XMMWORD PTR [shuf2b GLOBAL]
-    movdqa      xmm3, XMMWORD PTR [shuf3b GLOBAL]
+    movdqa      xmm0, XMMWORD PTR [GLOBAL(shuf2b)]
+    movdqa      xmm3, XMMWORD PTR [GLOBAL(shuf3b)]
 
     mov         rsi, arg(0)             ;src_ptr
     mov         rdi, arg(2)             ;output_ptr
@@ -427,8 +427,8 @@ filter_block1d4_h4_rowloop_ssse3:
     movdqu      xmm1,   XMMWORD PTR [rsi - 2]
 
     movdqa      xmm2, xmm1
-    pshufb      xmm1, xmm0 ;;[shuf2b GLOBAL]
-    pshufb      xmm2, xmm3 ;;[shuf3b GLOBAL]
+    pshufb      xmm1, xmm0 ;;[GLOBAL(shuf2b)]
+    pshufb      xmm2, xmm3 ;;[GLOBAL(shuf3b)]
     pmaddubsw   xmm1, xmm5
 
 ;--
@@ -480,7 +480,7 @@ sym(vp8_filter_block1d16_v6_ssse3):
     xor         rsi, rsi
     shl         rdx, 4      ;
 
-    lea         rax, [k0_k5 GLOBAL]
+    lea         rax, [GLOBAL(k0_k5)]
     add         rax, rdx
 
     cmp         esi, DWORD PTR [rax]
@@ -521,7 +521,7 @@ vp8_filter_block1d16_v6_ssse3_loop:
 
     paddsw      xmm2, xmm3
     paddsw      xmm2, xmm1
-    paddsw      xmm2, [rd GLOBAL]
+    paddsw      xmm2, [GLOBAL(rd)]
     psraw       xmm2, 7
     packuswb    xmm2, xmm2
 
@@ -548,7 +548,7 @@ vp8_filter_block1d16_v6_ssse3_loop:
 ;--
     paddsw      xmm2, xmm3
     paddsw      xmm2, xmm1
-    paddsw      xmm2, [rd GLOBAL]
+    paddsw      xmm2, [GLOBAL(rd)]
     psraw       xmm2, 7
     packuswb    xmm2, xmm2
 
@@ -601,7 +601,7 @@ vp8_filter_block1d16_v4_ssse3_loop:
     movq        xmm4, MMWORD PTR [rax + rdx * 2 + 8]        ;D
     movq        xmm0, MMWORD PTR [rsi + rdx * 4 + 8]        ;E
 
-    paddsw      xmm2, [rd GLOBAL]
+    paddsw      xmm2, [GLOBAL(rd)]
     paddsw      xmm2, xmm3
     psraw       xmm2, 7
     packuswb    xmm2, xmm2
@@ -612,7 +612,7 @@ vp8_filter_block1d16_v4_ssse3_loop:
     pmaddubsw   xmm1, xmm6
     pmaddubsw   xmm5, xmm7
 
-    movdqa      xmm4, [rd GLOBAL]
+    movdqa      xmm4, [GLOBAL(rd)]
     add         rsi,  rdx
     add         rax,  rdx
 ;--
@@ -665,7 +665,7 @@ sym(vp8_filter_block1d8_v6_ssse3):
     xor         rsi, rsi
     shl         rdx, 4      ;
 
-    lea         rax, [k0_k5 GLOBAL]
+    lea         rax, [GLOBAL(k0_k5)]
     add         rax, rdx
 
     movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
@@ -698,7 +698,7 @@ vp8_filter_block1d8_v6_ssse3_loop:
     punpcklbw   xmm3, xmm0                  ;C E
 
     movq        xmm0, MMWORD PTR [rax + rdx * 4]        ;F
-    movdqa      xmm4, [rd GLOBAL]
+    movdqa      xmm4, [GLOBAL(rd)]
 
     pmaddubsw   xmm3, xmm6
     punpcklbw   xmm1, xmm0                  ;A F
@@ -735,7 +735,7 @@ vp8_filter_block1d8_v6_ssse3_loop:
 vp8_filter_block1d8_v4_ssse3:
     movdqa      xmm6, XMMWORD PTR [rax+256]     ;k2_k4
     movdqa      xmm7, XMMWORD PTR [rax+128]     ;k1_k3
-    movdqa      xmm5, [rd GLOBAL]
+    movdqa      xmm5, [GLOBAL(rd)]
 
     mov         rsi, arg(0)             ;src_ptr
 
@@ -802,7 +802,7 @@ sym(vp8_filter_block1d4_v6_ssse3):
     xor         rsi, rsi
     shl         rdx, 4      ;
 
-    lea         rax, [k0_k5 GLOBAL]
+    lea         rax, [GLOBAL(k0_k5)]
     add         rax, rdx
 
     movsxd      rdx, DWORD PTR arg(1)   ;pixels_per_line
@@ -836,7 +836,7 @@ vp8_filter_block1d4_v6_ssse3_loop:
 
     movd        mm0, DWORD PTR [rax + rdx * 4]        ;F
 
-    movq        mm4, [rd GLOBAL]
+    movq        mm4, [GLOBAL(rd)]
 
     pmaddubsw   mm3, mm6
     punpcklbw   mm1, mm0                  ;A F
@@ -873,7 +873,7 @@ vp8_filter_block1d4_v6_ssse3_loop:
 vp8_filter_block1d4_v4_ssse3:
     movq        mm6, MMWORD PTR [rax+256]     ;k2_k4
     movq        mm7, MMWORD PTR [rax+128]     ;k1_k3
-    movq        mm5, MMWORD PTR [rd GLOBAL]
+    movq        mm5, MMWORD PTR [GLOBAL(rd)]
 
     mov         rsi, arg(0)             ;src_ptr
 
@@ -938,7 +938,7 @@ sym(vp8_bilinear_predict16x16_ssse3):
     push        rdi
     ; end prolog
 
-        lea         rcx,        [vp8_bilinear_filters_ssse3 GLOBAL]
+        lea         rcx,        [GLOBAL(vp8_bilinear_filters_ssse3)]
         movsxd      rax,        dword ptr arg(2)    ; xoffset
 
         cmp         rax,        0                   ; skip first_pass filter if xoffset=0
@@ -985,10 +985,10 @@ sym(vp8_bilinear_predict16x16_ssse3):
         punpcklbw   xmm4,       xmm5                ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16
         pmaddubsw   xmm4,       xmm1                ; 01 03 05 07 09 11 13 15
 
-        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        paddw       xmm3,       [GLOBAL(rd)]        ; xmm3 += round value
         psraw       xmm3,       VP8_FILTER_SHIFT    ; xmm3 /= 128
 
-        paddw       xmm4,       [rd GLOBAL]         ; xmm4 += round value
+        paddw       xmm4,       [GLOBAL(rd)]        ; xmm4 += round value
         psraw       xmm4,       VP8_FILTER_SHIFT    ; xmm4 /= 128
 
         movdqa      xmm7,       xmm3
@@ -1009,10 +1009,10 @@ sym(vp8_bilinear_predict16x16_ssse3):
         punpcklbw   xmm4,       xmm5
         pmaddubsw   xmm4,       xmm1
 
-        paddw       xmm6,       [rd GLOBAL]         ; xmm6 += round value
+        paddw       xmm6,       [GLOBAL(rd)]        ; xmm6 += round value
         psraw       xmm6,       VP8_FILTER_SHIFT    ; xmm6 /= 128
 
-        paddw       xmm4,       [rd GLOBAL]         ; xmm4 += round value
+        paddw       xmm4,       [GLOBAL(rd)]        ; xmm4 += round value
         psraw       xmm4,       VP8_FILTER_SHIFT    ; xmm4 /= 128
 
         packuswb    xmm6,       xmm4
@@ -1024,10 +1024,10 @@ sym(vp8_bilinear_predict16x16_ssse3):
         punpckhbw   xmm7,       xmm6
         pmaddubsw   xmm7,       xmm2
 
-        paddw       xmm5,       [rd GLOBAL]         ; xmm5 += round value
+        paddw       xmm5,       [GLOBAL(rd)]        ; xmm5 += round value
         psraw       xmm5,       VP8_FILTER_SHIFT    ; xmm5 /= 128
 
-        paddw       xmm7,       [rd GLOBAL]         ; xmm7 += round value
+        paddw       xmm7,       [GLOBAL(rd)]        ; xmm7 += round value
         psraw       xmm7,       VP8_FILTER_SHIFT    ; xmm7 /= 128
 
         packuswb    xmm5,       xmm7
@@ -1082,19 +1082,19 @@ b16x16_sp_only:
         punpcklbw   xmm5,       xmm6
 
         pmaddubsw   xmm3,       xmm1
-        paddw       xmm4,       [rd GLOBAL]
+        paddw       xmm4,       [GLOBAL(rd)]
 
         pmaddubsw   xmm5,       xmm1
-        paddw       xmm2,       [rd GLOBAL]
+        paddw       xmm2,       [GLOBAL(rd)]
 
         psraw       xmm4,       VP8_FILTER_SHIFT
         psraw       xmm2,       VP8_FILTER_SHIFT
 
         packuswb    xmm4,       xmm2
-        paddw       xmm3,       [rd GLOBAL]
+        paddw       xmm3,       [GLOBAL(rd)]
 
         movdqa      [rdi],      xmm4                ; store row 0
-        paddw       xmm5,       [rd GLOBAL]
+        paddw       xmm5,       [GLOBAL(rd)]
 
         psraw       xmm3,       VP8_FILTER_SHIFT
         psraw       xmm5,       VP8_FILTER_SHIFT
@@ -1134,7 +1134,7 @@ b16x16_fp_only:
         pmaddubsw   xmm3,       xmm1
         movq        xmm5,       [rsi]
 
-        paddw       xmm2,       [rd GLOBAL]
+        paddw       xmm2,       [GLOBAL(rd)]
         movq        xmm7,       [rsi+1]
 
         movq        xmm6,       [rsi+8]
@@ -1143,7 +1143,7 @@ b16x16_fp_only:
         punpcklbw   xmm5,       xmm7
         movq        xmm7,       [rsi+9]
 
-        paddw       xmm3,       [rd GLOBAL]
+        paddw       xmm3,       [GLOBAL(rd)]
         pmaddubsw   xmm5,       xmm1
 
         psraw       xmm3,       VP8_FILTER_SHIFT
@@ -1153,12 +1153,12 @@ b16x16_fp_only:
         pmaddubsw   xmm6,       xmm1
 
         movdqa      [rdi],      xmm2                ; store the results in the destination
-        paddw       xmm5,       [rd GLOBAL]
+        paddw       xmm5,       [GLOBAL(rd)]
 
         lea         rdi,        [rdi + rdx]         ; dst_pitch
         psraw       xmm5,       VP8_FILTER_SHIFT
 
-        paddw       xmm6,       [rd GLOBAL]
+        paddw       xmm6,       [GLOBAL(rd)]
         psraw       xmm6,       VP8_FILTER_SHIFT
 
         packuswb    xmm5,       xmm6
@@ -1204,7 +1204,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
     ALIGN_STACK 16, rax
     sub         rsp, 144                         ; reserve 144 bytes
 
-        lea         rcx,        [vp8_bilinear_filters_ssse3 GLOBAL]
+        lea         rcx,        [GLOBAL(vp8_bilinear_filters_ssse3)]
 
         mov         rsi,        arg(0) ;src_ptr
         movsxd      rdx,        dword ptr arg(1) ;src_pixels_per_line
@@ -1269,7 +1269,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
         punpcklbw   xmm3,       xmm5                ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08
         pmaddubsw   xmm3,       xmm0                ; 00 02 04 06 08 10 12 14
 
-        paddw       xmm3,       [rd GLOBAL]         ; xmm3 += round value
+        paddw       xmm3,       [GLOBAL(rd)]        ; xmm3 += round value
         psraw       xmm3,       VP8_FILTER_SHIFT    ; xmm3 /= 128
 
         movdqa      xmm7,       xmm3
@@ -1286,7 +1286,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
         punpcklbw   xmm6,       xmm5
         pmaddubsw   xmm6,       xmm0
 
-        paddw       xmm6,       [rd GLOBAL]         ; xmm6 += round value
+        paddw       xmm6,       [GLOBAL(rd)]        ; xmm6 += round value
         psraw       xmm6,       VP8_FILTER_SHIFT    ; xmm6 /= 128
 
         packuswb    xmm6,       xmm6
@@ -1294,7 +1294,7 @@ sym(vp8_bilinear_predict8x8_ssse3):
         punpcklbw   xmm7,       xmm6
         pmaddubsw   xmm7,       xmm1
 
-        paddw       xmm7,       [rd GLOBAL]         ; xmm7 += round value
+        paddw       xmm7,       [GLOBAL(rd)]        ; xmm7 += round value
         psraw       xmm7,       VP8_FILTER_SHIFT    ; xmm7 /= 128
 
         packuswb    xmm7,       xmm7
@@ -1347,21 +1347,21 @@ b8x8_sp_only:
         punpcklbw   xmm6,       xmm7
 
         pmaddubsw   xmm6,       xmm0
-        paddw       xmm1,       [rd GLOBAL]
+        paddw       xmm1,       [GLOBAL(rd)]
 
-        paddw       xmm2,       [rd GLOBAL]
+        paddw       xmm2,       [GLOBAL(rd)]
         psraw       xmm1,       VP8_FILTER_SHIFT
 
-        paddw       xmm3,       [rd GLOBAL]
+        paddw       xmm3,       [GLOBAL(rd)]
         psraw       xmm2,       VP8_FILTER_SHIFT
 
-        paddw       xmm4,       [rd GLOBAL]
+        paddw       xmm4,       [GLOBAL(rd)]
         psraw       xmm3,       VP8_FILTER_SHIFT
 
-        paddw       xmm5,       [rd GLOBAL]
+        paddw       xmm5,       [GLOBAL(rd)]
         psraw       xmm4,       VP8_FILTER_SHIFT
 
-        paddw       xmm6,       [rd GLOBAL]
+        paddw       xmm6,       [GLOBAL(rd)]
         psraw       xmm5,       VP8_FILTER_SHIFT
 
         psraw       xmm6,       VP8_FILTER_SHIFT
@@ -1395,10 +1395,10 @@ b8x8_sp_only:
         punpcklbw   xmm1,       xmm2
 
         pmaddubsw   xmm1,       xmm0
-        paddw       xmm7,       [rd GLOBAL]
+        paddw       xmm7,       [GLOBAL(rd)]
 
         psraw       xmm7,       VP8_FILTER_SHIFT
-        paddw       xmm1,       [rd GLOBAL]
+        paddw       xmm1,       [GLOBAL(rd)]
 
         psraw       xmm1,       VP8_FILTER_SHIFT
         packuswb    xmm7,       xmm7
@@ -1447,16 +1447,16 @@ b8x8_fp_only:
         punpcklbw   xmm7,       xmm2
         pmaddubsw   xmm7,       xmm0
 
-        paddw       xmm1,       [rd GLOBAL]
+        paddw       xmm1,       [GLOBAL(rd)]
         psraw       xmm1,       VP8_FILTER_SHIFT
 
-        paddw       xmm3,       [rd GLOBAL]
+        paddw       xmm3,       [GLOBAL(rd)]
         psraw       xmm3,       VP8_FILTER_SHIFT
 
-        paddw       xmm5,       [rd GLOBAL]
+        paddw       xmm5,       [GLOBAL(rd)]
         psraw       xmm5,       VP8_FILTER_SHIFT
 
-        paddw       xmm7,       [rd GLOBAL]
+        paddw       xmm7,       [GLOBAL(rd)]
         psraw       xmm7,       VP8_FILTER_SHIFT
 
         packuswb    xmm1,       xmm1
diff --git a/vp8/decoder/x86/dequantize_mmx.asm b/vp8/decoder/x86/dequantize_mmx.asm
index eb9d1f8aa..0d6133a46 100644
--- a/vp8/decoder/x86/dequantize_mmx.asm
+++ b/vp8/decoder/x86/dequantize_mmx.asm
@@ -98,11 +98,11 @@ sym(vp8_dequant_idct_add_mmx):
         movq        mm5,            mm1
         paddw       mm2,            mm0             ; a1 =0+2
 
-        pmulhw      mm5,            [x_s1sqr2 GLOBAL];
+        pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
         paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
 
         movq        mm7,            mm3             ;
-        pmulhw      mm7,            [x_c1sqr2less1 GLOBAL];
+        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
 
         paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
         psubw       mm7,            mm5             ; c1
@@ -110,10 +110,10 @@ sym(vp8_dequant_idct_add_mmx):
         movq        mm5,            mm1
         movq        mm4,            mm3
 
-        pmulhw      mm5,            [x_c1sqr2less1 GLOBAL]
+        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
         paddw       mm5,            mm1
 
-        pmulhw      mm3,            [x_s1sqr2 GLOBAL]
+        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
         paddw       mm3,            mm4
 
         paddw       mm3,            mm5             ; d1
@@ -153,11 +153,11 @@ sym(vp8_dequant_idct_add_mmx):
         movq        mm5,            mm1
         paddw       mm2,            mm0             ; a1 =0+2
 
-        pmulhw      mm5,            [x_s1sqr2 GLOBAL];
+        pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
         paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
 
         movq        mm7,            mm3             ;
-        pmulhw      mm7,            [x_c1sqr2less1 GLOBAL];
+        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
 
         paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
         psubw       mm7,            mm5             ; c1
@@ -165,16 +165,16 @@ sym(vp8_dequant_idct_add_mmx):
         movq        mm5,            mm1
         movq        mm4,            mm3
 
-        pmulhw      mm5,            [x_c1sqr2less1 GLOBAL]
+        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
         paddw       mm5,            mm1
 
-        pmulhw      mm3,            [x_s1sqr2 GLOBAL]
+        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
         paddw       mm3,            mm4
 
         paddw       mm3,            mm5             ; d1
-        paddw       mm0,            [fours GLOBAL]
+        paddw       mm0,            [GLOBAL(fours)]
 
-        paddw       mm2,            [fours GLOBAL]
+        paddw       mm2,            [GLOBAL(fours)]
         movq        mm6,            mm2             ; a1
 
         movq        mm4,            mm0             ; b1
@@ -300,11 +300,11 @@ sym(vp8_dequant_dc_idct_add_mmx):
         movq        mm5,            mm1
         paddw       mm2,            mm0             ; a1 =0+2
 
-        pmulhw      mm5,            [x_s1sqr2 GLOBAL];
+        pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
         paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
 
         movq        mm7,            mm3             ;
-        pmulhw      mm7,            [x_c1sqr2less1 GLOBAL];
+        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
 
         paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
         psubw       mm7,            mm5             ; c1
@@ -312,10 +312,10 @@ sym(vp8_dequant_dc_idct_add_mmx):
         movq        mm5,            mm1
         movq        mm4,            mm3
 
-        pmulhw      mm5,            [x_c1sqr2less1 GLOBAL]
+        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
         paddw       mm5,            mm1
 
-        pmulhw      mm3,            [x_s1sqr2 GLOBAL]
+        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
         paddw       mm3,            mm4
 
         paddw       mm3,            mm5             ; d1
@@ -355,11 +355,11 @@ sym(vp8_dequant_dc_idct_add_mmx):
         movq        mm5,            mm1
         paddw       mm2,            mm0             ; a1 =0+2
 
-        pmulhw      mm5,            [x_s1sqr2 GLOBAL];
+        pmulhw      mm5,            [GLOBAL(x_s1sqr2)];
         paddw       mm5,            mm1             ; ip1 * sin(pi/8) * sqrt(2)
 
         movq        mm7,            mm3             ;
-        pmulhw      mm7,            [x_c1sqr2less1 GLOBAL];
+        pmulhw      mm7,            [GLOBAL(x_c1sqr2less1)];
 
         paddw       mm7,            mm3             ; ip3 * cos(pi/8) * sqrt(2)
         psubw       mm7,            mm5             ; c1
@@ -367,16 +367,16 @@ sym(vp8_dequant_dc_idct_add_mmx):
         movq        mm5,            mm1
         movq        mm4,            mm3
 
-        pmulhw      mm5,            [x_c1sqr2less1 GLOBAL]
+        pmulhw      mm5,            [GLOBAL(x_c1sqr2less1)]
         paddw       mm5,            mm1
 
-        pmulhw      mm3,            [x_s1sqr2 GLOBAL]
+        pmulhw      mm3,            [GLOBAL(x_s1sqr2)]
         paddw       mm3,            mm4
 
         paddw       mm3,            mm5             ; d1
-        paddw       mm0,            [fours GLOBAL]
+        paddw       mm0,            [GLOBAL(fours)]
 
-        paddw       mm2,            [fours GLOBAL]
+        paddw       mm2,            [GLOBAL(fours)]
         movq        mm6,            mm2             ; a1
 
         movq        mm4,            mm0             ; b1
diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm
index b6cfc5ce0..5acaca875 100644
--- a/vp8/encoder/x86/dct_mmx.asm
+++ b/vp8/encoder/x86/dct_mmx.asm
@@ -35,7 +35,7 @@ sym(vp8_short_fdct4x4_mmx):
         mov     rsi,    arg(0) ;input
         mov     rdi,    arg(1) ;output
 
-        lea     rdx,    [dct_const_mmx GLOBAL]
+        lea     rdx,    [GLOBAL(dct_const_mmx)]
         movsxd  rax,    dword ptr arg(2) ;pitch
 
         lea     rcx,    [rsi + rax*2]
@@ -243,7 +243,7 @@ sym(vp8_short_fdct8x4_wmt):
         mov         rsi,    arg(0) ;input
         mov         rdi,    arg(1) ;output
 
-        lea         rdx,    [dct_const_xmm GLOBAL]
+        lea         rdx,    [GLOBAL(dct_const_xmm)]
         movsxd      rax,    dword ptr arg(2) ;pitch
 
         lea         rcx,    [rsi + rax*2]
diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index f7a18432d..723a78d76 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -52,14 +52,14 @@ sym(vp8_short_fdct4x4_sse2):
     psllw       xmm0, 3                         ;b1 <<= 3 a1 <<= 3
     psllw       xmm3, 3                         ;c1 <<= 3 d1 <<= 3
     movdqa      xmm1, xmm0
-    pmaddwd     xmm0, XMMWORD PTR[_mult_add GLOBAL]     ;a1 + b1
-    pmaddwd     xmm1, XMMWORD PTR[_mult_sub GLOBAL]     ;a1 - b1
+    pmaddwd     xmm0, XMMWORD PTR[GLOBAL(_mult_add)]    ;a1 + b1
+    pmaddwd     xmm1, XMMWORD PTR[GLOBAL(_mult_sub)]    ;a1 - b1
     movdqa      xmm4, xmm3
-    pmaddwd     xmm3, XMMWORD PTR[_5352_2217 GLOBAL]    ;c1*2217 + d1*5352
-    pmaddwd     xmm4, XMMWORD PTR[_2217_neg5352 GLOBAL] ;d1*2217 - c1*5352
+    pmaddwd     xmm3, XMMWORD PTR[GLOBAL(_5352_2217)]   ;c1*2217 + d1*5352
+    pmaddwd     xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352
 
-    paddd       xmm3, XMMWORD PTR[_14500 GLOBAL]
-    paddd       xmm4, XMMWORD PTR[_7500 GLOBAL]
+    paddd       xmm3, XMMWORD PTR[GLOBAL(_14500)]
+    paddd       xmm4, XMMWORD PTR[GLOBAL(_7500)]
     psrad       xmm3, 12            ;(c1 * 2217 + d1 * 5352 +  14500)>>12
     psrad       xmm4, 12            ;(d1 * 2217 - c1 * 5352 +   7500)>>12
 
@@ -80,7 +80,7 @@ sym(vp8_short_fdct4x4_sse2):
     punpcklwd   xmm0, xmm3                      ;13 12 11 10 03 02 01 00
     punpckhwd   xmm2, xmm3                      ;33 32 31 30 23 22 21 20
 
-    movdqa      xmm5, XMMWORD PTR[_7 GLOBAL]
+    movdqa      xmm5, XMMWORD PTR[GLOBAL(_7)]
     pshufd      xmm2, xmm2, 04eh
     movdqa      xmm3, xmm0
     paddw       xmm0, xmm2                      ;b1 b1 b1 b1 a1 a1 a1 a1
@@ -94,8 +94,8 @@ sym(vp8_short_fdct4x4_sse2):
     pshufhw     xmm0, xmm0, 0d8h                ;b1 a1 b1 a1 b1 a1 b1 a1
     pshufhw     xmm3, xmm3, 0d8h                ;c1 d1 c1 d1 c1 d1 c1 d1
     movdqa      xmm1, xmm0
-    pmaddwd     xmm0, XMMWORD PTR[_mult_add GLOBAL] ;a1 + b1
-    pmaddwd     xmm1, XMMWORD PTR[_mult_sub GLOBAL] ;a1 - b1
+    pmaddwd     xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1
+    pmaddwd     xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1
 
     pxor        xmm4, xmm4                      ;zero out for compare
     paddd       xmm0, xmm5
@@ -103,14 +103,14 @@ sym(vp8_short_fdct4x4_sse2):
     pcmpeqw     xmm2, xmm4
     psrad       xmm0, 4                         ;(a1 + b1 + 7)>>4
     psrad       xmm1, 4                         ;(a1 - b1 + 7)>>4
-    pandn       xmm2, XMMWORD PTR[_cmp_mask GLOBAL] ;clear upper,
-                                                    ;and keep bit 0 of lower
+    pandn       xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper,
+                                                     ;and keep bit 0 of lower
 
     movdqa      xmm4, xmm3
-    pmaddwd     xmm3, XMMWORD PTR[_5352_2217 GLOBAL]    ;c1*2217 + d1*5352
-    pmaddwd     xmm4, XMMWORD PTR[_2217_neg5352 GLOBAL] ;d1*2217 - c1*5352
-    paddd       xmm3, XMMWORD PTR[_12000 GLOBAL]
-    paddd       xmm4, XMMWORD PTR[_51000 GLOBAL]
+    pmaddwd     xmm3, XMMWORD PTR[GLOBAL(_5352_2217)]    ;c1*2217 + d1*5352
+    pmaddwd     xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352
+    paddd       xmm3, XMMWORD PTR[GLOBAL(_12000)]
+    paddd       xmm4, XMMWORD PTR[GLOBAL(_51000)]
     packssdw    xmm0, xmm1                      ;op[8] op[0]
     psrad       xmm3, 16                ;(c1 * 2217 + d1 * 5352 +  12000)>>16
     psrad       xmm4, 16                ;(d1 * 2217 - c1 * 5352 +  51000)>>16
diff --git a/vp8/encoder/x86/variance_impl_mmx.asm b/vp8/encoder/x86/variance_impl_mmx.asm
index d4ec63bd6..67a9b4d3e 100644
--- a/vp8/encoder/x86/variance_impl_mmx.asm
+++ b/vp8/encoder/x86/variance_impl_mmx.asm
@@ -556,7 +556,7 @@ sym(vp8_filter_block2d_bil4x4_var_mmx):
         pmullw          mm3,            [rax+8]             ;
 
         paddw           mm1,            mm3                 ;
-        paddw           mm1,            [mmx_bi_rd GLOBAL]  ;
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
 
         psraw           mm1,            mmx_filter_shift    ;
         movq            mm5,            mm1
@@ -580,7 +580,7 @@ filter_block2d_bil4x4_var_mmx_loop:
         pmullw          mm3,            [rax+8]             ;
 
         paddw           mm1,            mm3                 ;
-        paddw           mm1,            [mmx_bi_rd GLOBAL]  ;
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
 
         psraw           mm1,            mmx_filter_shift    ;
         movq            mm3,            mm5                 ;
@@ -592,7 +592,7 @@ filter_block2d_bil4x4_var_mmx_loop:
         paddw           mm1,            mm3                 ;
 
 
-        paddw           mm1,            [mmx_bi_rd GLOBAL]  ;
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
         psraw           mm1,            mmx_filter_shift    ;
 
         movd            mm3,            [rdi]               ;
@@ -710,10 +710,10 @@ sym(vp8_filter_block2d_bil_var_mmx):
         paddw           mm1,            mm3                 ;
 
         paddw           mm2,            mm4                 ;
-        paddw           mm1,            [mmx_bi_rd GLOBAL]  ;
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
 
         psraw           mm1,            mmx_filter_shift    ;
-        paddw           mm2,            [mmx_bi_rd GLOBAL]  ;
+        paddw           mm2,            [GLOBAL(mmx_bi_rd)] ;
 
         psraw           mm2,            mmx_filter_shift    ;
         movq            mm5,            mm1
@@ -749,10 +749,10 @@ filter_block2d_bil_var_mmx_loop:
         paddw           mm1,            mm3                 ;
         paddw           mm2,            mm4                 ;
 
-        paddw           mm1,            [mmx_bi_rd GLOBAL]  ;
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
         psraw           mm1,            mmx_filter_shift    ;
 
-        paddw           mm2,            [mmx_bi_rd GLOBAL]  ;
+        paddw           mm2,            [GLOBAL(mmx_bi_rd)] ;
         psraw           mm2,            mmx_filter_shift    ;
 
         movq            mm3,            mm5                 ;
@@ -773,8 +773,8 @@ filter_block2d_bil_var_mmx_loop:
         paddw           mm1,            mm3                 ;
         paddw           mm2,            mm4                 ;
 
-        paddw           mm1,            [mmx_bi_rd GLOBAL]  ;
-        paddw           mm2,            [mmx_bi_rd GLOBAL]  ;
+        paddw           mm1,            [GLOBAL(mmx_bi_rd)] ;
+        paddw           mm2,            [GLOBAL(mmx_bi_rd)] ;
 
         psraw           mm1,            mmx_filter_shift    ;
         psraw           mm2,            mmx_filter_shift    ;
diff --git a/vp8/encoder/x86/variance_impl_sse2.asm b/vp8/encoder/x86/variance_impl_sse2.asm
index 38b3f33ee..cefa0a956 100644
--- a/vp8/encoder/x86/variance_impl_sse2.asm
+++ b/vp8/encoder/x86/variance_impl_sse2.asm
@@ -532,7 +532,7 @@ sym(vp8_filter_block2d_bil_var_sse2):
         pmullw          xmm3,           [rax+16]             ;
         paddw           xmm1,           xmm3                 ;
 
-        paddw           xmm1,           [xmm_bi_rd GLOBAL]   ;
+        paddw           xmm1,           [GLOBAL(xmm_bi_rd)]  ;
         psraw           xmm1,           xmm_filter_shift    ;
 
         movdqa          xmm5,           xmm1
@@ -554,7 +554,7 @@ filter_block2d_bil_var_sse2_loop:
         pmullw          xmm3,           [rax+16]             ;
 
         paddw           xmm1,           xmm3                 ;
-        paddw           xmm1,           [xmm_bi_rd GLOBAL]   ;
+        paddw           xmm1,           [GLOBAL(xmm_bi_rd)]  ;
 
         psraw           xmm1,           xmm_filter_shift    ;
         movdqa          xmm3,           xmm5                 ;
@@ -565,7 +565,7 @@ filter_block2d_bil_var_sse2_loop:
         pmullw          xmm1,           [rdx+16]             ;
         paddw           xmm1,           xmm3                 ;
 
-        paddw           xmm1,           [xmm_bi_rd GLOBAL]   ;
+        paddw           xmm1,           [GLOBAL(xmm_bi_rd)]  ;
         psraw           xmm1,           xmm_filter_shift    ;
 
         movq            xmm3,           QWORD PTR [rdi]               ;
diff --git a/vpx_ports/x86_abi_support.asm b/vpx_ports/x86_abi_support.asm
index 470c58a6d..a872b280e 100644
--- a/vpx_ports/x86_abi_support.asm
+++ b/vpx_ports/x86_abi_support.asm
@@ -160,7 +160,7 @@
       ret
       %%exitGG:
       %undef GLOBAL
-      %define GLOBAL + %1 wrt ..gotoff
+      %define GLOBAL(x) x + %1 wrt ..gotoff
       %undef RESTORE_GOT
       %define RESTORE_GOT pop %1
     %endmacro
@@ -176,7 +176,7 @@
       ret
       %%exitGG:
       %undef GLOBAL
-      %define GLOBAL + %1 - fake_got
+      %define GLOBAL(x) x + %1 - fake_got
       %undef RESTORE_GOT
       %define RESTORE_GOT pop %1
     %endmacro
@@ -186,7 +186,7 @@
 %else
   %macro GET_GOT 1
   %endmacro
-  %define GLOBAL wrt rip
+  %define GLOBAL(x) rel x
   %ifidn __OUTPUT_FORMAT__,elf64
     %define WRT_PLT wrt ..plt
     %define HIDDEN_DATA(x) x:data hidden
@@ -197,7 +197,7 @@
 %ifnmacro GET_GOT
     %macro GET_GOT 1
     %endmacro
-    %define GLOBAL
+    %define GLOBAL(x) x
 %endif
 %ifndef RESTORE_GOT
 %define RESTORE_GOT

From fc2b06c625fc7d417379f4849d6d2f32ad99d5e4 Mon Sep 17 00:00:00 2001
From: Jan Kratochvil <jan.kratochvil@redhat.com>
Date: Sat, 31 Jul 2010 17:12:32 +0200
Subject: [PATCH 204/307] nasm: avoid relative include paths

nasm does not automatically assume the source's directory also for its
include files.

Provide nasm compatibility.  No binary change by this patch with yasm on
{x86_64,i686}-fedora13-linux-gnu.  Few longer opcodes with nasm on
{x86_64,i686}-fedora13-linux-gnu have been checked as safe.

Change-Id:	I386efa0cca5d401193416c11bd7363a283541645
---
 vpx_ports/emms.asm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpx_ports/emms.asm b/vpx_ports/emms.asm
index 87eece84e..306e235ce 100644
--- a/vpx_ports/emms.asm
+++ b/vpx_ports/emms.asm
@@ -9,7 +9,7 @@
 ;
 
 
-%include "x86_abi_support.asm"
+%include "vpx_ports/x86_abi_support.asm"
 
 section .text
     global sym(vpx_reset_mmx_state)

From 1fc294116a285d2399e06cc21edd0466ca2c51b2 Mon Sep 17 00:00:00 2001
From: Jan Kratochvil <jan.kratochvil@redhat.com>
Date: Mon, 4 Oct 2010 23:20:38 +0200
Subject: [PATCH 205/307] nasm: movhps compatibility QWORD->MMWORD

Filed for nasm as:
https://sourceforge.net/tracker/?func=detail&atid=106208&aid=3081103&group_id=6208

nasm just does not accept any size parameter for movhps:
1.asm:2: error: mismatch in operand sizes

Some parts of libvpx already use MMWORD for movhps and MMWORD is
defined-out so it is compatible both with yasm and nasm.

Provide nasm compatibility. No binary change by this patch with yasm on
{x86_64,i686}-fedora13-linux-gnu.

Change-Id: I4008a317ca87ec07c9ada958fcdc10a0cb589bbc
---
 vp8/common/x86/loopfilter_sse2.asm | 32 +++++++++++++++---------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/vp8/common/x86/loopfilter_sse2.asm b/vp8/common/x86/loopfilter_sse2.asm
index 1ab6d0f39..849133dc4 100644
--- a/vp8/common/x86/loopfilter_sse2.asm
+++ b/vp8/common/x86/loopfilter_sse2.asm
@@ -1184,22 +1184,22 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
 %endmacro
 
 %macro MBV_WRITEBACK_1 0
-        movq        QWORD PTR [rsi],    xmm0
-        movhps      QWORD PTR [rdi],    xmm0
+        movq        QWORD  PTR [rsi],   xmm0
+        movhps      MMWORD PTR [rdi],   xmm0
 
-        movq        QWORD PTR [rsi+2*rax], xmm6
-        movhps      QWORD PTR [rdi+2*rax], xmm6
+        movq        QWORD  PTR [rsi+2*rax], xmm6
+        movhps      MMWORD PTR [rdi+2*rax], xmm6
 
         movdqa      xmm0,               xmm3                ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40
         punpckldq   xmm0,               xmm7                ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40
 
         punpckhdq   xmm3,               xmm7                ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60
 
-        movq        QWORD PTR [rsi+4*rax], xmm0
-        movhps      QWORD PTR [rdi+4*rax], xmm0
+        movq        QWORD  PTR [rsi+4*rax], xmm0
+        movhps      MMWORD PTR [rdi+4*rax], xmm0
 
-        movq        QWORD PTR [rsi+2*rcx], xmm3
-        movhps      QWORD PTR [rdi+2*rcx], xmm3
+        movq        QWORD  PTR [rsi+2*rcx], xmm3
+        movhps      MMWORD PTR [rdi+2*rcx], xmm3
 
         movdqa      xmm2,               [rdx+64]            ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04
         punpckhbw   xmm2,               [rdx+80]            ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84
@@ -1217,21 +1217,21 @@ sym(vp8_loop_filter_vertical_edge_uv_sse2):
 %endmacro
 
 %macro MBV_WRITEBACK_2 0
-        movq        QWORD PTR [rsi],    xmm1
-        movhps      QWORD PTR [rdi],    xmm1
+        movq        QWORD  PTR [rsi],   xmm1
+        movhps      MMWORD PTR [rdi],   xmm1
 
-        movq        QWORD PTR [rsi+2*rax], xmm5
-        movhps      QWORD PTR [rdi+2*rax], xmm5
+        movq        QWORD  PTR [rsi+2*rax], xmm5
+        movhps      MMWORD PTR [rdi+2*rax], xmm5
 
         movdqa      xmm1,               xmm4                ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0
         punpckldq   xmm1,               xmm2                ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0
         punpckhdq   xmm4,               xmm2                ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0
 
-        movq        QWORD PTR [rsi+4*rax], xmm1
-        movhps      QWORD PTR [rdi+4*rax], xmm1
+        movq        QWORD  PTR [rsi+4*rax], xmm1
+        movhps      MMWORD PTR [rdi+4*rax], xmm1
 
-        movq        QWORD PTR [rsi+2*rcx], xmm4
-        movhps      QWORD PTR [rdi+2*rcx], xmm4
+        movq        QWORD  PTR [rsi+2*rcx], xmm4
+        movhps      MMWORD PTR [rdi+2*rcx], xmm4
 %endmacro
 
 

From 7be093ea4d50c8d38438f88cb9fa817c1c9de8dd Mon Sep 17 00:00:00 2001
From: Jan Kratochvil <jan.kratochvil@redhat.com>
Date: Tue, 5 Oct 2010 19:15:08 +0200
Subject: [PATCH 206/307] nasm: add configure support

yasm has to be preferred as currently nasm produces marginally less
efficient code (longer opcodes). Filed for nasm as:
https://sourceforge.net/tracker/?func=detail&atid=106208&aid=3037462&group_id=6208

OTOH package should be built always the same, no matter which additional
packages are / are not present on the system. As the package should be
built with nasm (as yasm may not be available) we should not use yasm
even if it is possibly available.

nasm >= approx. 2.09 is required for the nasm compilation as the former
versions had a section alignment bug.

Provide nasm compatibility. No binary change by this patch with yasm on
{x86_64,i686}-fedora13-linux-gnu. Few longer opcodes with nasm on
{x86_64,i686}-fedora13-linux-gnu have been checked as safe.

Change-Id: Icb0fe39c64bbcc3bcd7972e392fd03f3273340df
---
 build/make/configure.sh | 43 ++++++++++++++++++++++++++++++++++++-----
 configure               |  2 ++
 2 files changed, 40 insertions(+), 5 deletions(-)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index cfac5391c..e20f0d133 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -255,9 +255,10 @@ TMP_H="${TMPDIRx}/vpx-conf-$$-${RANDOM}.h"
 TMP_C="${TMPDIRx}/vpx-conf-$$-${RANDOM}.c"
 TMP_O="${TMPDIRx}/vpx-conf-$$-${RANDOM}.o"
 TMP_X="${TMPDIRx}/vpx-conf-$$-${RANDOM}.x"
+TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RANDOM}.asm"
 
 clean_temp_files() {
-    rm -f ${TMP_C} ${TMP_H} ${TMP_O} ${TMP_X}
+    rm -f ${TMP_C} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM}
 }
 
 #
@@ -322,6 +323,21 @@ check_add_ldflags() {
     add_ldflags "$@"
 }
 
+check_asm_align() {
+    log check_asm_align "$@"
+    cat >${TMP_ASM} <<EOF
+section .rodata
+align 16
+EOF
+    log_file ${TMP_ASM}
+    check_cmd ${AS} ${ASFLAGS} -o ${TMP_O} ${TMP_ASM}
+    readelf -WS ${TMP_O} >${TMP_X}
+    log_file ${TMP_X}
+    if ! grep -q '\.rodata .* 16$' ${TMP_X}; then
+        die "${AS} ${ASFLAGS} does not support section alignment (nasm <=2.08?)"
+    fi
+}
+
 write_common_config_banner() {
     echo '# This file automatically generated by configure. Do not edit!' > config.mk
     echo "TOOLCHAIN := ${toolchain}" >> config.mk
@@ -440,13 +456,18 @@ process_common_cmdline() {
         disable builtin_libc
         alt_libc="${optval}"
         ;;
+        --as=*)
+        [ "${optval}" = yasm -o "${optval}" = nasm -o "${optval}" = auto ] \
+            || die "Must be yasm, nasm or auto: ${optval}"
+        alt_as="${optval}"
+        ;;
         --prefix=*)
         prefix="${optval}"
         ;;
         --libdir=*)
         libdir="${optval}"
         ;;
-        --libc|--prefix|--libdir)
+        --libc|--as|--prefix|--libdir)
         die "Option ${opt} requires argument"
         ;;
         --help|-h) show_help
@@ -802,6 +823,7 @@ process_common_toolchain() {
                 ;;
         esac
 
+        AS="${alt_as:-${AS:-auto}}"
         case  ${tgt_cc} in
             icc*)
                 CC=${CC:-icc}
@@ -830,7 +852,16 @@ process_common_toolchain() {
                 ;;
         esac
 
-        AS=yasm
+        case "${AS}" in
+            auto|"")
+                which nasm >/dev/null 2>&1 && AS=nasm
+                which yasm >/dev/null 2>&1 && AS=yasm
+                [ "${AS}" = auto -o -z "${AS}" ] \
+                    && die "Neither yasm nor nasm have been found"
+                ;;
+        esac
+        log_echo "  using $AS"
+        [ "${AS##*/}" = nasm ] && add_asflags -Ox
         AS_SFX=.asm
         case  ${tgt_os} in
             win*)
@@ -839,7 +870,9 @@ process_common_toolchain() {
             ;;
             linux*|solaris*)
                 add_asflags -f elf${bits}
-                enabled debug && add_asflags -g dwarf2
+                enabled debug && [ "${AS}" = yasm ] && add_asflags -g dwarf2
+                enabled debug && [ "${AS}" = nasm ] && add_asflags -g
+                [ "${AS##*/}" = nasm ] && check_asm_align
             ;;
             darwin*)
                 add_asflags -f macho${bits}
@@ -852,7 +885,7 @@ process_common_toolchain() {
                 # enabled icc && ! enabled pic && add_cflags -fno-pic -mdynamic-no-pic
                 enabled icc && ! enabled pic && add_cflags -fno-pic
             ;;
-            *) log "Warning: Unknown os $tgt_os while setting up yasm flags"
+            *) log "Warning: Unknown os $tgt_os while setting up $AS flags"
             ;;
         esac
     ;;
diff --git a/configure b/configure
index b3cd0c72b..0321e1abf 100755
--- a/configure
+++ b/configure
@@ -23,6 +23,7 @@ Advanced options:
   ${toggle_libs}                  don't build libraries
   ${toggle_examples}              don't build examples
   --libc=PATH                     path to alternate libc
+  --as={yasm|nasm|auto}           use specified assembler [auto, yasm preferred]
   ${toggle_fast_unaligned}        don't use unaligned accesses, even when
                                   supported by hardware [auto]
   ${toggle_codec_srcs}            in/exclude codec library source code
@@ -266,6 +267,7 @@ CMDLINE_SELECT="
     libs
     examples
     libc
+    as
     fast_unaligned
     codec_srcs
     debug_libs

From d338d14c6bcf1bd9f9d028cf7ee177503076da47 Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Wed, 6 Oct 2010 13:28:36 -0700
Subject: [PATCH 207/307] optimize fast_quantizer c version

As the zbin and rounding constants are normalized, rounding effectively
does the zbinning, therefore the zbin operation can be removed. In
addition, the memset on the two arrays are no longer necessary.

Change-Id: If39c353c42d7e052296cb65322e5218810b5cc4c
---
 vp8/encoder/quantize.c | 23 +++++++----------------
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 20ec9d11b..d06b04896 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -125,38 +125,29 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
     int zbin;
     int x, y, z, sz;
     short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
     short *round_ptr  = &b->round[0][0];
     short *quant_ptr  = &b->quant[0][0];
     short *qcoeff_ptr = d->qcoeff;
     short *dqcoeff_ptr = d->dqcoeff;
     short *dequant_ptr = &d->dequant[0][0];
 
-    vpx_memset(qcoeff_ptr, 0, 32);
-    vpx_memset(dqcoeff_ptr, 0, 32);
-
     eob = -1;
-
     for (i = 0; i < 16; i++)
     {
         rc   = vp8_default_zig_zag1d[i];
         z    = coeff_ptr[rc];
-        zbin = zbin_ptr[rc] ;
 
         sz = (z >> 31);                                 // sign of z
         x  = (z ^ sz) - sz;                             // x = abs(z)
 
-        if (x >= zbin)
-        {
-            y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
-            x  = (y ^ sz) - sz;                         // get the sign back
-            qcoeff_ptr[rc] = x;                          // write to destination
-            dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
+        y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+        x  = (y ^ sz) - sz;                         // get the sign back
+        qcoeff_ptr[rc] = x;                          // write to destination
+        dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
 
-            if (y)
-            {
-                eob = i;                                // last nonzero coeffs
-            }
+        if (y)
+        {
+            eob = i;                                // last nonzero coeffs
         }
     }
     d->eob = eob + 1;

From d860f685b85ffafb32dfc20da53aaa81cb62c5c5 Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Thu, 7 Oct 2010 11:43:19 -0400
Subject: [PATCH 208/307] Added vp8_fast_quantize_b_sse2

Moved vp8_fast_quantize_b_sse from quantize_mmx.asm into
quantize_sse2.asm and renamed.  Updated the assembly code to
match the C version.

Change-Id: I1766d9e1ca60e173f65badc0ca0c160c2b51b200
---
 vp8/encoder/x86/quantize_mmx.asm       | 153 -------------------------
 vp8/encoder/x86/quantize_sse2.asm      | 134 ++++++++++++++++++++++
 vp8/encoder/x86/x86_csystemdependent.c |  15 +--
 3 files changed, 142 insertions(+), 160 deletions(-)

diff --git a/vp8/encoder/x86/quantize_mmx.asm b/vp8/encoder/x86/quantize_mmx.asm
index 51cd94078..f29a54ecd 100644
--- a/vp8/encoder/x86/quantize_mmx.asm
+++ b/vp8/encoder/x86/quantize_mmx.asm
@@ -284,156 +284,3 @@ sym(vp8_fast_quantize_b_impl_mmx):
     UNSHADOW_ARGS
     pop         rbp
     ret
-
-
-;int vp8_fast_quantize_b_impl_sse(short *coeff_ptr, short *zbin_ptr,
-;                           short *qcoeff_ptr,short *dequant_ptr,
-;                           short *scan_mask, short *round_ptr,
-;                           short *quant_ptr, short *dqcoeff_ptr);
-global sym(vp8_fast_quantize_b_impl_sse)
-sym(vp8_fast_quantize_b_impl_sse):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 8
-    push rsi
-    push rdi
-    ; end prolog
-
-
-        mov             rsi,        arg(0) ;coeff_ptr
-        movdqa          xmm0,       [rsi]
-
-        mov             rax,        arg(1) ;zbin_ptr
-        movdqa          xmm1,       [rax]
-
-        movdqa          xmm3,       xmm0
-        psraw           xmm0,       15
-
-        pxor            xmm3,       xmm0
-        psubw           xmm3,       xmm0            ; abs
-
-        movdqa          xmm2,       xmm3
-        pcmpgtw         xmm1,       xmm2
-
-        pandn           xmm1,       xmm2
-        movdqa          xmm3,       xmm1
-
-        mov             rdx,        arg(6) ; quant_ptr
-        movdqa          xmm1,       [rdx]
-
-        mov             rcx,        arg(5) ; round_ptr
-        movdqa          xmm2,       [rcx]
-
-        paddw           xmm3,       xmm2
-        pmulhuw         xmm3,       xmm1
-
-        pxor            xmm3,       xmm0
-        psubw           xmm3,       xmm0        ;gain the sign back
-
-        mov             rdi,        arg(2) ;qcoeff_ptr
-        movdqa          xmm0,       xmm3
-
-        movdqa          [rdi],      xmm3
-
-        mov             rax,        arg(3) ;dequant_ptr
-        movdqa          xmm2,       [rax]
-
-        pmullw          xmm3,       xmm2
-        mov             rax,        arg(7) ;dqcoeff_ptr
-
-        movdqa          [rax],      xmm3
-
-        ; next 8
-        movdqa          xmm4,       [rsi+16]
-
-        mov             rax,        arg(1) ;zbin_ptr
-        movdqa          xmm5,       [rax+16]
-
-        movdqa          xmm7,       xmm4
-        psraw           xmm4,       15
-
-        pxor            xmm7,       xmm4
-        psubw           xmm7,       xmm4            ; abs
-
-        movdqa          xmm6,       xmm7
-        pcmpgtw         xmm5,       xmm6
-
-        pandn           xmm5,       xmm6
-        movdqa          xmm7,       xmm5
-
-        movdqa          xmm5,       [rdx+16]
-        movdqa          xmm6,       [rcx+16]
-
-
-        paddw           xmm7,       xmm6
-        pmulhuw         xmm7,       xmm5
-
-        pxor            xmm7,       xmm4
-        psubw           xmm7,       xmm4;gain the sign back
-
-        mov             rdi,        arg(2) ;qcoeff_ptr
-
-        movdqa          xmm1,       xmm7
-        movdqa          [rdi+16],   xmm7
-
-        mov             rax,        arg(3) ;dequant_ptr
-        movdqa          xmm6,       [rax+16]
-
-        pmullw          xmm7,       xmm6
-        mov             rax,        arg(7) ;dqcoeff_ptr
-
-        movdqa          [rax+16],   xmm7
-        mov             rdi,        arg(4) ;scan_mask
-
-        pxor            xmm7,       xmm7
-        movdqa          xmm2,       [rdi]
-
-        movdqa          xmm3,       [rdi+16];
-        pcmpeqw         xmm0,       xmm7
-
-        pcmpeqw         xmm1,       xmm7
-        pcmpeqw         xmm6,       xmm6
-
-        pxor            xmm0,       xmm6
-        pxor            xmm1,       xmm6
-
-        psrlw           xmm0,       15
-        psrlw           xmm1,       15
-
-        pmaddwd         xmm0,       xmm2
-        pmaddwd         xmm1,       xmm3
-
-        movq            xmm2,       xmm0
-        movq            xmm3,       xmm1
-
-        psrldq          xmm0,       8
-        psrldq          xmm1,       8
-
-        paddd           xmm0,       xmm1
-        paddd           xmm2,       xmm3
-
-        paddd           xmm0,       xmm2
-        movq            xmm1,       xmm0
-
-        psrldq          xmm0,       4
-        paddd           xmm1,       xmm0
-
-        movq            rcx,        xmm1
-        and             rcx,        0xffff
-
-        xor             rdx,        rdx
-        sub             rdx,        rcx
-
-        bsr             rax,        rcx
-        inc             rax
-
-        sar             rdx,        31
-        and             rax,        rdx
-
-
-    ; begin epilog
-    pop rdi
-    pop rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
index a1b1c40cb..324881337 100644
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -252,3 +252,137 @@ rq_zigzag_1c:
     UNSHADOW_ARGS
     pop         rbp
     ret
+
+
+;int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
+;                           short *qcoeff_ptr,short *dequant_ptr,
+;                           short *scan_mask, short *round_ptr,
+;                           short *quant_ptr, short *dqcoeff_ptr);
+global sym(vp8_fast_quantize_b_impl_ssse2)
+sym(vp8_fast_quantize_b_impl_ssse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 7
+    push        rsi
+    push        rdi
+    push        rbx
+    ; end prolog
+
+    ALIGN_STACK 16, rax
+
+    %define save_xmm6  0
+    %define save_xmm7 16
+
+    %define vp8_fastquantizeb_stack_size save_xmm7 + 16
+
+    sub         rsp, vp8_fastquantizeb_stack_size
+
+    movdqa      XMMWORD PTR[rsp + save_xmm6], xmm6
+    movdqa      XMMWORD PTR[rsp + save_xmm7], xmm7
+
+    mov         rdx, arg(0)                 ;coeff_ptr
+    mov         rcx, arg(2)                 ;dequant_ptr
+    mov         rax, arg(3)                 ;scan_mask
+    mov         rdi, arg(4)                 ;round_ptr
+    mov         rsi, arg(5)                 ;quant_ptr
+
+    movdqa      xmm0, XMMWORD PTR[rdx]
+    movdqa      xmm4, XMMWORD PTR[rdx + 16]
+
+    movdqa      xmm6, XMMWORD PTR[rdi]      ;round lo
+    movdqa      xmm7, XMMWORD PTR[rdi + 16] ;round hi
+
+    movdqa      xmm1, xmm0
+    movdqa      xmm5, xmm4
+
+    psraw       xmm0, 15                    ;sign of z (aka sz)
+    psraw       xmm4, 15                    ;sign of z (aka sz)
+
+    pxor        xmm1, xmm0
+    pxor        xmm5, xmm4
+    psubw       xmm1, xmm0                  ;x = abs(z)
+    psubw       xmm5, xmm4                  ;x = abs(z)
+
+    paddw       xmm1, xmm6
+    paddw       xmm5, xmm7
+
+    pmulhw      xmm1, XMMWORD PTR[rsi]
+    pmulhw      xmm5, XMMWORD PTR[rsi + 16]
+
+    mov         rdi, arg(1)                 ;qcoeff_ptr
+    mov         rsi, arg(6)                 ;dqcoeff_ptr
+
+    movdqa      xmm6, XMMWORD PTR[rcx]
+    movdqa      xmm7, XMMWORD PTR[rcx + 16]
+
+    pxor        xmm1, xmm0
+    pxor        xmm5, xmm4
+    psubw       xmm1, xmm0
+    psubw       xmm5, xmm4
+
+    movdqa      XMMWORD PTR[rdi], xmm1
+    movdqa      XMMWORD PTR[rdi + 16], xmm5
+
+    pmullw      xmm6, xmm1
+    pmullw      xmm7, xmm5
+
+    movdqa      xmm2, XMMWORD PTR[rax]
+    movdqa      xmm3, XMMWORD PTR[rax+16];
+
+    pxor        xmm4, xmm4            ;clear all bits
+    pcmpeqw     xmm1, xmm4
+    pcmpeqw     xmm5, xmm4
+
+    pcmpeqw     xmm4, xmm4            ;set all bits
+    pxor        xmm1, xmm4
+    pxor        xmm5, xmm4
+
+    psrlw       xmm1, 15
+    psrlw       xmm5, 15
+
+    pmaddwd     xmm1, xmm2
+    pmaddwd     xmm5, xmm3
+
+    movq        xmm2, xmm1
+    movq        xmm3, xmm5
+
+    psrldq      xmm1, 8
+    psrldq      xmm5, 8
+
+    paddd       xmm1, xmm5
+    paddd       xmm2, xmm3
+
+    paddd       xmm1, xmm2
+    movq        xmm5, xmm1
+
+    psrldq      xmm1, 4
+    paddd       xmm5, xmm1
+
+    movq        rcx,  xmm5
+    and         rcx,  0xffff
+
+    xor         rdx,  rdx
+    sub         rdx,  rcx
+
+    bsr         rax,  rcx
+    inc         rax
+
+    sar         rdx,  31
+    and         rax,  rdx
+
+    movdqa      XMMWORD PTR[rsi], xmm6        ;store dqcoeff
+    movdqa      XMMWORD PTR[rsi + 16], xmm7   ;store dqcoeff
+
+    movdqa      xmm6, XMMWORD PTR[rsp + save_xmm6]
+    movdqa      xmm7, XMMWORD PTR[rsp + save_xmm7]
+
+    add         rsp, vp8_fastquantizeb_stack_size
+    pop         rsp
+
+    ; begin epilog
+    pop         rbx
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 18dc49cd4..7490a8add 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -88,24 +88,22 @@ void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
     vp8_short_fdct4x4_sse2(input + 4, output + 16, pitch);
 }
 
-int vp8_fast_quantize_b_impl_sse(short *coeff_ptr, short *zbin_ptr,
+int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
                                  short *qcoeff_ptr, short *dequant_ptr,
                                  short *scan_mask, short *round_ptr,
                                  short *quant_ptr, short *dqcoeff_ptr);
-void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d)
+void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
 {
     short *scan_mask    = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
     short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
     short *round_ptr  = &b->round[0][0];
     short *quant_ptr  = &b->quant[0][0];
     short *qcoeff_ptr = d->qcoeff;
     short *dqcoeff_ptr = d->dqcoeff;
     short *dequant_ptr = &d->dequant[0][0];
 
-    d->eob = vp8_fast_quantize_b_impl_sse(
+    d->eob = vp8_fast_quantize_b_impl_ssse2(
                  coeff_ptr,
-                 zbin_ptr,
                  qcoeff_ptr,
                  dequant_ptr,
                  scan_mask,
@@ -116,6 +114,7 @@ void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d)
              );
 }
 
+
 int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
                                short *qcoeff_ptr,short *dequant_ptr,
                                const int *default_zig_zag, short *round_ptr,
@@ -285,8 +284,10 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
         /* cpi->rtcd.encodemb.sub* not implemented for wmt */
 
-        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse;
-        cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;*/
+        /*cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;*/
+
+        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse2;
+
     }
 
 #endif

From 7e6f7b579a4e52bb2f81d5e3529e8b7f7d00bea5 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 7 Oct 2010 12:08:08 -0400
Subject: [PATCH 209/307] Remove unused file in encoder

Remove vp8/encoder/x86/csystemdependent.c

Change-Id: I7c590dcd07b68704d463a1452f62f29ffb1402f4
---
 vp8/encoder/x86/csystemdependent.c | 310 -----------------------------
 1 file changed, 310 deletions(-)
 delete mode 100644 vp8/encoder/x86/csystemdependent.c

diff --git a/vp8/encoder/x86/csystemdependent.c b/vp8/encoder/x86/csystemdependent.c
deleted file mode 100644
index 9fb67613d..000000000
--- a/vp8/encoder/x86/csystemdependent.c
+++ /dev/null
@@ -1,310 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "variance.h"
-#include "onyx_int.h"
-
-SADFunction *vp8_sad16x16;
-SADFunction *vp8_sad16x8;
-SADFunction *vp8_sad8x16;
-SADFunction *vp8_sad8x8;
-SADFunction *vp8_sad4x4;
-
-variance_function *vp8_variance4x4;
-variance_function *vp8_variance8x8;
-variance_function *vp8_variance8x16;
-variance_function *vp8_variance16x8;
-variance_function *vp8_variance16x16;
-
-
-variance_function *vp8_mse16x16;
-
-sub_pixel_variance_function *vp8_sub_pixel_variance4x4;
-sub_pixel_variance_function *vp8_sub_pixel_variance8x8;
-sub_pixel_variance_function *vp8_sub_pixel_variance8x16;
-sub_pixel_variance_function *vp8_sub_pixel_variance16x8;
-sub_pixel_variance_function *vp8_sub_pixel_variance16x16;
-
-int (*vp8_block_error)(short *, short *);
-int (*vp8_mbblock_error)(MACROBLOCK *mb, int dc);
-void (*vp8_subtract_mby)(short *diff, unsigned char *src, unsigned char *pred, int stride);
-
-extern void vp8_subtract_mby_c(short *diff, unsigned char *src, unsigned char *pred, int stride);
-extern void vp8_subtract_mby_mmx(short *diff, unsigned char *src, unsigned char *pred, int stride);
-
-extern int vp8_block_error_c(short *, short *);
-extern int vp8_mbblock_error_c(MACROBLOCK *x, int dc);
-
-extern int vp8_block_error_mmx(short *, short *);
-extern int vp8_mbblock_error_mmx(MACROBLOCK *x, int dc);
-
-extern int vp8_block_error_xmm(short *, short *);
-extern int vp8_mbblock_error_xmm(MACROBLOCK *x, int dc);
-
-
-
-int (*vp8_mbuverror)(MACROBLOCK *mb);
-unsigned int (*vp8_get_mb_ss)(short *);
-void (*vp8_short_fdct4x4)(short *input, short *output, int pitch);
-void (*vp8_short_fdct8x4)(short *input, short *output, int pitch);
-void (*vp8_fast_fdct4x4)(short *input, short *output, int pitch);
-void (*vp8_fast_fdct8x4)(short *input, short *output, int pitch);
-
-void (*vp8_subtract_b)(BLOCK *be, BLOCKD *bd, int pitch);
-void (*vp8_subtract_mbuv)(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
-void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
-unsigned int (*vp8_get16x16pred_error)(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
-unsigned int (*vp8_get8x8var)(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
-unsigned int (*vp8_get16x16var)(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
-unsigned int (*vp8_get4x4sse_cs)(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride);
-
-// c imports
-extern int vp8_mbuverror_c(MACROBLOCK *mb);
-extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
-extern void vp8_short_fdct4x4_c(short *input, short *output, int pitch);
-extern void vp8_short_fdct8x4_c(short *input, short *output, int pitch);
-extern void vp8_fast_fdct4x4_c(short *input, short *output, int pitch);
-extern void vp8_fast_fdct8x4_c(short *input, short *output, int pitch);
-
-
-extern void vp8_subtract_b_c(BLOCK *be, BLOCKD *bd, int pitch);
-extern void vp8_subtract_mbuv_c(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
-extern void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d);
-
-extern SADFunction vp8_sad16x16_c;
-extern SADFunction vp8_sad16x8_c;
-extern SADFunction vp8_sad8x16_c;
-extern SADFunction vp8_sad8x8_c;
-extern SADFunction vp8_sad4x4_c;
-
-extern SADFunction vp8_sad16x16_wmt;
-extern SADFunction vp8_sad16x8_wmt;
-extern SADFunction vp8_sad8x16_wmt;
-extern SADFunction vp8_sad8x8_wmt;
-extern SADFunction vp8_sad4x4_wmt;
-
-extern SADFunction vp8_sad16x16_mmx;
-extern SADFunction vp8_sad16x8_mmx;
-extern SADFunction vp8_sad8x16_mmx;
-extern SADFunction vp8_sad8x8_mmx;
-extern SADFunction vp8_sad4x4_mmx;
-
-extern variance_function vp8_variance16x16_c;
-extern variance_function vp8_variance8x16_c;
-extern variance_function vp8_variance16x8_c;
-extern variance_function vp8_variance8x8_c;
-extern variance_function vp8_variance4x4_c;
-extern variance_function vp8_mse16x16_c;
-
-extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_c;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_c;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_c;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_c;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_c;
-
-extern unsigned int vp8_get_mb_ss_c(short *);
-extern unsigned int vp8_get16x16pred_error_c(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
-extern unsigned int vp8_get8x8var_c(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
-extern unsigned int vp8_get16x16var_c(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
-extern unsigned int vp8_get4x4sse_cs_c(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride);
-
-// mmx imports
-extern int vp8_mbuverror_mmx(MACROBLOCK *mb);
-extern void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d);
-extern void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch);
-extern void vp8_subtract_mbuv_mmx(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride);
-extern void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch);
-extern void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch);
-extern void vp8_fast_fdct8x4_mmx(short *input, short *output, int pitch);
-extern void vp8_fast_fdct4x4_mmx(short *input, short *output, int pitch);
-extern variance_function vp8_variance4x4_mmx;
-extern variance_function vp8_variance8x8_mmx;
-extern variance_function vp8_variance8x16_mmx;
-extern variance_function vp8_variance16x8_mmx;
-extern variance_function vp8_variance16x16_mmx;
-
-extern variance_function vp8_mse16x16_mmx;
-extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_mmx;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_mmx;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_mmx;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_mmx;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_mmx;
-
-extern unsigned int vp8_get16x16pred_error_mmx(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
-extern unsigned int vp8_get_mb_ss_mmx(short *);
-extern unsigned int vp8_get8x8var_mmx(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
-extern unsigned int vp8_get16x16var_mmx(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
-extern unsigned int vp8_get4x4sse_cs_mmx(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride);
-
-
-// wmt imports
-extern int vp8_mbuverror_xmm(MACROBLOCK *mb);
-extern void vp8_fast_quantize_b_sse(BLOCK *b, BLOCKD *d);
-extern void vp8_fast_fdct8x4_wmt(short *input, short *output, int pitch);
-extern variance_function vp8_variance4x4_wmt;
-extern variance_function vp8_variance8x8_wmt;
-extern variance_function vp8_variance8x16_wmt;
-extern variance_function vp8_variance16x8_wmt;
-extern variance_function vp8_variance16x16_wmt;
-
-extern variance_function vp8_mse16x16_wmt;
-extern sub_pixel_variance_function vp8_sub_pixel_variance4x4_wmt;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x8_wmt;
-extern sub_pixel_variance_function vp8_sub_pixel_variance8x16_wmt;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x8_wmt;
-extern sub_pixel_variance_function vp8_sub_pixel_variance16x16_wmt;
-extern unsigned int vp8_get16x16pred_error_sse2(unsigned char *src_ptr, int src_stride, unsigned char *ref_ptr, int ref_stride);
-extern unsigned int vp8_get_mb_ss_sse2(short *src_ptr);
-extern unsigned int vp8_get8x8var_sse2(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
-extern unsigned int vp8_get16x16var_sse2(unsigned char *src_ptr, int  source_stride, unsigned char *ref_ptr, int  recon_stride, unsigned int *SSE, int *Sum);
-
-extern void vpx_get_processor_flags(int *mmx_enabled, int *xmm_enabled, int *wmt_enabled);
-
-void vp8_cmachine_specific_config(void)
-{
-    int mmx_enabled;
-    int xmm_enabled;
-    int wmt_enabled;
-
-    vpx_get_processor_flags(&mmx_enabled, &xmm_enabled, &wmt_enabled);
-
-    if (wmt_enabled)         // Willamette
-    {
-        // Willamette instruction set available:
-        vp8_mbuverror                = vp8_mbuverror_xmm;
-        /* The sse quantizer has not been updated to match the new exact
-         * quantizer introduced in commit e04e2935
-         */
-        vp8_fast_quantize_b            = vp8_fast_quantize_b_c;
-#if 0 //new fdct
-        vp8_short_fdct4x4             = vp8_short_fdct4x4_mmx;
-        vp8_short_fdct8x4             = vp8_short_fdct8x4_mmx;
-        vp8_fast_fdct4x4              = vp8_short_fdct4x4_mmx;
-        vp8_fast_fdct8x4              = vp8_short_fdct8x4_wmt;
-#else
-        vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
-        vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
-        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
-        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
-#endif
-        vp8_subtract_b                = vp8_subtract_b_mmx;
-        vp8_subtract_mbuv             = vp8_subtract_mbuv_mmx;
-        vp8_variance4x4              = vp8_variance4x4_mmx;
-        vp8_variance8x8              = vp8_variance8x8_mmx;
-        vp8_variance8x16             = vp8_variance8x16_wmt;
-        vp8_variance16x8             = vp8_variance16x8_wmt;
-        vp8_variance16x16            = vp8_variance16x16_wmt;
-        vp8_mse16x16                 = vp8_mse16x16_wmt;
-        vp8_sub_pixel_variance4x4      = vp8_sub_pixel_variance4x4_wmt;
-        vp8_sub_pixel_variance8x8      = vp8_sub_pixel_variance8x8_wmt;
-        vp8_sub_pixel_variance8x16     = vp8_sub_pixel_variance8x16_wmt;
-        vp8_sub_pixel_variance16x8     = vp8_sub_pixel_variance16x8_wmt;
-        vp8_sub_pixel_variance16x16    = vp8_sub_pixel_variance16x16_wmt;
-        vp8_get_mb_ss                  = vp8_get_mb_ss_sse2;
-        vp8_get16x16pred_error        = vp8_get16x16pred_error_sse2;
-        vp8_get8x8var                = vp8_get8x8var_sse2;
-        vp8_get16x16var              = vp8_get16x16var_sse2;
-        vp8_get4x4sse_cs             = vp8_get4x4sse_cs_mmx;
-        vp8_sad16x16                 = vp8_sad16x16_wmt;
-        vp8_sad16x8                  = vp8_sad16x8_wmt;
-        vp8_sad8x16                  = vp8_sad8x16_wmt;
-        vp8_sad8x8                   = vp8_sad8x8_wmt;
-        vp8_sad4x4                   = vp8_sad4x4_wmt;
-        vp8_block_error               = vp8_block_error_xmm;
-        vp8_mbblock_error             = vp8_mbblock_error_xmm;
-        vp8_subtract_mby              = vp8_subtract_mby_mmx;
-
-    }
-    else if (mmx_enabled)
-    {
-        // MMX instruction set available:
-        vp8_mbuverror                = vp8_mbuverror_mmx;
-        /* The mmx quantizer has not been updated to match the new exact
-         * quantizer introduced in commit e04e2935
-         */
-        vp8_fast_quantize_b            = vp8_fast_quantize_b_c;
-#if 0 // new fdct
-        vp8_short_fdct4x4             = vp8_short_fdct4x4_mmx;
-        vp8_short_fdct8x4             = vp8_short_fdct8x4_mmx;
-        vp8_fast_fdct4x4              = vp8_short_fdct4x4_mmx;
-        vp8_fast_fdct8x4              = vp8_short_fdct8x4_mmx;
-#else
-        vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
-        vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
-        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
-        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
-#endif
-        vp8_subtract_b                = vp8_subtract_b_mmx;
-        vp8_subtract_mbuv             = vp8_subtract_mbuv_mmx;
-        vp8_variance4x4              = vp8_variance4x4_mmx;
-        vp8_variance8x8              = vp8_variance8x8_mmx;
-        vp8_variance8x16             = vp8_variance8x16_mmx;
-        vp8_variance16x8             = vp8_variance16x8_mmx;
-        vp8_variance16x16            = vp8_variance16x16_mmx;
-        vp8_mse16x16                 = vp8_mse16x16_mmx;
-        vp8_sub_pixel_variance4x4      = vp8_sub_pixel_variance4x4_mmx;
-        vp8_sub_pixel_variance8x8      = vp8_sub_pixel_variance8x8_mmx;
-        vp8_sub_pixel_variance8x16     = vp8_sub_pixel_variance8x16_mmx;
-        vp8_sub_pixel_variance16x8     = vp8_sub_pixel_variance16x8_mmx;
-        vp8_sub_pixel_variance16x16    = vp8_sub_pixel_variance16x16_mmx;
-        vp8_get_mb_ss                  = vp8_get_mb_ss_mmx;
-        vp8_get16x16pred_error        = vp8_get16x16pred_error_mmx;
-        vp8_get8x8var                = vp8_get8x8var_mmx;
-        vp8_get16x16var              = vp8_get16x16var_mmx;
-        vp8_get4x4sse_cs             = vp8_get4x4sse_cs_mmx;
-        vp8_sad16x16                 = vp8_sad16x16_mmx;
-        vp8_sad16x8                  = vp8_sad16x8_mmx;
-        vp8_sad8x16                  = vp8_sad8x16_mmx;
-        vp8_sad8x8                   = vp8_sad8x8_mmx;
-        vp8_sad4x4                   = vp8_sad4x4_mmx;
-        vp8_block_error               = vp8_block_error_mmx;
-        vp8_mbblock_error             = vp8_mbblock_error_mmx;
-        vp8_subtract_mby              = vp8_subtract_mby_mmx;
-
-    }
-    else
-    {
-        // Pure C:
-        vp8_mbuverror                = vp8_mbuverror_c;
-        vp8_fast_quantize_b          = vp8_fast_quantize_b_c;
-        vp8_short_fdct4x4             = vp8_short_fdct4x4_c;
-        vp8_short_fdct8x4             = vp8_short_fdct8x4_c;
-        vp8_fast_fdct4x4              = vp8_short_fdct4x4_c;
-        vp8_fast_fdct8x4              = vp8_fast_fdct8x4_c;
-        vp8_subtract_b                = vp8_subtract_b_c;
-        vp8_subtract_mbuv             = vp8_subtract_mbuv_c;
-        vp8_variance4x4              = vp8_variance4x4_c;
-        vp8_variance8x8              = vp8_variance8x8_c;
-        vp8_variance8x16             = vp8_variance8x16_c;
-        vp8_variance16x8             = vp8_variance16x8_c;
-        vp8_variance16x16            = vp8_variance16x16_c;
-        vp8_mse16x16                 = vp8_mse16x16_c;
-        vp8_sub_pixel_variance4x4      = vp8_sub_pixel_variance4x4_c;
-        vp8_sub_pixel_variance8x8      = vp8_sub_pixel_variance8x8_c;
-        vp8_sub_pixel_variance8x16     = vp8_sub_pixel_variance8x16_c;
-        vp8_sub_pixel_variance16x8     = vp8_sub_pixel_variance16x8_c;
-        vp8_sub_pixel_variance16x16    = vp8_sub_pixel_variance16x16_c;
-        vp8_get_mb_ss                  = vp8_get_mb_ss_c;
-        vp8_get16x16pred_error        = vp8_get16x16pred_error_c;
-        vp8_get8x8var                = vp8_get8x8var_c;
-        vp8_get16x16var              = vp8_get16x16var_c;
-        vp8_get4x4sse_cs             = vp8_get4x4sse_cs_c;
-        vp8_sad16x16                 = vp8_sad16x16_c;
-        vp8_sad16x8                  = vp8_sad16x8_c;
-        vp8_sad8x16                  = vp8_sad8x16_c;
-        vp8_sad8x8                   = vp8_sad8x8_c;
-        vp8_sad4x4                   = vp8_sad4x4_c;
-        vp8_block_error               = vp8_block_error_c;
-        vp8_mbblock_error             = vp8_mbblock_error_c;
-        vp8_subtract_mby              = vp8_subtract_mby_c;
-    }
-
-}

From a31a58d19a89a75e15f17ec48235b9ac742d87bc Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Thu, 7 Oct 2010 14:13:36 -0400
Subject: [PATCH 210/307] configure is not in src

one comment in the README said the configure script was in src.
it's not. pointed out by Aaron Sherman

Change-Id: Ife0b53e096856d46669a99eefd71ac23d0351f65
---
 README | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README b/README
index f0625d3d7..c1a76687f 100644
--- a/README
+++ b/README
@@ -89,7 +89,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
   toolchain, the following command could be used (note, POSIX SH syntax, adapt
   to your shell as necessary):
 
-    $ CROSS=mipsel-linux-uclibc- ../libvpx/src/configure
+    $ CROSS=mipsel-linux-uclibc- ../libvpx/configure
 
   In addition, the executables to be invoked can be overridden by specifying the
   environment variables: CC, AR, LD, AS, STRIP, NM. Additional flags can be

From 78f2d3edb74c43375afd0dcb7e131b19641456e0 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 11 Oct 2010 09:41:14 -0400
Subject: [PATCH 211/307] Remove ivfenc usage message leading underscores

An earlier automatic transform changed eg '\nOptions' to '\n_options'
which is incorrect in these printfs. Fix these.

Change-Id: I7e0f37931ef82b79fadddd7058ce0df5572e2ca1
---
 ivfdec.c |  2 +-
 ivfenc.c | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/ivfdec.c b/ivfdec.c
index 3919d6bb2..8065e70be 100644
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -113,7 +113,7 @@ static void usage_exit()
             "Options:\n", exec_name);
     arg_show_usage(stderr, all_args);
 #if CONFIG_VP8_DECODER
-    fprintf(stderr, "\nvp8 Postprocessing Options:\n");
+    fprintf(stderr, "\nVP8 Postprocessing Options:\n");
     arg_show_usage(stderr, vp8_pp_args);
 #endif
     fprintf(stderr, "\nIncluded decoders:\n\n");
diff --git a/ivfenc.c b/ivfenc.c
index ad172927b..3fea5b5cc 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -562,18 +562,18 @@ static void usage_exit()
 
     fprintf(stderr, "Usage: %s <options> src_filename dst_filename\n", exec_name);
 
-    fprintf(stderr, "\n_options:\n");
+    fprintf(stderr, "\nOptions:\n");
     arg_show_usage(stdout, main_args);
-    fprintf(stderr, "\n_encoder Global Options:\n");
+    fprintf(stderr, "\nEncoder Global Options:\n");
     arg_show_usage(stdout, global_args);
-    fprintf(stderr, "\n_rate Control Options:\n");
+    fprintf(stderr, "\nRate Control Options:\n");
     arg_show_usage(stdout, rc_args);
-    fprintf(stderr, "\n_twopass Rate Control Options:\n");
+    fprintf(stderr, "\nTwopass Rate Control Options:\n");
     arg_show_usage(stdout, rc_twopass_args);
-    fprintf(stderr, "\n_keyframe Placement Options:\n");
+    fprintf(stderr, "\nKeyframe Placement Options:\n");
     arg_show_usage(stdout, kf_args);
 #if CONFIG_VP8_ENCODER
-    fprintf(stderr, "\n_vp8 Specific Options:\n");
+    fprintf(stderr, "\nVP8 Specific Options:\n");
     arg_show_usage(stdout, vp8_args);
 #endif
     fprintf(stderr, "\n"

From dd08db9315d77b183ba563c8a83cc4e0aee439d6 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Mon, 11 Oct 2010 09:34:48 -0700
Subject: [PATCH 212/307] Remove INTRARDOPT #define and intra_rd_opt option.

This is just eliminating some cruft.
Although a number of variables are declared only when INTRARDOPT
 is defined, they are used elsewhere without that protection, and
 no longer just for intra RDO.
The intra_rd_opt flag was hard-coded to 1 and never checked.

Change-Id: I83a81554ecee8053e7b4ccd8aa04e18fa60f8e4f
---
 vp8/encoder/onyx_if.c  | 5 -----
 vp8/encoder/onyx_int.h | 4 ----
 2 files changed, 9 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 6b7333f1d..34af6ea8e 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2188,11 +2188,6 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
 #endif
 
 
-#ifdef INTRARDOPT
-    cpi->intra_rd_opt = 1;
-
-#endif
-
     cpi->frames_since_key = 8;        // Give a sensible default for the first frame.
     cpi->key_frame_frequency = cpi->oxcf.key_freq;
 
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 0ea6a3b47..cb768c018 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -29,7 +29,6 @@
 #include "vpx/internal/vpx_codec_internal.h"
 #include "mcomp.h"
 
-#define INTRARDOPT
 //#define SPEEDSTATS 1
 #define MIN_GF_INTERVAL             4
 #define DEFAULT_GF_INTERVAL         7
@@ -310,15 +309,12 @@ typedef struct
     int subseqblockweight;
     int errthresh;
 
-#ifdef INTRARDOPT
     int RDMULT;
     int RDDIV ;
 
     TOKENEXTRA *rdtok;
-    int intra_rd_opt;
     vp8_writer rdbc;
     int intra_mode_costs[10];
-#endif
 
 
     CODING_CONTEXT coding_context;

From 82c433988583e9c2a09ebf19bb9fa20c4de001ea Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Mon, 11 Oct 2010 13:49:52 -0700
Subject: [PATCH 213/307] Move vp8_strict_quantize_b inside EXACT_QUANT
 #define.

There is currently no inexact version of this function, so do not
 even compile it without EXACT_QUANT.
This will prevent someone from inadvertently trying to use it without
 the proper EXACT_QUANT setup.

Change-Id: Ia13491e0128afb281c05c9222ee5987101e4010d
---
 vp8/encoder/quantize.c | 117 +++++++++++++++++++++--------------------
 1 file changed, 59 insertions(+), 58 deletions(-)

diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index d06b04896..1a8c8ebb7 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -118,6 +118,65 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
 
     d->eob = eob + 1;
 }
+
+/* Perform regular quantization, with unbiased rounding and no zero bin. */
+void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
+{
+    int i;
+    int rc;
+    int eob;
+    int x;
+    int y;
+    int z;
+    int sz;
+    short *coeff_ptr;
+    short *quant_ptr;
+    short *quant_shift_ptr;
+    short *qcoeff_ptr;
+    short *dqcoeff_ptr;
+    short *dequant_ptr;
+
+    coeff_ptr = &b->coeff[0];
+    quant_ptr = &b->quant[0][0];
+    quant_shift_ptr = &b->quant_shift[0][0];
+    qcoeff_ptr = d->qcoeff;
+    dqcoeff_ptr = d->dqcoeff;
+    dequant_ptr = &d->dequant[0][0];
+    eob = - 1;
+    vpx_memset(qcoeff_ptr, 0, 32);
+    vpx_memset(dqcoeff_ptr, 0, 32);
+    for (i = 0; i < 16; i++)
+    {
+        int dq;
+        int round;
+
+        /*TODO: These arrays should be stored in zig-zag order.*/
+        rc = vp8_default_zig_zag1d[i];
+        z = coeff_ptr[rc];
+        dq = dequant_ptr[rc];
+        round = dq >> 1;
+        /* Sign of z. */
+        sz = -(z < 0);
+        x = (z + sz) ^ sz;
+        x += round;
+        if (x >= dq)
+        {
+            /* Quantize x. */
+            y  = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc];
+            /* Put the sign back. */
+            x = (y + sz) ^ sz;
+            /* Save the coefficient and its dequantized value. */
+            qcoeff_ptr[rc] = x;
+            dqcoeff_ptr[rc] = x * dq;
+            /* Remember the last non-zero coefficient. */
+            if (y)
+                eob = i;
+        }
+    }
+
+    d->eob = eob + 1;
+}
+
 #else
 void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
 {
@@ -207,64 +266,6 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
 
 #endif
 
-/* Perform regular quantization, with unbiased rounding and no zero bin. */
-void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
-{
-    int i;
-    int rc;
-    int eob;
-    int x;
-    int y;
-    int z;
-    int sz;
-    short *coeff_ptr;
-    short *quant_ptr;
-    short *quant_shift_ptr;
-    short *qcoeff_ptr;
-    short *dqcoeff_ptr;
-    short *dequant_ptr;
-
-    coeff_ptr = &b->coeff[0];
-    quant_ptr = &b->quant[0][0];
-    quant_shift_ptr = &b->quant_shift[0][0];
-    qcoeff_ptr = d->qcoeff;
-    dqcoeff_ptr = d->dqcoeff;
-    dequant_ptr = &d->dequant[0][0];
-    eob = - 1;
-    vpx_memset(qcoeff_ptr, 0, 32);
-    vpx_memset(dqcoeff_ptr, 0, 32);
-    for (i = 0; i < 16; i++)
-    {
-        int dq;
-        int round;
-
-        /*TODO: These arrays should be stored in zig-zag order.*/
-        rc = vp8_default_zig_zag1d[i];
-        z = coeff_ptr[rc];
-        dq = dequant_ptr[rc];
-        round = dq >> 1;
-        /* Sign of z. */
-        sz = -(z < 0);
-        x = (z + sz) ^ sz;
-        x += round;
-        if (x >= dq)
-        {
-            /* Quantize x. */
-            y  = (((x * quant_ptr[rc]) >> 16) + x) >> quant_shift_ptr[rc];
-            /* Put the sign back. */
-            x = (y + sz) ^ sz;
-            /* Save the coefficient and its dequantized value. */
-            qcoeff_ptr[rc] = x;
-            dqcoeff_ptr[rc] = x * dq;
-            /* Remember the last non-zero coefficient. */
-            if (y)
-                eob = i;
-        }
-    }
-
-    d->eob = eob + 1;
-}
-
 void vp8_quantize_mby(MACROBLOCK *x)
 {
     int i;

From f4a8594492b7edd854ede663eb23be97efd56b6b Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Mon, 11 Oct 2010 14:01:23 -0700
Subject: [PATCH 214/307] Add const qualifiers to variance/SAD functions.

These functions should never change their input, and there's no
 reason not to declare that.
This allows them to be passed static const data.

Change-Id: Ia49fe4b01e80e9afcb24b4844817694d4da5995c
---
 vp8/encoder/pickinter.c         |   8 +-
 vp8/encoder/sad_c.c             |  54 +++++++-------
 vp8/encoder/variance.h          |  24 +++---
 vp8/encoder/variance_c.c        |  68 ++++++++---------
 vp8/encoder/x86/variance_mmx.c  |  92 +++++++++++------------
 vp8/encoder/x86/variance_sse2.c | 126 ++++++++++++++++----------------
 6 files changed, 186 insertions(+), 186 deletions(-)

diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index eeeddcce9..8821b3a0b 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -92,9 +92,9 @@ static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, v
 
 unsigned int vp8_get16x16pred_error_c
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int ref_stride,
     int max_sad
 )
@@ -125,9 +125,9 @@ unsigned int vp8_get16x16pred_error_c
 
 unsigned int vp8_get4x4sse_cs_c
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     int max_sad
 )
diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c
index e63be2bda..2ff122f68 100644
--- a/vp8/encoder/sad_c.c
+++ b/vp8/encoder/sad_c.c
@@ -12,9 +12,9 @@
 #include <stdlib.h>
 
 unsigned int vp8_sad16x16_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  ref_stride,
     int max_sad)
 {
@@ -39,9 +39,9 @@ unsigned int vp8_sad16x16_c(
 
 static __inline
 unsigned int sad_mx_n_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  ref_stride,
     int m,
     int n)
@@ -66,9 +66,9 @@ unsigned int sad_mx_n_c(
 
 
 unsigned int vp8_sad8x8_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  ref_stride,
     int max_sad)
 {
@@ -78,9 +78,9 @@ unsigned int vp8_sad8x8_c(
 
 
 unsigned int vp8_sad16x8_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  ref_stride,
     int max_sad)
 {
@@ -91,9 +91,9 @@ unsigned int vp8_sad16x8_c(
 
 
 unsigned int vp8_sad8x16_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  ref_stride,
     int max_sad)
 {
@@ -103,9 +103,9 @@ unsigned int vp8_sad8x16_c(
 
 
 unsigned int vp8_sad4x4_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  ref_stride,
     int max_sad)
 {
@@ -114,9 +114,9 @@ unsigned int vp8_sad4x4_c(
 }
 
 void vp8_sad16x16x3_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  ref_stride,
     unsigned int *sad_array
 )
@@ -127,9 +127,9 @@ void vp8_sad16x16x3_c(
 }
 
 void vp8_sad16x8x3_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  ref_stride,
     unsigned int *sad_array
 )
@@ -140,9 +140,9 @@ void vp8_sad16x8x3_c(
 }
 
 void vp8_sad8x8x3_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  ref_stride,
     unsigned int *sad_array
 )
@@ -153,9 +153,9 @@ void vp8_sad8x8x3_c(
 }
 
 void vp8_sad8x16x3_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  ref_stride,
     unsigned int *sad_array
 )
@@ -166,9 +166,9 @@ void vp8_sad8x16x3_c(
 }
 
 void vp8_sad4x4x3_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  ref_stride,
     unsigned int *sad_array
 )
@@ -179,7 +179,7 @@ void vp8_sad4x4x3_c(
 }
 
 void vp8_sad16x16x4d_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
     unsigned char *ref_ptr[],
     int  ref_stride,
@@ -193,7 +193,7 @@ void vp8_sad16x16x4d_c(
 }
 
 void vp8_sad16x8x4d_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
     unsigned char *ref_ptr[],
     int  ref_stride,
@@ -207,7 +207,7 @@ void vp8_sad16x8x4d_c(
 }
 
 void vp8_sad8x8x4d_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
     unsigned char *ref_ptr[],
     int  ref_stride,
@@ -221,7 +221,7 @@ void vp8_sad8x8x4d_c(
 }
 
 void vp8_sad8x16x4d_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
     unsigned char *ref_ptr[],
     int  ref_stride,
@@ -235,7 +235,7 @@ void vp8_sad8x16x4d_c(
 }
 
 void vp8_sad4x4x4d_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  src_stride,
     unsigned char *ref_ptr[],
     int  ref_stride,
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index 0341fbd9f..3c9ae987c 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -15,9 +15,9 @@
 #define prototype_sad(sym)\
     unsigned int (sym)\
     (\
-     unsigned char *src_ptr, \
+     const unsigned char *src_ptr, \
      int source_stride, \
-     unsigned char *ref_ptr, \
+     const unsigned char *ref_ptr, \
      int  ref_stride, \
      int max_sad\
     )
@@ -25,9 +25,9 @@
 #define prototype_sad_multi_same_address(sym)\
     void (sym)\
     (\
-     unsigned char *src_ptr, \
+     const unsigned char *src_ptr, \
      int source_stride, \
-     unsigned char *ref_ptr, \
+     const unsigned char *ref_ptr, \
      int  ref_stride, \
      unsigned int *sad_array\
     )
@@ -35,7 +35,7 @@
 #define prototype_sad_multi_dif_address(sym)\
     void (sym)\
     (\
-     unsigned char *src_ptr, \
+     const unsigned char *src_ptr, \
      int source_stride, \
      unsigned char *ref_ptr[4], \
      int  ref_stride, \
@@ -45,9 +45,9 @@
 #define prototype_variance(sym) \
     unsigned int (sym) \
     (\
-     unsigned char *src_ptr, \
+     const unsigned char *src_ptr, \
      int source_stride, \
-     unsigned char *ref_ptr, \
+     const unsigned char *ref_ptr, \
      int  ref_stride, \
      unsigned int *sse\
     )
@@ -55,9 +55,9 @@
 #define prototype_variance2(sym) \
     unsigned int (sym) \
     (\
-     unsigned char *src_ptr, \
+     const unsigned char *src_ptr, \
      int source_stride, \
-     unsigned char *ref_ptr, \
+     const unsigned char *ref_ptr, \
      int  ref_stride, \
      unsigned int *sse,\
      int *sum\
@@ -66,17 +66,17 @@
 #define prototype_subpixvariance(sym) \
     unsigned int (sym) \
     ( \
-      unsigned char  *src_ptr, \
+      const unsigned char  *src_ptr, \
       int  source_stride, \
       int  xoffset, \
       int  yoffset, \
-      unsigned char *ref_ptr, \
+      const unsigned char *ref_ptr, \
       int Refstride, \
       unsigned int *sse \
     );
 
 
-#define prototype_getmbss(sym) unsigned int (sym)(short *)
+#define prototype_getmbss(sym) unsigned int (sym)(const short *)
 
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/variance_x86.h"
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index 179cd0d8e..177414351 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -41,7 +41,7 @@ const int vp8_bilinear_taps[8][2] =
 
 unsigned int vp8_get_mb_ss_c
 (
-    short *src_ptr
+    const short *src_ptr
 )
 {
     unsigned int i = 0, sum = 0;
@@ -58,9 +58,9 @@ unsigned int vp8_get_mb_ss_c
 
 
 void  vp8_variance(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     int  w,
     int  h,
@@ -90,9 +90,9 @@ void  vp8_variance(
 unsigned int
 vp8_get8x8var_c
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *SSE,
     int *Sum
@@ -106,9 +106,9 @@ vp8_get8x8var_c
 unsigned int
 vp8_get16x16var_c
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *SSE,
     int *Sum
@@ -123,9 +123,9 @@ vp8_get16x16var_c
 
 
 unsigned int vp8_variance16x16_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -139,9 +139,9 @@ unsigned int vp8_variance16x16_c(
 }
 
 unsigned int vp8_variance8x16_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -155,9 +155,9 @@ unsigned int vp8_variance8x16_c(
 }
 
 unsigned int vp8_variance16x8_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -172,9 +172,9 @@ unsigned int vp8_variance16x8_c(
 
 
 unsigned int vp8_variance8x8_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -188,9 +188,9 @@ unsigned int vp8_variance8x8_c(
 }
 
 unsigned int vp8_variance4x4_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -205,9 +205,9 @@ unsigned int vp8_variance4x4_c(
 
 
 unsigned int vp8_mse16x16_c(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -250,7 +250,7 @@ unsigned int vp8_mse16x16_c(
  ****************************************************************************/
 void vp8e_filter_block2d_bil_first_pass
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     unsigned short *output_ptr,
     unsigned int src_pixels_per_line,
     int pixel_step,
@@ -308,7 +308,7 @@ void vp8e_filter_block2d_bil_first_pass
  ****************************************************************************/
 void vp8e_filter_block2d_bil_second_pass
 (
-    unsigned short *src_ptr,
+    const unsigned short *src_ptr,
     unsigned char  *output_ptr,
     unsigned int  src_pixels_per_line,
     unsigned int  pixel_step,
@@ -366,7 +366,7 @@ void vp8e_filter_block2d_bil_second_pass
  ****************************************************************************/
 void vp8e_filter_block2d_bil
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     unsigned char *output_ptr,
     unsigned int src_pixels_per_line,
     int  *HFilter,
@@ -387,11 +387,11 @@ void vp8e_filter_block2d_bil
 
 unsigned int vp8_sub_pixel_variance4x4_c
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -415,11 +415,11 @@ unsigned int vp8_sub_pixel_variance4x4_c
 
 unsigned int vp8_sub_pixel_variance8x8_c
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -439,11 +439,11 @@ unsigned int vp8_sub_pixel_variance8x8_c
 
 unsigned int vp8_sub_pixel_variance16x16_c
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -463,11 +463,11 @@ unsigned int vp8_sub_pixel_variance16x16_c
 
 unsigned int vp8_sub_pixel_mse16x16_c
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -478,11 +478,11 @@ unsigned int vp8_sub_pixel_mse16x16_c
 
 unsigned int vp8_sub_pixel_variance16x8_c
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -502,11 +502,11 @@ unsigned int vp8_sub_pixel_variance16x8_c
 
 unsigned int vp8_sub_pixel_variance8x16_c
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index 2600ce96b..a5a89d6de 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -15,7 +15,7 @@
 
 extern void filter_block1d_h6_mmx
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     unsigned short *output_ptr,
     unsigned int src_pixels_per_line,
     unsigned int pixel_step,
@@ -25,7 +25,7 @@ extern void filter_block1d_h6_mmx
 );
 extern void filter_block1d_v6_mmx
 (
-    short *src_ptr,
+    const short *src_ptr,
     unsigned char *output_ptr,
     unsigned int pixels_per_line,
     unsigned int pixel_step,
@@ -37,34 +37,34 @@ extern void filter_block1d_v6_mmx
 extern unsigned int vp8_get_mb_ss_mmx(short *src_ptr);
 extern unsigned int vp8_get8x8var_mmx
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *SSE,
     int *Sum
 );
 extern unsigned int vp8_get4x4var_mmx
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *SSE,
     int *Sum
 );
 extern unsigned int vp8_get4x4sse_cs_mmx
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride
 );
 extern void vp8_filter_block2d_bil4x4_var_mmx
 (
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int ref_pixels_per_line,
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int src_pixels_per_line,
     const short *HFilter,
     const short *VFilter,
@@ -73,9 +73,9 @@ extern void vp8_filter_block2d_bil4x4_var_mmx
 );
 extern void vp8_filter_block2d_bil_var_mmx
 (
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int ref_pixels_per_line,
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int src_pixels_per_line,
     unsigned int Height,
     const short *HFilter,
@@ -126,9 +126,9 @@ void vp8_test_get_mb_ss(void)
 
 
 unsigned int vp8_get16x16var_mmx(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned *SSE,
     unsigned *SUM
@@ -157,9 +157,9 @@ unsigned int vp8_get16x16var_mmx(
 
 
 unsigned int vp8_variance4x4_mmx(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -173,9 +173,9 @@ unsigned int vp8_variance4x4_mmx(
 }
 
 unsigned int vp8_variance8x8_mmx(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -190,9 +190,9 @@ unsigned int vp8_variance8x8_mmx(
 }
 
 unsigned int vp8_mse16x16_mmx(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -212,9 +212,9 @@ unsigned int vp8_mse16x16_mmx(
 
 
 unsigned int vp8_variance16x16_mmx(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     int *sse)
 {
@@ -234,9 +234,9 @@ unsigned int vp8_variance16x16_mmx(
 }
 
 unsigned int vp8_variance16x8_mmx(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -255,9 +255,9 @@ unsigned int vp8_variance16x8_mmx(
 
 
 unsigned int vp8_variance8x16_mmx(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -296,11 +296,11 @@ DECLARE_ALIGNED(16, const short, vp8_vp7_bilinear_filters_mmx[8][8]) =
 
 unsigned int vp8_sub_pixel_variance4x4_mmx
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse)
 
@@ -320,11 +320,11 @@ unsigned int vp8_sub_pixel_variance4x4_mmx
 
 unsigned int vp8_sub_pixel_variance8x8_mmx
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -344,11 +344,11 @@ unsigned int vp8_sub_pixel_variance8x8_mmx
 
 unsigned int vp8_sub_pixel_variance16x16_mmx
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -383,11 +383,11 @@ unsigned int vp8_sub_pixel_variance16x16_mmx
 }
 
 unsigned int vp8_sub_pixel_mse16x16_mmx(
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -398,11 +398,11 @@ unsigned int vp8_sub_pixel_mse16x16_mmx(
 
 unsigned int vp8_sub_pixel_variance16x8_mmx
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -435,11 +435,11 @@ unsigned int vp8_sub_pixel_variance16x8_mmx
 
 unsigned int vp8_sub_pixel_variance8x16_mmx
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     int *sse
 )
@@ -457,9 +457,9 @@ unsigned int vp8_sub_pixel_variance8x16_mmx
 }
 
 unsigned int vp8_i_variance16x16_mmx(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -480,9 +480,9 @@ unsigned int vp8_i_variance16x16_mmx(
 }
 
 unsigned int vp8_i_variance8x16_mmx(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -501,11 +501,11 @@ unsigned int vp8_i_variance8x16_mmx(
 
 unsigned int vp8_i_sub_pixel_variance16x16_mmx
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -560,11 +560,11 @@ unsigned int vp8_i_sub_pixel_variance16x16_mmx
 
 unsigned int vp8_i_sub_pixel_variance8x16_mmx
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index 5e750ba2f..fb0bac1cb 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -13,16 +13,16 @@
 #include "pragmas.h"
 #include "vpx_ports/mem.h"
 
-extern void filter_block1d_h6_mmx(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d_v6_mmx(short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d8_h6_sse2(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
-extern void filter_block1d8_v6_sse2(short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d_h6_mmx(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d_v6_mmx(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d8_h6_sse2(const unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
+extern void filter_block1d8_v6_sse2(const short *src_ptr, unsigned char *output_ptr, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, short *vp7_filter);
 
 extern void vp8_filter_block2d_bil4x4_var_mmx
 (
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int ref_pixels_per_line,
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int src_pixels_per_line,
     const short *HFilter,
     const short *VFilter,
@@ -32,9 +32,9 @@ extern void vp8_filter_block2d_bil4x4_var_mmx
 
 extern unsigned int vp8_get4x4var_mmx
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *SSE,
     int *Sum
@@ -42,38 +42,38 @@ extern unsigned int vp8_get4x4var_mmx
 
 unsigned int vp8_get_mb_ss_sse2
 (
-    short *src_ptr
+    const short *src_ptr
 );
 unsigned int vp8_get16x16var_sse2
 (
-    unsigned char     *src_ptr,
-    int             source_stride,
-    unsigned char     *ref_ptr,
-    int             recon_stride,
-    unsigned int      *SSE,
-    int               *Sum
+    const unsigned char *src_ptr,
+    int source_stride,
+    const unsigned char *ref_ptr,
+    int recon_stride,
+    unsigned int *SSE,
+    int *Sum
 );
 unsigned int vp8_get16x16pred_error_sse2
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int src_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int ref_stride
 );
 unsigned int vp8_get8x8var_sse2
 (
-    unsigned char     *src_ptr,
-    int             source_stride,
-    unsigned char     *ref_ptr,
-    int             recon_stride,
-    unsigned int      *SSE,
-    int               *Sum
+    const unsigned char *src_ptr,
+    int source_stride,
+    const unsigned char *ref_ptr,
+    int recon_stride,
+    unsigned int *SSE,
+    int *Sum
 );
 void vp8_filter_block2d_bil_var_sse2
 (
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int ref_pixels_per_line,
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int src_pixels_per_line,
     unsigned int Height,
     const short *HFilter,
@@ -83,9 +83,9 @@ void vp8_filter_block2d_bil_var_sse2
 );
 void vp8_half_horiz_vert_variance16x_h_sse2
 (
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int ref_pixels_per_line,
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int src_pixels_per_line,
     unsigned int Height,
     int *sum,
@@ -93,9 +93,9 @@ void vp8_half_horiz_vert_variance16x_h_sse2
 );
 void vp8_half_horiz_variance16x_h_sse2
 (
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int ref_pixels_per_line,
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int src_pixels_per_line,
     unsigned int Height,
     int *sum,
@@ -103,9 +103,9 @@ void vp8_half_horiz_variance16x_h_sse2
 );
 void vp8_half_vert_variance16x_h_sse2
 (
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int ref_pixels_per_line,
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int src_pixels_per_line,
     unsigned int Height,
     int *sum,
@@ -115,9 +115,9 @@ void vp8_half_vert_variance16x_h_sse2
 DECLARE_ALIGNED(16, extern short, vp8_vp7_bilinear_filters_mmx[8][8]);
 
 unsigned int vp8_variance4x4_wmt(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride)
 {
     unsigned int var;
@@ -132,9 +132,9 @@ unsigned int vp8_variance4x4_wmt(
 
 unsigned int vp8_variance8x8_wmt
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride)
 {
     unsigned int var;
@@ -149,9 +149,9 @@ unsigned int vp8_variance8x8_wmt
 
 unsigned int vp8_variance16x16_wmt
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -164,9 +164,9 @@ unsigned int vp8_variance16x16_wmt
     return (sse0 - ((sum0 * sum0) >> 8));
 }
 unsigned int vp8_mse16x16_wmt(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -182,9 +182,9 @@ unsigned int vp8_mse16x16_wmt(
 
 unsigned int vp8_variance16x8_wmt
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -203,9 +203,9 @@ unsigned int vp8_variance16x8_wmt
 
 unsigned int vp8_variance8x16_wmt
 (
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -239,11 +239,11 @@ DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_xmm[8][16]) =
 };
 unsigned int vp8_sub_pixel_variance4x4_wmt
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -263,11 +263,11 @@ unsigned int vp8_sub_pixel_variance4x4_wmt
 
 unsigned int vp8_sub_pixel_variance8x8_wmt
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -288,11 +288,11 @@ unsigned int vp8_sub_pixel_variance8x8_wmt
 
 unsigned int vp8_sub_pixel_variance16x16_wmt
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -364,11 +364,11 @@ unsigned int vp8_sub_pixel_variance16x16_wmt
 }
 
 unsigned int vp8_sub_pixel_mse16x16_wmt(
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -379,11 +379,11 @@ unsigned int vp8_sub_pixel_mse16x16_wmt(
 
 unsigned int vp8_sub_pixel_variance16x8_wmt
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 
@@ -417,11 +417,11 @@ unsigned int vp8_sub_pixel_variance16x8_wmt
 
 unsigned int vp8_sub_pixel_variance8x16_wmt
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -440,9 +440,9 @@ unsigned int vp8_sub_pixel_variance8x16_wmt
 }
 
 unsigned int vp8_i_variance16x16_wmt(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -464,9 +464,9 @@ unsigned int vp8_i_variance16x16_wmt(
 }
 
 unsigned int vp8_i_variance8x16_wmt(
-    unsigned char *src_ptr,
+    const unsigned char *src_ptr,
     int  source_stride,
-    unsigned char *ref_ptr,
+    const unsigned char *ref_ptr,
     int  recon_stride,
     unsigned int *sse)
 {
@@ -486,11 +486,11 @@ unsigned int vp8_i_variance8x16_wmt(
 
 unsigned int vp8_i_sub_pixel_variance16x16_wmt
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )
@@ -501,11 +501,11 @@ unsigned int vp8_i_sub_pixel_variance16x16_wmt
 
 unsigned int vp8_i_sub_pixel_variance8x16_wmt
 (
-    unsigned char  *src_ptr,
+    const unsigned char  *src_ptr,
     int  src_pixels_per_line,
     int  xoffset,
     int  yoffset,
-    unsigned char *dst_ptr,
+    const unsigned char *dst_ptr,
     int dst_pixels_per_line,
     unsigned int *sse
 )

From 136857475ecbbcc4ae6fd24f8a9a82b5a2610e4a Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 6 Oct 2010 22:39:16 -0700
Subject: [PATCH 215/307] Centralize mb skip state calculation

This patch moves the scattered updates to the mb skip state
(mode_info_context->mbmi.mb_skip_coeff) to vp8_tokenize_mb. Recent
changes to the quantizer exposed a bug where if a macroblock
could be coded as a skip but isn't, the encoder would run the
loopfilter but the decoder wouldn't, causing a reference buffer
mismatch.

The loopfilter is controlled by a flag called dc_diff. The decoder
looks at the number of decoded coefficients when setting this flag.
The encoder sets this flag based on the skip state, since any
skippable macroblock should be transmitted as a skip. The coefficient
optimization pass (vp8_optimize_b()) could change the coefficients
such that a block that was not a skip becomes one. The encoder was
not updating the skip state in this situation for intra coded blocks.

The underlying issue predates it, but this bug was recently triggered
by enabling trellis quantization on the Y2 block in commit dcd29e3,
and by changing the quantizer range control in commit 305be4e.

Change-Id: I5cce5da0dbc2d22f7d79ee48149f01e868a64802
---
 vp8/encoder/encodeframe.c |  6 -----
 vp8/encoder/encodeintra.c |  6 -----
 vp8/encoder/encodemb.c    | 32 ------------------------
 vp8/encoder/quantize.c    | 15 -----------
 vp8/encoder/tokenize.c    | 52 +++++++++++++++++----------------------
 5 files changed, 23 insertions(+), 88 deletions(-)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index efbe2365b..91384c73f 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -1071,8 +1071,6 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
 
         error_uv = vp8_rd_pick_intra_mbuv_mode(cpi, x, &rateuv, &rateuv_tokenonly, &distuv);
 
-        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
-
         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
         rate += rateuv;
 
@@ -1139,8 +1137,6 @@ int vp8cx_encode_intra_macro_block(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t)
         else
             Error4x4 = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
 
-        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
-
         if (Error4x4 < Error16x16)
         {
             x->e_mbd.mode_info_context->mbmi.mode = B_PRED;
@@ -1237,8 +1233,6 @@ int vp8cx_encode_inter_macroblock
 
     if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
-        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip) ? 1 : 0;
-
         vp8_encode_intra16x16mbuv(IF_RTCD(&cpi->rtcd), x);
 
         if (xd->mode_info_context->mbmi.mode == B_PRED)
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index af80857d2..6611e0077 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -53,8 +53,6 @@ void vp8_encode_intra4x4block(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BLOCK
 
     x->quantize_b(be, b);
 
-    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!b->eob);
-
     vp8_inverse_transform_b(IF_RTCD(&rtcd->common->idct), b, 32);
 
     RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
@@ -70,8 +68,6 @@ void vp8_encode_intra4x4block_rd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x, BL
 
     x->quantize_b(be, b);
 
-    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!b->eob);
-
     IDCT_INVOKE(&rtcd->common->idct, idct16)(b->dqcoeff, b->diff, 32);
 
     RECON_INVOKE(&rtcd->common->recon, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
@@ -157,8 +153,6 @@ void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_transform_intra_mby(x);
 
-    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = 1;
-
     vp8_quantize_mby(x);
 
     vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 63cdf3c1a..1f9568902 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -547,35 +547,6 @@ void vp8_optimize_mb(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 }
 
 
-
-static void vp8_find_mb_skip_coef(MACROBLOCK *x)
-{
-    int i;
-
-    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = 1;
-
-    if (x->e_mbd.mode_info_context->mbmi.mode != B_PRED && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV)
-    {
-        for (i = 0; i < 16; i++)
-        {
-            x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (x->e_mbd.block[i].eob < 2);
-        }
-
-        for (i = 16; i < 25; i++)
-        {
-            x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
-    }
-    else
-    {
-        for (i = 0; i < 24; i++)
-        {
-            x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-        }
-    }
-}
-
-
 void vp8_optimize_mby(MACROBLOCK *x, const VP8_ENCODER_RTCD *rtcd)
 {
     int b;
@@ -663,10 +634,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
 #if !(CONFIG_REALTIME_ONLY)
     if (x->optimize && x->rddiv > 1)
-    {
         vp8_optimize_mb(x, rtcd);
-        vp8_find_mb_skip_coef(x);
-    }
 #endif
 
     vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 1a8c8ebb7..6cc224494 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -273,17 +273,10 @@ void vp8_quantize_mby(MACROBLOCK *x)
         && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
 
     for (i = 0; i < 16; i++)
-    {
         x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &=
-            (x->e_mbd.block[i].eob <= has_2nd_order);
-    }
 
     if(has_2nd_order)
-    {
         x->quantize_b(&x->block[24], &x->e_mbd.block[24]);
-        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[24].eob);
-    }
 }
 
 void vp8_quantize_mb(MACROBLOCK *x)
@@ -292,13 +285,8 @@ void vp8_quantize_mb(MACROBLOCK *x)
     int has_2nd_order=(x->e_mbd.mode_info_context->mbmi.mode != B_PRED
         && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV);
 
-    x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = 1;
     for (i = 0; i < 24+has_2nd_order; i++)
-    {
         x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &=
-            (x->e_mbd.block[i].eob <= (has_2nd_order && i<16));
-    }
 }
 
 
@@ -307,8 +295,5 @@ void vp8_quantize_mbuv(MACROBLOCK *x)
     int i;
 
     for (i = 16; i < 24; i++)
-    {
         x->quantize_b(&x->block[i], &x->e_mbd.block[i]);
-        x->e_mbd.mode_info_context->mbmi.mb_skip_coeff &= (!x->e_mbd.block[i].eob);
-    }
 }
diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c
index 0e86f28df..e4da83379 100644
--- a/vp8/encoder/tokenize.c
+++ b/vp8/encoder/tokenize.c
@@ -198,6 +198,28 @@ static void tokenize1st_order_b
 
 }
 
+
+static int mb_is_skippable(MACROBLOCKD *x)
+{
+    int has_y2_block;
+    int skip = 1;
+    int i = 0;
+
+    has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED
+                    && x->mode_info_context->mbmi.mode != SPLITMV);
+    if (has_y2_block)
+    {
+        for (i = 0; i < 16; i++)
+            skip &= (x->block[i].eob < 2);
+    }
+
+    for (; i < 24 + has_y2_block; i++)
+        skip &= (!x->block[i].eob);
+
+    return skip;
+}
+
+
 void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
 {
     ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context;
@@ -223,6 +245,7 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
 
 #if 1
 
+    x->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(x);
     if (x->mode_info_context->mbmi.mb_skip_coeff)
     {
 
@@ -247,35 +270,6 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t)
     cpi->skip_false_count++;
 #endif
 #if 0
-
-    if (x->mbmi.mode == B_PRED || x->mbmi.mode == SPLITMV)
-    {
-        int i, skip = 1;
-
-        for (i = 0; i < 24; i++)
-            skip &= (!x->block[i].eob);
-
-        if (skip != x->mbmi.mb_skip_coeff)
-            skip += 0;
-
-        x->mbmi.mb_skip_coeff = skip;
-    }
-    else
-    {
-        int i, skip = 1;
-
-        for (i = 0; i < 16; i++)
-            skip &= (x->block[i].eob < 2);
-
-        for (i = 16; i < 25; i++)
-            skip &= (!x->block[i].eob);
-
-        if (skip != x->mbmi.mb_skip_coeff)
-            skip += 0;
-
-        x->mbmi.mb_skip_coeff = skip;
-    }
-
     vpx_memcpy(cpi->coef_counts_backup, cpi->coef_counts, sizeof(cpi->coef_counts));
 #endif
 

From e50f5d40376b3817fa2bf2e2b2fdf8a0084c3460 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Tue, 12 Oct 2010 09:42:03 -0700
Subject: [PATCH 216/307] GCC inline restrictions were not adequate.

=r was not restrictive enough and the compiler was not returning
ebx correctly.

Change-Id: I7606e384067bd5fb69189802f1ff64ccc5aa02d6
---
 vpx_ports/x86.h | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index a8e4607cd..05a45917a 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -24,12 +24,11 @@
 #else
 #define cpuid(func,ax,bx,cx,dx)\
     __asm__ __volatile__ (\
-                          "pushl %%ebx     \n\t" \
-                          "cpuid           \n\t" \
-                          "movl  %%ebx, %1 \n\t" \
-                          "popl  %%ebx     \n\t" \
-                          : "=a" (ax), "=r" (bx), "=c" (cx), "=d" (dx) \
-                          : "a"  (func));
+                          "mov %%ebx, %%edi   \n\t" \
+                          "cpuid              \n\t" \
+                          "xchg %%edi, %%ebx  \n\t" \
+                          : "=a" (ax), "=D" (bx), "=c" (cx), "=d" (dx) \
+                          : "a" (func));
 #endif
 #else
 #if ARCH_X86_64

From 0f5c63e4f6fce8e846d799ad60b0c0d1895069f8 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Tue, 12 Oct 2010 14:55:31 -0700
Subject: [PATCH 217/307] Add processor dectection for x86.

Use cpuid to check the vendor string against known
architectures.

Change-Id: I3fbd7f73638d71857a0c4a44a6275eb295fb4cef
---
 libs.mk               |  1 +
 vpx_ports/x86.h       | 21 +++++++++++++++++
 vpx_ports/x86_cpuid.c | 53 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+)
 create mode 100644 vpx_ports/x86_cpuid.c

diff --git a/libs.mk b/libs.mk
index 45cf9bfdc..4beaa50cb 100644
--- a/libs.mk
+++ b/libs.mk
@@ -91,6 +91,7 @@ ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emms.asm
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
+CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c
 endif
 CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm
 CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index 05a45917a..feffc3e69 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -14,6 +14,26 @@
 #include <stdlib.h>
 #include "config.h"
 
+typedef enum
+{
+    VPX_CPU_UNKNOWN = -1,
+    VPX_CPU_AMD,
+    VPX_CPU_AMD_OLD,
+    VPX_CPU_CENTAUR,
+    VPX_CPU_CYRIX,
+    VPX_CPU_INTEL,
+    VPX_CPU_NEXGEN,
+    VPX_CPU_NSC,
+    VPX_CPU_RISE,
+    VPX_CPU_SIS,
+    VPX_CPU_TRANSMETA,
+    VPX_CPU_TRANSMETA_OLD,
+    VPX_CPU_UMC,
+    VPX_CPU_VIA,
+
+    VPX_CPU_LAST
+}  vpx_cpu_t;
+
 #if defined(__GNUC__) && __GNUC__
 #if ARCH_X86_64
 #define cpuid(func,ax,bx,cx,dx)\
@@ -100,6 +120,7 @@ x86_simd_caps(void)
     return flags & mask;
 }
 
+vpx_cpu_t vpx_x86_vendor(void);
 
 #if ARCH_X86_64 && defined(_MSC_VER)
 unsigned __int64 __rdtsc(void);
diff --git a/vpx_ports/x86_cpuid.c b/vpx_ports/x86_cpuid.c
new file mode 100644
index 000000000..ce6403374
--- /dev/null
+++ b/vpx_ports/x86_cpuid.c
@@ -0,0 +1,53 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string.h>
+#include "x86.h"
+
+struct cpuid_vendors
+{
+    char vendor_string[12];
+    vpx_cpu_t vendor_id;
+};
+
+static struct cpuid_vendors cpuid_vendor_list[VPX_CPU_LAST] =
+{
+    { "AuthenticAMD", VPX_CPU_AMD           },
+    { "AMDisbetter!", VPX_CPU_AMD_OLD       },
+    { "CentaurHauls", VPX_CPU_CENTAUR       },
+    { "CyrixInstead", VPX_CPU_CYRIX         },
+    { "GenuineIntel", VPX_CPU_INTEL         },
+    { "NexGenDriven", VPX_CPU_NEXGEN        },
+    { "Geode by NSC", VPX_CPU_NSC           },
+    { "RiseRiseRise", VPX_CPU_RISE          },
+    { "SiS SiS SiS ", VPX_CPU_SIS           },
+    { "GenuineTMx86", VPX_CPU_TRANSMETA     },
+    { "TransmetaCPU", VPX_CPU_TRANSMETA_OLD },
+    { "UMC UMC UMC ", VPX_CPU_UMC           },
+    { "VIA VIA VIA ", VPX_CPU_VIA           },
+};
+
+vpx_cpu_t vpx_x86_vendor(void)
+{
+    unsigned int reg_eax;
+    unsigned int vs[3];
+    int i;
+
+    /* Get the Vendor String from the CPU */
+    cpuid(0, reg_eax, vs[0], vs[2], vs[1]);
+
+    for (i = 0; i < VPX_CPU_LAST; i++)
+    {
+        if (strncmp ((const char *)vs, cpuid_vendor_list[i].vendor_string, 12) == 0)
+           return (cpuid_vendor_list[i].vendor_id);
+    }
+
+    return VPX_CPU_UNKNOWN;
+}

From 92df4a06d2c199e7cae333c23748a35a8cb10301 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Wed, 13 Oct 2010 16:57:57 -0700
Subject: [PATCH 218/307] Correct QWORD usage in assembly files

QWORD was being undefined because it was being used
incorrectly.

Change-Id: I3610cefa3d6f0da4054316760f78b9694cde3876
---
 vp8/encoder/x86/sad_mmx.asm   |  18 ++--
 vp8/encoder/x86/sad_sse2.asm  |  18 ++--
 vp8/encoder/x86/sad_sse3.asm  | 164 +++++++++++++++++-----------------
 vp8/encoder/x86/sad_ssse3.asm |  44 +++++----
 4 files changed, 118 insertions(+), 126 deletions(-)

diff --git a/vp8/encoder/x86/sad_mmx.asm b/vp8/encoder/x86/sad_mmx.asm
index 19041d49f..85cb023a4 100644
--- a/vp8/encoder/x86/sad_mmx.asm
+++ b/vp8/encoder/x86/sad_mmx.asm
@@ -17,8 +17,6 @@ global sym(vp8_sad8x8_mmx)
 global sym(vp8_sad4x4_mmx)
 global sym(vp8_sad16x8_mmx)
 
-%idefine QWORD
-
 ;unsigned int vp8_sad16x16_mmx(
 ;    unsigned char *src_ptr,
 ;    int  src_stride,
@@ -272,11 +270,11 @@ sym(vp8_sad4x4_mmx):
         movsxd          rax,        dword ptr arg(1) ;src_stride
         movsxd          rdx,        dword ptr arg(3) ;ref_stride
 
-        movd            mm0,       QWORD PTR [rsi]
-        movd            mm1,       QWORD PTR [rdi]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm1,        DWORD PTR [rdi]
 
-        movd            mm2,       QWORD PTR [rsi+rax]
-        movd            mm3,       QWORD PTR [rdi+rdx]
+        movd            mm2,        DWORD PTR [rsi+rax]
+        movd            mm3,        DWORD PTR [rdi+rdx]
 
         punpcklbw       mm0,        mm2
         punpcklbw       mm1,        mm3
@@ -298,11 +296,11 @@ sym(vp8_sad4x4_mmx):
         lea             rsi,        [rsi+rax*2]
         lea             rdi,        [rdi+rdx*2]
 
-        movd            mm4,       QWORD PTR [rsi]
-        movd            mm5,       QWORD PTR [rdi]
+        movd            mm4,        DWORD PTR [rsi]
+        movd            mm5,        DWORD PTR [rdi]
 
-        movd            mm6,       QWORD PTR [rsi+rax]
-        movd            mm7,       QWORD PTR [rdi+rdx]
+        movd            mm6,        DWORD PTR [rsi+rax]
+        movd            mm7,        DWORD PTR [rdi+rdx]
 
         punpcklbw       mm4,        mm6
         punpcklbw       mm5,        mm7
diff --git a/vp8/encoder/x86/sad_sse2.asm b/vp8/encoder/x86/sad_sse2.asm
index 0f6c5d9c4..39ed79604 100644
--- a/vp8/encoder/x86/sad_sse2.asm
+++ b/vp8/encoder/x86/sad_sse2.asm
@@ -11,8 +11,6 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-%idefine QWORD
-
 ;unsigned int vp8_sad16x16_wmt(
 ;    unsigned char *src_ptr,
 ;    int  src_stride,
@@ -221,11 +219,11 @@ sym(vp8_sad4x4_wmt):
         movsxd          rax,        dword ptr arg(1) ;src_stride
         movsxd          rdx,        dword ptr arg(3) ;ref_stride
 
-        movd            mm0,       QWORD PTR [rsi]
-        movd            mm1,       QWORD PTR [rdi]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm1,        DWORD PTR [rdi]
 
-        movd            mm2,       QWORD PTR [rsi+rax]
-        movd            mm3,       QWORD PTR [rdi+rdx]
+        movd            mm2,        DWORD PTR [rsi+rax]
+        movd            mm3,        DWORD PTR [rdi+rdx]
 
         punpcklbw       mm0,        mm2
         punpcklbw       mm1,        mm3
@@ -234,12 +232,12 @@ sym(vp8_sad4x4_wmt):
         lea             rsi,        [rsi+rax*2]
 
         lea             rdi,        [rdi+rdx*2]
-        movd            mm4,       QWORD PTR [rsi]
+        movd            mm4,        DWORD PTR [rsi]
 
-        movd            mm5,       QWORD PTR [rdi]
-        movd            mm6,       QWORD PTR [rsi+rax]
+        movd            mm5,        DWORD PTR [rdi]
+        movd            mm6,        DWORD PTR [rsi+rax]
 
-        movd            mm7,       QWORD PTR [rdi+rdx]
+        movd            mm7,        DWORD PTR [rdi+rdx]
         punpcklbw       mm4,        mm6
 
         punpcklbw       mm5,        mm7
diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index b12c81562..1b7293c20 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -11,23 +11,21 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-%idefine QWORD
-
 %macro PROCESS_16X2X3 1
 %if %1
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm5,       [rdi]
-        lddqu           xmm6,       [rdi+1]
-        lddqu           xmm7,       [rdi+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm5,       XMMWORD PTR [rdi]
+        lddqu           xmm6,       XMMWORD PTR [rdi+1]
+        lddqu           xmm7,       XMMWORD PTR [rdi+2]
 
         psadbw          xmm5,       xmm0
         psadbw          xmm6,       xmm0
         psadbw          xmm7,       xmm0
 %else
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm1,       [rdi]
-        lddqu           xmm2,       [rdi+1]
-        lddqu           xmm3,       [rdi+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm1,       XMMWORD PTR [rdi]
+        lddqu           xmm2,       XMMWORD PTR [rdi+1]
+        lddqu           xmm3,       XMMWORD PTR [rdi+2]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
@@ -37,10 +35,10 @@
         paddw           xmm6,       xmm2
         paddw           xmm7,       xmm3
 %endif
-        movdqa          xmm0,       QWORD PTR [rsi+rax]
-        lddqu           xmm1,       QWORD PTR [rdi+rdx]
-        lddqu           xmm2,       QWORD PTR [rdi+rdx+1]
-        lddqu           xmm3,       QWORD PTR [rdi+rdx+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
+        lddqu           xmm1,       XMMWORD PTR [rdi+rdx]
+        lddqu           xmm2,       XMMWORD PTR [rdi+rdx+1]
+        lddqu           xmm3,       XMMWORD PTR [rdi+rdx+2]
 
         lea             rsi,        [rsi+rax*2]
         lea             rdi,        [rdi+rdx*2]
@@ -56,19 +54,19 @@
 
 %macro PROCESS_8X2X3 1
 %if %1
-        movq            mm0,       [rsi]
-        movq            mm5,       [rdi]
-        movq            mm6,       [rdi+1]
-        movq            mm7,       [rdi+2]
+        movq            mm0,       QWORD PTR [rsi]
+        movq            mm5,       QWORD PTR [rdi]
+        movq            mm6,       QWORD PTR [rdi+1]
+        movq            mm7,       QWORD PTR [rdi+2]
 
         psadbw          mm5,       mm0
         psadbw          mm6,       mm0
         psadbw          mm7,       mm0
 %else
-        movq            mm0,       [rsi]
-        movq            mm1,       [rdi]
-        movq            mm2,       [rdi+1]
-        movq            mm3,       [rdi+2]
+        movq            mm0,       QWORD PTR [rsi]
+        movq            mm1,       QWORD PTR [rdi]
+        movq            mm2,       QWORD PTR [rdi+1]
+        movq            mm3,       QWORD PTR [rdi+2]
 
         psadbw          mm1,       mm0
         psadbw          mm2,       mm0
@@ -105,45 +103,45 @@
 
 %macro PROCESS_16X2X4 1
 %if %1
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm4,       [rcx]
-        lddqu           xmm5,       [rdx]
-        lddqu           xmm6,       [rbx]
-        lddqu           xmm7,       [rdi]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm4,       XMMWORD PTR [rcx]
+        lddqu           xmm5,       XMMWORD PTR [rdx]
+        lddqu           xmm6,       XMMWORD PTR [rbx]
+        lddqu           xmm7,       XMMWORD PTR [rdi]
 
         psadbw          xmm4,       xmm0
         psadbw          xmm5,       xmm0
         psadbw          xmm6,       xmm0
         psadbw          xmm7,       xmm0
 %else
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm1,       [rcx]
-        lddqu           xmm2,       [rdx]
-        lddqu           xmm3,       [rbx]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm1,       XMMWORD PTR [rcx]
+        lddqu           xmm2,       XMMWORD PTR [rdx]
+        lddqu           xmm3,       XMMWORD PTR [rbx]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
         psadbw          xmm3,       xmm0
 
         paddw           xmm4,       xmm1
-        lddqu           xmm1,       [rdi]
+        lddqu           xmm1,       XMMWORD PTR [rdi]
         paddw           xmm5,       xmm2
         paddw           xmm6,       xmm3
 
         psadbw          xmm1,       xmm0
         paddw           xmm7,       xmm1
 %endif
-        movdqa          xmm0,       QWORD PTR [rsi+rax]
-        lddqu           xmm1,       QWORD PTR [rcx+rbp]
-        lddqu           xmm2,       QWORD PTR [rdx+rbp]
-        lddqu           xmm3,       QWORD PTR [rbx+rbp]
+        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
+        lddqu           xmm1,       XMMWORD PTR [rcx+rbp]
+        lddqu           xmm2,       XMMWORD PTR [rdx+rbp]
+        lddqu           xmm3,       XMMWORD PTR [rbx+rbp]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
         psadbw          xmm3,       xmm0
 
         paddw           xmm4,       xmm1
-        lddqu           xmm1,       QWORD PTR [rdi+rbp]
+        lddqu           xmm1,       XMMWORD PTR [rdi+rbp]
         paddw           xmm5,       xmm2
         paddw           xmm6,       xmm3
 
@@ -162,28 +160,28 @@
 
 %macro PROCESS_8X2X4 1
 %if %1
-        movq            mm0,        [rsi]
-        movq            mm4,        [rcx]
-        movq            mm5,        [rdx]
-        movq            mm6,        [rbx]
-        movq            mm7,        [rdi]
+        movq            mm0,        QWORD PTR [rsi]
+        movq            mm4,        QWORD PTR [rcx]
+        movq            mm5,        QWORD PTR [rdx]
+        movq            mm6,        QWORD PTR [rbx]
+        movq            mm7,        QWORD PTR [rdi]
 
         psadbw          mm4,        mm0
         psadbw          mm5,        mm0
         psadbw          mm6,        mm0
         psadbw          mm7,        mm0
 %else
-        movq            mm0,        [rsi]
-        movq            mm1,        [rcx]
-        movq            mm2,        [rdx]
-        movq            mm3,        [rbx]
+        movq            mm0,        QWORD PTR [rsi]
+        movq            mm1,        QWORD PTR [rcx]
+        movq            mm2,        QWORD PTR [rdx]
+        movq            mm3,        QWORD PTR [rbx]
 
         psadbw          mm1,        mm0
         psadbw          mm2,        mm0
         psadbw          mm3,        mm0
 
         paddw           mm4,        mm1
-        movq            mm1,        [rdi]
+        movq            mm1,        QWORD PTR [rdi]
         paddw           mm5,        mm2
         paddw           mm6,        mm3
 
@@ -430,20 +428,20 @@ sym(vp8_sad4x4x3_sse3):
         movsxd          rax,        dword ptr arg(1) ;src_stride
         movsxd          rdx,        dword ptr arg(3) ;ref_stride
 
-        movd            mm0,        QWORD PTR [rsi]
-        movd            mm1,        QWORD PTR [rdi]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm1,        DWORD PTR [rdi]
 
-        movd            mm2,        QWORD PTR [rsi+rax]
-        movd            mm3,        QWORD PTR [rdi+rdx]
+        movd            mm2,        DWORD PTR [rsi+rax]
+        movd            mm3,        DWORD PTR [rdi+rdx]
 
         punpcklbw       mm0,        mm2
         punpcklbw       mm1,        mm3
 
-        movd            mm4,        QWORD PTR [rdi+1]
-        movd            mm5,        QWORD PTR [rdi+2]
+        movd            mm4,        DWORD PTR [rdi+1]
+        movd            mm5,        DWORD PTR [rdi+2]
 
-        movd            mm2,        QWORD PTR [rdi+rdx+1]
-        movd            mm3,        QWORD PTR [rdi+rdx+2]
+        movd            mm2,        DWORD PTR [rdi+rdx+1]
+        movd            mm3,        DWORD PTR [rdi+rdx+2]
 
         psadbw          mm1,        mm0
 
@@ -458,24 +456,24 @@ sym(vp8_sad4x4x3_sse3):
         lea             rsi,        [rsi+rax*2]
         lea             rdi,        [rdi+rdx*2]
 
-        movd            mm0,        QWORD PTR [rsi]
-        movd            mm2,        QWORD PTR [rdi]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm2,        DWORD PTR [rdi]
 
-        movd            mm3,        QWORD PTR [rsi+rax]
-        movd            mm6,        QWORD PTR [rdi+rdx]
+        movd            mm3,        DWORD PTR [rsi+rax]
+        movd            mm6,        DWORD PTR [rdi+rdx]
 
         punpcklbw       mm0,        mm3
         punpcklbw       mm2,        mm6
 
-        movd            mm3,        QWORD PTR [rdi+1]
-        movd            mm7,        QWORD PTR [rdi+2]
+        movd            mm3,        DWORD PTR [rdi+1]
+        movd            mm7,        DWORD PTR [rdi+2]
 
         psadbw          mm2,        mm0
 
         paddw           mm1,        mm2
 
-        movd            mm2,        QWORD PTR [rdi+rdx+1]
-        movd            mm6,        QWORD PTR [rdi+rdx+2]
+        movd            mm2,        DWORD PTR [rdi+rdx+1]
+        movd            mm6,        DWORD PTR [rdi+rdx+2]
 
         punpcklbw       mm3,        mm2
         punpcklbw       mm7,        mm6
@@ -846,23 +844,23 @@ sym(vp8_sad4x4x4d_sse3):
 
         xchg            rbx,        rax
 
-        movd            mm0,        QWORD PTR [rsi]
-        movd            mm1,        QWORD PTR [rcx]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm1,        DWORD PTR [rcx]
 
-        movd            mm2,        QWORD PTR [rsi+rax]
-        movd            mm3,        QWORD PTR [rcx+rbp]
+        movd            mm2,        DWORD PTR [rsi+rax]
+        movd            mm3,        DWORD PTR [rcx+rbp]
 
         punpcklbw       mm0,        mm2
         punpcklbw       mm1,        mm3
 
-        movd            mm4,        QWORD PTR [rdx]
-        movd            mm5,        QWORD PTR [rbx]
+        movd            mm4,        DWORD PTR [rdx]
+        movd            mm5,        DWORD PTR [rbx]
 
-        movd            mm6,        QWORD PTR [rdi]
-        movd            mm2,        QWORD PTR [rdx+rbp]
+        movd            mm6,        DWORD PTR [rdi]
+        movd            mm2,        DWORD PTR [rdx+rbp]
 
-        movd            mm3,        QWORD PTR [rbx+rbp]
-        movd            mm7,        QWORD PTR [rdi+rbp]
+        movd            mm3,        DWORD PTR [rbx+rbp]
+        movd            mm7,        DWORD PTR [rdi+rbp]
 
         psadbw          mm1,        mm0
 
@@ -885,17 +883,17 @@ sym(vp8_sad4x4x4d_sse3):
 
         lea             rdi,        [rdi+rbp*2]
 
-        movd            mm0,        QWORD PTR [rsi]
-        movd            mm2,        QWORD PTR [rcx]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm2,        DWORD PTR [rcx]
 
-        movd            mm3,        QWORD PTR [rsi+rax]
-        movd            mm7,        QWORD PTR [rcx+rbp]
+        movd            mm3,        DWORD PTR [rsi+rax]
+        movd            mm7,        DWORD PTR [rcx+rbp]
 
         punpcklbw       mm0,        mm3
         punpcklbw       mm2,        mm7
 
-        movd            mm3,        QWORD PTR [rdx]
-        movd            mm7,        QWORD PTR [rbx]
+        movd            mm3,        DWORD PTR [rdx]
+        movd            mm7,        DWORD PTR [rbx]
 
         psadbw          mm2,        mm0
         mov             rax,        rbp
@@ -906,8 +904,8 @@ sym(vp8_sad4x4x4d_sse3):
         paddw           mm1,        mm2
         movd            [rsi],      mm1
 
-        movd            mm2,        QWORD PTR [rdx+rax]
-        movd            mm1,        QWORD PTR [rbx+rax]
+        movd            mm2,        DWORD PTR [rdx+rax]
+        movd            mm1,        DWORD PTR [rbx+rax]
 
         punpcklbw       mm3,        mm2
         punpcklbw       mm7,        mm1
@@ -915,8 +913,8 @@ sym(vp8_sad4x4x4d_sse3):
         psadbw          mm3,        mm0
         psadbw          mm7,        mm0
 
-        movd            mm2,        QWORD PTR [rdi]
-        movd            mm1,        QWORD PTR [rdi+rax]
+        movd            mm2,        DWORD PTR [rdi]
+        movd            mm1,        DWORD PTR [rdi+rax]
 
         paddw           mm3,        mm4
         paddw           mm7,        mm5
diff --git a/vp8/encoder/x86/sad_ssse3.asm b/vp8/encoder/x86/sad_ssse3.asm
index 94bbfffbc..69c5eaedc 100644
--- a/vp8/encoder/x86/sad_ssse3.asm
+++ b/vp8/encoder/x86/sad_ssse3.asm
@@ -11,23 +11,21 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-%idefine QWORD
-
 %macro PROCESS_16X2X3 1
 %if %1
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm5,       [rdi]
-        lddqu           xmm6,       [rdi+1]
-        lddqu           xmm7,       [rdi+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm5,       XMMWORD PTR [rdi]
+        lddqu           xmm6,       XMMWORD PTR [rdi+1]
+        lddqu           xmm7,       XMMWORD PTR [rdi+2]
 
         psadbw          xmm5,       xmm0
         psadbw          xmm6,       xmm0
         psadbw          xmm7,       xmm0
 %else
-        movdqa          xmm0,       [rsi]
-        lddqu           xmm1,       [rdi]
-        lddqu           xmm2,       [rdi+1]
-        lddqu           xmm3,       [rdi+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm1,       XMMWORD PTR [rdi]
+        lddqu           xmm2,       XMMWORD PTR [rdi+1]
+        lddqu           xmm3,       XMMWORD PTR [rdi+2]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
@@ -37,10 +35,10 @@
         paddw           xmm6,       xmm2
         paddw           xmm7,       xmm3
 %endif
-        movdqa          xmm0,       QWORD PTR [rsi+rax]
-        lddqu           xmm1,       QWORD PTR [rdi+rdx]
-        lddqu           xmm2,       QWORD PTR [rdi+rdx+1]
-        lddqu           xmm3,       QWORD PTR [rdi+rdx+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
+        lddqu           xmm1,       XMMWORD PTR [rdi+rdx]
+        lddqu           xmm2,       XMMWORD PTR [rdi+rdx+1]
+        lddqu           xmm3,       XMMWORD PTR [rdi+rdx+2]
 
         lea             rsi,        [rsi+rax*2]
         lea             rdi,        [rdi+rdx*2]
@@ -56,9 +54,9 @@
 
 %macro PROCESS_16X2X3_OFFSET 2
 %if %1
-        movdqa          xmm0,       [rsi]
-        movdqa          xmm4,       [rdi]
-        movdqa          xmm7,       [rdi+16]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        movdqa          xmm4,       XMMWORD PTR [rdi]
+        movdqa          xmm7,       XMMWORD PTR [rdi+16]
 
         movdqa          xmm5,       xmm7
         palignr         xmm5,       xmm4,       %2
@@ -72,9 +70,9 @@
         psadbw          xmm6,       xmm0
         psadbw          xmm7,       xmm0
 %else
-        movdqa          xmm0,       [rsi]
-        movdqa          xmm4,       [rdi]
-        movdqa          xmm3,       [rdi+16]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        movdqa          xmm4,       XMMWORD PTR [rdi]
+        movdqa          xmm3,       XMMWORD PTR [rdi+16]
 
         movdqa          xmm1,       xmm3
         palignr         xmm1,       xmm4,       %2
@@ -92,9 +90,9 @@
         paddw           xmm6,       xmm2
         paddw           xmm7,       xmm3
 %endif
-        movdqa          xmm0,       QWORD PTR [rsi+rax]
-        movdqa          xmm4,       QWORD PTR [rdi+rdx]
-        movdqa          xmm3,       QWORD PTR [rdi+rdx+16]
+        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
+        movdqa          xmm4,       XMMWORD PTR [rdi+rdx]
+        movdqa          xmm3,       XMMWORD PTR [rdi+rdx+16]
 
         movdqa          xmm1,       xmm3
         palignr         xmm1,       xmm4,       %2

From 1dc0ca13404b072f14df29b40c79b71bd49a4763 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Wed, 13 Oct 2010 17:08:13 -0700
Subject: [PATCH 219/307] Fix compiler warning about
 vp8_fast_quantize_b_impl_ssse2.

Typo had function defined as _ssse2 and prototyped as _sse2.

Change-Id: If9f19da1a83cff40774a90cf936d601c0bf1b7fe
---
 vp8/encoder/x86/quantize_sse2.asm      | 4 ++--
 vp8/encoder/x86/x86_csystemdependent.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/vp8/encoder/x86/quantize_sse2.asm b/vp8/encoder/x86/quantize_sse2.asm
index 324881337..1e0bd5c48 100644
--- a/vp8/encoder/x86/quantize_sse2.asm
+++ b/vp8/encoder/x86/quantize_sse2.asm
@@ -258,8 +258,8 @@ rq_zigzag_1c:
 ;                           short *qcoeff_ptr,short *dequant_ptr,
 ;                           short *scan_mask, short *round_ptr,
 ;                           short *quant_ptr, short *dqcoeff_ptr);
-global sym(vp8_fast_quantize_b_impl_ssse2)
-sym(vp8_fast_quantize_b_impl_ssse2):
+global sym(vp8_fast_quantize_b_impl_sse2)
+sym(vp8_fast_quantize_b_impl_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 7
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 7490a8add..6d36aff49 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -102,7 +102,7 @@ void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
     short *dqcoeff_ptr = d->dqcoeff;
     short *dequant_ptr = &d->dequant[0][0];
 
-    d->eob = vp8_fast_quantize_b_impl_ssse2(
+    d->eob = vp8_fast_quantize_b_impl_sse2(
                  coeff_ptr,
                  qcoeff_ptr,
                  dequant_ptr,

From d6da7b8ea1092d3c99591b2087811ad22d667d1b Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 14 Oct 2010 11:06:37 -0400
Subject: [PATCH 220/307] Improve bounds checking in vp8_diamond_search_sadx4()

In order to know if all 4/8 neighbor points are within the bounds,
4 bounds checking are enough instead of checking 4 bounds for
each points (16/32 checkings). This improvement reduces cost of
vp8_diamond_search_sadx4() by 30%, and gives encoder a 1.5%
performance gain (test options: 1 pass, good, speed=4).

Change-Id: Ie8da29d18a6ecfc9829e74ac02f6fa70e042331a
---
 vp8/encoder/mcomp.c | 99 ++++++++++++++++++++-------------------------
 1 file changed, 44 insertions(+), 55 deletions(-)

diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index b89354eaa..4d60b92d6 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1035,44 +1035,63 @@ int vp8_diamond_search_sadx4
 
     for (step = 0; step < tot_steps ; step++)
     {
-        int check_row_min, check_col_min, check_row_max, check_col_max;
+        int all_in = 1, t;
 
-        check_row_min = x->mv_row_min - best_mv->row;
-        check_row_max = x->mv_row_max - best_mv->row;
-        check_col_min = x->mv_col_min - best_mv->col;
-        check_col_max = x->mv_col_max - best_mv->col;
+        // To know if all neighbor points are within the bounds, 4 bounds checking are enough instead of
+        // checking 4 bounds for each points.
+        all_in &= ((best_mv->row + ss[i].mv.row)> x->mv_row_min);
+        all_in &= ((best_mv->row + ss[i+1].mv.row) < x->mv_row_max);
+        all_in &= ((best_mv->col + ss[i+2].mv.col) > x->mv_col_min);
+        all_in &= ((best_mv->col + ss[i+3].mv.col) < x->mv_col_max);
 
-        for (j = 0 ; j < x->searches_per_step ; j += 4)
+        if (all_in)
         {
-            unsigned char *block_offset[4];
-            unsigned int valid_block[4];
-            int all_in = 1, t;
+            unsigned int sad_array[4];
 
-            for (t = 0; t < 4; t++)
+            for (j = 0 ; j < x->searches_per_step ; j += 4)
             {
-                valid_block [t]  = (ss[t+i].mv.col > check_col_min);
-                valid_block [t] &= (ss[t+i].mv.col < check_col_max);
-                valid_block [t] &= (ss[t+i].mv.row > check_row_min);
-                valid_block [t] &= (ss[t+i].mv.row < check_row_max);
+                unsigned char *block_offset[4];
 
-                all_in &= valid_block[t];
-                block_offset[t] = ss[i+t].offset + best_address;
-            }
-
-            if (all_in)
-            {
-                unsigned int sad_array[4];
+                for (t = 0; t < 4; t++)
+                    block_offset[t] = ss[i+t].offset + best_address;
 
                 fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
 
                 for (t = 0; t < 4; t++, i++)
                 {
-                    thissad = sad_array[t];
-
-                    if (thissad < bestsad)
+                    if (sad_array[t] < bestsad)
                     {
                         this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
                         this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
+                        sad_array[t] += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+
+                        if (sad_array[t] < bestsad)
+                        {
+                            bestsad = sad_array[t];
+                            best_site = i;
+                        }
+                    }
+                }
+            }
+        }
+        else
+        {
+            for (j = 0 ; j < x->searches_per_step ; j++)
+            {
+                // Trap illegal vectors
+                this_row_offset = best_mv->row + ss[i].mv.row;
+                this_col_offset = best_mv->col + ss[i].mv.col;
+
+                if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
+                (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
+                {
+                    check_here = ss[i].offset + best_address;
+                    thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+
+                    if (thissad < bestsad)
+                    {
+                        this_mv.row = this_row_offset << 3;
+                        this_mv.col = this_col_offset << 3;
                         thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
 
                         if (thissad < bestsad)
@@ -1082,37 +1101,7 @@ int vp8_diamond_search_sadx4
                         }
                     }
                 }
-            }
-            else
-            {
-                int t;
-
-                for (t = 0; t < 4; i++, t++)
-                {
-                    // Trap illegal vectors
-                    if (valid_block[t])
-
-                    {
-                        check_here = block_offset[t];
-                        thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-
-                        if (thissad < bestsad)
-                        {
-                            this_row_offset = best_mv->row + ss[i].mv.row;
-                            this_col_offset = best_mv->col + ss[i].mv.col;
-
-                            this_mv.row = this_row_offset << 3;
-                            this_mv.col = this_col_offset << 3;
-                            thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
-                            if (thissad < bestsad)
-                            {
-                                bestsad = thissad;
-                                best_site = i;
-                            }
-                        }
-                    }
-                }
+                i++;
             }
         }
 

From 7804befb55fe66d0cbe780f298fb57155d826044 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 14 Oct 2010 14:25:03 -0400
Subject: [PATCH 221/307] Fix one gcc compiler warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

../libvpx/vp8/encoder/bitstream.c: In function ‘pack_inter_mode_mvs’:
../libvpx/vp8/encoder/bitstream.c:1026: warning: array subscript has type ‘char’

Change-Id: Ic77491e0a172fa1821e5b3e914d0dc41fe87c00f
---
 vp8/common/blockd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index 75dd4f79d..db8400ae4 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -169,7 +169,7 @@ typedef struct
         MV  as_mv;
     } mv;
 
-    char partitioning;
+    unsigned char partitioning;
     unsigned char mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
     unsigned char dc_diff;
     unsigned char need_to_clamp_mvs;

From 39f41a4f36fae46168312f08f144b5cc22b0c69a Mon Sep 17 00:00:00 2001
From: Jim Bankoski <jimbankoski@google.com>
Date: Thu, 14 Oct 2010 16:19:06 -0400
Subject: [PATCH 222/307] safety check to avoid divide by 0s

---
 vp8/encoder/onyx_if.c  | 3 +++
 vp8/encoder/ratectrl.c | 2 ++
 2 files changed, 5 insertions(+)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 34af6ea8e..54eb31be8 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1420,6 +1420,9 @@ int vp8_reverse_trans(int x)
 };
 void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
 {
+    if(framerate < .1)
+        framerate = 30;
+
     cpi->oxcf.frame_rate             = framerate;
     cpi->output_frame_rate            = cpi->oxcf.frame_rate;
     cpi->per_frame_bandwidth          = (int)(cpi->oxcf.target_bandwidth / cpi->output_frame_rate);
diff --git a/vp8/encoder/ratectrl.c b/vp8/encoder/ratectrl.c
index 50f4db0b8..dd324f435 100644
--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -1481,6 +1481,8 @@ void vp8_adjust_key_frame_context(VP8_COMP *cpi)
         // allocated than those following other gfs.
         cpi->kf_overspend_bits += (cpi->projected_frame_size - cpi->per_frame_bandwidth) * 7 / 8;
         cpi->gf_overspend_bits += (cpi->projected_frame_size - cpi->per_frame_bandwidth) * 1 / 8;
+        if(!av_key_frame_frequency)
+            av_key_frame_frequency = 60;
 
         // Work out how much to try and recover per frame.
         // For one pass we estimate the number of frames to spread it over based upon past history.

From 2e53e9e53ff00113dc8a9952d596020c648f13db Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Thu, 14 Oct 2010 18:58:34 -0700
Subject: [PATCH 223/307] change to make use of more trellis quantization

when a subsequent frame is encoded as an alt reference frame, it is
unlikely that any mb in current frame will be used as reference for
future frames, so we can enable quantization optimization even when
the RD constant is slightly rate-biased. The change has an overall
benefit between 0.1% to 0.2% bit savings on the test sets based on
vpxssim scores.

Change-Id: I9aa7bc5cd573ea84e3ee655d2834c18c4460ceea
---
 vp8/encoder/encodeintra.c |  4 ++--
 vp8/encoder/encodemb.c    |  2 +-
 vp8/encoder/onyx_if.c     | 10 +++++++++-
 vp8/encoder/onyx_int.h    |  1 +
 4 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index 6611e0077..a790456f8 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -105,7 +105,7 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
 #if !(CONFIG_REALTIME_ONLY)
 #if 1
-    if (x->optimize && x->rddiv > 1)
+    if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
         vp8_optimize_mby(x, rtcd);
 
 #endif
@@ -200,7 +200,7 @@ void vp8_encode_intra16x16mbuv(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 #if !(CONFIG_REALTIME_ONLY)
 #if 1
 
-    if (x->optimize && x->rddiv > 1)
+    if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
         vp8_optimize_mbuv(x, rtcd);
 
 #endif
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 1f9568902..782b6fd2d 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -633,7 +633,7 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
     vp8_quantize_mb(x);
 
 #if !(CONFIG_REALTIME_ONLY)
-    if (x->optimize && x->rddiv > 1)
+    if (x->optimize==2 ||(x->optimize && x->rddiv > 1))
         vp8_optimize_mb(x, rtcd);
 #endif
 
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 34af6ea8e..382a9de58 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -1282,7 +1282,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
     }
 
     if (cpi->sf.optimize_coefficients == 1)
-        cpi->mb.optimize = 1;
+        cpi->mb.optimize = 1 + cpi->is_next_src_alt_ref;
     else
         cpi->mb.optimize = 0;
 
@@ -1749,6 +1749,7 @@ void vp8_init_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
     // YX Temp
     cpi->last_alt_ref_sei    = -1;
     cpi->is_src_frame_alt_ref = 0;
+    cpi->is_next_src_alt_ref = 0;
 
 #if 0
     // Experimental RD Code
@@ -2034,6 +2035,7 @@ void vp8_change_config(VP8_PTR ptr, VP8_CONFIG *oxcf)
     // YX Temp
     cpi->last_alt_ref_sei    = -1;
     cpi->is_src_frame_alt_ref = 0;
+    cpi->is_next_src_alt_ref = 0;
 
 #if 0
     // Experimental RD Code
@@ -5419,6 +5421,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
             cm->show_frame = 0;
             cpi->source_alt_ref_pending = FALSE;   // Clear Pending altf Ref flag.
             cpi->is_src_frame_alt_ref = 0;
+            cpi->is_next_src_alt_ref = 0;
         }
         else
 #endif
@@ -5437,6 +5440,11 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
                     cpi->is_src_frame_alt_ref = 0;
 
                 cpi->source_encode_index = (cpi->source_encode_index + 1) % cpi->oxcf.lag_in_frames;
+
+                if(cpi->source_encode_index == cpi->last_alt_ref_sei)
+                    cpi->is_next_src_alt_ref = 1;
+                else
+                    cpi->is_next_src_alt_ref = 0;
             }
 
 #endif
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index cb768c018..8a34a1f3a 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -273,6 +273,7 @@ typedef struct
 
     int last_alt_ref_sei;
     int is_src_frame_alt_ref;
+    int is_next_src_alt_ref;
 
     int gold_is_last; // golden frame same as last frame ( short circuit gold searches)
     int alt_is_last;  // Alt reference frame same as last ( short circuit altref search)

From 963bcd6c87078f3991d698e44450a8a9e327198c Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Fri, 15 Oct 2010 15:25:19 -0400
Subject: [PATCH 224/307] remove dead code

vp8_diamond_search_sadx4 isn't used in arm because there is no
corrosponding sdx4df as in x86. rather than keep it in sync with
../mcomp.c, delete it

vp8_hex_search had the original, more readable/understandable code if`d
out. it's also available in ../mcomp.c, so remove the dead copy

Change-Id: Ia42aa6e23b3a2e88040f467280befec091ec080e
---
 vp8/encoder/arm/mcomp_arm.c | 283 ------------------------------------
 1 file changed, 283 deletions(-)

diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c
index 4e95c47ac..9a138e6fc 100644
--- a/vp8/encoder/arm/mcomp_arm.c
+++ b/vp8/encoder/arm/mcomp_arm.c
@@ -786,8 +786,6 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
     return bestmse;
 }
 
-#if 1
-
 #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
 #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
 #define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
@@ -937,120 +935,6 @@ cal_neighbors:
 #undef ERR
 #undef CHECK_BETTER
 
-#else
-
-#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
-#define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
-#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
-#define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
-
-int vp8_hex_search
-(
-    MACROBLOCK *x,
-    BLOCK *b,
-    BLOCKD *d,
-    MV *ref_mv,
-    MV *best_mv,
-    int search_param,
-    int error_per_bit,
-    int *num00,
-    vp8_variance_fn_t vf,
-    vp8_sad_fn_t      sf,
-    int *mvsadcost[2],
-    int *mvcost[2]
-)
-{
-    MV hex[6] = { { -2, 0}, { -1, -2}, { -1, 2}, {2, 0}, {1, 2}, {1, -2} } ;
-    MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
-    int i, j;
-    unsigned char *src = (*(b->base_src) + b->src);
-    int src_stride = b->src_stride;
-    //int rr= ref_mv->row,rc= ref_mv->col,br=rr,bc=rc,tr,tc;
-    int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
-    unsigned int besterr, thiserr = 0x7fffffff;
-
-    /*
-        if ( rc < x->mv_col_min) bc = x->mv_col_min;
-        if ( rc > x->mv_col_max) bc = x->mv_col_max;
-        if ( rr < x->mv_row_min) br = x->mv_row_min;
-        if ( rr > x->mv_row_max) br = x->mv_row_max;
-        rr>>=1;
-        rc>>=1;
-        br>>=3;
-        bc>>=3;
-    */
-    if (bc < x->mv_col_min) bc = x->mv_col_min;
-
-    if (bc > x->mv_col_max) bc = x->mv_col_max;
-
-    if (br < x->mv_row_min) br = x->mv_row_min;
-
-    if (br > x->mv_row_max) br = x->mv_row_max;
-
-    rr >>= 1;
-    rc >>= 1;
-
-    besterr = ERR(br, bc, thiserr);
-
-    // hex search  jbb changed to 127 to avoid max 256 problem steping by 2.
-    for (j = 0; j < 127; j++)
-    {
-        tr = br;
-        tc = bc;
-
-        for (i = 0; i < 6; i++)
-        {
-            int nr = tr + hex[i].row, nc = tc + hex[i].col;
-
-            if (nc < x->mv_col_min) continue;
-
-            if (nc > x->mv_col_max) continue;
-
-            if (nr < x->mv_row_min) continue;
-
-            if (nr > x->mv_row_max) continue;
-
-            CHECK_BETTER(thiserr, nr, nc);
-        }
-
-        if (tr == br && tc == bc)
-            break;
-    }
-
-    // check 8 1 away neighbors
-    tr = br;
-    tc = bc;
-
-    for (i = 0; i < 8; i++)
-    {
-        int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col;
-
-        if (nc < x->mv_col_min) continue;
-
-        if (nc > x->mv_col_max) continue;
-
-        if (nr < x->mv_row_min) continue;
-
-        if (nr > x->mv_row_max) continue;
-
-        CHECK_BETTER(thiserr, nr, nc);
-    }
-
-    best_mv->row = br;
-    best_mv->col = bc;
-
-    return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
-}
-#undef MVC
-#undef PRE
-#undef SP
-#undef DIST
-#undef ERR
-#undef CHECK_BETTER
-
-#endif
-
 int vp8_diamond_search_sad
 (
     MACROBLOCK *x,
@@ -1166,173 +1050,6 @@ int vp8_diamond_search_sad
     + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 }
 
-int vp8_diamond_search_sadx4
-(
-    MACROBLOCK *x,
-    BLOCK *b,
-    BLOCKD *d,
-    MV *ref_mv,
-    MV *best_mv,
-    int search_param,
-    int error_per_bit,
-    int *num00,
-    vp8_variance_fn_ptr_t *fn_ptr,
-    int *mvsadcost[2],
-    int *mvcost[2]
-)
-{
-    int i, j, step;
-
-    unsigned char *what = (*(b->base_src) + b->src);
-    int what_stride = b->src_stride;
-    unsigned char *in_what;
-    int in_what_stride = d->pre_stride;
-    unsigned char *best_address;
-
-    int tot_steps;
-    MV this_mv;
-
-    int bestsad = INT_MAX;
-    int best_site = 0;
-    int last_site = 0;
-
-    int ref_row = ref_mv->row >> 3;
-    int ref_col = ref_mv->col >> 3;
-    int this_row_offset;
-    int this_col_offset;
-    search_site *ss;
-
-    unsigned char *check_here;
-    int thissad;
-
-    // Work out the start point for the search
-    in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
-    best_address = in_what;
-
-    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
-    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
-    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
-    {
-        // Check the starting position
-        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
-    }
-
-    // search_param determines the length of the initial step and hence the number of iterations
-    // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
-    ss = &x->ss[search_param * x->searches_per_step];
-    tot_steps = (x->ss_count / x->searches_per_step) - search_param;
-
-    i = 1;
-    best_mv->row = ref_row;
-    best_mv->col = ref_col;
-
-    *num00 = 0;
-
-    for (step = 0; step < tot_steps ; step++)
-    {
-        int check_row_min, check_col_min, check_row_max, check_col_max;
-
-        check_row_min = x->mv_row_min - best_mv->row;
-        check_row_max = x->mv_row_max - best_mv->row;
-        check_col_min = x->mv_col_min - best_mv->col;
-        check_col_max = x->mv_col_max - best_mv->col;
-
-        for (j = 0 ; j < x->searches_per_step ; j += 4)
-        {
-            char *block_offset[4];
-            unsigned int valid_block[4];
-            int all_in = 1, t;
-
-            for (t = 0; t < 4; t++)
-            {
-                valid_block [t]  = (ss[t+i].mv.col > check_col_min);
-                valid_block [t] &= (ss[t+i].mv.col < check_col_max);
-                valid_block [t] &= (ss[t+i].mv.row > check_row_min);
-                valid_block [t] &= (ss[t+i].mv.row < check_row_max);
-
-                all_in &= valid_block[t];
-                block_offset[t] = ss[i+t].offset + best_address;
-            }
-
-            if (all_in)
-            {
-                int sad_array[4];
-
-                fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array);
-
-                for (t = 0; t < 4; t++, i++)
-                {
-                    thissad = sad_array[t];
-
-                    if (thissad < bestsad)
-                    {
-                        this_mv.row = (best_mv->row + ss[i].mv.row) << 3;
-                        this_mv.col = (best_mv->col + ss[i].mv.col) << 3;
-                        thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
-                        if (thissad < bestsad)
-                        {
-                            bestsad = thissad;
-                            best_site = i;
-                        }
-                    }
-                }
-            }
-            else
-            {
-                int t;
-
-                for (t = 0; t < 4; i++, t++)
-                {
-                    // Trap illegal vectors
-                    if (valid_block[t])
-
-                    {
-                        check_here = block_offset[t];
-                        thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-
-                        if (thissad < bestsad)
-                        {
-                            this_row_offset = best_mv->row + ss[i].mv.row;
-                            this_col_offset = best_mv->col + ss[i].mv.col;
-
-                            this_mv.row = this_row_offset << 3;
-                            this_mv.col = this_col_offset << 3;
-                            thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
-                            if (thissad < bestsad)
-                            {
-                                bestsad = thissad;
-                                best_site = i;
-                            }
-                        }
-                    }
-                }
-            }
-        }
-
-        if (best_site != last_site)
-        {
-            best_mv->row += ss[best_site].mv.row;
-            best_mv->col += ss[best_site].mv.col;
-            best_address += ss[best_site].offset;
-            last_site = best_site;
-        }
-        else if (best_address == in_what)
-            (*num00)++;
-    }
-
-    this_mv.row = best_mv->row << 3;
-    this_mv.col = best_mv->col << 3;
-
-    if (bestsad == INT_MAX)
-        return INT_MAX;
-
-    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
-    + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-}
-
-
 #if !(CONFIG_REALTIME_ONLY)
 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
 {

From ce1ce992ce30f01c723b4fcf67ea17266ba91712 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Mon, 18 Oct 2010 13:23:39 -0400
Subject: [PATCH 225/307] copy compiler warning fixes

generic version got fixed, but not the arm version. fixes:
vp8/encoder/arm/mcomp_arm.c: In function 'vp8_full_search_sadx3':
vp8/encoder/arm/mcomp_arm.c:1208: warning: pointer targets in passing
argument 5 of 'fn_ptr->sdx3f' differ in signedness
vp8/encoder/arm/mcomp_arm.c:1208: note: expected 'unsigned int *' but
argument is of type 'int *'

and another unsigned change to keep the files similar

Change-Id: I1b6255dc3a03b90394a791ee0d15d8167d9454db
---
 vp8/encoder/arm/mcomp_arm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c
index 9a138e6fc..56358328e 100644
--- a/vp8/encoder/arm/mcomp_arm.c
+++ b/vp8/encoder/arm/mcomp_arm.c
@@ -1155,7 +1155,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
     int r, c;
 
     unsigned char *check_here;
-    int thissad;
+    unsigned int thissad;
 
     int ref_row = ref_mv->row >> 3;
     int ref_col = ref_mv->col >> 3;
@@ -1165,7 +1165,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
     int col_min = ref_col - distance;
     int col_max = ref_col + distance;
 
-    int sad_array[3];
+    unsigned int sad_array[3];
 
     // Work out the mid point for the search
     in_what = *(d->base_pre) + d->pre;

From 4db2076594be3a48c6c1b3755c1d9621f5ad1c5b Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Mon, 18 Oct 2010 14:15:15 -0400
Subject: [PATCH 226/307] Add SSE2 subtract functions

Instead of doing 8-bit data unpack and 16-bit subtraction, use
psubb to do 16 8-bit subtractions and pcmpgtb to preserve the
sign information. This does not bring noticable gain since
these functions are not called frequently.

Change-Id: I90a0dfaa3db9d422e4ada324076596ffb178548e
---
 vp8/encoder/x86/encodemb_x86.h         |  13 +-
 vp8/encoder/x86/subtract_mmx.asm       |   2 +-
 vp8/encoder/x86/subtract_sse2.asm      | 348 +++++++++++++++++++++++++
 vp8/encoder/x86/x86_csystemdependent.c |  18 +-
 vp8/vp8cx.mk                           |   1 +
 5 files changed, 377 insertions(+), 5 deletions(-)
 create mode 100644 vp8/encoder/x86/subtract_sse2.asm

diff --git a/vp8/encoder/x86/encodemb_x86.h b/vp8/encoder/x86/encodemb_x86.h
index d090b2d89..69b3edd66 100644
--- a/vp8/encoder/x86/encodemb_x86.h
+++ b/vp8/encoder/x86/encodemb_x86.h
@@ -55,7 +55,9 @@ extern prototype_submbuv(vp8_subtract_mbuv_mmx);
 extern prototype_berr(vp8_block_error_xmm);
 extern prototype_mberr(vp8_mbblock_error_xmm);
 extern prototype_mbuverr(vp8_mbuverror_xmm);
-
+extern prototype_subb(vp8_subtract_b_sse2);
+extern prototype_submby(vp8_subtract_mby_sse2);
+extern prototype_submbuv(vp8_subtract_mbuv_sse2);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_encodemb_berr
@@ -67,6 +69,15 @@ extern prototype_mbuverr(vp8_mbuverror_xmm);
 #undef  vp8_encodemb_mbuverr
 #define vp8_encodemb_mbuverr vp8_mbuverror_xmm
 
+#undef  vp8_encodemb_subb
+#define vp8_encodemb_subb vp8_subtract_b_sse2
+
+#undef  vp8_encodemb_submby
+#define vp8_encodemb_submby vp8_subtract_mby_sse2
+
+#undef  vp8_encodemb_submbuv
+#define vp8_encodemb_submbuv vp8_subtract_mbuv_sse2
+
 #endif
 #endif
 
diff --git a/vp8/encoder/x86/subtract_mmx.asm b/vp8/encoder/x86/subtract_mmx.asm
index 8fe3ee174..a47e1f0d6 100644
--- a/vp8/encoder/x86/subtract_mmx.asm
+++ b/vp8/encoder/x86/subtract_mmx.asm
@@ -12,7 +12,7 @@
 %include "vpx_ports/x86_abi_support.asm"
 
 ;void vp8_subtract_b_mmx_impl(unsigned char *z,  int src_stride,
-;                            unsigned short *diff, unsigned char *Predictor,
+;                            short *diff, unsigned char *Predictor,
 ;                            int pitch);
 global sym(vp8_subtract_b_mmx_impl)
 sym(vp8_subtract_b_mmx_impl):
diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm
new file mode 100644
index 000000000..ef329de71
--- /dev/null
+++ b/vp8/encoder/x86/subtract_sse2.asm
@@ -0,0 +1,348 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+;void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
+;                            short *diff, unsigned char *Predictor,
+;                            int pitch);
+global sym(vp8_subtract_b_sse2_impl)
+sym(vp8_subtract_b_sse2_impl):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push rsi
+    push rdi
+    ; end prolog
+
+        mov     rdi,        arg(2) ;diff
+        mov     rax,        arg(3) ;Predictor
+        mov     rsi,        arg(0) ;z
+        movsxd  rdx,        dword ptr arg(1);src_stride;
+        movsxd  rcx,        dword ptr arg(4);pitch
+        pxor    mm7,        mm7
+
+        movd    mm0,        [rsi]
+        movd    mm1,        [rax]
+        punpcklbw   mm0,    mm7
+        punpcklbw   mm1,    mm7
+        psubw   mm0,        mm1
+        movq    [rdi],      mm0
+
+        movd    mm0,        [rsi+rdx]
+        movd    mm1,        [rax+rcx]
+        punpcklbw   mm0,    mm7
+        punpcklbw   mm1,    mm7
+        psubw   mm0,        mm1
+        movq    [rdi+rcx*2],mm0
+
+        movd    mm0,        [rsi+rdx*2]
+        movd    mm1,        [rax+rcx*2]
+        punpcklbw   mm0,    mm7
+        punpcklbw   mm1,    mm7
+        psubw   mm0,        mm1
+        movq    [rdi+rcx*4],        mm0
+
+        lea     rsi,        [rsi+rdx*2]
+        lea     rcx,        [rcx+rcx*2]
+
+        movd    mm0,        [rsi+rdx]
+        movd    mm1,        [rax+rcx]
+        punpcklbw   mm0,    mm7
+        punpcklbw   mm1,    mm7
+        psubw   mm0,        mm1
+        movq    [rdi+rcx*2],        mm0
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_subtract_mby_sse2(short *diff, unsigned char *src, unsigned char *pred, int stride)
+global sym(vp8_subtract_mby_sse2)
+sym(vp8_subtract_mby_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 4
+    push rsi
+    push rdi
+    ; end prolog
+
+            mov         rsi,            arg(1) ;src
+            mov         rdi,            arg(0) ;diff
+
+            mov         rax,            arg(2) ;pred
+            movsxd      rdx,            dword ptr arg(3) ;stride
+
+            mov         rcx,            8      ; do two lines at one time
+
+submby_loop:
+            movdqa      xmm0,           [rsi]   ; src
+            movdqa      xmm1,           [rax]   ; pred
+
+            movdqa      xmm2,           xmm0
+            psubb       xmm0,           xmm1
+
+            pxor        xmm1,           [GLOBAL(t80)]   ;convert to signed values
+            pxor        xmm2,           [GLOBAL(t80)]
+            pcmpgtb     xmm1,           xmm2            ; obtain sign information
+
+            movdqa      xmm2,    xmm0
+            movdqa      xmm3,    xmm1
+            punpcklbw   xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw   xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa      [rdi],   xmm0
+            movdqa      [rdi +16], xmm2
+
+            movdqa      xmm4,           [rsi + rdx]
+            movdqa      xmm5,           [rax + 16]
+
+            movdqa      xmm6,           xmm4
+            psubb       xmm4,           xmm5
+
+            pxor        xmm5,           [GLOBAL(t80)]   ;convert to signed values
+            pxor        xmm6,           [GLOBAL(t80)]
+            pcmpgtb     xmm5,           xmm6            ; obtain sign information
+
+            movdqa      xmm6,    xmm4
+            movdqa      xmm7,    xmm5
+            punpcklbw   xmm4,    xmm5            ; put sign back to subtraction
+            punpckhbw   xmm6,    xmm7            ; put sign back to subtraction
+
+            movdqa      [rdi +32], xmm4
+            movdqa      [rdi +48], xmm6
+
+            add         rdi,            64
+            add         rax,            32
+            lea         rsi,            [rsi+rdx*2]
+
+            sub         rcx,            1
+            jnz         submby_loop
+
+    pop rdi
+    pop rsi
+    ; begin epilog
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_subtract_mbuv_sse2(short *diff, unsigned char *usrc, unsigned char *vsrc, unsigned char *pred, int stride)
+global sym(vp8_subtract_mbuv_sse2)
+sym(vp8_subtract_mbuv_sse2):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push rsi
+    push rdi
+    ; end prolog
+
+            mov     rdi,        arg(0) ;diff
+            mov     rax,        arg(3) ;pred
+            mov     rsi,        arg(1) ;z = usrc
+            add     rdi,        256*2  ;diff = diff + 256 (shorts)
+            add     rax,        256    ;Predictor = pred + 256
+            movsxd  rdx,        dword ptr arg(4) ;stride;
+            lea     rcx,        [rdx + rdx*2]
+
+            ;u
+            ;line 0 1
+            movq       xmm0,    [rsi]  ; src
+            movq       xmm2,    [rsi+rdx]
+            movdqa     xmm1,    [rax]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     [rdi],   xmm0
+            movdqa     [rdi +16],   xmm2
+
+            ;line 2 3
+            movq       xmm0,    [rsi+rdx*2]  ; src
+            movq       xmm2,    [rsi+rcx]
+            movdqa     xmm1,    [rax+16]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     [rdi + 32],   xmm0
+            movdqa     [rdi + 48],   xmm2
+
+            ;line 4 5
+            lea        rsi,     [rsi + rdx*4]
+
+            movq       xmm0,    [rsi]  ; src
+            movq       xmm2,    [rsi+rdx]
+            movdqa     xmm1,    [rax + 32]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     [rdi + 64],   xmm0
+            movdqa     [rdi + 80],   xmm2
+
+            ;line 6 7
+            movq       xmm0,    [rsi+rdx*2]  ; src
+            movq       xmm2,    [rsi+rcx]
+            movdqa     xmm1,    [rax+ 48]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     [rdi + 96],   xmm0
+            movdqa     [rdi + 112],  xmm2
+
+            ;v
+            mov     rsi,        arg(2) ;z = vsrc
+            add     rdi,        64*2  ;diff = diff + 320 (shorts)
+            add     rax,        64    ;Predictor = pred + 320
+
+            ;line 0 1
+            movq       xmm0,    [rsi]  ; src
+            movq       xmm2,    [rsi+rdx]
+            movdqa     xmm1,    [rax]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     [rdi],   xmm0
+            movdqa     [rdi +16],   xmm2
+
+            ;line 2 3
+            movq       xmm0,    [rsi+rdx*2]  ; src
+            movq       xmm2,    [rsi+rcx]
+            movdqa     xmm1,    [rax+16]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     [rdi + 32],   xmm0
+            movdqa     [rdi + 48],   xmm2
+
+            ;line 4 5
+            lea        rsi,     [rsi + rdx*4]
+
+            movq       xmm0,    [rsi]  ; src
+            movq       xmm2,    [rsi+rdx]
+            movdqa     xmm1,    [rax + 32]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     [rdi + 64],   xmm0
+            movdqa     [rdi + 80],   xmm2
+
+            ;line 6 7
+            movq       xmm0,    [rsi+rdx*2]  ; src
+            movq       xmm2,    [rsi+rcx]
+            movdqa     xmm1,    [rax+ 48]  ; pred
+            punpcklqdq xmm0,    xmm2
+
+            movdqa     xmm2,    xmm0
+            psubb      xmm0,    xmm1            ; subtraction with sign missed
+
+            pxor       xmm1,    [GLOBAL(t80)]   ;convert to signed values
+            pxor       xmm2,    [GLOBAL(t80)]
+            pcmpgtb    xmm1,    xmm2            ; obtain sign information
+
+            movdqa     xmm2,    xmm0
+            movdqa     xmm3,    xmm1
+            punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
+            punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
+
+            movdqa     [rdi + 96],   xmm0
+            movdqa     [rdi + 112],  xmm2
+
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+SECTION_RODATA
+align 16
+t80:
+    times 16 db 0x80
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 6d36aff49..9b753bfa7 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -165,6 +165,18 @@ int vp8_mbuverror_xmm(MACROBLOCK *mb)
     return vp8_mbuverror_xmm_impl(s_ptr, d_ptr);
 }
 
+void vp8_subtract_b_sse2_impl(unsigned char *z,  int src_stride,
+                             short *diff, unsigned char *predictor,
+                             int pitch);
+void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
+{
+    unsigned char *z = *(be->base_src) + be->src;
+    unsigned int  src_stride = be->src_stride;
+    short *diff = &be->src_diff[0];
+    unsigned char *predictor = &bd->predictor[0];
+    vp8_subtract_b_sse2_impl(z, src_stride, diff, predictor, pitch);
+}
+
 #endif
 
 void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
@@ -282,12 +294,12 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
         cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_xmm;
         cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_xmm;
-        /* cpi->rtcd.encodemb.sub* not implemented for wmt */
+        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_sse2;
+        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_sse2;
+        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_sse2;
 
         /*cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;*/
-
         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse2;
-
     }
 
 #endif
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 4ce18b6e7..2a8440265 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -104,6 +104,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/variance_impl_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/sad_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
+VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
 VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm

From 15542721eed9f434b4d9c670d4184c566705bfb7 Mon Sep 17 00:00:00 2001
From: Frank Galligan <fgalligan@google.com>
Date: Mon, 4 Oct 2010 21:12:22 -0400
Subject: [PATCH 227/307] Update arnr strength range form 1-6 to 0-6.

Change-Id: I8eb49c56f7509f0a8074d440e8345b9e3344b85b
---
 vp8/vp8_cx_iface.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index a6cb27b93..499bc503a 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -184,7 +184,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
     RANGE_CHECK(vp8_cfg, token_partitions,   VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION);
     RANGE_CHECK_HI(vp8_cfg, Sharpness,       7);
     RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15);
-    RANGE_CHECK(vp8_cfg, arnr_strength,   1, 6);
+    RANGE_CHECK_HI(vp8_cfg, arnr_strength,   6);
     RANGE_CHECK(vp8_cfg, arnr_type,       1, 3);
 
     if (cfg->g_pass == VPX_RC_LAST_PASS)

From 28d191ada449130276fc10030e8ff68460a8c812 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 19 Oct 2010 14:40:07 -0400
Subject: [PATCH 228/307] Import nestegg webm/mkv parser

Initial import of nestegg[1] parser lib, at commit 0d51131.

[1]: http://github.com/kinetiknz/nestegg
     commit 0d51131519a1014660b5e111e28a78785d76600f

Change-Id: I191d388b7e5140ef96624511ccdd65d0e183076d
---
 nestegg/.gitignore                |   40 +
 nestegg/AUTHORS                   |    1 +
 nestegg/INSTALL                   |    8 +
 nestegg/LICENSE                   |   13 +
 nestegg/Makefile.am               |   51 +
 nestegg/README                    |    6 +
 nestegg/TODO                      |   21 +
 nestegg/configure.ac              |  124 ++
 nestegg/docs/Doxyfile.in          | 1551 +++++++++++++++++++++++
 nestegg/docs/Makefile.am          |   38 +
 nestegg/halloc/README             |   45 +
 nestegg/halloc/halloc.h           |   43 +
 nestegg/halloc/src/align.h        |   36 +
 nestegg/halloc/src/halloc.c       |  254 ++++
 nestegg/halloc/src/hlist.h        |  136 ++
 nestegg/halloc/src/macros.h       |   36 +
 nestegg/include/nestegg/nestegg.h |  292 +++++
 nestegg/m4/as-ac-expand.m4        |   43 +
 nestegg/m4/ax_create_stdint_h.m4  |  695 +++++++++++
 nestegg/m4/pkg.m4                 |  157 +++
 nestegg/nestegg-uninstalled.pc.in |   13 +
 nestegg/nestegg.pc.in             |   13 +
 nestegg/src/nestegg.c             | 1938 +++++++++++++++++++++++++++++
 nestegg/test/test.c               |  248 ++++
 24 files changed, 5802 insertions(+)
 create mode 100644 nestegg/.gitignore
 create mode 100644 nestegg/AUTHORS
 create mode 100644 nestegg/INSTALL
 create mode 100644 nestegg/LICENSE
 create mode 100644 nestegg/Makefile.am
 create mode 100644 nestegg/README
 create mode 100644 nestegg/TODO
 create mode 100644 nestegg/configure.ac
 create mode 100644 nestegg/docs/Doxyfile.in
 create mode 100644 nestegg/docs/Makefile.am
 create mode 100644 nestegg/halloc/README
 create mode 100644 nestegg/halloc/halloc.h
 create mode 100644 nestegg/halloc/src/align.h
 create mode 100644 nestegg/halloc/src/halloc.c
 create mode 100644 nestegg/halloc/src/hlist.h
 create mode 100644 nestegg/halloc/src/macros.h
 create mode 100644 nestegg/include/nestegg/nestegg.h
 create mode 100644 nestegg/m4/as-ac-expand.m4
 create mode 100644 nestegg/m4/ax_create_stdint_h.m4
 create mode 100644 nestegg/m4/pkg.m4
 create mode 100644 nestegg/nestegg-uninstalled.pc.in
 create mode 100644 nestegg/nestegg.pc.in
 create mode 100644 nestegg/src/nestegg.c
 create mode 100644 nestegg/test/test.c

diff --git a/nestegg/.gitignore b/nestegg/.gitignore
new file mode 100644
index 000000000..b2ba99c3f
--- /dev/null
+++ b/nestegg/.gitignore
@@ -0,0 +1,40 @@
+*.lo
+*.o
+*.swp
+*~
+.deps
+.dirstamp
+.libs
+Makefile
+Makefile.in
+_stdint.h
+aclocal.m4
+autom4te.cache
+compile
+config.guess
+config.h
+config.h.in
+config.log
+config.status
+config.sub
+configure
+depcomp
+docs/Doxyfile
+docs/doxygen-build.stamp
+docs/html
+install-sh
+libtool
+ltmain.sh
+m4/libtool.m4
+m4/ltoptions.m4
+m4/ltsugar.m4
+m4/ltversion.m4
+m4/lt~obsolete.m4
+missing
+nestegg-uninstalled.pc
+nestegg.pc
+src/.dirstamp
+src/libnestegg.la
+stamp-h1
+test/test
+include/nestegg/nestegg-stdint.h
diff --git a/nestegg/AUTHORS b/nestegg/AUTHORS
new file mode 100644
index 000000000..8204f40f4
--- /dev/null
+++ b/nestegg/AUTHORS
@@ -0,0 +1 @@
+Matthew Gregan <kinetik@flim.org>
diff --git a/nestegg/INSTALL b/nestegg/INSTALL
new file mode 100644
index 000000000..401df4184
--- /dev/null
+++ b/nestegg/INSTALL
@@ -0,0 +1,8 @@
+Build instructions for libnestegg
+=================================
+
+0. Change directory into the source directory.
+1. Run |autoreconf --install| to generate configure.
+2. Run |./configure| to configure the build.
+3. Run |make| to build.
+4. Run |make check| to run the test suite.
diff --git a/nestegg/LICENSE b/nestegg/LICENSE
new file mode 100644
index 000000000..a67984a61
--- /dev/null
+++ b/nestegg/LICENSE
@@ -0,0 +1,13 @@
+Copyright © 2010 Mozilla Foundation
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
diff --git a/nestegg/Makefile.am b/nestegg/Makefile.am
new file mode 100644
index 000000000..500699160
--- /dev/null
+++ b/nestegg/Makefile.am
@@ -0,0 +1,51 @@
+AUTOMAKE_OPTIONS = foreign 1.11 no-dist-gzip dist-bzip2 subdir-objects
+ACLOCAL_AMFLAGS = -I m4
+
+INCLUDES = -I$(top_srcdir)/include -I. -I$(top_srcdir)/halloc
+AM_CFLAGS = -ansi -pedantic -Wall -Wextra -Wno-long-long -O0 -g
+
+SUBDIRS = docs
+
+EXTRA_DIST = \
+	AUTHORS README LICENSE \
+	nestegg-uninstalled.pc.in \
+	m4/as-ac-expand.m4 \
+	m4/pkg.m4 \
+	m4/ax_create_stdint_h.m4 \
+	halloc/src/halloc.c \
+	halloc/halloc.h \
+	halloc/src/align.h \
+	halloc/src/hlist.h \
+	halloc/src/macros.h
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = nestegg.pc
+
+nesteggincludedir = $(includedir)/nestegg
+nestegginclude_HEADERS = include/nestegg/nestegg.h include/nestegg/nestegg-stdint.h
+
+lib_LTLIBRARIES = src/libnestegg.la
+
+src_libnestegg_la_SOURCES = \
+	src/nestegg.c \
+	halloc/src/halloc.c \
+	halloc/halloc.h \
+	halloc/src/align.h \
+	halloc/src/hlist.h \
+	halloc/src/macros.h
+
+check_PROGRAMS = test/test
+
+test_test_SOURCES = test/test.c
+test_test_LDADD = src/libnestegg.la
+
+DISTCLEANFILES = include/nestegg/nestegg-stdint.h
+
+dist-hook:
+	find $(distdir) -type d -name '.git' | xargs rm -rf
+
+debug:
+	$(MAKE) all CFLAGS="@DEBUG@"
+
+profile:
+	$(MAKE) all CFLAGS="@PROFILE@"
diff --git a/nestegg/README b/nestegg/README
new file mode 100644
index 000000000..47c8237d2
--- /dev/null
+++ b/nestegg/README
@@ -0,0 +1,6 @@
+See INSTALL for build instructions.
+
+Licensed under an ISC-style license.  See LICENSE for details.
+
+The source under the halloc/ directory is licensed under a BSD license.  See
+halloc/halloc.h for details.
diff --git a/nestegg/TODO b/nestegg/TODO
new file mode 100644
index 000000000..bf0cb04c4
--- /dev/null
+++ b/nestegg/TODO
@@ -0,0 +1,21 @@
+- Document when read, seek, tell callbacks are used.
+- Add an automated testsuite.
+- Test (and fix, if necessary) support for unknown sizes.
+- Test (and fix, if necessary) support for large files.
+- Read past unknown elements rather than seeking.
+- Try to handle unknown elements with unknown sizes.
+- Formalize handling of default element values.
+- Try to resynchronize stream when read_block fails so that failure to parse
+  a single block can be treated as non-fatal.
+- Make logging more useful to API users.
+- Avoid reparsing Cues and ignore any SeekHead at end of file.
+- Optionally build a Cue index as Clusters are parsed.
+- Support seeking without Cues.
+- Avoid building a list of Clusters as they are parsed and retain only the
+  last one parsed.
+- Add an asynchronous error code to struct nestegg and ensure that API calls
+  continue to fail safely one a fatal error has been returned.
+- Modify parser/data structures to provide a clean separation.  Perhaps the
+  parser should return a generic tree of nodes that a second pass uses to
+  initialize the main data structures.
+- Use pool allocator for all allocations.
diff --git a/nestegg/configure.ac b/nestegg/configure.ac
new file mode 100644
index 000000000..70f6e0d59
--- /dev/null
+++ b/nestegg/configure.ac
@@ -0,0 +1,124 @@
+dnl ------------------------------------------------
+dnl Initialization and Versioning
+dnl ------------------------------------------------
+
+AC_INIT(libnestegg,[0.1git])
+
+AC_CANONICAL_HOST
+AC_CANONICAL_TARGET
+
+AC_CONFIG_MACRO_DIR([m4])
+
+AM_CONFIG_HEADER([config.h])
+AC_CONFIG_SRCDIR([src/nestegg.c])
+AM_INIT_AUTOMAKE
+
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
+
+dnl Library versioning
+dnl CURRENT, REVISION, AGE
+dnl - library source changed -> increment REVISION
+dnl - interfaces added/removed/changed -> increment CURRENT, REVISION = 0
+dnl - interfaces added -> increment AGE
+dnl - interfaces removed -> AGE = 0
+
+NESTEGG_CURRENT=0
+NESTEGG_REVISION=0
+NESTEGG_AGE=1
+AC_SUBST(NESTEGG_CURRENT)
+AC_SUBST(NESTEGG_REVISION)
+AC_SUBST(NESTEGG_AGE)
+
+
+dnl --------------------------------------------------  
+dnl Check for programs
+dnl --------------------------------------------------  
+
+dnl save $CFLAGS since AC_PROG_CC likes to insert "-g -O2"
+dnl if $CFLAGS is blank
+cflags_save="$CFLAGS"
+AC_PROG_CC
+AC_PROG_CPP
+CFLAGS="$cflags_save"
+
+AM_PROG_CC_C_O
+AC_LIBTOOL_WIN32_DLL
+AM_PROG_LIBTOOL
+
+dnl Check for doxygen
+AC_ARG_ENABLE([doc],
+	AS_HELP_STRING([--enable-doc], [Build API documentation]),
+	[ac_enable_doc=$enableval], [ac_enable_doc=auto])
+
+if test "x$ac_enable_doc" != "xno"; then
+	AC_CHECK_PROG(HAVE_DOXYGEN, doxygen, true, false)
+
+	if test "x$HAVE_DOXYGEN" = "xfalse" -a "x$ac_enable_doc" = "xyes"; then
+		AC_MSG_ERROR([*** API documentation explicitly requested but Doxygen not found])
+	fi
+else
+	HAVE_DOXYGEN=false
+fi
+AM_CONDITIONAL(HAVE_DOXYGEN,$HAVE_DOXYGEN)
+if test $HAVE_DOXYGEN = "false"; then
+        AC_MSG_WARN([*** doxygen not found, API documentation will not be built])
+fi
+
+# Generate portable stdint.h replacement
+AX_CREATE_STDINT_H(include/nestegg/nestegg-stdint.h)
+
+# Test whenever ld supports -version-script
+AC_PROG_LD
+AC_PROG_LD_GNU
+AC_MSG_CHECKING([how to control symbol export])
+
+dnl --------------------------------------------------
+dnl Do substitutions
+dnl --------------------------------------------------
+
+AC_SUBST(DEBUG)
+AC_SUBST(PROFILE)
+
+AC_OUTPUT([
+  Makefile 
+  docs/Makefile
+  docs/Doxyfile
+  nestegg.pc
+  nestegg-uninstalled.pc
+])
+
+AS_AC_EXPAND(LIBDIR, ${libdir})
+AS_AC_EXPAND(INCLUDEDIR, ${includedir})
+AS_AC_EXPAND(BINDIR, ${bindir})
+AS_AC_EXPAND(DOCDIR, ${docdir})
+
+if test $HAVE_DOXYGEN = "false"; then
+  doc_build="no"
+else
+  doc_build="yes"
+fi
+
+AC_MSG_RESULT([
+------------------------------------------------------------------------
+  $PACKAGE $VERSION:  Automatic configuration OK.
+
+  General configuration:
+
+    API Documentation: .......... ${doc_build}
+
+  Installation paths:
+
+    libnestegg: .................. ${LIBDIR}
+    C header files: .............. ${INCLUDEDIR}/nestegg
+    Documentation: ............... ${DOCDIR}
+
+  Building:
+
+    Type 'make' to compile $PACKAGE.
+
+    Type 'make install' to install $PACKAGE.
+
+  Example programs will be built but not installed.
+------------------------------------------------------------------------
+])
+
diff --git a/nestegg/docs/Doxyfile.in b/nestegg/docs/Doxyfile.in
new file mode 100644
index 000000000..e0e9249aa
--- /dev/null
+++ b/nestegg/docs/Doxyfile.in
@@ -0,0 +1,1551 @@
+# Doxyfile 1.6.2
+
+# This file describes the settings to be used by the documentation system
+# doxygen (www.doxygen.org) for a project
+#
+# All text after a hash (#) is considered a comment and will be ignored
+# The format is:
+#       TAG = value [value, ...]
+# For lists items can also be appended using:
+#       TAG += value [value, ...]
+# Values that contain spaces should be placed between quotes (" ")
+
+#---------------------------------------------------------------------------
+# Project related configuration options
+#---------------------------------------------------------------------------
+
+# This tag specifies the encoding used for all characters in the config file
+# that follow. The default is UTF-8 which is also the encoding used for all
+# text before the first occurrence of this tag. Doxygen uses libiconv (or the
+# iconv built into libc) for the transcoding. See
+# http://www.gnu.org/software/libiconv for the list of possible encodings.
+
+DOXYFILE_ENCODING      = UTF-8
+
+# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
+# by quotes) that should identify the project.
+
+PROJECT_NAME           = @PACKAGE@
+
+# The PROJECT_NUMBER tag can be used to enter a project or revision number.
+# This could be handy for archiving the generated documentation or
+# if some version control system is used.
+
+PROJECT_NUMBER         = @VERSION@
+
+# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
+# base path where the generated documentation will be put.
+# If a relative path is entered, it will be relative to the location
+# where doxygen was started. If left blank the current directory will be used.
+
+OUTPUT_DIRECTORY       = .
+
+# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create
+# 4096 sub-directories (in 2 levels) under the output directory of each output
+# format and will distribute the generated files over these directories.
+# Enabling this option can be useful when feeding doxygen a huge amount of
+# source files, where putting all generated files in the same directory would
+# otherwise cause performance problems for the file system.
+
+CREATE_SUBDIRS         = NO
+
+# The OUTPUT_LANGUAGE tag is used to specify the language in which all
+# documentation generated by doxygen is written. Doxygen will use this
+# information to generate all constant output in the proper language.
+# The default language is English, other supported languages are:
+# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional,
+# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German,
+# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English
+# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian,
+# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak,
+# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese.
+
+OUTPUT_LANGUAGE        = English
+
+# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will
+# include brief member descriptions after the members that are listed in
+# the file and class documentation (similar to JavaDoc).
+# Set to NO to disable this.
+
+BRIEF_MEMBER_DESC      = YES
+
+# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend
+# the brief description of a member or function before the detailed description.
+# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
+# brief descriptions will be completely suppressed.
+
+REPEAT_BRIEF           = YES
+
+# This tag implements a quasi-intelligent brief description abbreviator
+# that is used to form the text in various listings. Each string
+# in this list, if found as the leading text of the brief description, will be
+# stripped from the text and the result after processing the whole list, is
+# used as the annotated text. Otherwise, the brief description is used as-is.
+# If left blank, the following values are used ("$name" is automatically
+# replaced with the name of the entity): "The $name class" "The $name widget"
+# "The $name file" "is" "provides" "specifies" "contains"
+# "represents" "a" "an" "the"
+
+ABBREVIATE_BRIEF       =
+
+# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
+# Doxygen will generate a detailed section even if there is only a brief
+# description.
+
+ALWAYS_DETAILED_SEC    = NO
+
+# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
+# inherited members of a class in the documentation of that class as if those
+# members were ordinary class members. Constructors, destructors and assignment
+# operators of the base classes will not be shown.
+
+INLINE_INHERITED_MEMB  = NO
+
+# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full
+# path before files name in the file list and in the header files. If set
+# to NO the shortest path that makes the file name unique will be used.
+
+FULL_PATH_NAMES        = YES
+
+# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag
+# can be used to strip a user-defined part of the path. Stripping is
+# only done if one of the specified strings matches the left-hand part of
+# the path. The tag can be used to show relative paths in the file list.
+# If left blank the directory from which doxygen is run is used as the
+# path to strip.
+
+STRIP_FROM_PATH        =
+
+# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of
+# the path mentioned in the documentation of a class, which tells
+# the reader which header file to include in order to use a class.
+# If left blank only the name of the header file containing the class
+# definition is used. Otherwise one should specify the include paths that
+# are normally passed to the compiler using the -I flag.
+
+STRIP_FROM_INC_PATH    =
+
+# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter
+# (but less readable) file names. This can be useful is your file systems
+# doesn't support long names like on DOS, Mac, or CD-ROM.
+
+SHORT_NAMES            = NO
+
+# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen
+# will interpret the first line (until the first dot) of a JavaDoc-style
+# comment as the brief description. If set to NO, the JavaDoc
+# comments will behave just like regular Qt-style comments
+# (thus requiring an explicit @brief command for a brief description.)
+
+JAVADOC_AUTOBRIEF      = YES
+
+# If the QT_AUTOBRIEF tag is set to YES then Doxygen will
+# interpret the first line (until the first dot) of a Qt-style
+# comment as the brief description. If set to NO, the comments
+# will behave just like regular Qt-style comments (thus requiring
+# an explicit \brief command for a brief description.)
+
+QT_AUTOBRIEF           = NO
+
+# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen
+# treat a multi-line C++ special comment block (i.e. a block of //! or ///
+# comments) as a brief description. This used to be the default behaviour.
+# The new default is to treat a multi-line C++ comment block as a detailed
+# description. Set this tag to YES if you prefer the old behaviour instead.
+
+MULTILINE_CPP_IS_BRIEF = NO
+
+# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented
+# member inherits the documentation from any documented member that it
+# re-implements.
+
+INHERIT_DOCS           = YES
+
+# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce
+# a new page for each member. If set to NO, the documentation of a member will
+# be part of the file/class/namespace that contains it.
+
+SEPARATE_MEMBER_PAGES  = NO
+
+# The TAB_SIZE tag can be used to set the number of spaces in a tab.
+# Doxygen uses this value to replace tabs by spaces in code fragments.
+
+TAB_SIZE               = 8
+
+# This tag can be used to specify a number of aliases that acts
+# as commands in the documentation. An alias has the form "name=value".
+# For example adding "sideeffect=\par Side Effects:\n" will allow you to
+# put the command \sideeffect (or @sideeffect) in the documentation, which
+# will result in a user-defined paragraph with heading "Side Effects:".
+# You can put \n's in the value part of an alias to insert newlines.
+
+ALIASES                =
+
+# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C
+# sources only. Doxygen will then generate output that is more tailored for C.
+# For instance, some of the names that are used will be different. The list
+# of all members will be omitted, etc.
+
+OPTIMIZE_OUTPUT_FOR_C  = YES
+
+# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java
+# sources only. Doxygen will then generate output that is more tailored for
+# Java. For instance, namespaces will be presented as packages, qualified
+# scopes will look different, etc.
+
+OPTIMIZE_OUTPUT_JAVA   = NO
+
+# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
+# sources only. Doxygen will then generate output that is more tailored for
+# Fortran.
+
+OPTIMIZE_FOR_FORTRAN   = NO
+
+# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
+# sources. Doxygen will then generate output that is tailored for
+# VHDL.
+
+OPTIMIZE_OUTPUT_VHDL   = NO
+
+# Doxygen selects the parser to use depending on the extension of the files it parses.
+# With this tag you can assign which parser to use for a given extension.
+# Doxygen has a built-in mapping, but you can override or extend it using this tag.
+# The format is ext=language, where ext is a file extension, and language is one of
+# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP,
+# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat
+# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran),
+# use: inc=Fortran f=C. Note that for custom extensions you also need to set FILE_PATTERNS otherwise the files are not read by doxygen.
+
+EXTENSION_MAPPING      =
+
+# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
+# to include (a tag file for) the STL sources as input, then you should
+# set this tag to YES in order to let doxygen match functions declarations and
+# definitions whose arguments contain STL classes (e.g. func(std::string); v.s.
+# func(std::string) {}). This also make the inheritance and collaboration
+# diagrams that involve STL classes more complete and accurate.
+
+BUILTIN_STL_SUPPORT    = NO
+
+# If you use Microsoft's C++/CLI language, you should set this option to YES to
+# enable parsing support.
+
+CPP_CLI_SUPPORT        = NO
+
+# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only.
+# Doxygen will parse them like normal C++ but will assume all classes use public
+# instead of private inheritance when no explicit protection keyword is present.
+
+SIP_SUPPORT            = NO
+
+# For Microsoft's IDL there are propget and propput attributes to indicate getter
+# and setter methods for a property. Setting this option to YES (the default)
+# will make doxygen to replace the get and set methods by a property in the
+# documentation. This will only work if the methods are indeed getting or
+# setting a simple type. If this is not the case, or you want to show the
+# methods anyway, you should set this option to NO.
+
+IDL_PROPERTY_SUPPORT   = YES
+
+# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
+# tag is set to YES, then doxygen will reuse the documentation of the first
+# member in the group (if any) for the other members of the group. By default
+# all members of a group must be documented explicitly.
+
+DISTRIBUTE_GROUP_DOC   = NO
+
+# Set the SUBGROUPING tag to YES (the default) to allow class member groups of
+# the same type (for instance a group of public functions) to be put as a
+# subgroup of that type (e.g. under the Public Functions section). Set it to
+# NO to prevent subgrouping. Alternatively, this can be done per class using
+# the \nosubgrouping command.
+
+SUBGROUPING            = YES
+
+# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum
+# is documented as struct, union, or enum with the name of the typedef. So
+# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
+# with name TypeT. When disabled the typedef will appear as a member of a file,
+# namespace, or class. And the struct will be named TypeS. This can typically
+# be useful for C code in case the coding convention dictates that all compound
+# types are typedef'ed and only the typedef is referenced, never the tag name.
+
+TYPEDEF_HIDES_STRUCT   = NO
+
+# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to
+# determine which symbols to keep in memory and which to flush to disk.
+# When the cache is full, less often used symbols will be written to disk.
+# For small to medium size projects (<1000 input files) the default value is
+# probably good enough. For larger projects a too small cache size can cause
+# doxygen to be busy swapping symbols to and from disk most of the time
+# causing a significant performance penality.
+# If the system has enough physical memory increasing the cache will improve the
+# performance by keeping more symbols in memory. Note that the value works on
+# a logarithmic scale so increasing the size by one will rougly double the
+# memory usage. The cache size is given by this formula:
+# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0,
+# corresponding to a cache size of 2^16 = 65536 symbols
+
+SYMBOL_CACHE_SIZE      = 0
+
+#---------------------------------------------------------------------------
+# Build related configuration options
+#---------------------------------------------------------------------------
+
+# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in
+# documentation are documented, even if no documentation was available.
+# Private class members and static file members will be hidden unless
+# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES
+
+EXTRACT_ALL            = NO
+
+# If the EXTRACT_PRIVATE tag is set to YES all private members of a class
+# will be included in the documentation.
+
+EXTRACT_PRIVATE        = NO
+
+# If the EXTRACT_STATIC tag is set to YES all static members of a file
+# will be included in the documentation.
+
+EXTRACT_STATIC         = NO
+
+# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs)
+# defined locally in source files will be included in the documentation.
+# If set to NO only classes defined in header files are included.
+
+EXTRACT_LOCAL_CLASSES  = YES
+
+# This flag is only useful for Objective-C code. When set to YES local
+# methods, which are defined in the implementation section but not in
+# the interface are included in the documentation.
+# If set to NO (the default) only methods in the interface are included.
+
+EXTRACT_LOCAL_METHODS  = NO
+
+# If this flag is set to YES, the members of anonymous namespaces will be
+# extracted and appear in the documentation as a namespace called
+# 'anonymous_namespace{file}', where file will be replaced with the base
+# name of the file that contains the anonymous namespace. By default
+# anonymous namespace are hidden.
+
+EXTRACT_ANON_NSPACES   = NO
+
+# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all
+# undocumented members of documented classes, files or namespaces.
+# If set to NO (the default) these members will be included in the
+# various overviews, but no documentation section is generated.
+# This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_MEMBERS     = NO
+
+# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all
+# undocumented classes that are normally visible in the class hierarchy.
+# If set to NO (the default) these classes will be included in the various
+# overviews. This option has no effect if EXTRACT_ALL is enabled.
+
+HIDE_UNDOC_CLASSES     = NO
+
+# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all
+# friend (class|struct|union) declarations.
+# If set to NO (the default) these declarations will be included in the
+# documentation.
+
+HIDE_FRIEND_COMPOUNDS  = NO
+
+# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any
+# documentation blocks found inside the body of a function.
+# If set to NO (the default) these blocks will be appended to the
+# function's detailed documentation block.
+
+HIDE_IN_BODY_DOCS      = NO
+
+# The INTERNAL_DOCS tag determines if documentation
+# that is typed after a \internal command is included. If the tag is set
+# to NO (the default) then the documentation will be excluded.
+# Set it to YES to include the internal documentation.
+
+INTERNAL_DOCS          = NO
+
+# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate
+# file names in lower-case letters. If set to YES upper-case letters are also
+# allowed. This is useful if you have classes or files whose names only differ
+# in case and if your file system supports case sensitive file names. Windows
+# and Mac users are advised to set this option to NO.
+
+CASE_SENSE_NAMES       = NO
+
+# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen
+# will show members with their full class and namespace scopes in the
+# documentation. If set to YES the scope will be hidden.
+
+HIDE_SCOPE_NAMES       = NO
+
+# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen
+# will put a list of the files that are included by a file in the documentation
+# of that file.
+
+SHOW_INCLUDE_FILES     = YES
+
+# If the FORCE_LOCAL_INCLUDES tag is set to YES then Doxygen
+# will list include files with double quotes in the documentation
+# rather than with sharp brackets.
+
+FORCE_LOCAL_INCLUDES   = NO
+
+# If the INLINE_INFO tag is set to YES (the default) then a tag [inline]
+# is inserted in the documentation for inline members.
+
+INLINE_INFO            = YES
+
+# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen
+# will sort the (detailed) documentation of file and class members
+# alphabetically by member name. If set to NO the members will appear in
+# declaration order.
+
+SORT_MEMBER_DOCS       = NO
+
+# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the
+# brief documentation of file, namespace and class members alphabetically
+# by member name. If set to NO (the default) the members will appear in
+# declaration order.
+
+SORT_BRIEF_DOCS        = NO
+
+# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the (brief and detailed) documentation of class members so that constructors and destructors are listed first. If set to NO (the default) the constructors will appear in the respective orders defined by SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO.
+
+SORT_MEMBERS_CTORS_1ST = NO
+
+# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the
+# hierarchy of group names into alphabetical order. If set to NO (the default)
+# the group names will appear in their defined order.
+
+SORT_GROUP_NAMES       = NO
+
+# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be
+# sorted by fully-qualified names, including namespaces. If set to
+# NO (the default), the class list will be sorted only by class name,
+# not including the namespace part.
+# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
+# Note: This option applies only to the class list, not to the
+# alphabetical list.
+
+SORT_BY_SCOPE_NAME     = NO
+
+# The GENERATE_TODOLIST tag can be used to enable (YES) or
+# disable (NO) the todo list. This list is created by putting \todo
+# commands in the documentation.
+
+GENERATE_TODOLIST      = YES
+
+# The GENERATE_TESTLIST tag can be used to enable (YES) or
+# disable (NO) the test list. This list is created by putting \test
+# commands in the documentation.
+
+GENERATE_TESTLIST      = YES
+
+# The GENERATE_BUGLIST tag can be used to enable (YES) or
+# disable (NO) the bug list. This list is created by putting \bug
+# commands in the documentation.
+
+GENERATE_BUGLIST       = YES
+
+# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or
+# disable (NO) the deprecated list. This list is created by putting
+# \deprecated commands in the documentation.
+
+GENERATE_DEPRECATEDLIST= YES
+
+# The ENABLED_SECTIONS tag can be used to enable conditional
+# documentation sections, marked by \if sectionname ... \endif.
+
+ENABLED_SECTIONS       =
+
+# The MAX_INITIALIZER_LINES tag determines the maximum number of lines
+# the initial value of a variable or define consists of for it to appear in
+# the documentation. If the initializer consists of more lines than specified
+# here it will be hidden. Use a value of 0 to hide initializers completely.
+# The appearance of the initializer of individual variables and defines in the
+# documentation can be controlled using \showinitializer or \hideinitializer
+# command in the documentation regardless of this setting.
+
+MAX_INITIALIZER_LINES  = 30
+
+# Set the SHOW_USED_FILES tag to NO to disable the list of files generated
+# at the bottom of the documentation of classes and structs. If set to YES the
+# list will mention the files that were used to generate the documentation.
+
+SHOW_USED_FILES        = YES
+
+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES       = NO
+
+# Set the SHOW_FILES tag to NO to disable the generation of the Files page.
+# This will remove the Files entry from the Quick Index and from the
+# Folder Tree View (if specified). The default is YES.
+
+SHOW_FILES             = YES
+
+# Set the SHOW_NAMESPACES tag to NO to disable the generation of the
+# Namespaces page.
+# This will remove the Namespaces entry from the Quick Index
+# and from the Folder Tree View (if specified). The default is YES.
+
+SHOW_NAMESPACES        = YES
+
+# The FILE_VERSION_FILTER tag can be used to specify a program or script that
+# doxygen should invoke to get the current version for each file (typically from
+# the version control system). Doxygen will invoke the program by executing (via
+# popen()) the command <command> <input-file>, where <command> is the value of
+# the FILE_VERSION_FILTER tag, and <input-file> is the name of an input file
+# provided by doxygen. Whatever the program writes to standard output
+# is used as the file version. See the manual for examples.
+
+FILE_VERSION_FILTER    =
+
+# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by
+# doxygen. The layout file controls the global structure of the generated output files
+# in an output format independent way. The create the layout file that represents
+# doxygen's defaults, run doxygen with the -l option. You can optionally specify a
+# file name after the option, if omitted DoxygenLayout.xml will be used as the name
+# of the layout file.
+
+LAYOUT_FILE            =
+
+#---------------------------------------------------------------------------
+# configuration options related to warning and progress messages
+#---------------------------------------------------------------------------
+
+# The QUIET tag can be used to turn on/off the messages that are generated
+# by doxygen. Possible values are YES and NO. If left blank NO is used.
+
+QUIET                  = YES
+
+# The WARNINGS tag can be used to turn on/off the warning messages that are
+# generated by doxygen. Possible values are YES and NO. If left blank
+# NO is used.
+
+WARNINGS               = YES
+
+# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings
+# for undocumented members. If EXTRACT_ALL is set to YES then this flag will
+# automatically be disabled.
+
+WARN_IF_UNDOCUMENTED   = YES
+
+# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for
+# potential errors in the documentation, such as not documenting some
+# parameters in a documented function, or documenting parameters that
+# don't exist or using markup commands wrongly.
+
+WARN_IF_DOC_ERROR      = YES
+
+# This WARN_NO_PARAMDOC option can be abled to get warnings for
+# functions that are documented, but have no documentation for their parameters
+# or return value. If set to NO (the default) doxygen will only warn about
+# wrong or incomplete parameter documentation, but not about the absence of
+# documentation.
+
+WARN_NO_PARAMDOC       = YES
+
+# The WARN_FORMAT tag determines the format of the warning messages that
+# doxygen can produce. The string should contain the $file, $line, and $text
+# tags, which will be replaced by the file and line number from which the
+# warning originated and the warning text. Optionally the format may contain
+# $version, which will be replaced by the version of the file (if it could
+# be obtained via FILE_VERSION_FILTER)
+
+WARN_FORMAT            = "$file:$line: $text"
+
+# The WARN_LOGFILE tag can be used to specify a file to which warning
+# and error messages should be written. If left blank the output is written
+# to stderr.
+
+WARN_LOGFILE           =
+
+#---------------------------------------------------------------------------
+# configuration options related to the input files
+#---------------------------------------------------------------------------
+
+# The INPUT tag can be used to specify the files and/or directories that contain
+# documented source files. You may enter file names like "myfile.cpp" or
+# directories like "/usr/src/myproject". Separate the files or directories
+# with spaces.
+
+INPUT                  = @top_srcdir@/include/nestegg
+
+# This tag can be used to specify the character encoding of the source files
+# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is
+# also the default input encoding. Doxygen uses libiconv (or the iconv built
+# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for
+# the list of possible encodings.
+
+INPUT_ENCODING         = UTF-8
+
+# If the value of the INPUT tag contains directories, you can use the
+# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank the following patterns are tested:
+# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx
+# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90
+
+FILE_PATTERNS          =
+
+# The RECURSIVE tag can be used to turn specify whether or not subdirectories
+# should be searched for input files as well. Possible values are YES and NO.
+# If left blank NO is used.
+
+RECURSIVE              = NO
+
+# The EXCLUDE tag can be used to specify files and/or directories that should
+# excluded from the INPUT source files. This way you can easily exclude a
+# subdirectory from a directory tree whose root is specified with the INPUT tag.
+
+EXCLUDE                =
+
+# The EXCLUDE_SYMLINKS tag can be used select whether or not files or
+# directories that are symbolic links (a Unix filesystem feature) are excluded
+# from the input.
+
+EXCLUDE_SYMLINKS       = NO
+
+# If the value of the INPUT tag contains directories, you can use the
+# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude
+# certain files from those directories. Note that the wildcards are matched
+# against the file with absolute path, so to exclude all test directories
+# for example use the pattern */test/*
+
+EXCLUDE_PATTERNS       =
+
+# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names
+# (namespaces, classes, functions, etc.) that should be excluded from the
+# output. The symbol name can be a fully qualified name, a word, or if the
+# wildcard * is used, a substring. Examples: ANamespace, AClass,
+# AClass::ANamespace, ANamespace::*Test
+
+EXCLUDE_SYMBOLS        =
+
+# The EXAMPLE_PATH tag can be used to specify one or more files or
+# directories that contain example code fragments that are included (see
+# the \include command).
+
+EXAMPLE_PATH           =
+
+# If the value of the EXAMPLE_PATH tag contains directories, you can use the
+# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp
+# and *.h) to filter out the source-files in the directories. If left
+# blank all files are included.
+
+EXAMPLE_PATTERNS       =
+
+# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be
+# searched for input files to be used with the \include or \dontinclude
+# commands irrespective of the value of the RECURSIVE tag.
+# Possible values are YES and NO. If left blank NO is used.
+
+EXAMPLE_RECURSIVE      = NO
+
+# The IMAGE_PATH tag can be used to specify one or more files or
+# directories that contain image that are included in the documentation (see
+# the \image command).
+
+IMAGE_PATH             =
+
+# The INPUT_FILTER tag can be used to specify a program that doxygen should
+# invoke to filter for each input file. Doxygen will invoke the filter program
+# by executing (via popen()) the command <filter> <input-file>, where <filter>
+# is the value of the INPUT_FILTER tag, and <input-file> is the name of an
+# input file. Doxygen will then use the output that the filter program writes
+# to standard output.
+# If FILTER_PATTERNS is specified, this tag will be
+# ignored.
+
+INPUT_FILTER           =
+
+# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern
+# basis.
+# Doxygen will compare the file name with each pattern and apply the
+# filter if there is a match.
+# The filters are a list of the form:
+# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further
+# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER
+# is applied to all files.
+
+FILTER_PATTERNS        =
+
+# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using
+# INPUT_FILTER) will be used to filter the input files when producing source
+# files to browse (i.e. when SOURCE_BROWSER is set to YES).
+
+FILTER_SOURCE_FILES    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to source browsing
+#---------------------------------------------------------------------------
+
+# If the SOURCE_BROWSER tag is set to YES then a list of source files will
+# be generated. Documented entities will be cross-referenced with these sources.
+# Note: To get rid of all source code in the generated output, make sure also
+# VERBATIM_HEADERS is set to NO.
+
+SOURCE_BROWSER         = NO
+
+# Setting the INLINE_SOURCES tag to YES will include the body
+# of functions and classes directly in the documentation.
+
+INLINE_SOURCES         = NO
+
+# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct
+# doxygen to hide any special comment blocks from generated source code
+# fragments. Normal C and C++ comments will always remain visible.
+
+STRIP_CODE_COMMENTS    = YES
+
+# If the REFERENCED_BY_RELATION tag is set to YES
+# then for each documented function all documented
+# functions referencing it will be listed.
+
+REFERENCED_BY_RELATION = NO
+
+# If the REFERENCES_RELATION tag is set to YES
+# then for each documented function all documented entities
+# called/used by that function will be listed.
+
+REFERENCES_RELATION    = NO
+
+# If the REFERENCES_LINK_SOURCE tag is set to YES (the default)
+# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from
+# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will
+# link to the source code.
+# Otherwise they will link to the documentation.
+
+REFERENCES_LINK_SOURCE = YES
+
+# If the USE_HTAGS tag is set to YES then the references to source code
+# will point to the HTML generated by the htags(1) tool instead of doxygen
+# built-in source browser. The htags tool is part of GNU's global source
+# tagging system (see http://www.gnu.org/software/global/global.html). You
+# will need version 4.8.6 or higher.
+
+USE_HTAGS              = NO
+
+# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen
+# will generate a verbatim copy of the header file for each class for
+# which an include is specified. Set to NO to disable this.
+
+VERBATIM_HEADERS       = YES
+
+#---------------------------------------------------------------------------
+# configuration options related to the alphabetical class index
+#---------------------------------------------------------------------------
+
+# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index
+# of all compounds will be generated. Enable this if the project
+# contains a lot of classes, structs, unions or interfaces.
+
+ALPHABETICAL_INDEX     = NO
+
+# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then
+# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns
+# in which this list will be split (can be a number in the range [1..20])
+
+COLS_IN_ALPHA_INDEX    = 5
+
+# In case all classes in a project start with a common prefix, all
+# classes will be put under the same header in the alphabetical index.
+# The IGNORE_PREFIX tag can be used to specify one or more prefixes that
+# should be ignored while generating the index headers.
+
+IGNORE_PREFIX          =
+
+#---------------------------------------------------------------------------
+# configuration options related to the HTML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_HTML tag is set to YES (the default) Doxygen will
+# generate HTML output.
+
+GENERATE_HTML          = YES
+
+# The HTML_OUTPUT tag is used to specify where the HTML docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `html' will be used as the default path.
+
+HTML_OUTPUT            = html
+
+# The HTML_FILE_EXTENSION tag can be used to specify the file extension for
+# each generated HTML page (for example: .htm,.php,.asp). If it is left blank
+# doxygen will generate files with .html extension.
+
+HTML_FILE_EXTENSION    = .html
+
+# The HTML_HEADER tag can be used to specify a personal HTML header for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard header.
+
+HTML_HEADER            =
+
+# The HTML_FOOTER tag can be used to specify a personal HTML footer for
+# each generated HTML page. If it is left blank doxygen will generate a
+# standard footer.
+
+HTML_FOOTER            =
+
+# The HTML_STYLESHEET tag can be used to specify a user-defined cascading
+# style sheet that is used by each HTML page. It can be used to
+# fine-tune the look of the HTML output. If the tag is left blank doxygen
+# will generate a default style sheet. Note that doxygen will try to copy
+# the style sheet file to the HTML output directory, so don't put your own
+# stylesheet in the HTML output directory as well, or it will be erased!
+
+HTML_STYLESHEET        =
+
+# If the HTML_TIMESTAMP tag is set to YES then the footer of each generated HTML
+# page will contain the date and time when the page was generated. Setting
+# this to NO can help when comparing the output of multiple runs.
+
+HTML_TIMESTAMP         = NO
+
+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
+# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML
+# documentation will contain sections that can be hidden and shown after the
+# page has loaded. For this to work a browser that supports
+# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox
+# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari).
+
+HTML_DYNAMIC_SECTIONS  = NO
+
+# If the GENERATE_DOCSET tag is set to YES, additional index files
+# will be generated that can be used as input for Apple's Xcode 3
+# integrated development environment, introduced with OSX 10.5 (Leopard).
+# To create a documentation set, doxygen will generate a Makefile in the
+# HTML output directory. Running make will produce the docset in that
+# directory and running "make install" will install the docset in
+# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find
+# it at startup.
+# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information.
+
+GENERATE_DOCSET        = NO
+
+# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the
+# feed. A documentation feed provides an umbrella under which multiple
+# documentation sets from a single provider (such as a company or product suite)
+# can be grouped.
+
+DOCSET_FEEDNAME        = "Doxygen generated docs"
+
+# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that
+# should uniquely identify the documentation set bundle. This should be a
+# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen
+# will append .docset to the name.
+
+DOCSET_BUNDLE_ID       = org.doxygen.Project
+
+# If the GENERATE_HTMLHELP tag is set to YES, additional index files
+# will be generated that can be used as input for tools like the
+# Microsoft HTML help workshop to generate a compiled HTML help file (.chm)
+# of the generated HTML documentation.
+
+GENERATE_HTMLHELP      = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can
+# be used to specify the file name of the resulting .chm file. You
+# can add a path in front of the file if the result should not be
+# written to the html output directory.
+
+CHM_FILE               =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can
+# be used to specify the location (absolute path including file name) of
+# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run
+# the HTML help compiler on the generated index.hhp.
+
+HHC_LOCATION           =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag
+# controls if a separate .chi index file is generated (YES) or that
+# it should be included in the master .chm file (NO).
+
+GENERATE_CHI           = NO
+
+# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING
+# is used to encode HtmlHelp index (hhk), content (hhc) and project file
+# content.
+
+CHM_INDEX_ENCODING     =
+
+# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag
+# controls whether a binary table of contents is generated (YES) or a
+# normal table of contents (NO) in the .chm file.
+
+BINARY_TOC             = NO
+
+# The TOC_EXPAND flag can be set to YES to add extra items for group members
+# to the contents of the HTML help documentation and to the tree view.
+
+TOC_EXPAND             = NO
+
+# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER
+# are set, an additional index file will be generated that can be used as input for
+# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated
+# HTML documentation.
+
+GENERATE_QHP           = NO
+
+# If the QHG_LOCATION tag is specified, the QCH_FILE tag can
+# be used to specify the file name of the resulting .qch file.
+# The path specified is relative to the HTML output folder.
+
+QCH_FILE               =
+
+# The QHP_NAMESPACE tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#namespace
+
+QHP_NAMESPACE          = org.doxygen.Project
+
+# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating
+# Qt Help Project output. For more information please see
+# http://doc.trolltech.com/qthelpproject.html#virtual-folders
+
+QHP_VIRTUAL_FOLDER     = doc
+
+# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add.
+# For more information please see
+# http://doc.trolltech.com/qthelpproject.html#custom-filters
+
+QHP_CUST_FILTER_NAME   =
+
+# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see
+# <a href="http://doc.trolltech.com/qthelpproject.html#custom-filters">Qt Help Project / Custom Filters</a>.
+
+QHP_CUST_FILTER_ATTRS  =
+
+# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's
+# filter section matches.
+# <a href="http://doc.trolltech.com/qthelpproject.html#filter-attributes">Qt Help Project / Filter Attributes</a>.
+
+QHP_SECT_FILTER_ATTRS  =
+
+# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can
+# be used to specify the location of Qt's qhelpgenerator.
+# If non-empty doxygen will try to run qhelpgenerator on the generated
+# .qhp file.
+
+QHG_LOCATION           =
+
+# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files
+#  will be generated, which together with the HTML files, form an Eclipse help
+#  plugin. To install this plugin and make it available under the help contents
+# menu in Eclipse, the contents of the directory containing the HTML and XML
+# files needs to be copied into the plugins directory of eclipse. The name of
+# the directory within the plugins directory should be the same as
+# the ECLIPSE_DOC_ID value. After copying Eclipse needs to be restarted before the help appears.
+
+GENERATE_ECLIPSEHELP   = NO
+
+# A unique identifier for the eclipse help plugin. When installing the plugin
+# the directory name containing the HTML and XML files should also have
+# this name.
+
+ECLIPSE_DOC_ID         = org.doxygen.Project
+
+# The DISABLE_INDEX tag can be used to turn on/off the condensed index at
+# top of each HTML page. The value NO (the default) enables the index and
+# the value YES disables it.
+
+DISABLE_INDEX          = NO
+
+# This tag can be used to set the number of enum values (range [1..20])
+# that doxygen will group on one line in the generated HTML documentation.
+
+ENUM_VALUES_PER_LINE   = 4
+
+# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index
+# structure should be generated to display hierarchical information.
+# If the tag value is set to YES, a side panel will be generated
+# containing a tree-like index structure (just like the one that
+# is generated for HTML Help). For this to work a browser that supports
+# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser).
+# Windows users are probably better off using the HTML help feature.
+
+GENERATE_TREEVIEW      = NO
+
+# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories,
+# and Class Hierarchy pages using a tree view instead of an ordered list.
+
+USE_INLINE_TREES       = NO
+
+# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be
+# used to set the initial width (in pixels) of the frame in which the tree
+# is shown.
+
+TREEVIEW_WIDTH         = 250
+
+# Use this tag to change the font size of Latex formulas included
+# as images in the HTML documentation. The default is 10. Note that
+# when you change the font size after a successful doxygen run you need
+# to manually remove any form_*.png images from the HTML output directory
+# to force them to be regenerated.
+
+FORMULA_FONTSIZE       = 10
+
+# When the SEARCHENGINE tag is enabled doxygen will generate a search box for the HTML output. The underlying search engine uses javascript
+# and DHTML and should work on any modern browser. Note that when using HTML help (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) there is already a search function so this one should
+# typically be disabled. For large projects the javascript based search engine
+# can be slow, then enabling SERVER_BASED_SEARCH may provide a better solution.
+
+SEARCHENGINE           = YES
+
+# When the SERVER_BASED_SEARCH tag is enabled the search engine will be implemented using a PHP enabled web server instead of at the web client using Javascript. Doxygen will generate the search PHP script and index
+# file to put on the web server. The advantage of the server based approach is that it scales better to large projects and allows full text search. The disadvances is that it is more difficult to setup
+# and does not have live searching capabilities.
+
+SERVER_BASED_SEARCH    = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the LaTeX output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will
+# generate Latex output.
+
+GENERATE_LATEX         = NO
+
+# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `latex' will be used as the default path.
+
+LATEX_OUTPUT           = latex
+
+# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be
+# invoked. If left blank `latex' will be used as the default command name.
+# Note that when enabling USE_PDFLATEX this option is only used for
+# generating bitmaps for formulas in the HTML output, but not in the
+# Makefile that is written to the output directory.
+
+LATEX_CMD_NAME         = latex
+
+# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to
+# generate index for LaTeX. If left blank `makeindex' will be used as the
+# default command name.
+
+MAKEINDEX_CMD_NAME     = makeindex
+
+# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact
+# LaTeX documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_LATEX          = NO
+
+# The PAPER_TYPE tag can be used to set the paper type that is used
+# by the printer. Possible values are: a4, a4wide, letter, legal and
+# executive. If left blank a4wide will be used.
+
+PAPER_TYPE             = a4wide
+
+# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX
+# packages that should be included in the LaTeX output.
+
+EXTRA_PACKAGES         =
+
+# The LATEX_HEADER tag can be used to specify a personal LaTeX header for
+# the generated latex document. The header should contain everything until
+# the first chapter. If it is left blank doxygen will generate a
+# standard header. Notice: only use this tag if you know what you are doing!
+
+LATEX_HEADER           =
+
+# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated
+# is prepared for conversion to pdf (using ps2pdf). The pdf file will
+# contain links (just like the HTML output) instead of page references
+# This makes the output suitable for online browsing using a pdf viewer.
+
+PDF_HYPERLINKS         = YES
+
+# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of
+# plain latex in the generated Makefile. Set this option to YES to get a
+# higher quality PDF documentation.
+
+USE_PDFLATEX           = YES
+
+# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode.
+# command to the generated LaTeX files. This will instruct LaTeX to keep
+# running if errors occur, instead of asking the user for help.
+# This option is also used when generating formulas in HTML.
+
+LATEX_BATCHMODE        = NO
+
+# If LATEX_HIDE_INDICES is set to YES then doxygen will not
+# include the index chapters (such as File Index, Compound Index, etc.)
+# in the output.
+
+LATEX_HIDE_INDICES     = NO
+
+# If LATEX_SOURCE_CODE is set to YES then doxygen will include source code with syntax highlighting in the LaTeX output. Note that which sources are shown also depends on other settings such as SOURCE_BROWSER.
+
+LATEX_SOURCE_CODE      = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the RTF output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output
+# The RTF output is optimized for Word 97 and may not look very pretty with
+# other RTF readers or editors.
+
+GENERATE_RTF           = NO
+
+# The RTF_OUTPUT tag is used to specify where the RTF docs will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `rtf' will be used as the default path.
+
+RTF_OUTPUT             = rtf
+
+# If the COMPACT_RTF tag is set to YES Doxygen generates more compact
+# RTF documents. This may be useful for small projects and may help to
+# save some trees in general.
+
+COMPACT_RTF            = NO
+
+# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated
+# will contain hyperlink fields. The RTF file will
+# contain links (just like the HTML output) instead of page references.
+# This makes the output suitable for online browsing using WORD or other
+# programs which support those fields.
+# Note: wordpad (write) and others do not support links.
+
+RTF_HYPERLINKS         = NO
+
+# Load stylesheet definitions from file. Syntax is similar to doxygen's
+# config file, i.e. a series of assignments. You only have to provide
+# replacements, missing definitions are set to their default value.
+
+RTF_STYLESHEET_FILE    =
+
+# Set optional variables used in the generation of an rtf document.
+# Syntax is similar to doxygen's config file.
+
+RTF_EXTENSIONS_FILE    =
+
+#---------------------------------------------------------------------------
+# configuration options related to the man page output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_MAN tag is set to YES (the default) Doxygen will
+# generate man pages
+
+GENERATE_MAN           = NO
+
+# The MAN_OUTPUT tag is used to specify where the man pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `man' will be used as the default path.
+
+MAN_OUTPUT             = man
+
+# The MAN_EXTENSION tag determines the extension that is added to
+# the generated man pages (default is the subroutine's section .3)
+
+MAN_EXTENSION          = .3
+
+# If the MAN_LINKS tag is set to YES and Doxygen generates man output,
+# then it will generate one additional man file for each entity
+# documented in the real man page(s). These additional files
+# only source the real man page, but without them the man command
+# would be unable to find the correct page. The default is NO.
+
+MAN_LINKS              = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the XML output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_XML tag is set to YES Doxygen will
+# generate an XML file that captures the structure of
+# the code including all documentation.
+
+GENERATE_XML           = NO
+
+# The XML_OUTPUT tag is used to specify where the XML pages will be put.
+# If a relative path is entered the value of OUTPUT_DIRECTORY will be
+# put in front of it. If left blank `xml' will be used as the default path.
+
+XML_OUTPUT             = xml
+
+# The XML_SCHEMA tag can be used to specify an XML schema,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_SCHEMA             =
+
+# The XML_DTD tag can be used to specify an XML DTD,
+# which can be used by a validating XML parser to check the
+# syntax of the XML files.
+
+XML_DTD                =
+
+# If the XML_PROGRAMLISTING tag is set to YES Doxygen will
+# dump the program listings (including syntax highlighting
+# and cross-referencing information) to the XML output. Note that
+# enabling this will significantly increase the size of the XML output.
+
+XML_PROGRAMLISTING     = YES
+
+#---------------------------------------------------------------------------
+# configuration options for the AutoGen Definitions output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will
+# generate an AutoGen Definitions (see autogen.sf.net) file
+# that captures the structure of the code including all
+# documentation. Note that this feature is still experimental
+# and incomplete at the moment.
+
+GENERATE_AUTOGEN_DEF   = NO
+
+#---------------------------------------------------------------------------
+# configuration options related to the Perl module output
+#---------------------------------------------------------------------------
+
+# If the GENERATE_PERLMOD tag is set to YES Doxygen will
+# generate a Perl module file that captures the structure of
+# the code including all documentation. Note that this
+# feature is still experimental and incomplete at the
+# moment.
+
+GENERATE_PERLMOD       = NO
+
+# If the PERLMOD_LATEX tag is set to YES Doxygen will generate
+# the necessary Makefile rules, Perl scripts and LaTeX code to be able
+# to generate PDF and DVI output from the Perl module output.
+
+PERLMOD_LATEX          = NO
+
+# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be
+# nicely formatted so it can be parsed by a human reader.
+# This is useful
+# if you want to understand what is going on.
+# On the other hand, if this
+# tag is set to NO the size of the Perl module output will be much smaller
+# and Perl will parse it just the same.
+
+PERLMOD_PRETTY         = YES
+
+# The names of the make variables in the generated doxyrules.make file
+# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX.
+# This is useful so different doxyrules.make files included by the same
+# Makefile don't overwrite each other's variables.
+
+PERLMOD_MAKEVAR_PREFIX =
+
+#---------------------------------------------------------------------------
+# Configuration options related to the preprocessor
+#---------------------------------------------------------------------------
+
+# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will
+# evaluate all C-preprocessor directives found in the sources and include
+# files.
+
+ENABLE_PREPROCESSING   = YES
+
+# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro
+# names in the source code. If set to NO (the default) only conditional
+# compilation will be performed. Macro expansion can be done in a controlled
+# way by setting EXPAND_ONLY_PREDEF to YES.
+
+MACRO_EXPANSION        = NO
+
+# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES
+# then the macro expansion is limited to the macros specified with the
+# PREDEFINED and EXPAND_AS_DEFINED tags.
+
+EXPAND_ONLY_PREDEF     = NO
+
+# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files
+# in the INCLUDE_PATH (see below) will be search if a #include is found.
+
+SEARCH_INCLUDES        = YES
+
+# The INCLUDE_PATH tag can be used to specify one or more directories that
+# contain include files that are not input files but should be processed by
+# the preprocessor.
+
+INCLUDE_PATH           =
+
+# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard
+# patterns (like *.h and *.hpp) to filter out the header-files in the
+# directories. If left blank, the patterns specified with FILE_PATTERNS will
+# be used.
+
+INCLUDE_FILE_PATTERNS  =
+
+# The PREDEFINED tag can be used to specify one or more macro names that
+# are defined before the preprocessor is started (similar to the -D option of
+# gcc). The argument of the tag is a list of macros of the form: name
+# or name=definition (no spaces). If the definition and the = are
+# omitted =1 is assumed. To prevent a macro definition from being
+# undefined via #undef or recursively expanded use the := operator
+# instead of the = operator.
+
+PREDEFINED             =
+
+# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
+# this tag can be used to specify a list of macro names that should be expanded.
+# The macro definition that is found in the sources will be used.
+# Use the PREDEFINED tag if you want to use a different macro definition.
+
+EXPAND_AS_DEFINED      =
+
+# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then
+# doxygen's preprocessor will remove all function-like macros that are alone
+# on a line, have an all uppercase name, and do not end with a semicolon. Such
+# function macros are typically used for boiler-plate code, and will confuse
+# the parser if not removed.
+
+SKIP_FUNCTION_MACROS   = YES
+
+#---------------------------------------------------------------------------
+# Configuration::additions related to external references
+#---------------------------------------------------------------------------
+
+# The TAGFILES option can be used to specify one or more tagfiles.
+# Optionally an initial location of the external documentation
+# can be added for each tagfile. The format of a tag file without
+# this location is as follows:
+#
+# TAGFILES = file1 file2 ...
+# Adding location for the tag files is done as follows:
+#
+# TAGFILES = file1=loc1 "file2 = loc2" ...
+# where "loc1" and "loc2" can be relative or absolute paths or
+# URLs. If a location is present for each tag, the installdox tool
+# does not have to be run to correct the links.
+# Note that each tag file must have a unique name
+# (where the name does NOT include the path)
+# If a tag file is not located in the directory in which doxygen
+# is run, you must also specify the path to the tagfile here.
+
+TAGFILES               =
+
+# When a file name is specified after GENERATE_TAGFILE, doxygen will create
+# a tag file that is based on the input files it reads.
+
+GENERATE_TAGFILE       =
+
+# If the ALLEXTERNALS tag is set to YES all external classes will be listed
+# in the class index. If set to NO only the inherited external classes
+# will be listed.
+
+ALLEXTERNALS           = NO
+
+# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed
+# in the modules index. If set to NO, only the current project's groups will
+# be listed.
+
+EXTERNAL_GROUPS        = YES
+
+# The PERL_PATH should be the absolute path and name of the perl script
+# interpreter (i.e. the result of `which perl').
+
+PERL_PATH              = /usr/bin/perl
+
+#---------------------------------------------------------------------------
+# Configuration options related to the dot tool
+#---------------------------------------------------------------------------
+
+# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will
+# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base
+# or super classes. Setting the tag to NO turns the diagrams off. Note that
+# this option is superseded by the HAVE_DOT option below. This is only a
+# fallback. It is recommended to install and use dot, since it yields more
+# powerful graphs.
+
+CLASS_DIAGRAMS         = YES
+
+# You can define message sequence charts within doxygen comments using the \msc
+# command. Doxygen will then run the mscgen tool (see
+# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the
+# documentation. The MSCGEN_PATH tag allows you to specify the directory where
+# the mscgen tool resides. If left empty the tool is assumed to be found in the
+# default search path.
+
+MSCGEN_PATH            =
+
+# If set to YES, the inheritance and collaboration graphs will hide
+# inheritance and usage relations if the target is undocumented
+# or is not a class.
+
+HIDE_UNDOC_RELATIONS   = YES
+
+# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is
+# available from the path. This tool is part of Graphviz, a graph visualization
+# toolkit from AT&T and Lucent Bell Labs. The other options in this section
+# have no effect if this option is set to NO (the default)
+
+HAVE_DOT               = NO
+
+# By default doxygen will write a font called FreeSans.ttf to the output
+# directory and reference it in all dot files that doxygen generates. This
+# font does not include all possible unicode characters however, so when you need
+# these (or just want a differently looking font) you can specify the font name
+# using DOT_FONTNAME. You need need to make sure dot is able to find the font,
+# which can be done by putting it in a standard location or by setting the
+# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory
+# containing the font.
+
+DOT_FONTNAME           = FreeSans
+
+# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs.
+# The default size is 10pt.
+
+DOT_FONTSIZE           = 10
+
+# By default doxygen will tell dot to use the output directory to look for the
+# FreeSans.ttf font (which doxygen will put there itself). If you specify a
+# different font using DOT_FONTNAME you can set the path where dot
+# can find it using this tag.
+
+DOT_FONTPATH           =
+
+# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect inheritance relations. Setting this tag to YES will force the
+# the CLASS_DIAGRAMS tag to NO.
+
+CLASS_GRAPH            = YES
+
+# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for each documented class showing the direct and
+# indirect implementation dependencies (inheritance, containment, and
+# class references variables) of the class with other documented classes.
+
+COLLABORATION_GRAPH    = YES
+
+# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen
+# will generate a graph for groups, showing the direct groups dependencies
+
+GROUP_GRAPHS           = YES
+
+# If the UML_LOOK tag is set to YES doxygen will generate inheritance and
+# collaboration diagrams in a style similar to the OMG's Unified Modeling
+# Language.
+
+UML_LOOK               = NO
+
+# If set to YES, the inheritance and collaboration graphs will show the
+# relations between templates and their instances.
+
+TEMPLATE_RELATIONS     = NO
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT
+# tags are set to YES then doxygen will generate a graph for each documented
+# file showing the direct and indirect include dependencies of the file with
+# other documented files.
+
+INCLUDE_GRAPH          = YES
+
+# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and
+# HAVE_DOT tags are set to YES then doxygen will generate a graph for each
+# documented header file showing the documented files that directly or
+# indirectly include this file.
+
+INCLUDED_BY_GRAPH      = YES
+
+# If the CALL_GRAPH and HAVE_DOT options are set to YES then
+# doxygen will generate a call dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable call graphs
+# for selected functions only using the \callgraph command.
+
+CALL_GRAPH             = NO
+
+# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then
+# doxygen will generate a caller dependency graph for every global function
+# or class method. Note that enabling this option will significantly increase
+# the time of a run. So in most cases it will be better to enable caller
+# graphs for selected functions only using the \callergraph command.
+
+CALLER_GRAPH           = NO
+
+# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen
+# will graphical hierarchy of all classes instead of a textual one.
+
+GRAPHICAL_HIERARCHY    = YES
+
+# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES
+# then doxygen will show the dependencies a directory has on other directories
+# in a graphical way. The dependency relations are determined by the #include
+# relations between the files in the directories.
+
+DIRECTORY_GRAPH        = YES
+
+# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images
+# generated by dot. Possible values are png, jpg, or gif
+# If left blank png will be used.
+
+DOT_IMAGE_FORMAT       = png
+
+# The tag DOT_PATH can be used to specify the path where the dot tool can be
+# found. If left blank, it is assumed the dot tool can be found in the path.
+
+DOT_PATH               =
+
+# The DOTFILE_DIRS tag can be used to specify one or more directories that
+# contain dot files that are included in the documentation (see the
+# \dotfile command).
+
+DOTFILE_DIRS           =
+
+# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of
+# nodes that will be shown in the graph. If the number of nodes in a graph
+# becomes larger than this value, doxygen will truncate the graph, which is
+# visualized by representing a node as a red box. Note that doxygen if the
+# number of direct children of the root node in a graph is already larger than
+# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note
+# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH.
+
+DOT_GRAPH_MAX_NODES    = 50
+
+# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the
+# graphs generated by dot. A depth value of 3 means that only nodes reachable
+# from the root by following a path via at most 3 edges will be shown. Nodes
+# that lay further from the root node will be omitted. Note that setting this
+# option to 1 or 2 may greatly reduce the computation time needed for large
+# code bases. Also note that the size of a graph can be further restricted by
+# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction.
+
+MAX_DOT_GRAPH_DEPTH    = 0
+
+# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent
+# background. This is disabled by default, because dot on Windows does not
+# seem to support this out of the box. Warning: Depending on the platform used,
+# enabling this option may lead to badly anti-aliased labels on the edges of
+# a graph (i.e. they become hard to read).
+
+DOT_TRANSPARENT        = NO
+
+# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output
+# files in one run (i.e. multiple -o and -T options on the command line). This
+# makes dot run faster, but since only newer versions of dot (>1.8.10)
+# support this, this feature is disabled by default.
+
+DOT_MULTI_TARGETS      = NO
+
+# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will
+# generate a legend page explaining the meaning of the various boxes and
+# arrows in the dot generated graphs.
+
+GENERATE_LEGEND        = YES
+
+# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will
+# remove the intermediate dot files that are used to generate
+# the various graphs.
+
+DOT_CLEANUP            = YES
diff --git a/nestegg/docs/Makefile.am b/nestegg/docs/Makefile.am
new file mode 100644
index 000000000..42cf8eec4
--- /dev/null
+++ b/nestegg/docs/Makefile.am
@@ -0,0 +1,38 @@
+doc_DATA = doxygen-build.stamp
+
+EXTRA_DIST = Doxyfile.in
+
+if HAVE_DOXYGEN
+doxygen-build.stamp: Doxyfile
+	doxygen
+	touch doxygen-build.stamp
+else
+doxygen-build.stamp:
+	echo "*** Warning: Doxygen not found; documentation will not be built."
+	touch doxygen-build.stamp
+endif
+
+dist_docdir = $(distdir)/libnestegg
+
+dist-hook:
+	if test -d html; then \
+	  mkdir $(dist_docdir); \
+	  echo -n "copying built documenation..."; \
+	  cp -rp html $(dist_docdir)/html; \
+	  echo "OK"; \
+	fi
+
+
+install-data-local: doxygen-build.stamp
+	$(mkinstalldirs) $(DESTDIR)$(docdir)
+	if test -d html; then \
+	  cp -rp html $(DESTDIR)$(docdir)/html; \
+	fi
+
+uninstall-local:
+	rm -rf $(DESTDIR)$(docdir)
+
+clean-local:
+	if test -d html; then rm -rf html; fi
+	if test -f doxygen-build.stamp; then rm -f doxygen-build.stamp; fi
+
diff --git a/nestegg/halloc/README b/nestegg/halloc/README
new file mode 100644
index 000000000..380fba2b8
--- /dev/null
+++ b/nestegg/halloc/README
@@ -0,0 +1,45 @@
+halloc 1.2.1
+============
+      
+	Hierarchical memory heap interface - an extension to standard
+	malloc/free interface that simplifies tasks of memory disposal 
+	when allocated structures exhibit hierarchical properties.
+
+	http://swapped.cc/halloc
+=
+	To build libhalloc.a with GNU tools run
+		make
+
+	To install in /usr/include and /usr/lib
+		make install
+
+	To cleanup the build files 
+		make clean
+=
+	halloc-1.2.1
+		* fixed a double-free bug in _set_allocator() as per
+		  Matthew Gregan comments
+
+		* switched to using NULL instead of 0 where applicable
+
+	halloc-1.2.0
+		* added missing <string.h> include to halloc.c
+		
+		* improved standard compliance thanks to the feedback
+		  received from Stan Tobias. Two things were fixed -
+		  
+		- hblock_t structure no longer uses zero-sized 'data'
+		  array, which happened to be common, but non-standard
+		  extension; 
+		  
+		- secondly, added the code to test the behaviour of 
+		  realloc(ptr, 0). Standard allows it NOT to act as
+		  free(), in which case halloc will use its own version
+		  of allocator calling free() when neccessary.
+
+	halloc-1.1.0
+		* initial public release (rewrite of hhmalloc library)
+
+=============================================================================
+Copyright (c) 2004-2010, Alex Pankratov (ap@swapped.cc). All rights reserved.
+
diff --git a/nestegg/halloc/halloc.h b/nestegg/halloc/halloc.h
new file mode 100644
index 000000000..10af4e8d8
--- /dev/null
+++ b/nestegg/halloc/halloc.h
@@ -0,0 +1,43 @@
+/*
+ *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *	Hierarchical memory allocator, 1.2.1
+ *	http://swapped.cc/halloc
+ */
+
+/*
+ *	The program is distributed under terms of BSD license. 
+ *	You can obtain the copy of the license by visiting:
+ *	
+ *	http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_HALLOC_H_
+#define _LIBP_HALLOC_H_
+
+#include <stddef.h>  /* size_t */
+
+/*
+ *	Core API
+ */
+void * halloc (void * block, size_t len);
+void   hattach(void * block, void * parent);
+
+/*
+ *	standard malloc/free api
+ */
+void * h_malloc (size_t len);
+void * h_calloc (size_t n, size_t len);
+void * h_realloc(void * p, size_t len);
+void   h_free   (void * p);
+char * h_strdup (const char * str);
+
+/*
+ *	the underlying allocator
+ */
+typedef void * (* realloc_t)(void * ptr, size_t len);
+
+extern realloc_t halloc_allocator;
+
+#endif
+
diff --git a/nestegg/halloc/src/align.h b/nestegg/halloc/src/align.h
new file mode 100644
index 000000000..4c6e1831f
--- /dev/null
+++ b/nestegg/halloc/src/align.h
@@ -0,0 +1,36 @@
+/*
+ *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *	Hierarchical memory allocator, 1.2.1
+ *	http://swapped.cc/halloc
+ */
+
+/*
+ *	The program is distributed under terms of BSD license. 
+ *	You can obtain the copy of the license by visiting:
+ *	
+ *	http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_ALIGN_H_
+#define _LIBP_ALIGN_H_
+
+/*
+ *	a type with the most strict alignment requirements
+ */
+union max_align
+{
+	char   c;
+	short  s;
+	long   l;
+	int    i;
+	float  f;
+	double d;
+	void * v;
+	void (*q)(void);
+};
+
+typedef union max_align max_align_t;
+
+#endif
+
diff --git a/nestegg/halloc/src/halloc.c b/nestegg/halloc/src/halloc.c
new file mode 100644
index 000000000..5758fc07b
--- /dev/null
+++ b/nestegg/halloc/src/halloc.c
@@ -0,0 +1,254 @@
+/*
+ *	Copyright (c) 2004i-2010 Alex Pankratov. All rights reserved.
+ *
+ *	Hierarchical memory allocator, 1.2.1
+ *	http://swapped.cc/halloc
+ */
+
+/*
+ *	The program is distributed under terms of BSD license. 
+ *	You can obtain the copy of the license by visiting:
+ *	
+ *	http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#include <stdlib.h>  /* realloc */
+#include <string.h>  /* memset & co */
+
+#include "halloc.h"
+#include "align.h"
+#include "hlist.h"
+
+/*
+ *	block control header
+ */
+typedef struct hblock
+{
+#ifndef NDEBUG
+#define HH_MAGIC    0x20040518L
+	long          magic;
+#endif
+	hlist_item_t  siblings; /* 2 pointers */
+	hlist_head_t  children; /* 1 pointer  */
+	max_align_t   data[1];  /* not allocated, see below */
+	
+} hblock_t;
+
+#define sizeof_hblock offsetof(hblock_t, data)
+
+/*
+ *
+ */
+realloc_t halloc_allocator = NULL;
+
+#define allocator halloc_allocator
+
+/*
+ *	static methods
+ */
+static void _set_allocator(void);
+static void * _realloc(void * ptr, size_t n);
+
+static int  _relate(hblock_t * b, hblock_t * p);
+static void _free_children(hblock_t * p);
+
+/*
+ *	Core API
+ */
+void * halloc(void * ptr, size_t len)
+{
+	hblock_t * p;
+
+	/* set up default allocator */
+	if (! allocator)
+	{
+		_set_allocator();
+		assert(allocator);
+	}
+
+	/* calloc */
+	if (! ptr)
+	{
+		if (! len)
+			return NULL;
+
+		p = allocator(0, len + sizeof_hblock);
+		if (! p)
+			return NULL;
+#ifndef NDEBUG
+		p->magic = HH_MAGIC;
+#endif
+		hlist_init(&p->children);
+		hlist_init_item(&p->siblings);
+
+		return p->data;
+	}
+
+	p = structof(ptr, hblock_t, data);
+	assert(p->magic == HH_MAGIC);
+
+	/* realloc */
+	if (len)
+	{
+		p = allocator(p, len + sizeof_hblock);
+		if (! p)
+			return NULL;
+
+		hlist_relink(&p->siblings);
+		hlist_relink_head(&p->children);
+		
+		return p->data;
+	}
+
+	/* free */
+	_free_children(p);
+	hlist_del(&p->siblings);
+	allocator(p, 0);
+
+	return NULL;
+}
+
+void hattach(void * block, void * parent)
+{
+	hblock_t * b, * p;
+	
+	if (! block)
+	{
+		assert(! parent);
+		return;
+	}
+
+	/* detach */
+	b = structof(block, hblock_t, data);
+	assert(b->magic == HH_MAGIC);
+
+	hlist_del(&b->siblings);
+
+	if (! parent)
+		return;
+
+	/* attach */
+	p = structof(parent, hblock_t, data);
+	assert(p->magic == HH_MAGIC);
+	
+	/* sanity checks */
+	assert(b != p);          /* trivial */
+	assert(! _relate(p, b)); /* heavy ! */
+
+	hlist_add(&p->children, &b->siblings);
+}
+
+/*
+ *	malloc/free api
+ */
+void * h_malloc(size_t len)
+{
+	return halloc(0, len);
+}
+
+void * h_calloc(size_t n, size_t len)
+{
+	void * ptr = halloc(0, len*=n);
+	return ptr ? memset(ptr, 0, len) : NULL;
+}
+
+void * h_realloc(void * ptr, size_t len)
+{
+	return halloc(ptr, len);
+}
+
+void   h_free(void * ptr)
+{
+	halloc(ptr, 0);
+}
+
+char * h_strdup(const char * str)
+{
+	size_t len = strlen(str);
+	char * ptr = halloc(0, len + 1);
+	return ptr ? (ptr[len] = 0, memcpy(ptr, str, len)) : NULL;
+}
+
+/*
+ *	static stuff
+ */
+static void _set_allocator(void)
+{
+	void * p;
+	assert(! allocator);
+	
+	/*
+	 *	the purpose of the test below is to check the behaviour
+	 *	of realloc(ptr, 0), which is defined in the standard
+	 *	as an implementation-specific. if it returns zero,
+	 *	then it's equivalent to free(). it can however return
+	 *	non-zero, in which case it cannot be used for freeing
+	 *	memory blocks and we'll need to supply our own version
+	 *
+	 *	Thanks to Stan Tobias for pointing this tricky part out.
+	 */
+	allocator = realloc;
+	if (! (p = malloc(1)))
+		/* hmm */
+		return;
+		
+	if ((p = realloc(p, 0)))
+	{
+		/* realloc cannot be used as free() */
+		allocator = _realloc;
+		free(p);
+	}
+}
+
+static void * _realloc(void * ptr, size_t n)
+{
+	/*
+	 *	free'ing realloc()
+	 */
+	if (n)
+		return realloc(ptr, n);
+	free(ptr);
+	return NULL;
+}
+
+static int _relate(hblock_t * b, hblock_t * p)
+{
+	hlist_item_t * i;
+
+	if (!b || !p)
+		return 0;
+
+	/* 
+	 *  since there is no 'parent' pointer, which would've allowed
+	 *  O(log(n)) upward traversal, the check must use O(n) downward 
+	 *  iteration of the entire hierarchy; and this can be VERY SLOW
+	 */
+	hlist_for_each(i, &p->children)
+	{
+		hblock_t * q = structof(i, hblock_t, siblings);
+		if (q == b || _relate(b, q))
+			return 1;
+	}
+	return 0;
+}
+
+static void _free_children(hblock_t * p)
+{
+	hlist_item_t * i, * tmp;
+	
+#ifndef NDEBUG
+	/*
+	 *	this catches loops in hierarchy with almost zero 
+	 *	overhead (compared to _relate() running time)
+	 */
+	assert(p && p->magic == HH_MAGIC);
+	p->magic = 0; 
+#endif
+	hlist_for_each_safe(i, tmp, &p->children)
+	{
+		hblock_t * q = structof(i, hblock_t, siblings);
+		_free_children(q);
+		allocator(q, 0);
+	}
+}
+
diff --git a/nestegg/halloc/src/hlist.h b/nestegg/halloc/src/hlist.h
new file mode 100644
index 000000000..2791f78c7
--- /dev/null
+++ b/nestegg/halloc/src/hlist.h
@@ -0,0 +1,136 @@
+/*
+ *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *	Hierarchical memory allocator, 1.2.1
+ *	http://swapped.cc/halloc
+ */
+
+/*
+ *	The program is distributed under terms of BSD license. 
+ *	You can obtain the copy of the license by visiting:
+ *	
+ *	http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_HLIST_H_
+#define _LIBP_HLIST_H_
+
+#include <assert.h>
+#include "macros.h"  /* static_inline */
+
+/*
+ *	weak double-linked list w/ tail sentinel
+ */
+typedef struct hlist_head  hlist_head_t;
+typedef struct hlist_item  hlist_item_t;
+
+/*
+ *
+ */
+struct hlist_head
+{
+	hlist_item_t * next;
+};
+
+struct hlist_item
+{
+	hlist_item_t * next;
+	hlist_item_t ** prev;
+};
+
+/*
+ *	shared tail sentinel
+ */
+struct hlist_item hlist_null;
+
+/*
+ *
+ */
+#define __hlist_init(h)      { &hlist_null }
+#define __hlist_init_item(i) { &hlist_null, &(i).next }
+
+static_inline void hlist_init(hlist_head_t * h);
+static_inline void hlist_init_item(hlist_item_t * i);
+
+/* static_inline void hlist_purge(hlist_head_t * h); */
+
+/* static_inline bool_t hlist_empty(const hlist_head_t * h); */
+
+/* static_inline hlist_item_t * hlist_head(const hlist_head_t * h); */
+
+/* static_inline hlist_item_t * hlist_next(const hlist_item_t * i); */
+/* static_inline hlist_item_t * hlist_prev(const hlist_item_t * i, 
+                                           const hlist_head_t * h); */
+
+static_inline void hlist_add(hlist_head_t * h, hlist_item_t * i);
+
+/* static_inline void hlist_add_prev(hlist_item_t * l, hlist_item_t * i); */
+/* static_inline void hlist_add_next(hlist_item_t * l, hlist_item_t * i); */
+
+static_inline void hlist_del(hlist_item_t * i);
+
+static_inline void hlist_relink(hlist_item_t * i);
+static_inline void hlist_relink_head(hlist_head_t * h);
+
+#define hlist_for_each(i, h) \
+	for (i = (h)->next; i != &hlist_null; i = i->next)
+
+#define hlist_for_each_safe(i, tmp, h) \
+	for (i = (h)->next, tmp = i->next; \
+	     i!= &hlist_null; \
+	     i = tmp, tmp = i->next)
+
+/*
+ *	static
+ */
+static_inline void hlist_init(hlist_head_t * h)
+{
+	assert(h);
+	h->next = &hlist_null;
+}
+
+static_inline void hlist_init_item(hlist_item_t * i)
+{
+	assert(i);
+	i->prev = &i->next;
+	i->next = &hlist_null;
+}
+
+static_inline void hlist_add(hlist_head_t * h, hlist_item_t * i)
+{
+	hlist_item_t * next;
+	assert(h && i);
+	
+	next = i->next = h->next;
+	next->prev = &i->next;
+	h->next = i;
+	i->prev = &h->next;
+}
+
+static_inline void hlist_del(hlist_item_t * i)
+{
+	hlist_item_t * next;
+	assert(i);
+
+	next = i->next;
+	next->prev = i->prev;
+	*i->prev = next;
+	
+	hlist_init_item(i);
+}
+
+static_inline void hlist_relink(hlist_item_t * i)
+{
+	assert(i);
+	*i->prev = i;
+	i->next->prev = &i->next;
+}
+
+static_inline void hlist_relink_head(hlist_head_t * h)
+{
+	assert(h);
+	h->next->prev = &h->next;
+}
+
+#endif
+
diff --git a/nestegg/halloc/src/macros.h b/nestegg/halloc/src/macros.h
new file mode 100644
index 000000000..c36b516ee
--- /dev/null
+++ b/nestegg/halloc/src/macros.h
@@ -0,0 +1,36 @@
+/*
+ *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *	Hierarchical memory allocator, 1.2.1
+ *	http://swapped.cc/halloc
+ */
+
+/*
+ *	The program is distributed under terms of BSD license. 
+ *	You can obtain the copy of the license by visiting:
+ *	
+ *	http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_MACROS_H_
+#define _LIBP_MACROS_H_
+
+#include <stddef.h>  /* offsetof */
+
+/*
+ 	restore pointer to the structure by a pointer to its field
+ */
+#define structof(p,t,f) ((t*)(- offsetof(t,f) + (char*)(p)))
+
+/*
+ *	redefine for the target compiler
+ */
+#ifdef _WIN32
+#define static_inline static __inline
+#else
+#define static_inline static __inline__
+#endif
+
+
+#endif
+
diff --git a/nestegg/include/nestegg/nestegg.h b/nestegg/include/nestegg/nestegg.h
new file mode 100644
index 000000000..5e1069884
--- /dev/null
+++ b/nestegg/include/nestegg/nestegg.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright © 2010 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+#ifndef   NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79
+#define   NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79
+
+#include <nestegg/nestegg-stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @mainpage
+
+    @section intro Introduction
+
+    This is the documentation fot the <tt>libnestegg</tt> C API.
+    <tt>libnestegg</tt> is a demultiplexing library for <a
+    href="http://www.matroska.org/">Matroska</a> and <a
+    href="http://www.webmproject.org/">WebMedia</a> media files.
+
+    @section example Example code
+
+    @code
+    nestegg * demux_ctx;
+    nestegg_init(&demux_ctx, io, NULL);
+
+    nestegg_packet * pkt;
+    while ((r = nestegg_read_packet(demux_ctx, &pkt)) > 0) {
+      unsigned int track;
+
+      nestegg_packet_track(pkt, &track);
+
+      // This example decodes the first track only.
+      if (track == 0) {
+        unsigned int chunk, chunks;
+
+        nestegg_packet_count(pkt, &chunks);
+
+        // Decode each chunk of data.
+        for (chunk = 0; chunk < chunks; ++chunk) {
+          unsigned char * data;
+          size_t data_size;
+
+          nestegg_packet_data(pkt, chunk, &data, &data_size);
+
+          example_codec_decode(codec_ctx, data, data_size);
+        }
+      }
+
+      nestegg_free_packet(pkt);
+    }
+
+    nestegg_destroy(demux_ctx);
+    @endcode
+*/
+
+
+/** @file
+    The <tt>libnestegg</tt> C API. */
+
+#define NESTEGG_TRACK_VIDEO 0 /**< Track is of type video. */
+#define NESTEGG_TRACK_AUDIO 1 /**< Track is of type audio. */
+
+#define NESTEGG_CODEC_VP8    0 /**< Track uses Google On2 VP8 codec. */
+#define NESTEGG_CODEC_VORBIS 1 /**< Track uses Xiph Vorbis codec. */
+
+#define NESTEGG_SEEK_SET 0 /**< Seek offset relative to beginning of stream. */
+#define NESTEGG_SEEK_CUR 1 /**< Seek offset relative to current position in stream. */
+#define NESTEGG_SEEK_END 2 /**< Seek offset relative to end of stream. */
+
+#define NESTEGG_LOG_DEBUG    1     /**< Debug level log message. */
+#define NESTEGG_LOG_INFO     10    /**< Informational level log message. */
+#define NESTEGG_LOG_WARNING  100   /**< Warning level log message. */
+#define NESTEGG_LOG_ERROR    1000  /**< Error level log message. */
+#define NESTEGG_LOG_CRITICAL 10000 /**< Critical level log message. */
+
+typedef struct nestegg nestegg;               /**< Opaque handle referencing the stream state. */
+typedef struct nestegg_packet nestegg_packet; /**< Opaque handle referencing a packet of data. */
+
+/** User supplied IO context. */
+typedef struct {
+  /** User supplied read callback.
+      @param buffer   Buffer to read data into.
+      @param length   Length of supplied buffer in bytes.
+      @param userdata The #userdata supplied by the user.
+      @retval  1 Read succeeded.
+      @retval  0 End of stream.
+      @retval -1 Error. */
+  int (* read)(void * buffer, size_t length, void * userdata);
+
+  /** User supplied seek callback.
+      @param offset   Offset within the stream to seek to.
+      @param whence   Seek direction.  One of #NESTEGG_SEEK_SET,
+                      #NESTEGG_SEEK_CUR, or #NESTEGG_SEEK_END.
+      @param userdata The #userdata supplied by the user.
+      @retval  0 Seek succeeded.
+      @retval -1 Error. */
+  int (* seek)(int64_t offset, int whence, void * userdata);
+
+  /** User supplied tell callback.
+      @param userdata The #userdata supplied by the user.
+      @returns Current position within the stream.
+      @retval -1 Error. */
+  int64_t (* tell)(void * userdata);
+
+  /** User supplied pointer to be passed to the IO callbacks. */
+  void * userdata;
+} nestegg_io;
+
+/** Parameters specific to a video track. */
+typedef struct {
+  unsigned int width;          /**< Width of the video frame in pixels. */
+  unsigned int height;         /**< Height of the video frame in pixels. */
+  unsigned int display_width;  /**< Display width of the video frame in pixels. */
+  unsigned int display_height; /**< Display height of the video frame in pixels. */
+  unsigned int crop_bottom;    /**< Pixels to crop from the bottom of the frame. */
+  unsigned int crop_top;       /**< Pixels to crop from the top of the frame. */
+  unsigned int crop_left;      /**< Pixels to crop from the left of the frame. */
+  unsigned int crop_right;     /**< Pixels to crop from the right of the frame. */
+} nestegg_video_params;
+
+/** Parameters specific to an audio track. */
+typedef struct {
+  double rate;           /**< Sampling rate in Hz. */
+  unsigned int channels; /**< Number of audio channels. */
+  unsigned int depth;    /**< Bits per sample. */
+} nestegg_audio_params;
+
+/** Logging callback function pointer. */
+typedef void (* nestegg_log)(nestegg * context, unsigned int severity, char const * format, ...);
+
+/** Initialize a nestegg context.  During initialization the parser will
+    read forward in the stream processing all elements until the first
+    block of media is reached.  All track metadata has been processed at this point.
+    @param context  Storage for the new nestegg context.  @see nestegg_destroy
+    @param io       User supplied IO context.
+    @param callback Optional logging callback function pointer.  May be NULL.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_init(nestegg ** context, nestegg_io io, nestegg_log callback);
+
+/** Destroy a nestegg context and free associated memory.
+    @param context #nestegg context to be freed.  @see nestegg_init */
+void nestegg_destroy(nestegg * context);
+
+/** Query the duration of the media stream in nanoseconds.
+    @param context  Stream context initialized by #nestegg_init.
+    @param duration Storage for the queried duration.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_duration(nestegg * context, uint64_t * duration);
+
+/** Query the tstamp scale of the media stream in nanoseconds.
+    Timecodes presented by nestegg have been scaled by this value
+    before presentation to the caller.
+    @param context Stream context initialized by #nestegg_init.
+    @param scale   Storage for the queried scale factor.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_tstamp_scale(nestegg * context, uint64_t * scale);
+
+/** Query the number of tracks in the media stream.
+    @param context Stream context initialized by #nestegg_init.
+    @param tracks  Storage for the queried track count.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_count(nestegg * context, unsigned int * tracks);
+
+/** Seek @a track to @a tstamp.  Stream seek will terminate at the earliest
+    key point in the stream at or before @a tstamp.  Other tracks in the
+    stream will output packets with unspecified but nearby timestamps.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param tstamp  Absolute timestamp in nanoseconds.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_seek(nestegg * context, unsigned int track, uint64_t tstamp);
+
+/** Query the type specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @retval #NESTEGG_TRACK_VIDEO Track type is video.
+    @retval #NESTEGG_TRACK_AUDIO Track type is audio.
+    @retval -1 Error. */
+int nestegg_track_type(nestegg * context, unsigned int track);
+
+/** Query the codec ID specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @retval #NESTEGG_CODEC_VP8    Track codec is VP8.
+    @retval #NESTEGG_CODEC_VORBIS Track codec is Vorbis.
+    @retval -1 Error. */
+int nestegg_track_codec_id(nestegg * context, unsigned int track);
+
+/** Query the number of codec initialization chunks for @a track.  Each
+    chunk of data should be passed to the codec initialization functions in
+    the order returned.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param count   Storage for the queried chunk count.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_codec_data_count(nestegg * context, unsigned int track,
+                                   unsigned int * count);
+
+/** Get a pointer to chunk number @a item of codec initialization data for
+    @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param item    Zero based chunk item number.
+    @param data    Storage for the queried data pointer.
+                   The data is owned by the #nestegg context.
+    @param length  Storage for the queried data size.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_codec_data(nestegg * context, unsigned int track, unsigned int item,
+                             unsigned char ** data, size_t * length);
+
+/** Query the video parameters specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param params  Storage for the queried video parameters.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_video_params(nestegg * context, unsigned int track,
+                               nestegg_video_params * params);
+
+/** Query the audio parameters specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param params  Storage for the queried audio parameters.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_audio_params(nestegg * context, unsigned int track,
+                               nestegg_audio_params * params);
+
+/** Read a packet of media data.  A packet consists of one or more chunks of
+    data associated with a single track.  nestegg_read_packet should be
+    called in a loop while the return value is 1 to drive the stream parser
+    forward.  @see nestegg_free_packet
+    @param context Context returned by #nestegg_init.
+    @param packet  Storage for the returned nestegg_packet.
+    @retval  1 Additional packets may be read in subsequent calls.
+    @retval  0 End of stream.
+    @retval -1 Error. */
+int nestegg_read_packet(nestegg * context, nestegg_packet ** packet);
+
+/** Destroy a nestegg_packet and free associated memory.
+    @param packet #nestegg_packet to be freed. @see nestegg_read_packet */
+void nestegg_free_packet(nestegg_packet * packet);
+
+/** Query the track number of @a packet.
+    @param packet Packet initialized by #nestegg_read_packet.
+    @param track  Storage for the queried zero based track index.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_track(nestegg_packet * packet, unsigned int * track);
+
+/** Query the time stamp in nanoseconds of @a packet.
+    @param packet Packet initialized by #nestegg_read_packet.
+    @param tstamp Storage for the queried timestamp in nanoseconds.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_tstamp(nestegg_packet * packet, uint64_t * tstamp);
+
+/** Query the number of data chunks contained in @a packet.
+    @param packet Packet initialized by #nestegg_read_packet.
+    @param count  Storage for the queried timestamp in nanoseconds.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_count(nestegg_packet * packet, unsigned int * count);
+
+/** Get a pointer to chunk number @a item of packet data.
+    @param packet  Packet initialized by #nestegg_read_packet.
+    @param item    Zero based chunk item number.
+    @param data    Storage for the queried data pointer.
+                   The data is owned by the #nestegg_packet packet.
+    @param length  Storage for the queried data size.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_data(nestegg_packet * packet, unsigned int item,
+                        unsigned char ** data, size_t * length);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79 */
diff --git a/nestegg/m4/as-ac-expand.m4 b/nestegg/m4/as-ac-expand.m4
new file mode 100644
index 000000000..d6c9e3306
--- /dev/null
+++ b/nestegg/m4/as-ac-expand.m4
@@ -0,0 +1,43 @@
+dnl as-ac-expand.m4 0.2.0
+dnl autostars m4 macro for expanding directories using configure's prefix
+dnl thomas@apestaart.org
+
+dnl AS_AC_EXPAND(VAR, CONFIGURE_VAR)
+dnl example
+dnl AS_AC_EXPAND(SYSCONFDIR, $sysconfdir)
+dnl will set SYSCONFDIR to /usr/local/etc if prefix=/usr/local
+
+AC_DEFUN([AS_AC_EXPAND],
+[
+  EXP_VAR=[$1]
+  FROM_VAR=[$2]
+
+  dnl first expand prefix and exec_prefix if necessary
+  prefix_save=$prefix
+  exec_prefix_save=$exec_prefix
+
+  dnl if no prefix given, then use /usr/local, the default prefix
+  if test "x$prefix" = "xNONE"; then
+    prefix="$ac_default_prefix"
+  fi
+  dnl if no exec_prefix given, then use prefix
+  if test "x$exec_prefix" = "xNONE"; then
+    exec_prefix=$prefix
+  fi
+
+  full_var="$FROM_VAR"
+  dnl loop until it doesn't change anymore
+  while true; do
+    new_full_var="`eval echo $full_var`"
+    if test "x$new_full_var" = "x$full_var"; then break; fi
+    full_var=$new_full_var
+  done
+
+  dnl clean up
+  full_var=$new_full_var
+  AC_SUBST([$1], "$full_var")
+
+  dnl restore prefix and exec_prefix
+  prefix=$prefix_save
+  exec_prefix=$exec_prefix_save
+])
diff --git a/nestegg/m4/ax_create_stdint_h.m4 b/nestegg/m4/ax_create_stdint_h.m4
new file mode 100644
index 000000000..228105b11
--- /dev/null
+++ b/nestegg/m4/ax_create_stdint_h.m4
@@ -0,0 +1,695 @@
+dnl @synopsis AX_CREATE_STDINT_H [( HEADER-TO-GENERATE [, HEDERS-TO-CHECK])]
+dnl
+dnl the "ISO C9X: 7.18 Integer types <stdint.h>" section requires the
+dnl existence of an include file <stdint.h> that defines a set of
+dnl typedefs, especially uint8_t,int32_t,uintptr_t. Many older
+dnl installations will not provide this file, but some will have the
+dnl very same definitions in <inttypes.h>. In other enviroments we can
+dnl use the inet-types in <sys/types.h> which would define the typedefs
+dnl int8_t and u_int8_t respectivly.
+dnl
+dnl This macros will create a local "_stdint.h" or the headerfile given
+dnl as an argument. In many cases that file will just "#include
+dnl <stdint.h>" or "#include <inttypes.h>", while in other environments
+dnl it will provide the set of basic 'stdint's definitions/typedefs:
+dnl
+dnl   int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,intptr_t,uintptr_t
+dnl   int_least32_t.. int_fast32_t.. intmax_t
+dnl
+dnl which may or may not rely on the definitions of other files, or
+dnl using the AC_CHECK_SIZEOF macro to determine the actual sizeof each
+dnl type.
+dnl
+dnl if your header files require the stdint-types you will want to
+dnl create an installable file mylib-int.h that all your other
+dnl installable header may include. So if you have a library package
+dnl named "mylib", just use
+dnl
+dnl      AX_CREATE_STDINT_H(mylib-int.h)
+dnl
+dnl in configure.ac and go to install that very header file in
+dnl Makefile.am along with the other headers (mylib.h) - and the
+dnl mylib-specific headers can simply use "#include <mylib-int.h>" to
+dnl obtain the stdint-types.
+dnl
+dnl Remember, if the system already had a valid <stdint.h>, the
+dnl generated file will include it directly. No need for fuzzy
+dnl HAVE_STDINT_H things... (oops, GCC 4.2.x has deliberatly disabled
+dnl its stdint.h for non-c99 compilation and the c99-mode is not the
+dnl default. Therefore this macro will not use the compiler's stdint.h
+dnl - please complain to the GCC developers).
+dnl
+dnl @category C
+dnl @author Guido U. Draheim <guidod@gmx.de>
+dnl @version 2006-10-13
+dnl @license GPLWithACException
+
+AC_DEFUN([AX_CHECK_DATA_MODEL],[
+   AC_CHECK_SIZEOF(char)
+   AC_CHECK_SIZEOF(short)
+   AC_CHECK_SIZEOF(int)
+   AC_CHECK_SIZEOF(long)
+   AC_CHECK_SIZEOF(void*)
+   ac_cv_char_data_model=""
+   ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_char"
+   ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_short"
+   ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_int"
+   ac_cv_long_data_model=""
+   ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_int"
+   ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_long"
+   ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_voidp"
+   AC_MSG_CHECKING([data model])
+   case "$ac_cv_char_data_model/$ac_cv_long_data_model" in
+    122/242)     ac_cv_data_model="IP16"  ; n="standard 16bit machine" ;;
+    122/244)     ac_cv_data_model="LP32"  ; n="standard 32bit machine" ;;
+    122/*)       ac_cv_data_model="i16"   ; n="unusual int16 model" ;;
+    124/444)     ac_cv_data_model="ILP32" ; n="standard 32bit unixish" ;;
+    124/488)     ac_cv_data_model="LP64"  ; n="standard 64bit unixish" ;;
+    124/448)     ac_cv_data_model="LLP64" ; n="unusual 64bit unixish" ;;
+    124/*)       ac_cv_data_model="i32"   ; n="unusual int32 model" ;;
+    128/888)     ac_cv_data_model="ILP64" ; n="unusual 64bit numeric" ;;
+    128/*)       ac_cv_data_model="i64"   ; n="unusual int64 model" ;;
+    222/*2)      ac_cv_data_model="DSP16" ; n="strict 16bit dsptype" ;;
+    333/*3)      ac_cv_data_model="DSP24" ; n="strict 24bit dsptype" ;;
+    444/*4)      ac_cv_data_model="DSP32" ; n="strict 32bit dsptype" ;;
+    666/*6)      ac_cv_data_model="DSP48" ; n="strict 48bit dsptype" ;;
+    888/*8)      ac_cv_data_model="DSP64" ; n="strict 64bit dsptype" ;;
+    222/*|333/*|444/*|666/*|888/*) :
+                 ac_cv_data_model="iDSP"  ; n="unusual dsptype" ;;
+     *)          ac_cv_data_model="none"  ; n="very unusual model" ;;
+   esac
+   AC_MSG_RESULT([$ac_cv_data_model ($ac_cv_long_data_model, $n)])
+])
+
+dnl AX_CHECK_HEADER_STDINT_X([HEADERLIST][,ACTION-IF])
+AC_DEFUN([AX_CHECK_HEADER_STDINT_X],[
+AC_CACHE_CHECK([for stdint uintptr_t], [ac_cv_header_stdint_x],[
+ ac_cv_header_stdint_x="" # the 1997 typedefs (inttypes.h)
+  AC_MSG_RESULT([(..)])
+  for i in m4_ifval([$1],[$1],[stdint.h inttypes.h sys/inttypes.h sys/types.h])
+  do
+   unset ac_cv_type_uintptr_t
+   unset ac_cv_type_uint64_t
+   AC_CHECK_TYPE(uintptr_t,[ac_cv_header_stdint_x=$i],continue,[#include <$i>])
+   AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>])
+   m4_ifvaln([$1],[$1]) break
+  done
+  AC_MSG_CHECKING([for stdint uintptr_t])
+ ])
+])
+
+AC_DEFUN([AX_CHECK_HEADER_STDINT_O],[
+AC_CACHE_CHECK([for stdint uint32_t], [ac_cv_header_stdint_o],[
+ ac_cv_header_stdint_o="" # the 1995 typedefs (sys/inttypes.h)
+  AC_MSG_RESULT([(..)])
+  for i in m4_ifval([$1],[$1],[inttypes.h sys/inttypes.h sys/types.h stdint.h])
+  do
+   unset ac_cv_type_uint32_t
+   unset ac_cv_type_uint64_t
+   AC_CHECK_TYPE(uint32_t,[ac_cv_header_stdint_o=$i],continue,[#include <$i>])
+   AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>])
+   m4_ifvaln([$1],[$1]) break
+   break;
+  done
+  AC_MSG_CHECKING([for stdint uint32_t])
+ ])
+])
+
+AC_DEFUN([AX_CHECK_HEADER_STDINT_U],[
+AC_CACHE_CHECK([for stdint u_int32_t], [ac_cv_header_stdint_u],[
+ ac_cv_header_stdint_u="" # the BSD typedefs (sys/types.h)
+  AC_MSG_RESULT([(..)])
+  for i in m4_ifval([$1],[$1],[sys/types.h inttypes.h sys/inttypes.h]) ; do
+   unset ac_cv_type_u_int32_t
+   unset ac_cv_type_u_int64_t
+   AC_CHECK_TYPE(u_int32_t,[ac_cv_header_stdint_u=$i],continue,[#include <$i>])
+   AC_CHECK_TYPE(u_int64_t,[and64="/u_int64_t"],[and64=""],[#include<$i>])
+   m4_ifvaln([$1],[$1]) break
+   break;
+  done
+  AC_MSG_CHECKING([for stdint u_int32_t])
+ ])
+])
+
+AC_DEFUN([AX_CREATE_STDINT_H],
+[# ------ AX CREATE STDINT H -------------------------------------
+AC_MSG_CHECKING([for stdint types])
+ac_stdint_h=`echo ifelse($1, , _stdint.h, $1)`
+# try to shortcircuit - if the default include path of the compiler
+# can find a "stdint.h" header then we assume that all compilers can.
+AC_CACHE_VAL([ac_cv_header_stdint_t],[
+old_CXXFLAGS="$CXXFLAGS" ; CXXFLAGS=""
+old_CPPFLAGS="$CPPFLAGS" ; CPPFLAGS=""
+old_CFLAGS="$CFLAGS"     ; CFLAGS=""
+AC_TRY_COMPILE([#include <stdint.h>],[int_least32_t v = 0;],
+[ac_cv_stdint_result="(assuming C99 compatible system)"
+ ac_cv_header_stdint_t="stdint.h"; ],
+[ac_cv_header_stdint_t=""])
+if test "$GCC" = "yes" && test ".$ac_cv_header_stdint_t" = "."; then
+CFLAGS="-std=c99"
+AC_TRY_COMPILE([#include <stdint.h>],[int_least32_t v = 0;],
+[AC_MSG_WARN(your GCC compiler has a defunct stdint.h for its default-mode)])
+fi
+CXXFLAGS="$old_CXXFLAGS"
+CPPFLAGS="$old_CPPFLAGS"
+CFLAGS="$old_CFLAGS" ])
+
+v="... $ac_cv_header_stdint_h"
+if test "$ac_stdint_h" = "stdint.h" ; then
+ AC_MSG_RESULT([(are you sure you want them in ./stdint.h?)])
+elif test "$ac_stdint_h" = "inttypes.h" ; then
+ AC_MSG_RESULT([(are you sure you want them in ./inttypes.h?)])
+elif test "_$ac_cv_header_stdint_t" = "_" ; then
+ AC_MSG_RESULT([(putting them into $ac_stdint_h)$v])
+else
+ ac_cv_header_stdint="$ac_cv_header_stdint_t"
+ AC_MSG_RESULT([$ac_cv_header_stdint (shortcircuit)])
+fi
+
+if test "_$ac_cv_header_stdint_t" = "_" ; then # can not shortcircuit..
+
+dnl .....intro message done, now do a few system checks.....
+dnl btw, all old CHECK_TYPE macros do automatically "DEFINE" a type,
+dnl therefore we use the autoconf implementation detail CHECK_TYPE_NEW
+dnl instead that is triggered with 3 or more arguments (see types.m4)
+
+inttype_headers=`echo $2 | sed -e 's/,/ /g'`
+
+ac_cv_stdint_result="(no helpful system typedefs seen)"
+AX_CHECK_HEADER_STDINT_X(dnl
+   stdint.h inttypes.h sys/inttypes.h $inttype_headers,
+   ac_cv_stdint_result="(seen uintptr_t$and64 in $i)")
+
+if test "_$ac_cv_header_stdint_x" = "_" ; then
+AX_CHECK_HEADER_STDINT_O(dnl,
+   inttypes.h sys/inttypes.h stdint.h $inttype_headers,
+   ac_cv_stdint_result="(seen uint32_t$and64 in $i)")
+fi
+
+if test "_$ac_cv_header_stdint_x" = "_" ; then
+if test "_$ac_cv_header_stdint_o" = "_" ; then
+AX_CHECK_HEADER_STDINT_U(dnl,
+   sys/types.h inttypes.h sys/inttypes.h $inttype_headers,
+   ac_cv_stdint_result="(seen u_int32_t$and64 in $i)")
+fi fi
+
+dnl if there was no good C99 header file, do some typedef checks...
+if test "_$ac_cv_header_stdint_x" = "_" ; then
+   AC_MSG_CHECKING([for stdint datatype model])
+   AC_MSG_RESULT([(..)])
+   AX_CHECK_DATA_MODEL
+fi
+
+if test "_$ac_cv_header_stdint_x" != "_" ; then
+   ac_cv_header_stdint="$ac_cv_header_stdint_x"
+elif  test "_$ac_cv_header_stdint_o" != "_" ; then
+   ac_cv_header_stdint="$ac_cv_header_stdint_o"
+elif  test "_$ac_cv_header_stdint_u" != "_" ; then
+   ac_cv_header_stdint="$ac_cv_header_stdint_u"
+else
+   ac_cv_header_stdint="stddef.h"
+fi
+
+AC_MSG_CHECKING([for extra inttypes in chosen header])
+AC_MSG_RESULT([($ac_cv_header_stdint)])
+dnl see if int_least and int_fast types are present in _this_ header.
+unset ac_cv_type_int_least32_t
+unset ac_cv_type_int_fast32_t
+AC_CHECK_TYPE(int_least32_t,,,[#include <$ac_cv_header_stdint>])
+AC_CHECK_TYPE(int_fast32_t,,,[#include<$ac_cv_header_stdint>])
+AC_CHECK_TYPE(intmax_t,,,[#include <$ac_cv_header_stdint>])
+
+fi # shortcircut to system "stdint.h"
+# ------------------ PREPARE VARIABLES ------------------------------
+if test "$GCC" = "yes" ; then
+ac_cv_stdint_message="using gnu compiler "`$CC --version | head -1`
+else
+ac_cv_stdint_message="using $CC"
+fi
+
+AC_MSG_RESULT([make use of $ac_cv_header_stdint in $ac_stdint_h dnl
+$ac_cv_stdint_result])
+
+dnl -----------------------------------------------------------------
+# ----------------- DONE inttypes.h checks START header -------------
+AC_CONFIG_COMMANDS([$ac_stdint_h],[
+AC_MSG_NOTICE(creating $ac_stdint_h : $_ac_stdint_h)
+ac_stdint=$tmp/_stdint.h
+
+echo "#ifndef" $_ac_stdint_h >$ac_stdint
+echo "#define" $_ac_stdint_h "1" >>$ac_stdint
+echo "#ifndef" _GENERATED_STDINT_H >>$ac_stdint
+echo "#define" _GENERATED_STDINT_H '"'$PACKAGE $VERSION'"' >>$ac_stdint
+echo "/* generated $ac_cv_stdint_message */" >>$ac_stdint
+if test "_$ac_cv_header_stdint_t" != "_" ; then
+echo "#define _STDINT_HAVE_STDINT_H" "1" >>$ac_stdint
+echo "#include <stdint.h>" >>$ac_stdint
+echo "#endif" >>$ac_stdint
+echo "#endif" >>$ac_stdint
+else
+
+cat >>$ac_stdint <<STDINT_EOF
+
+/* ................... shortcircuit part ........................... */
+
+#if defined HAVE_STDINT_H || defined _STDINT_HAVE_STDINT_H
+#include <stdint.h>
+#else
+#include <stddef.h>
+
+/* .................... configured part ............................ */
+
+STDINT_EOF
+
+echo "/* whether we have a C99 compatible stdint header file */" >>$ac_stdint
+if test "_$ac_cv_header_stdint_x" != "_" ; then
+  ac_header="$ac_cv_header_stdint_x"
+  echo "#define _STDINT_HEADER_INTPTR" '"'"$ac_header"'"' >>$ac_stdint
+else
+  echo "/* #undef _STDINT_HEADER_INTPTR */" >>$ac_stdint
+fi
+
+echo "/* whether we have a C96 compatible inttypes header file */" >>$ac_stdint
+if  test "_$ac_cv_header_stdint_o" != "_" ; then
+  ac_header="$ac_cv_header_stdint_o"
+  echo "#define _STDINT_HEADER_UINT32" '"'"$ac_header"'"' >>$ac_stdint
+else
+  echo "/* #undef _STDINT_HEADER_UINT32 */" >>$ac_stdint
+fi
+
+echo "/* whether we have a BSD compatible inet types header */" >>$ac_stdint
+if  test "_$ac_cv_header_stdint_u" != "_" ; then
+  ac_header="$ac_cv_header_stdint_u"
+  echo "#define _STDINT_HEADER_U_INT32" '"'"$ac_header"'"' >>$ac_stdint
+else
+  echo "/* #undef _STDINT_HEADER_U_INT32 */" >>$ac_stdint
+fi
+
+echo "" >>$ac_stdint
+
+if test "_$ac_header" != "_" ; then if test "$ac_header" != "stddef.h" ; then
+  echo "#include <$ac_header>" >>$ac_stdint
+  echo "" >>$ac_stdint
+fi fi
+
+echo "/* which 64bit typedef has been found */" >>$ac_stdint
+if test "$ac_cv_type_uint64_t" = "yes" ; then
+echo "#define   _STDINT_HAVE_UINT64_T" "1"  >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_UINT64_T */" >>$ac_stdint
+fi
+if test "$ac_cv_type_u_int64_t" = "yes" ; then
+echo "#define   _STDINT_HAVE_U_INT64_T" "1"  >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_U_INT64_T */" >>$ac_stdint
+fi
+echo "" >>$ac_stdint
+
+echo "/* which type model has been detected */" >>$ac_stdint
+if test "_$ac_cv_char_data_model" != "_" ; then
+echo "#define   _STDINT_CHAR_MODEL" "$ac_cv_char_data_model" >>$ac_stdint
+echo "#define   _STDINT_LONG_MODEL" "$ac_cv_long_data_model" >>$ac_stdint
+else
+echo "/* #undef _STDINT_CHAR_MODEL // skipped */" >>$ac_stdint
+echo "/* #undef _STDINT_LONG_MODEL // skipped */" >>$ac_stdint
+fi
+echo "" >>$ac_stdint
+
+echo "/* whether int_least types were detected */" >>$ac_stdint
+if test "$ac_cv_type_int_least32_t" = "yes"; then
+echo "#define   _STDINT_HAVE_INT_LEAST32_T" "1"  >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_INT_LEAST32_T */" >>$ac_stdint
+fi
+echo "/* whether int_fast types were detected */" >>$ac_stdint
+if test "$ac_cv_type_int_fast32_t" = "yes"; then
+echo "#define   _STDINT_HAVE_INT_FAST32_T" "1" >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_INT_FAST32_T */" >>$ac_stdint
+fi
+echo "/* whether intmax_t type was detected */" >>$ac_stdint
+if test "$ac_cv_type_intmax_t" = "yes"; then
+echo "#define   _STDINT_HAVE_INTMAX_T" "1" >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_INTMAX_T */" >>$ac_stdint
+fi
+echo "" >>$ac_stdint
+
+  cat >>$ac_stdint <<STDINT_EOF
+/* .................... detections part ............................ */
+
+/* whether we need to define bitspecific types from compiler base types */
+#ifndef _STDINT_HEADER_INTPTR
+#ifndef _STDINT_HEADER_UINT32
+#ifndef _STDINT_HEADER_U_INT32
+#define _STDINT_NEED_INT_MODEL_T
+#else
+#define _STDINT_HAVE_U_INT_TYPES
+#endif
+#endif
+#endif
+
+#ifdef _STDINT_HAVE_U_INT_TYPES
+#undef _STDINT_NEED_INT_MODEL_T
+#endif
+
+#ifdef  _STDINT_CHAR_MODEL
+#if     _STDINT_CHAR_MODEL+0 == 122 || _STDINT_CHAR_MODEL+0 == 124
+#ifndef _STDINT_BYTE_MODEL
+#define _STDINT_BYTE_MODEL 12
+#endif
+#endif
+#endif
+
+#ifndef _STDINT_HAVE_INT_LEAST32_T
+#define _STDINT_NEED_INT_LEAST_T
+#endif
+
+#ifndef _STDINT_HAVE_INT_FAST32_T
+#define _STDINT_NEED_INT_FAST_T
+#endif
+
+#ifndef _STDINT_HEADER_INTPTR
+#define _STDINT_NEED_INTPTR_T
+#ifndef _STDINT_HAVE_INTMAX_T
+#define _STDINT_NEED_INTMAX_T
+#endif
+#endif
+
+
+/* .................... definition part ............................ */
+
+/* some system headers have good uint64_t */
+#ifndef _HAVE_UINT64_T
+#if     defined _STDINT_HAVE_UINT64_T  || defined HAVE_UINT64_T
+#define _HAVE_UINT64_T
+#elif   defined _STDINT_HAVE_U_INT64_T || defined HAVE_U_INT64_T
+#define _HAVE_UINT64_T
+typedef u_int64_t uint64_t;
+#endif
+#endif
+
+#ifndef _HAVE_UINT64_T
+/* .. here are some common heuristics using compiler runtime specifics */
+#if defined __STDC_VERSION__ && defined __STDC_VERSION__ >= 199901L
+#define _HAVE_UINT64_T
+#define _HAVE_LONGLONG_UINT64_T
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+
+#elif !defined __STRICT_ANSI__
+#if defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__
+#define _HAVE_UINT64_T
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+
+#elif defined __GNUC__ || defined __MWERKS__ || defined __ELF__
+/* note: all ELF-systems seem to have loff-support which needs 64-bit */
+#if !defined _NO_LONGLONG
+#define _HAVE_UINT64_T
+#define _HAVE_LONGLONG_UINT64_T
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+#endif
+
+#elif defined __alpha || (defined __mips && defined _ABIN32)
+#if !defined _NO_LONGLONG
+typedef long int64_t;
+typedef unsigned long uint64_t;
+#endif
+  /* compiler/cpu type to define int64_t */
+#endif
+#endif
+#endif
+
+#if defined _STDINT_HAVE_U_INT_TYPES
+/* int8_t int16_t int32_t defined by inet code, redeclare the u_intXX types */
+typedef u_int8_t uint8_t;
+typedef u_int16_t uint16_t;
+typedef u_int32_t uint32_t;
+
+/* glibc compatibility */
+#ifndef __int8_t_defined
+#define __int8_t_defined
+#endif
+#endif
+
+#ifdef _STDINT_NEED_INT_MODEL_T
+/* we must guess all the basic types. Apart from byte-adressable system, */
+/* there a few 32-bit-only dsp-systems that we guard with BYTE_MODEL 8-} */
+/* (btw, those nibble-addressable systems are way off, or so we assume) */
+
+dnl   /* have a look at "64bit and data size neutrality" at */
+dnl   /* http://unix.org/version2/whatsnew/login_64bit.html */
+dnl   /* (the shorthand "ILP" types always have a "P" part) */
+
+#if defined _STDINT_BYTE_MODEL
+#if _STDINT_LONG_MODEL+0 == 242
+/* 2:4:2 =  IP16 = a normal 16-bit system                */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned long   uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          long    int32_t;
+#endif
+#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL == 444
+/* 2:4:4 =  LP32 = a 32-bit system derived from a 16-bit */
+/* 4:4:4 = ILP32 = a normal 32-bit system                */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned int    uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          int     int32_t;
+#endif
+#elif _STDINT_LONG_MODEL+0 == 484 || _STDINT_LONG_MODEL+0 == 488
+/* 4:8:4 =  IP32 = a 32-bit system prepared for 64-bit    */
+/* 4:8:8 =  LP64 = a normal 64-bit system                 */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned int    uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          int     int32_t;
+#endif
+/* this system has a "long" of 64bit */
+#ifndef _HAVE_UINT64_T
+#define _HAVE_UINT64_T
+typedef unsigned long   uint64_t;
+typedef          long    int64_t;
+#endif
+#elif _STDINT_LONG_MODEL+0 == 448
+/*      LLP64   a 64-bit system derived from a 32-bit system */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned int    uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          int     int32_t;
+#endif
+/* assuming the system has a "long long" */
+#ifndef _HAVE_UINT64_T
+#define _HAVE_UINT64_T
+#define _HAVE_LONGLONG_UINT64_T
+typedef unsigned long long uint64_t;
+typedef          long long  int64_t;
+#endif
+#else
+#define _STDINT_NO_INT32_T
+#endif
+#else
+#define _STDINT_NO_INT8_T
+#define _STDINT_NO_INT32_T
+#endif
+#endif
+
+/*
+ * quote from SunOS-5.8 sys/inttypes.h:
+ * Use at your own risk.  As of February 1996, the committee is squarely
+ * behind the fixed sized types; the "least" and "fast" types are still being
+ * discussed.  The probability that the "fast" types may be removed before
+ * the standard is finalized is high enough that they are not currently
+ * implemented.
+ */
+
+#if defined _STDINT_NEED_INT_LEAST_T
+typedef  int8_t    int_least8_t;
+typedef  int16_t   int_least16_t;
+typedef  int32_t   int_least32_t;
+#ifdef _HAVE_UINT64_T
+typedef  int64_t   int_least64_t;
+#endif
+
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+#ifdef _HAVE_UINT64_T
+typedef uint64_t  uint_least64_t;
+#endif
+  /* least types */
+#endif
+
+#if defined _STDINT_NEED_INT_FAST_T
+typedef  int8_t    int_fast8_t;
+typedef  int       int_fast16_t;
+typedef  int32_t   int_fast32_t;
+#ifdef _HAVE_UINT64_T
+typedef  int64_t   int_fast64_t;
+#endif
+
+typedef uint8_t   uint_fast8_t;
+typedef unsigned  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+#ifdef _HAVE_UINT64_T
+typedef uint64_t  uint_fast64_t;
+#endif
+  /* fast types */
+#endif
+
+#ifdef _STDINT_NEED_INTMAX_T
+#ifdef _HAVE_UINT64_T
+typedef  int64_t       intmax_t;
+typedef uint64_t      uintmax_t;
+#else
+typedef          long  intmax_t;
+typedef unsigned long uintmax_t;
+#endif
+#endif
+
+#ifdef _STDINT_NEED_INTPTR_T
+#ifndef __intptr_t_defined
+#define __intptr_t_defined
+/* we encourage using "long" to store pointer values, never use "int" ! */
+#if   _STDINT_LONG_MODEL+0 == 242 || _STDINT_LONG_MODEL+0 == 484
+typedef  unsigned int   uintptr_t;
+typedef           int    intptr_t;
+#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL+0 == 444
+typedef  unsigned long  uintptr_t;
+typedef           long   intptr_t;
+#elif _STDINT_LONG_MODEL+0 == 448 && defined _HAVE_UINT64_T
+typedef        uint64_t uintptr_t;
+typedef         int64_t  intptr_t;
+#else /* matches typical system types ILP32 and LP64 - but not IP16 or LLP64 */
+typedef  unsigned long  uintptr_t;
+typedef           long   intptr_t;
+#endif
+#endif
+#endif
+
+/* The ISO C99 standard specifies that in C++ implementations these
+   should only be defined if explicitly requested.  */
+#if !defined __cplusplus || defined __STDC_CONSTANT_MACROS
+#ifndef UINT32_C
+
+/* Signed.  */
+# define INT8_C(c)      c
+# define INT16_C(c)     c
+# define INT32_C(c)     c
+# ifdef _HAVE_LONGLONG_UINT64_T
+#  define INT64_C(c)    c ## L
+# else
+#  define INT64_C(c)    c ## LL
+# endif
+
+/* Unsigned.  */
+# define UINT8_C(c)     c ## U
+# define UINT16_C(c)    c ## U
+# define UINT32_C(c)    c ## U
+# ifdef _HAVE_LONGLONG_UINT64_T
+#  define UINT64_C(c)   c ## UL
+# else
+#  define UINT64_C(c)   c ## ULL
+# endif
+
+/* Maximal type.  */
+# ifdef _HAVE_LONGLONG_UINT64_T
+#  define INTMAX_C(c)   c ## L
+#  define UINTMAX_C(c)  c ## UL
+# else
+#  define INTMAX_C(c)   c ## LL
+#  define UINTMAX_C(c)  c ## ULL
+# endif
+
+  /* literalnumbers */
+#endif
+#endif
+
+/* These limits are merily those of a two complement byte-oriented system */
+
+/* Minimum of signed integral types.  */
+# define INT8_MIN               (-128)
+# define INT16_MIN              (-32767-1)
+# define INT32_MIN              (-2147483647-1)
+# define INT64_MIN              (-__INT64_C(9223372036854775807)-1)
+/* Maximum of signed integral types.  */
+# define INT8_MAX               (127)
+# define INT16_MAX              (32767)
+# define INT32_MAX              (2147483647)
+# define INT64_MAX              (__INT64_C(9223372036854775807))
+
+/* Maximum of unsigned integral types.  */
+# define UINT8_MAX              (255)
+# define UINT16_MAX             (65535)
+# define UINT32_MAX             (4294967295U)
+# define UINT64_MAX             (__UINT64_C(18446744073709551615))
+
+/* Minimum of signed integral types having a minimum size.  */
+# define INT_LEAST8_MIN         INT8_MIN
+# define INT_LEAST16_MIN        INT16_MIN
+# define INT_LEAST32_MIN        INT32_MIN
+# define INT_LEAST64_MIN        INT64_MIN
+/* Maximum of signed integral types having a minimum size.  */
+# define INT_LEAST8_MAX         INT8_MAX
+# define INT_LEAST16_MAX        INT16_MAX
+# define INT_LEAST32_MAX        INT32_MAX
+# define INT_LEAST64_MAX        INT64_MAX
+
+/* Maximum of unsigned integral types having a minimum size.  */
+# define UINT_LEAST8_MAX        UINT8_MAX
+# define UINT_LEAST16_MAX       UINT16_MAX
+# define UINT_LEAST32_MAX       UINT32_MAX
+# define UINT_LEAST64_MAX       UINT64_MAX
+
+  /* shortcircuit*/
+#endif
+  /* once */
+#endif
+#endif
+STDINT_EOF
+fi
+    if cmp -s $ac_stdint_h $ac_stdint 2>/dev/null; then
+      AC_MSG_NOTICE([$ac_stdint_h is unchanged])
+    else
+      ac_dir=`AS_DIRNAME(["$ac_stdint_h"])`
+      AS_MKDIR_P(["$ac_dir"])
+      rm -f $ac_stdint_h
+      mv $ac_stdint $ac_stdint_h
+    fi
+],[# variables for create stdint.h replacement
+PACKAGE="$PACKAGE"
+VERSION="$VERSION"
+ac_stdint_h="$ac_stdint_h"
+_ac_stdint_h=AS_TR_CPP(_$PACKAGE-$ac_stdint_h)
+ac_cv_stdint_message="$ac_cv_stdint_message"
+ac_cv_header_stdint_t="$ac_cv_header_stdint_t"
+ac_cv_header_stdint_x="$ac_cv_header_stdint_x"
+ac_cv_header_stdint_o="$ac_cv_header_stdint_o"
+ac_cv_header_stdint_u="$ac_cv_header_stdint_u"
+ac_cv_type_uint64_t="$ac_cv_type_uint64_t"
+ac_cv_type_u_int64_t="$ac_cv_type_u_int64_t"
+ac_cv_char_data_model="$ac_cv_char_data_model"
+ac_cv_long_data_model="$ac_cv_long_data_model"
+ac_cv_type_int_least32_t="$ac_cv_type_int_least32_t"
+ac_cv_type_int_fast32_t="$ac_cv_type_int_fast32_t"
+ac_cv_type_intmax_t="$ac_cv_type_intmax_t"
+])
+])
diff --git a/nestegg/m4/pkg.m4 b/nestegg/m4/pkg.m4
new file mode 100644
index 000000000..996e29454
--- /dev/null
+++ b/nestegg/m4/pkg.m4
@@ -0,0 +1,157 @@
+# pkg.m4 - Macros to locate and utilise pkg-config.            -*- Autoconf -*-
+#
+# Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# PKG_PROG_PKG_CONFIG([MIN-VERSION])
+# ----------------------------------
+AC_DEFUN([PKG_PROG_PKG_CONFIG],
+[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
+m4_pattern_allow([^PKG_CONFIG(_PATH)?$])
+AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl
+if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
+	AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
+fi
+if test -n "$PKG_CONFIG"; then
+	_pkg_min_version=m4_default([$1], [0.9.0])
+	AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
+	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
+		AC_MSG_RESULT([yes])
+	else
+		AC_MSG_RESULT([no])
+		PKG_CONFIG=""
+	fi
+
+fi[]dnl
+])# PKG_PROG_PKG_CONFIG
+
+# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+#
+# Check to see whether a particular set of modules exists.  Similar
+# to PKG_CHECK_MODULES(), but does not set variables or print errors.
+#
+#
+# Similar to PKG_CHECK_MODULES, make sure that the first instance of
+# this or PKG_CHECK_MODULES is called, or make sure to call
+# PKG_CHECK_EXISTS manually
+# --------------------------------------------------------------
+AC_DEFUN([PKG_CHECK_EXISTS],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+if test -n "$PKG_CONFIG" && \
+    AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
+  m4_ifval([$2], [$2], [:])
+m4_ifvaln([$3], [else
+  $3])dnl
+fi])
+
+
+# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
+# ---------------------------------------------
+m4_define([_PKG_CONFIG],
+[if test -n "$PKG_CONFIG"; then
+    if test -n "$$1"; then
+        pkg_cv_[]$1="$$1"
+    else
+        PKG_CHECK_EXISTS([$3],
+                         [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`],
+			 [pkg_failed=yes])
+    fi
+else
+	pkg_failed=untried
+fi[]dnl
+])# _PKG_CONFIG
+
+# _PKG_SHORT_ERRORS_SUPPORTED
+# -----------------------------
+AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+        _pkg_short_errors_supported=yes
+else
+        _pkg_short_errors_supported=no
+fi[]dnl
+])# _PKG_SHORT_ERRORS_SUPPORTED
+
+
+# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
+# [ACTION-IF-NOT-FOUND])
+#
+#
+# Note that if there is a possibility the first call to
+# PKG_CHECK_MODULES might not happen, you should be sure to include an
+# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
+#
+#
+# --------------------------------------------------------------
+AC_DEFUN([PKG_CHECK_MODULES],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
+AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
+
+pkg_failed=no
+AC_MSG_CHECKING([for $1])
+
+_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
+_PKG_CONFIG([$1][_LIBS], [libs], [$2])
+
+m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
+and $1[]_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.])
+
+if test $pkg_failed = yes; then
+        _PKG_SHORT_ERRORS_SUPPORTED
+        if test $_pkg_short_errors_supported = yes; then
+	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2"`
+        else
+	        $1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2"`
+        fi
+	# Put the nasty error message in config.log where it belongs
+	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
+
+	ifelse([$4], , [AC_MSG_ERROR(dnl
+[Package requirements ($2) were not met:
+
+$$1_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+_PKG_TEXT
+])],
+		[AC_MSG_RESULT([no])
+                $4])
+elif test $pkg_failed = untried; then
+	ifelse([$4], , [AC_MSG_FAILURE(dnl
+[The pkg-config script could not be found or is too old.  Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+_PKG_TEXT
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.])],
+		[$4])
+else
+	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
+	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
+        AC_MSG_RESULT([yes])
+	ifelse([$3], , :, [$3])
+fi[]dnl
+])# PKG_CHECK_MODULES
diff --git a/nestegg/nestegg-uninstalled.pc.in b/nestegg/nestegg-uninstalled.pc.in
new file mode 100644
index 000000000..19bb680ac
--- /dev/null
+++ b/nestegg/nestegg-uninstalled.pc.in
@@ -0,0 +1,13 @@
+# nestegg uninstalled pkg-config file
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: nestegg
+Description: WebM/Matroska demuxer
+Version: @VERSION@
+Conflicts:
+Libs: -L${libdir} -lnestegg
+Cflags: -I${includedir}
diff --git a/nestegg/nestegg.pc.in b/nestegg/nestegg.pc.in
new file mode 100644
index 000000000..32c09d79d
--- /dev/null
+++ b/nestegg/nestegg.pc.in
@@ -0,0 +1,13 @@
+# nestegg installed pkg-config file
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: nestegg
+Description: WebM/Matroska demuxer
+Version: @VERSION@
+Conflicts:
+Libs: -L${libdir} -lnestegg
+Cflags: -I${includedir}
diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c
new file mode 100644
index 000000000..ef09918da
--- /dev/null
+++ b/nestegg/src/nestegg.c
@@ -0,0 +1,1938 @@
+/*
+ * Copyright © 2010 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "halloc.h"
+#include "nestegg/nestegg.h"
+
+/* EBML Elements */
+#define ID_EBML                 0x1a45dfa3
+#define ID_EBML_VERSION         0x4286
+#define ID_EBML_READ_VERSION    0x42f7
+#define ID_EBML_MAX_ID_LENGTH   0x42f2
+#define ID_EBML_MAX_SIZE_LENGTH 0x42f3
+#define ID_DOCTYPE              0x4282
+#define ID_DOCTYPE_VERSION      0x4287
+#define ID_DOCTYPE_READ_VERSION 0x4285
+
+/* Global Elements */
+#define ID_VOID                 0xec
+#define ID_CRC32                0xbf
+
+/* WebMedia Elements */
+#define ID_SEGMENT              0x18538067
+
+/* Seek Head Elements */
+#define ID_SEEK_HEAD            0x114d9b74
+#define ID_SEEK                 0x4dbb
+#define ID_SEEK_ID              0x53ab
+#define ID_SEEK_POSITION        0x53ac
+
+/* Info Elements */
+#define ID_INFO                 0x1549a966
+#define ID_TIMECODE_SCALE       0x2ad7b1
+#define ID_DURATION             0x4489
+
+/* Cluster Elements */
+#define ID_CLUSTER              0x1f43b675
+#define ID_TIMECODE             0xe7
+#define ID_BLOCK_GROUP          0xa0
+#define ID_SIMPLE_BLOCK         0xa3
+
+/* BlockGroup Elements */
+#define ID_BLOCK                0xa1
+#define ID_BLOCK_DURATION       0x9b
+#define ID_REFERENCE_BLOCK      0xfb
+
+/* Tracks Elements */
+#define ID_TRACKS               0x1654ae6b
+#define ID_TRACK_ENTRY          0xae
+#define ID_TRACK_NUMBER         0xd7
+#define ID_TRACK_UID            0x73c5
+#define ID_TRACK_TYPE           0x83
+#define ID_FLAG_ENABLED         0xb9
+#define ID_FLAG_DEFAULT         0x88
+#define ID_FLAG_LACING          0x9c
+#define ID_TRACK_TIMECODE_SCALE 0x23314f
+#define ID_LANGUAGE             0x22b59c
+#define ID_CODEC_ID             0x86
+#define ID_CODEC_PRIVATE        0x63a2
+
+/* Video Elements */
+#define ID_VIDEO                0xe0
+#define ID_PIXEL_WIDTH          0xb0
+#define ID_PIXEL_HEIGHT         0xba
+#define ID_PIXEL_CROP_BOTTOM    0x54aa
+#define ID_PIXEL_CROP_TOP       0x54bb
+#define ID_PIXEL_CROP_LEFT      0x54cc
+#define ID_PIXEL_CROP_RIGHT     0x54dd
+#define ID_DISPLAY_WIDTH        0x54b0
+#define ID_DISPLAY_HEIGHT       0x54ba
+
+/* Audio Elements */
+#define ID_AUDIO                0xe1
+#define ID_SAMPLING_FREQUENCY   0xb5
+#define ID_CHANNELS             0x9f
+#define ID_BIT_DEPTH            0x6264
+
+/* Cues Elements */
+#define ID_CUES                 0x1c53bb6b
+#define ID_CUE_POINT            0xbb
+#define ID_CUE_TIME             0xb3
+#define ID_CUE_TRACK_POSITIONS  0xb7
+#define ID_CUE_TRACK            0xf7
+#define ID_CUE_CLUSTER_POSITION 0xf1
+#define ID_CUE_BLOCK_NUMBER     0x5378
+
+/* EBML Types */
+enum ebml_type_enum {
+  TYPE_UNKNOWN,
+  TYPE_MASTER,
+  TYPE_UINT,
+  TYPE_FLOAT,
+  TYPE_INT,
+  TYPE_STRING,
+  TYPE_BINARY
+};
+
+#define LIMIT_STRING            (1 << 20)
+#define LIMIT_BINARY            (1 << 24)
+#define LIMIT_BLOCK             (1 << 30)
+#define LIMIT_FRAME             (1 << 28)
+
+/* Field Flags */
+#define DESC_FLAG_NONE          0
+#define DESC_FLAG_MULTI         (1 << 0)
+#define DESC_FLAG_SUSPEND       (1 << 1)
+#define DESC_FLAG_OFFSET        (1 << 2)
+
+/* Block Header Flags */
+#define BLOCK_FLAGS_LACING      6
+
+/* Lacing Constants */
+#define LACING_NONE             0
+#define LACING_XIPH             1
+#define LACING_FIXED            2
+#define LACING_EBML             3
+
+/* Track Types */
+#define TRACK_TYPE_VIDEO        1
+#define TRACK_TYPE_AUDIO        2
+
+/* Track IDs */
+#define TRACK_ID_VP8            "V_VP8"
+#define TRACK_ID_VORBIS         "A_VORBIS"
+
+enum vint_mask {
+  MASK_NONE,
+  MASK_FIRST_BIT
+};
+
+struct ebml_binary {
+  unsigned char * data;
+  size_t length;
+};
+
+struct ebml_list_node {
+  struct ebml_list_node * next;
+  uint64_t id;
+  void * data;
+};
+
+struct ebml_list {
+  struct ebml_list_node * head;
+  struct ebml_list_node * tail;
+};
+
+struct ebml_type {
+  union ebml_value {
+    uint64_t u;
+    double f;
+    int64_t i;
+    char * s;
+    struct ebml_binary b;
+  } v;
+  enum ebml_type_enum type;
+  int read;
+};
+
+/* EBML Definitions */
+struct ebml {
+  struct ebml_type ebml_version;
+  struct ebml_type ebml_read_version;
+  struct ebml_type ebml_max_id_length;
+  struct ebml_type ebml_max_size_length;
+  struct ebml_type doctype;
+  struct ebml_type doctype_version;
+  struct ebml_type doctype_read_version;
+};
+
+/* Matroksa Definitions */
+struct seek {
+  struct ebml_type id;
+  struct ebml_type position;
+};
+
+struct seek_head {
+  struct ebml_list seek;
+};
+
+struct info {
+  struct ebml_type timecode_scale;
+  struct ebml_type duration;
+};
+
+struct block_group {
+  struct ebml_type duration;
+  struct ebml_type reference_block;
+};
+
+struct cluster {
+  struct ebml_type timecode;
+  struct ebml_list block_group;
+};
+
+struct video {
+  struct ebml_type pixel_width;
+  struct ebml_type pixel_height;
+  struct ebml_type pixel_crop_bottom;
+  struct ebml_type pixel_crop_top;
+  struct ebml_type pixel_crop_left;
+  struct ebml_type pixel_crop_right;
+  struct ebml_type display_width;
+  struct ebml_type display_height;
+};
+
+struct audio {
+  struct ebml_type sampling_frequency;
+  struct ebml_type channels;
+  struct ebml_type bit_depth;
+};
+
+struct track_entry {
+  struct ebml_type number;
+  struct ebml_type uid;
+  struct ebml_type type;
+  struct ebml_type flag_enabled;
+  struct ebml_type flag_default;
+  struct ebml_type flag_lacing;
+  struct ebml_type track_timecode_scale;
+  struct ebml_type language;
+  struct ebml_type codec_id;
+  struct ebml_type codec_private;
+  struct video video;
+  struct audio audio;
+};
+
+struct tracks {
+  struct ebml_list track_entry;
+};
+
+struct cue_track_positions {
+  struct ebml_type track;
+  struct ebml_type cluster_position;
+  struct ebml_type block_number;
+};
+
+struct cue_point {
+  struct ebml_type time;
+  struct ebml_list cue_track_positions;
+};
+
+struct cues {
+  struct ebml_list cue_point;
+};
+
+struct segment {
+  struct ebml_list seek_head;
+  struct info info;
+  struct ebml_list cluster;
+  struct tracks tracks;
+  struct cues cues;
+};
+
+/* Misc. */
+struct pool_ctx {
+  char dummy;
+};
+
+struct list_node {
+  struct list_node * previous;
+  struct ebml_element_desc * node;
+  unsigned char * data;
+};
+
+struct saved_state {
+  int64_t stream_offset;
+  struct list_node * ancestor;
+  uint64_t last_id;
+  uint64_t last_size;
+};
+
+struct frame {
+  unsigned char * data;
+  size_t length;
+  struct frame * next;
+};
+
+/* Public (opaque) Structures */
+struct nestegg {
+  nestegg_io * io;
+  nestegg_log log;
+  struct pool_ctx * alloc_pool;
+  uint64_t last_id;
+  uint64_t last_size;
+  struct list_node * ancestor;
+  struct ebml ebml;
+  struct segment segment;
+  int64_t segment_offset;
+  unsigned int track_count;
+};
+
+struct nestegg_packet {
+  uint64_t track;
+  uint64_t timecode;
+  struct frame * frame;
+};
+
+/* Element Descriptor */
+struct ebml_element_desc {
+  char const * name;
+  uint64_t id;
+  enum ebml_type_enum type;
+  size_t offset;
+  unsigned int flags;
+  struct ebml_element_desc * children;
+  size_t size;
+  size_t data_offset;
+};
+
+#define E_FIELD(ID, TYPE, STRUCT, FIELD) \
+  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_NONE, NULL, 0, 0 }
+#define E_MASTER(ID, TYPE, STRUCT, FIELD) \
+  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_MULTI, ne_ ## FIELD ## _elements, \
+      sizeof(struct FIELD), 0 }
+#define E_SINGLE_MASTER_O(ID, TYPE, STRUCT, FIELD) \
+  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_OFFSET, ne_ ## FIELD ## _elements, 0, \
+      offsetof(STRUCT, FIELD ## _offset) }
+#define E_SINGLE_MASTER(ID, TYPE, STRUCT, FIELD) \
+  { #ID, ID, TYPE, offsetof(STRUCT, FIELD), DESC_FLAG_NONE, ne_ ## FIELD ## _elements, 0, 0 }
+#define E_SUSPEND(ID, TYPE) \
+  { #ID, ID, TYPE, 0, DESC_FLAG_SUSPEND, NULL, 0, 0 }
+#define E_LAST \
+  { NULL, 0, 0, 0, DESC_FLAG_NONE, NULL, 0, 0 }
+
+/* EBML Element Lists */
+static struct ebml_element_desc ne_ebml_elements[] = {
+  E_FIELD(ID_EBML_VERSION, TYPE_UINT, struct ebml, ebml_version),
+  E_FIELD(ID_EBML_READ_VERSION, TYPE_UINT, struct ebml, ebml_read_version),
+  E_FIELD(ID_EBML_MAX_ID_LENGTH, TYPE_UINT, struct ebml, ebml_max_id_length),
+  E_FIELD(ID_EBML_MAX_SIZE_LENGTH, TYPE_UINT, struct ebml, ebml_max_size_length),
+  E_FIELD(ID_DOCTYPE, TYPE_STRING, struct ebml, doctype),
+  E_FIELD(ID_DOCTYPE_VERSION, TYPE_UINT, struct ebml, doctype_version),
+  E_FIELD(ID_DOCTYPE_READ_VERSION, TYPE_UINT, struct ebml, doctype_read_version),
+  E_LAST
+};
+
+/* WebMedia Element Lists */
+static struct ebml_element_desc ne_seek_elements[] = {
+  E_FIELD(ID_SEEK_ID, TYPE_BINARY, struct seek, id),
+  E_FIELD(ID_SEEK_POSITION, TYPE_UINT, struct seek, position),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_seek_head_elements[] = {
+  E_MASTER(ID_SEEK, TYPE_MASTER, struct seek_head, seek),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_info_elements[] = {
+  E_FIELD(ID_TIMECODE_SCALE, TYPE_UINT, struct info, timecode_scale),
+  E_FIELD(ID_DURATION, TYPE_FLOAT, struct info, duration),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_block_group_elements[] = {
+  E_SUSPEND(ID_BLOCK, TYPE_BINARY),
+  E_FIELD(ID_BLOCK_DURATION, TYPE_UINT, struct block_group, duration),
+  E_FIELD(ID_REFERENCE_BLOCK, TYPE_INT, struct block_group, reference_block),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_cluster_elements[] = {
+  E_FIELD(ID_TIMECODE, TYPE_UINT, struct cluster, timecode),
+  E_MASTER(ID_BLOCK_GROUP, TYPE_MASTER, struct cluster, block_group),
+  E_SUSPEND(ID_SIMPLE_BLOCK, TYPE_BINARY),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_video_elements[] = {
+  E_FIELD(ID_PIXEL_WIDTH, TYPE_UINT, struct video, pixel_width),
+  E_FIELD(ID_PIXEL_HEIGHT, TYPE_UINT, struct video, pixel_height),
+  E_FIELD(ID_PIXEL_CROP_BOTTOM, TYPE_UINT, struct video, pixel_crop_bottom),
+  E_FIELD(ID_PIXEL_CROP_TOP, TYPE_UINT, struct video, pixel_crop_top),
+  E_FIELD(ID_PIXEL_CROP_LEFT, TYPE_UINT, struct video, pixel_crop_left),
+  E_FIELD(ID_PIXEL_CROP_RIGHT, TYPE_UINT, struct video, pixel_crop_right),
+  E_FIELD(ID_DISPLAY_WIDTH, TYPE_UINT, struct video, display_width),
+  E_FIELD(ID_DISPLAY_HEIGHT, TYPE_UINT, struct video, display_height),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_audio_elements[] = {
+  E_FIELD(ID_SAMPLING_FREQUENCY, TYPE_FLOAT, struct audio, sampling_frequency),
+  E_FIELD(ID_CHANNELS, TYPE_UINT, struct audio, channels),
+  E_FIELD(ID_BIT_DEPTH, TYPE_UINT, struct audio, bit_depth),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_track_entry_elements[] = {
+  E_FIELD(ID_TRACK_NUMBER, TYPE_UINT, struct track_entry, number),
+  E_FIELD(ID_TRACK_UID, TYPE_UINT, struct track_entry, uid),
+  E_FIELD(ID_TRACK_TYPE, TYPE_UINT, struct track_entry, type),
+  E_FIELD(ID_FLAG_ENABLED, TYPE_UINT, struct track_entry, flag_enabled),
+  E_FIELD(ID_FLAG_DEFAULT, TYPE_UINT, struct track_entry, flag_default),
+  E_FIELD(ID_FLAG_LACING, TYPE_UINT, struct track_entry, flag_lacing),
+  E_FIELD(ID_TRACK_TIMECODE_SCALE, TYPE_FLOAT, struct track_entry, track_timecode_scale),
+  E_FIELD(ID_LANGUAGE, TYPE_STRING, struct track_entry, language),
+  E_FIELD(ID_CODEC_ID, TYPE_STRING, struct track_entry, codec_id),
+  E_FIELD(ID_CODEC_PRIVATE, TYPE_BINARY, struct track_entry, codec_private),
+  E_SINGLE_MASTER(ID_VIDEO, TYPE_MASTER, struct track_entry, video),
+  E_SINGLE_MASTER(ID_AUDIO, TYPE_MASTER, struct track_entry, audio),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_tracks_elements[] = {
+  E_MASTER(ID_TRACK_ENTRY, TYPE_MASTER, struct tracks, track_entry),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_cue_track_positions_elements[] = {
+  E_FIELD(ID_CUE_TRACK, TYPE_UINT, struct cue_track_positions, track),
+  E_FIELD(ID_CUE_CLUSTER_POSITION, TYPE_UINT, struct cue_track_positions, cluster_position),
+  E_FIELD(ID_CUE_BLOCK_NUMBER, TYPE_UINT, struct cue_track_positions, block_number),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_cue_point_elements[] = {
+  E_FIELD(ID_CUE_TIME, TYPE_UINT, struct cue_point, time),
+  E_MASTER(ID_CUE_TRACK_POSITIONS, TYPE_MASTER, struct cue_point, cue_track_positions),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_cues_elements[] = {
+  E_MASTER(ID_CUE_POINT, TYPE_MASTER, struct cues, cue_point),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_segment_elements[] = {
+  E_MASTER(ID_SEEK_HEAD, TYPE_MASTER, struct segment, seek_head),
+  E_SINGLE_MASTER(ID_INFO, TYPE_MASTER, struct segment, info),
+  E_MASTER(ID_CLUSTER, TYPE_MASTER, struct segment, cluster),
+  E_SINGLE_MASTER(ID_TRACKS, TYPE_MASTER, struct segment, tracks),
+  E_SINGLE_MASTER(ID_CUES, TYPE_MASTER, struct segment, cues),
+  E_LAST
+};
+
+static struct ebml_element_desc ne_top_level_elements[] = {
+  E_SINGLE_MASTER(ID_EBML, TYPE_MASTER, nestegg, ebml),
+  E_SINGLE_MASTER_O(ID_SEGMENT, TYPE_MASTER, nestegg, segment),
+  E_LAST
+};
+
+#undef E_FIELD
+#undef E_MASTER
+#undef E_SINGLE_MASTER_O
+#undef E_SINGLE_MASTER
+#undef E_SUSPEND
+#undef E_LAST
+
+static struct pool_ctx *
+ne_pool_init(void)
+{
+  struct pool_ctx * pool;
+
+  pool = h_malloc(sizeof(*pool));
+  if (!pool)
+    abort();
+  return pool;
+}
+
+static void
+ne_pool_destroy(struct pool_ctx * pool)
+{
+  h_free(pool);
+}
+
+static void *
+ne_pool_alloc(size_t size, struct pool_ctx * pool)
+{
+  void * p;
+
+  p = h_malloc(size);
+  if (!p)
+    abort();
+  hattach(p, pool);
+  memset(p, 0, size);
+  return p;
+}
+
+static void *
+ne_alloc(size_t size)
+{
+  void * p;
+
+  p = calloc(1, size);
+  if (!p)
+    abort();
+  return p;
+}
+
+static int
+ne_io_read(nestegg_io * io, void * buffer, size_t length)
+{
+  return io->read(buffer, length, io->userdata);
+}
+
+static int
+ne_io_seek(nestegg_io * io, int64_t offset, int whence)
+{
+  return io->seek(offset, whence, io->userdata);
+}
+
+static int
+ne_io_read_skip(nestegg_io * io, size_t length)
+{
+  size_t get;
+  unsigned char buf[8192];
+  int r = 1;
+
+  while (length > 0) {
+    get = length < sizeof(buf) ? length : sizeof(buf);
+    r = ne_io_read(io, buf, get);
+    if (r != 1)
+      break;
+    length -= get;
+  }
+
+  return r;
+}
+
+static int64_t
+ne_io_tell(nestegg_io * io)
+{
+  return io->tell(io->userdata);
+}
+
+static int
+ne_bare_read_vint(nestegg_io * io, uint64_t * value, uint64_t * length, enum vint_mask maskflag)
+{
+  int r;
+  unsigned char b;
+  size_t maxlen = 8;
+  unsigned int count = 1, mask = 1 << 7;
+
+  r = ne_io_read(io, &b, 1);
+  if (r != 1)
+    return r;
+
+  while (count < maxlen) {
+    if ((b & mask) != 0)
+      break;
+    mask >>= 1;
+    count += 1;
+  }
+
+  if (length)
+    *length = count;
+  *value = b;
+
+  if (maskflag == MASK_FIRST_BIT)
+    *value = b & ~mask;
+
+  while (--count) {
+    r = ne_io_read(io, &b, 1);
+    if (r != 1)
+      return r;
+    *value <<= 8;
+    *value |= b;
+  }
+
+  return 1;
+}
+
+static int
+ne_read_id(nestegg_io * io, uint64_t * value, uint64_t * length)
+{
+  return ne_bare_read_vint(io, value, length, MASK_NONE);
+}
+
+static int
+ne_read_vint(nestegg_io * io, uint64_t * value, uint64_t * length)
+{
+  return ne_bare_read_vint(io, value, length, MASK_FIRST_BIT);
+}
+
+static int
+ne_read_svint(nestegg_io * io, int64_t * value, uint64_t * length)
+{
+  int r;
+  uint64_t uvalue;
+  uint64_t ulength;
+  int64_t svint_subtr[] = {
+    0x3f, 0x1fff,
+    0xfffff, 0x7ffffff,
+    0x3ffffffffLL, 0x1ffffffffffLL,
+    0xffffffffffffLL, 0x7fffffffffffffLL
+  };
+
+  r = ne_bare_read_vint(io, &uvalue, &ulength, MASK_FIRST_BIT);
+  if (r != 1)
+    return r;
+  *value = uvalue - svint_subtr[ulength - 1];
+  if (length)
+    *length = ulength;
+  return r;
+}
+
+static int
+ne_read_uint(nestegg_io * io, uint64_t * val, uint64_t length)
+{
+  unsigned char b;
+  int r;
+
+  if (length == 0 || length > 8)
+    return -1;
+  r = ne_io_read(io, &b, 1);
+  if (r != 1)
+    return r;
+  *val = b;
+  while (--length) {
+    r = ne_io_read(io, &b, 1);
+    if (r != 1)
+      return r;
+    *val <<= 8;
+    *val |= b;
+  }
+  return 1;
+}
+
+static int
+ne_read_int(nestegg_io * io, int64_t * val, uint64_t length)
+{
+  int r;
+  uint64_t uval, base;
+
+  r = ne_read_uint(io, &uval, length);
+  if (r != 1)
+    return r;
+
+  if (length < sizeof(int64_t)) {
+    base = 1;
+    base <<= length * 8 - 1;
+    if (uval >= base) {
+        base = 1;
+        base <<= length * 8;
+    } else {
+      base = 0;
+    }
+    *val = uval - base;
+  } else {
+    *val = (int64_t) uval;
+  }
+
+  return 1;
+}
+
+static int
+ne_read_float(nestegg_io * io, double * val, uint64_t length)
+{
+  union {
+    uint64_t u;
+    float f;
+    double d;
+  } value;
+  int r;
+
+  /* length == 10 not implemented */
+  if (length != 4 && length != 8)
+    return -1;
+  r = ne_read_uint(io, &value.u, length);
+  if (r != 1)
+    return r;
+  if (length == 4)
+    *val = value.f;
+  else
+    *val = value.d;
+  return 1;
+}
+
+static int
+ne_read_string(nestegg * ctx, char ** val, uint64_t length)
+{
+  char * str;
+  int r;
+
+  if (length == 0 || length > LIMIT_STRING)
+    return -1;
+  str = ne_pool_alloc(length + 1, ctx->alloc_pool);
+  r = ne_io_read(ctx->io, (unsigned char *) str, length);
+  if (r != 1)
+    return r;
+  str[length] = '\0';
+  *val = str;
+  return 1;
+}
+
+static int
+ne_read_binary(nestegg * ctx, struct ebml_binary * val, uint64_t length)
+{
+  if (length == 0 || length > LIMIT_BINARY)
+    return -1;
+  val->data = ne_pool_alloc(length, ctx->alloc_pool);
+  val->length = length;
+  return ne_io_read(ctx->io, val->data, length);
+}
+
+static int
+ne_get_uint(struct ebml_type type, uint64_t * value)
+{
+  if (!type.read)
+    return -1;
+
+  assert(type.type == TYPE_UINT);
+
+  *value = type.v.u;
+
+  return 0;
+}
+
+static int
+ne_get_float(struct ebml_type type, double * value)
+{
+  if (!type.read)
+    return -1;
+
+  assert(type.type == TYPE_FLOAT);
+
+  *value = type.v.f;
+
+  return 0;
+}
+
+static int
+ne_get_string(struct ebml_type type, char ** value)
+{
+  if (!type.read)
+    return -1;
+
+  assert(type.type == TYPE_STRING);
+
+  *value = type.v.s;
+
+  return 0;
+}
+
+static int
+ne_get_binary(struct ebml_type type, struct ebml_binary * value)
+{
+  if (!type.read)
+    return -1;
+
+  assert(type.type == TYPE_BINARY);
+
+  *value = type.v.b;
+
+  return 0;
+}
+
+static int
+ne_is_ancestor_element(uint64_t id, struct list_node * ancestor)
+{
+  struct ebml_element_desc * element;
+
+  for (; ancestor; ancestor = ancestor->previous)
+    for (element = ancestor->node; element->id; ++element)
+      if (element->id == id)
+        return 1;
+
+  return 0;
+}
+
+static struct ebml_element_desc *
+ne_find_element(uint64_t id, struct ebml_element_desc * elements)
+{
+  struct ebml_element_desc * element;
+
+  for (element = elements; element->id; ++element)
+    if (element->id == id)
+      return element;
+
+  return NULL;
+}
+
+static void
+ne_ctx_push(nestegg * ctx, struct ebml_element_desc * ancestor, void * data)
+{
+  struct list_node * item;
+
+  item = ne_alloc(sizeof(*item));
+  item->previous = ctx->ancestor;
+  item->node = ancestor;
+  item->data = data;
+  ctx->ancestor = item;
+}
+
+static void
+ne_ctx_pop(nestegg * ctx)
+{
+  struct list_node * item;
+
+  item = ctx->ancestor;
+  ctx->ancestor = item->previous;
+  free(item);
+}
+
+static int
+ne_ctx_save(nestegg * ctx, struct saved_state * s)
+{
+  s->stream_offset = ne_io_tell(ctx->io);
+  if (s->stream_offset < 0)
+    return -1;
+  s->ancestor = ctx->ancestor;
+  s->last_id = ctx->last_id;
+  s->last_size = ctx->last_size;
+  return 0;
+}
+
+static int
+ne_ctx_restore(nestegg * ctx, struct saved_state * s)
+{
+  int r;
+
+  r = ne_io_seek(ctx->io, s->stream_offset, NESTEGG_SEEK_SET);
+  if (r != 0)
+    return -1;
+  ctx->ancestor = s->ancestor;
+  ctx->last_id = s->last_id;
+  ctx->last_size = s->last_size;
+  return 0;
+}
+
+static int
+ne_peek_element(nestegg * ctx, uint64_t * id, uint64_t * size)
+{
+  int r;
+
+  if (ctx->last_id && ctx->last_size) {
+    if (id)
+      *id = ctx->last_id;
+    if (size)
+      *size = ctx->last_size;
+    return 1;
+  }
+
+  r = ne_read_id(ctx->io, &ctx->last_id, NULL);
+  if (r != 1)
+    return r;
+
+  r = ne_read_vint(ctx->io, &ctx->last_size, NULL);
+  if (r != 1)
+    return r;
+
+  if (id)
+    *id = ctx->last_id;
+  if (size)
+    *size = ctx->last_size;
+
+  return 1;
+}
+
+static int
+ne_read_element(nestegg * ctx, uint64_t * id, uint64_t * size)
+{
+  int r;
+
+  r = ne_peek_element(ctx, id, size);
+  if (r != 1)
+    return r;
+
+  ctx->last_id = 0;
+  ctx->last_size = 0;
+
+  return 1;
+}
+
+static void
+ne_read_master(nestegg * ctx, struct ebml_element_desc * desc)
+{
+  struct ebml_list * list;
+  struct ebml_list_node * node, * oldtail;
+
+  assert(desc->type == TYPE_MASTER && desc->flags & DESC_FLAG_MULTI);
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "multi master element %llx (%s)",
+           desc->id, desc->name);
+
+  list = (struct ebml_list *) (ctx->ancestor->data + desc->offset);
+
+  node = ne_pool_alloc(sizeof(*node), ctx->alloc_pool);
+  node->id = desc->id;
+  node->data = ne_pool_alloc(desc->size, ctx->alloc_pool);
+
+  oldtail = list->tail;
+  if (oldtail)
+    oldtail->next = node;
+  list->tail = node;
+  if (!list->head)
+    list->head = node;
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, " -> using data %p", node->data);
+
+  ne_ctx_push(ctx, desc->children, node->data);
+}
+
+static void
+ne_read_single_master(nestegg * ctx, struct ebml_element_desc * desc)
+{
+  assert(desc->type == TYPE_MASTER && !(desc->flags & DESC_FLAG_MULTI));
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "single master element %llx (%s)",
+           desc->id, desc->name);
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, " -> using data %p (%u)",
+           ctx->ancestor->data + desc->offset, desc->offset);
+
+  ne_ctx_push(ctx, desc->children, ctx->ancestor->data + desc->offset);
+}
+
+static int
+ne_read_simple(nestegg * ctx, struct ebml_element_desc * desc, size_t length)
+{
+  struct ebml_type * storage;
+  int r;
+
+  storage = (struct ebml_type *) (ctx->ancestor->data + desc->offset);
+
+  if (storage->read) {
+    ctx->log(ctx, NESTEGG_LOG_DEBUG, "element %llx (%s) already read, skipping",
+             desc->id, desc->name);
+    return 0;
+  }
+
+  storage->type = desc->type;
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "element %llx (%s) -> %p (%u)",
+           desc->id, desc->name, storage, desc->offset);
+
+  r = -1;
+
+  switch (desc->type) {
+  case TYPE_UINT:
+    r = ne_read_uint(ctx->io, &storage->v.u, length);
+    break;
+  case TYPE_FLOAT:
+    r = ne_read_float(ctx->io, &storage->v.f, length);
+    break;
+  case TYPE_INT:
+    r = ne_read_int(ctx->io, &storage->v.i, length);
+    break;
+  case TYPE_STRING:
+    r = ne_read_string(ctx, &storage->v.s, length);
+    break;
+  case TYPE_BINARY:
+    r = ne_read_binary(ctx, &storage->v.b, length);
+    break;
+  case TYPE_MASTER:
+  case TYPE_UNKNOWN:
+    assert(0);
+    break;
+  }
+
+  if (r == 1)
+    storage->read = 1;
+
+  return r;
+}
+
+static int
+ne_parse(nestegg * ctx, struct ebml_element_desc * top_level)
+{
+  int r;
+  int64_t * data_offset;
+  uint64_t id, size;
+  struct ebml_element_desc * element;
+
+  /* loop until we need to return:
+     - hit suspend point
+     - parse complete
+     - error occurred */
+
+  /* loop over elements at current level reading them if sublevel found,
+     push ctx onto stack and continue if sublevel ended, pop ctx off stack
+     and continue */
+
+  if (!ctx->ancestor)
+    return -1;
+
+  for (;;) {
+    r = ne_peek_element(ctx, &id, &size);
+    if (r != 1)
+      break;
+
+    element = ne_find_element(id, ctx->ancestor->node);
+    if (element) {
+      if (element->flags & DESC_FLAG_SUSPEND) {
+        assert(element->type == TYPE_BINARY);
+        ctx->log(ctx, NESTEGG_LOG_DEBUG, "suspend parse at %llx", id);
+        r = 1;
+        break;
+      }
+
+      r = ne_read_element(ctx, &id, &size);
+      if (r != 1)
+        break;
+
+      if (element->flags & DESC_FLAG_OFFSET) {
+        data_offset = (int64_t *) (ctx->ancestor->data + element->data_offset);
+        *data_offset = ne_io_tell(ctx->io);
+        if (*data_offset < 0) {
+          r = -1;
+          break;
+        }
+      }
+
+      if (element->type == TYPE_MASTER) {
+        if (element->flags & DESC_FLAG_MULTI)
+          ne_read_master(ctx, element);
+        else
+          ne_read_single_master(ctx, element);
+        continue;
+      } else {
+        r = ne_read_simple(ctx, element, size);
+        if (r < 0)
+          break;
+      }
+    } else if (ne_is_ancestor_element(id, ctx->ancestor->previous)) {
+      ctx->log(ctx, NESTEGG_LOG_DEBUG, "parent element %llx", id);
+      if (top_level && ctx->ancestor->node == top_level) {
+        ctx->log(ctx, NESTEGG_LOG_DEBUG, "*** parse about to back up past top_level");
+        r = 1;
+        break;
+      }
+      ne_ctx_pop(ctx);
+    } else {
+      r = ne_read_element(ctx, &id, &size);
+      if (r != 1)
+        break;
+
+      if (id != ID_VOID && id != ID_CRC32)
+        ctx->log(ctx, NESTEGG_LOG_DEBUG, "unknown element %llx", id);
+      r = ne_io_read_skip(ctx->io, size);
+      if (r != 1)
+        break;
+    }
+  }
+
+  if (r != 1)
+    while (ctx->ancestor)
+      ne_ctx_pop(ctx);
+
+  return r;
+}
+
+static uint64_t
+ne_xiph_lace_value(unsigned char ** np)
+{
+  uint64_t lace;
+  uint64_t value;
+  unsigned char * p = *np;
+
+  lace = *p++;
+  value = lace;
+  while (lace == 255) {
+    lace = *p++;
+    value += lace;
+  }
+
+  *np = p;
+
+  return value;
+}
+
+static int
+ne_read_xiph_lace_value(nestegg_io * io, uint64_t * value, size_t * consumed)
+{
+  int r;
+  uint64_t lace;
+
+  r = ne_read_uint(io, &lace, 1);
+  if (r != 1)
+    return r;
+  *consumed += 1;
+
+  *value = lace;
+  while (lace == 255) {
+    r = ne_read_uint(io, &lace, 1);
+    if (r != 1)
+      return r;
+    *consumed += 1;
+    *value += lace;
+  }
+
+  return 1;
+}
+
+static int
+ne_read_xiph_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, uint64_t * sizes)
+{
+  int r;
+  size_t i = 0;
+  uint64_t sum = 0;
+
+  while (--n) {
+    r = ne_read_xiph_lace_value(io, &sizes[i], read);
+    if (r != 1)
+      return r;
+    sum += sizes[i];
+    i += 1;
+  }
+
+  if (*read + sum > block)
+    return -1;
+
+  /* last frame is the remainder of the block */
+  sizes[i] = block - *read - sum;
+  return 1;
+}
+
+static int
+ne_read_ebml_lacing(nestegg_io * io, size_t block, size_t * read, uint64_t n, uint64_t * sizes)
+{
+  int r;
+  uint64_t lace, sum, length;
+  int64_t slace;
+  size_t i = 0;
+
+  r = ne_read_vint(io, &lace, &length);
+  if (r != 1)
+    return r;
+  *read += length;
+
+  sizes[i] = lace;
+  sum = sizes[i];
+
+  i += 1;
+  n -= 1;
+
+  while (--n) {
+    r = ne_read_svint(io, &slace, &length);
+    if (r != 1)
+      return r;
+    *read += length;
+    sizes[i] = sizes[i - 1] + slace;
+    sum += sizes[i];
+    i += 1;
+  }
+
+  if (*read + sum > block)
+    return -1;
+
+  /* last frame is the remainder of the block */
+  sizes[i] = block - *read - sum;
+  return 1;
+}
+
+static uint64_t
+ne_get_timecode_scale(nestegg * ctx)
+{
+  uint64_t scale;
+
+  if (ne_get_uint(ctx->segment.info.timecode_scale, &scale) != 0)
+    scale = 1000000;
+
+  return scale;
+}
+
+static struct track_entry *
+ne_find_track_entry(nestegg * ctx, unsigned int track)
+{
+  struct ebml_list_node * node;
+  unsigned int tracks = 0;
+
+  node = ctx->segment.tracks.track_entry.head;
+  while (node) {
+    assert(node->id == ID_TRACK_ENTRY);
+    if (track == tracks)
+      return node->data;
+    tracks += 1;
+    node = node->next;
+  }
+
+  return NULL;
+}
+
+static int
+ne_read_block(nestegg * ctx, uint64_t block_id, uint64_t block_size, nestegg_packet ** data)
+{
+  int r;
+  int64_t timecode, abs_timecode;
+  nestegg_packet * pkt;
+  struct cluster * cluster;
+  struct frame * f, * last;
+  struct track_entry * entry;
+  double track_scale;
+  uint64_t track, length, frame_sizes[256], cluster_tc, flags, frames, tc_scale, total;
+  unsigned int i, lacing;
+  size_t consumed = 0;
+
+  *data = NULL;
+
+  if (block_size > LIMIT_BLOCK)
+    return -1;
+
+  r = ne_read_vint(ctx->io, &track, &length);
+  if (r != 1)
+    return r;
+
+  if (track == 0 || track > ctx->track_count)
+    return -1;
+
+  consumed += length;
+
+  r = ne_read_int(ctx->io, &timecode, 2);
+  if (r != 1)
+    return r;
+
+  consumed += 2;
+
+  r = ne_read_uint(ctx->io, &flags, 1);
+  if (r != 1)
+    return r;
+
+  consumed += 1;
+
+  frames = 0;
+
+  /* flags are different between block and simpleblock, but lacing is
+     encoded the same way */
+  lacing = (flags & BLOCK_FLAGS_LACING) >> 1;
+
+  switch (lacing) {
+  case LACING_NONE:
+    frames = 1;
+    break;
+  case LACING_XIPH:
+  case LACING_FIXED:
+  case LACING_EBML:
+    r = ne_read_uint(ctx->io, &frames, 1);
+    if (r != 1)
+      return r;
+    consumed += 1;
+    frames += 1;
+  }
+
+  if (frames > 256)
+    return -1;
+
+  switch (lacing) {
+  case LACING_NONE:
+    frame_sizes[0] = block_size - consumed;
+    break;
+  case LACING_XIPH:
+    if (frames == 1)
+      return -1;
+    r = ne_read_xiph_lacing(ctx->io, block_size, &consumed, frames, frame_sizes);
+    if (r != 1)
+      return r;
+    break;
+  case LACING_FIXED:
+    if ((block_size - consumed) % frames)
+      return -1;
+    for (i = 0; i < frames; ++i)
+      frame_sizes[i] = (block_size - consumed) / frames;
+    break;
+  case LACING_EBML:
+    if (frames == 1)
+      return -1;
+    r = ne_read_ebml_lacing(ctx->io, block_size, &consumed, frames, frame_sizes);
+    if (r != 1)
+      return r;
+    break;
+  }
+
+  /* sanity check unlaced frame sizes against total block size. */
+  total = consumed;
+  for (i = 0; i < frames; ++i)
+    total += frame_sizes[i];
+  if (total > block_size)
+    return -1;
+
+  entry = ne_find_track_entry(ctx, track - 1);
+  if (!entry)
+    return -1;
+
+  track_scale = 1.0;
+
+  tc_scale = ne_get_timecode_scale(ctx);
+
+  assert(ctx->segment.cluster.tail->id == ID_CLUSTER);
+  cluster = ctx->segment.cluster.tail->data;
+  if (ne_get_uint(cluster->timecode, &cluster_tc) != 0)
+    return -1;
+
+  abs_timecode = timecode + cluster_tc;
+  if (abs_timecode < 0)
+    return -1;
+
+  pkt = ne_alloc(sizeof(*pkt));
+  pkt->track = track - 1;
+  pkt->timecode = abs_timecode * tc_scale * track_scale;
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "%sblock t %lld pts %f f %llx frames: %llu",
+           block_id == ID_BLOCK ? "" : "simple", pkt->track, pkt->timecode / 1e9, flags, frames);
+
+  last = NULL;
+  for (i = 0; i < frames; ++i) {
+    if (frame_sizes[i] > LIMIT_FRAME) {
+      nestegg_free_packet(pkt);
+      return -1;
+    }
+    f = ne_alloc(sizeof(*f));
+    f->data = ne_alloc(frame_sizes[i]);
+    f->length = frame_sizes[i];
+    r = ne_io_read(ctx->io, f->data, frame_sizes[i]);
+    if (r != 1) {
+      free(f->data);
+      free(f);
+      nestegg_free_packet(pkt);
+      return -1;
+    }
+
+    if (!last)
+      pkt->frame = f;
+    else
+      last->next = f;
+    last = f;
+  }
+
+  *data = pkt;
+
+  return 1;
+}
+
+static uint64_t
+ne_buf_read_id(unsigned char const * p, size_t length)
+{
+  uint64_t id = 0;
+
+  while (length--) {
+    id <<= 8;
+    id |= *p++;
+  }
+
+  return id;
+}
+
+static struct seek *
+ne_find_seek_for_id(struct ebml_list_node * seek_head, uint64_t id)
+{
+  struct ebml_list * head;
+  struct ebml_list_node * seek;
+  struct ebml_binary binary_id;
+  struct seek * s;
+
+  while (seek_head) {
+    assert(seek_head->id == ID_SEEK_HEAD);
+    head = seek_head->data;
+    seek = head->head;
+
+    while (seek) {
+      assert(seek->id == ID_SEEK);
+      s = seek->data;
+
+      if (ne_get_binary(s->id, &binary_id) == 0 &&
+          ne_buf_read_id(binary_id.data, binary_id.length) == id)
+        return s;
+
+      seek = seek->next;
+    }
+
+    seek_head = seek_head->next;
+  }
+
+  return NULL;
+}
+
+static struct cue_point *
+ne_find_cue_point_for_tstamp(struct ebml_list_node * cue_point, uint64_t scale, uint64_t tstamp)
+{
+  uint64_t time;
+  struct cue_point * c, * prev = NULL;
+
+  while (cue_point) {
+    assert(cue_point->id == ID_CUE_POINT);
+    c = cue_point->data;
+
+    if (!prev)
+      prev = c;
+
+    if (ne_get_uint(c->time, &time) == 0 && time * scale > tstamp)
+      break;
+
+    prev = cue_point->data;
+    cue_point = cue_point->next;
+  }
+
+  return prev;
+}
+
+static int
+ne_is_suspend_element(uint64_t id)
+{
+  /* this could search the tree of elements for DESC_FLAG_SUSPEND */
+  if (id == ID_SIMPLE_BLOCK || id == ID_BLOCK)
+    return 1;
+  return 0;
+}
+
+static void
+ne_null_log_callback(nestegg * ctx, unsigned int severity, char const * fmt, ...)
+{
+  if (ctx && severity && fmt)
+    return;
+}
+
+int
+nestegg_init(nestegg ** context, nestegg_io io, nestegg_log callback)
+{
+  int r;
+  uint64_t id, version, docversion;
+  struct ebml_list_node * track;
+  char * doctype;
+  nestegg * ctx = NULL;
+
+  if (!(io.read && io.seek && io.tell))
+    return -1;
+
+  ctx = ne_alloc(sizeof(*ctx));
+
+  ctx->io = ne_alloc(sizeof(*ctx->io));
+  *ctx->io = io;
+  ctx->log = callback;
+  ctx->alloc_pool = ne_pool_init();
+
+  if (!ctx->log)
+    ctx->log = ne_null_log_callback;
+
+  r = ne_peek_element(ctx, &id, NULL);
+  if (r != 1) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  if (id != ID_EBML) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "ctx %p", ctx);
+
+  ne_ctx_push(ctx, ne_top_level_elements, ctx);
+
+  r = ne_parse(ctx, NULL);
+
+  if (r != 1) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  if (ne_get_uint(ctx->ebml.ebml_read_version, &version) != 0)
+    version = 1;
+  if (version != 1) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  if (ne_get_string(ctx->ebml.doctype, &doctype) != 0)
+    doctype = "matroska";
+  if (strcmp(doctype, "webm") != 0) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  if (ne_get_uint(ctx->ebml.doctype_read_version, &docversion) != 0)
+    docversion = 1;
+  if (docversion < 1 || docversion > 2) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  if (!ctx->segment.tracks.track_entry.head) {
+    nestegg_destroy(ctx);
+    return -1;
+  }
+
+  track = ctx->segment.tracks.track_entry.head;
+  ctx->track_count = 0;
+
+  while (track) {
+    ctx->track_count += 1;
+    track = track->next;
+  }
+
+  *context = ctx;
+
+  return 0;
+}
+
+void
+nestegg_destroy(nestegg * ctx)
+{
+  while (ctx->ancestor)
+    ne_ctx_pop(ctx);
+  ne_pool_destroy(ctx->alloc_pool);
+  free(ctx->io);
+  free(ctx);
+}
+
+int
+nestegg_duration(nestegg * ctx, uint64_t * duration)
+{
+  uint64_t tc_scale;
+  double unscaled_duration;
+
+  if (ne_get_float(ctx->segment.info.duration, &unscaled_duration) != 0)
+    return -1;
+
+  tc_scale = ne_get_timecode_scale(ctx);
+
+  *duration = (uint64_t) (unscaled_duration * tc_scale);
+  return 0;
+}
+
+int
+nestegg_tstamp_scale(nestegg * ctx, uint64_t * scale)
+{
+  *scale = ne_get_timecode_scale(ctx);
+  return 0;
+}
+
+int
+nestegg_track_count(nestegg * ctx, unsigned int * tracks)
+{
+  *tracks = ctx->track_count;
+  return 0;
+}
+
+int
+nestegg_track_seek(nestegg * ctx, unsigned int track, uint64_t tstamp)
+{
+  int r;
+  struct cue_point * cue_point;
+  struct cue_track_positions * pos;
+  struct saved_state state;
+  struct seek * found;
+  uint64_t seek_pos, tc_scale, t, id;
+  struct ebml_list_node * node = ctx->segment.cues.cue_point.head;
+
+  /* If there are no cues loaded, check for cues element in the seek head
+     and load it. */
+  if (!node) {
+    found = ne_find_seek_for_id(ctx->segment.seek_head.head, ID_CUES);
+    if (!found)
+      return -1;
+
+    if (ne_get_uint(found->position, &seek_pos) != 0)
+      return -1;
+
+    /* Save old parser state. */
+    r = ne_ctx_save(ctx, &state);
+    if (r != 0)
+      return -1;
+
+    /* Seek and set up parser state for segment-level element (Cues). */
+    r = ne_io_seek(ctx->io, ctx->segment_offset + seek_pos, NESTEGG_SEEK_SET);
+    if (r != 0)
+      return -1;
+    ctx->last_id = 0;
+    ctx->last_size = 0;
+
+    r = ne_read_element(ctx, &id, NULL);
+    if (r != 1)
+      return -1;
+
+    if (id != ID_CUES)
+      return -1;
+
+    ctx->ancestor = NULL;
+    ne_ctx_push(ctx, ne_top_level_elements, ctx);
+    ne_ctx_push(ctx, ne_segment_elements, &ctx->segment);
+    ne_ctx_push(ctx, ne_cues_elements, &ctx->segment.cues);
+    /* parser will run until end of cues element. */
+    ctx->log(ctx, NESTEGG_LOG_DEBUG, "seek: parsing cue elements");
+    r = ne_parse(ctx, ne_cues_elements);
+    while (ctx->ancestor)
+      ne_ctx_pop(ctx);
+
+    /* Reset parser state to original state and seek back to old position. */
+    if (ne_ctx_restore(ctx, &state) != 0)
+      return -1;
+
+    if (r < 0)
+      return -1;
+  }
+
+  tc_scale = ne_get_timecode_scale(ctx);
+
+  cue_point = ne_find_cue_point_for_tstamp(ctx->segment.cues.cue_point.head, tc_scale, tstamp);
+  if (!cue_point)
+    return -1;
+
+  node = cue_point->cue_track_positions.head;
+
+  seek_pos = 0;
+
+  while (node) {
+    assert(node->id == ID_CUE_TRACK_POSITIONS);
+    pos = node->data;
+    if (ne_get_uint(pos->track, &t) == 0 && t - 1 == track) {
+      if (ne_get_uint(pos->cluster_position, &seek_pos) != 0)
+        return -1;
+      break;
+    }
+    node = node->next;
+  }
+
+  /* Seek and set up parser state for segment-level element (Cluster). */
+  r = ne_io_seek(ctx->io, ctx->segment_offset + seek_pos, NESTEGG_SEEK_SET);
+  if (r != 0)
+    return -1;
+  ctx->last_id = 0;
+  ctx->last_size = 0;
+
+  while (ctx->ancestor)
+    ne_ctx_pop(ctx);
+
+  ne_ctx_push(ctx, ne_top_level_elements, ctx);
+  ne_ctx_push(ctx, ne_segment_elements, &ctx->segment);
+  ctx->log(ctx, NESTEGG_LOG_DEBUG, "seek: parsing cluster elements");
+  r = ne_parse(ctx, NULL);
+  if (r != 1)
+    return -1;
+
+  if (!ne_is_suspend_element(ctx->last_id))
+    return -1;
+
+  return 0;
+}
+
+int
+nestegg_track_type(nestegg * ctx, unsigned int track)
+{
+  struct track_entry * entry;
+  uint64_t type;
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (ne_get_uint(entry->type, &type) != 0)
+    return -1;
+
+  if (type & TRACK_TYPE_VIDEO)
+    return NESTEGG_TRACK_VIDEO;
+
+  if (type & TRACK_TYPE_AUDIO)
+    return NESTEGG_TRACK_AUDIO;
+
+  return -1;
+}
+
+int
+nestegg_track_codec_id(nestegg * ctx, unsigned int track)
+{
+  char * codec_id;
+  struct track_entry * entry;
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (ne_get_string(entry->codec_id, &codec_id) != 0)
+    return -1;
+
+  if (strcmp(codec_id, TRACK_ID_VP8) == 0)
+    return NESTEGG_CODEC_VP8;
+
+  if (strcmp(codec_id, TRACK_ID_VORBIS) == 0)
+    return NESTEGG_CODEC_VORBIS;
+
+  return -1;
+}
+
+int
+nestegg_track_codec_data_count(nestegg * ctx, unsigned int track,
+                               unsigned int * count)
+{
+  struct track_entry * entry;
+  struct ebml_binary codec_private;
+  unsigned char * p;
+
+  *count = 0;
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (nestegg_track_codec_id(ctx, track) != NESTEGG_CODEC_VORBIS)
+    return -1;
+
+  if (ne_get_binary(entry->codec_private, &codec_private) != 0)
+    return -1;
+
+  if (codec_private.length < 1)
+    return -1;
+
+  p = codec_private.data;
+  *count = *p + 1;
+
+  if (*count > 3)
+    return -1;
+
+  return 0;
+}
+
+int
+nestegg_track_codec_data(nestegg * ctx, unsigned int track, unsigned int item,
+                         unsigned char ** data, size_t * length)
+{
+  struct track_entry * entry;
+  struct ebml_binary codec_private;
+  uint64_t sizes[3], total;
+  unsigned char * p;
+  unsigned int count, i;
+
+  *data = NULL;
+  *length = 0;
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (nestegg_track_codec_id(ctx, track) != NESTEGG_CODEC_VORBIS)
+    return -1;
+
+  if (ne_get_binary(entry->codec_private, &codec_private) != 0)
+    return -1;
+
+  p = codec_private.data;
+  count = *p++ + 1;
+
+  if (count > 3)
+    return -1;
+
+  i = 0;
+  total = 0;
+  while (--count) {
+    sizes[i] = ne_xiph_lace_value(&p);
+    total += sizes[i];
+    i += 1;
+  }
+  sizes[i] = codec_private.length - total - (p - codec_private.data);
+
+  for (i = 0; i < item; ++i) {
+    if (sizes[i] > LIMIT_FRAME)
+      return -1;
+    p += sizes[i];
+  }
+  *data = p;
+  *length = sizes[item];
+
+  return 0;
+}
+
+int
+nestegg_track_video_params(nestegg * ctx, unsigned int track,
+                           nestegg_video_params * params)
+{
+  struct track_entry * entry;
+  uint64_t value;
+
+  memset(params, 0, sizeof(*params));
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (nestegg_track_type(ctx, track) != NESTEGG_TRACK_VIDEO)
+    return -1;
+
+  if (ne_get_uint(entry->video.pixel_width, &value) != 0)
+    return -1;
+  params->width = value;
+
+  if (ne_get_uint(entry->video.pixel_height, &value) != 0)
+    return -1;
+  params->height = value;
+
+  value = 0;
+  ne_get_uint(entry->video.pixel_crop_bottom, &value);
+  params->crop_bottom = value;
+
+  value = 0;
+  ne_get_uint(entry->video.pixel_crop_top, &value);
+  params->crop_top = value;
+
+  value = 0;
+  ne_get_uint(entry->video.pixel_crop_left, &value);
+  params->crop_left = value;
+
+  value = 0;
+  ne_get_uint(entry->video.pixel_crop_right, &value);
+  params->crop_right = value;
+
+  value = params->width;
+  ne_get_uint(entry->video.display_width, &value);
+  params->display_width = value;
+
+  value = params->height;
+  ne_get_uint(entry->video.display_height, &value);
+  params->display_height = value;
+
+  return 0;
+}
+
+int
+nestegg_track_audio_params(nestegg * ctx, unsigned int track,
+                           nestegg_audio_params * params)
+{
+  struct track_entry * entry;
+  uint64_t value;
+
+  memset(params, 0, sizeof(*params));
+
+  entry = ne_find_track_entry(ctx, track);
+  if (!entry)
+    return -1;
+
+  if (nestegg_track_type(ctx, track) != NESTEGG_TRACK_AUDIO)
+    return -1;
+
+  params->rate = 8000;
+  ne_get_float(entry->audio.sampling_frequency, &params->rate);
+
+  value = 1;
+  ne_get_uint(entry->audio.channels, &value);
+  params->channels = value;
+
+  value = 16;
+  ne_get_uint(entry->audio.bit_depth, &value);
+  params->depth = value;
+
+  return 0;
+}
+
+int
+nestegg_read_packet(nestegg * ctx, nestegg_packet ** pkt)
+{
+  int r;
+  uint64_t id, size;
+
+  *pkt = NULL;
+
+  for (;;) {
+    r = ne_peek_element(ctx, &id, &size);
+    if (r != 1)
+      return r;
+
+    /* any suspend fields must be handled here */
+    if (ne_is_suspend_element(id)) {
+      r = ne_read_element(ctx, &id, &size);
+      if (r != 1)
+        return r;
+
+      /* the only suspend fields are blocks and simple blocks, which we
+         handle directly. */
+      r = ne_read_block(ctx, id, size, pkt);
+      return r;
+    }
+
+    r =  ne_parse(ctx, NULL);
+    if (r != 1)
+      return r;
+  }
+
+  return 1;
+}
+
+void
+nestegg_free_packet(nestegg_packet * pkt)
+{
+  struct frame * frame;
+
+  while (pkt->frame) {
+    frame = pkt->frame;
+    pkt->frame = frame->next;
+    free(frame->data);
+    free(frame);
+  }
+
+ free(pkt);
+}
+
+int
+nestegg_packet_track(nestegg_packet * pkt, unsigned int * track)
+{
+  *track = pkt->track;
+  return 0;
+}
+
+int
+nestegg_packet_tstamp(nestegg_packet * pkt, uint64_t * tstamp)
+{
+  *tstamp = pkt->timecode;
+  return 0;
+}
+
+int
+nestegg_packet_count(nestegg_packet * pkt, unsigned int * count)
+{
+  struct frame * f = pkt->frame;
+
+  *count = 0;
+
+  while (f) {
+    *count += 1;
+    f = f->next;
+  }
+
+  return 0;
+}
+
+int
+nestegg_packet_data(nestegg_packet * pkt, unsigned int item,
+                    unsigned char ** data, size_t * length)
+{
+  struct frame * f = pkt->frame;
+  unsigned int count = 0;
+
+  *data = NULL;
+  *length = 0;
+
+  while (f) {
+    if (count == item) {
+      *data = f->data;
+      *length = f->length;
+      return 0;
+    }
+    count += 1;
+    f = f->next;
+  }
+
+  return -1;
+}
diff --git a/nestegg/test/test.c b/nestegg/test/test.c
new file mode 100644
index 000000000..210b640c7
--- /dev/null
+++ b/nestegg/test/test.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright © 2010 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include "nestegg/nestegg.h"
+
+#undef DEBUG
+#define SEEK_TEST
+
+static int
+stdio_read(void * p, size_t length, void * fp)
+{
+  size_t r;
+
+  r = fread(p, length, 1, fp);
+  if (r == 0 && feof(fp))
+    return 0;
+  return r == 0 ? -1 : 1;
+}
+
+static int
+stdio_seek(int64_t offset, int whence, void * fp)
+{
+  return fseek(fp, offset, whence);
+}
+
+static int64_t
+stdio_tell(void * fp)
+{
+  return ftell(fp);
+}
+
+static void
+log_callback(nestegg * ctx, unsigned int severity, char const * fmt, ...)
+{
+  va_list ap;
+  char const * sev = NULL;
+
+#ifndef DEBUG
+  if (severity < NESTEGG_LOG_WARNING)
+    return;
+#endif
+
+  switch (severity) {
+  case NESTEGG_LOG_DEBUG:
+    sev = "debug:   ";
+    break;
+  case NESTEGG_LOG_WARNING:
+    sev = "warning: ";
+    break;
+  case NESTEGG_LOG_CRITICAL:
+    sev = "critical:";
+    break;
+  default:
+    sev = "unknown: ";
+  }
+
+  fprintf(stderr, "%p %s ", (void *) ctx, sev);
+
+  va_start(ap, fmt);
+  vfprintf(stderr, fmt, ap);
+  va_end(ap);
+
+  fprintf(stderr, "\n");
+}
+
+int
+main(int argc, char * argv[])
+{
+  FILE * fp;
+  int r, type;
+  nestegg * ctx;
+  nestegg_audio_params aparams;
+  nestegg_packet * pkt;
+  nestegg_video_params vparams;
+  size_t length, size;
+  uint64_t duration, tstamp, pkt_tstamp;
+  unsigned char * codec_data, * ptr;
+  unsigned int cnt, i, j, track, tracks, pkt_cnt, pkt_track;
+  unsigned int data_items = 0;
+  nestegg_io io = {
+    stdio_read,
+    stdio_seek,
+    stdio_tell,
+    NULL
+  };
+
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  fp = fopen(argv[1], "rb");
+  if (!fp)
+    return EXIT_FAILURE;
+
+  io.userdata = fp;
+
+  ctx = NULL;
+  r = nestegg_init(&ctx, io, log_callback);
+  if (r != 0)
+    return EXIT_FAILURE;
+
+  nestegg_track_count(ctx, &tracks);
+  nestegg_duration(ctx, &duration);
+#ifdef DEBUG
+  fprintf(stderr, "media has %u tracks and duration %fs\n", tracks, duration / 1e9);
+#endif
+
+  for (i = 0; i < tracks; ++i) {
+    type = nestegg_track_type(ctx, i);
+#ifdef DEBUG
+    fprintf(stderr, "track %u: type: %d codec: %d", i,
+            type, nestegg_track_codec_id(ctx, i));
+#endif
+    nestegg_track_codec_data_count(ctx, i, &data_items);
+    for (j = 0; j < data_items; ++j) {
+      nestegg_track_codec_data(ctx, i, j, &codec_data, &length);
+#ifdef DEBUG
+      fprintf(stderr, " (%p, %u)", codec_data, (unsigned int) length);
+#endif
+    }
+    if (type == NESTEGG_TRACK_VIDEO) {
+      nestegg_track_video_params(ctx, i, &vparams);
+#ifdef DEBUG
+      fprintf(stderr, " video: %ux%u (d: %ux%u %ux%ux%ux%u)",
+              vparams.width, vparams.height,
+              vparams.display_width, vparams.display_height,
+              vparams.crop_top, vparams.crop_left, vparams.crop_bottom, vparams.crop_right);
+#endif
+    } else if (type == NESTEGG_TRACK_AUDIO) {
+      nestegg_track_audio_params(ctx, i, &aparams);
+#ifdef DEBUG
+      fprintf(stderr, " audio: %.2fhz %u bit %u channels",
+              aparams.rate, aparams.depth, aparams.channels);
+#endif
+    }
+#ifdef DEBUG
+    fprintf(stderr, "\n");
+#endif
+  }
+
+#ifdef SEEK_TEST
+#ifdef DEBUG
+  fprintf(stderr, "seek to middle\n");
+#endif
+  r = nestegg_track_seek(ctx, 0, duration / 2);
+  if (r == 0) {
+#ifdef DEBUG
+    fprintf(stderr, "middle ");
+#endif
+    r = nestegg_read_packet(ctx, &pkt);
+    if (r == 1) {
+      nestegg_packet_track(pkt, &track);
+      nestegg_packet_count(pkt, &cnt);
+      nestegg_packet_tstamp(pkt, &tstamp);
+#ifdef DEBUG
+      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
+#endif
+      nestegg_free_packet(pkt);
+    } else {
+#ifdef DEBUG
+      fprintf(stderr, "middle seek failed\n");
+#endif
+    }
+  }
+
+#ifdef DEBUG
+  fprintf(stderr, "seek to ~end\n");
+#endif
+  r = nestegg_track_seek(ctx, 0, duration - (duration / 10));
+  if (r == 0) {
+#ifdef DEBUG
+    fprintf(stderr, "end ");
+#endif
+    r = nestegg_read_packet(ctx, &pkt);
+    if (r == 1) {
+      nestegg_packet_track(pkt, &track);
+      nestegg_packet_count(pkt, &cnt);
+      nestegg_packet_tstamp(pkt, &tstamp);
+#ifdef DEBUG
+      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
+#endif
+      nestegg_free_packet(pkt);
+    } else {
+#ifdef DEBUG
+      fprintf(stderr, "end seek failed\n");
+#endif
+    }
+  }
+
+#ifdef DEBUG
+  fprintf(stderr, "seek to ~start\n");
+#endif
+  r = nestegg_track_seek(ctx, 0, duration / 10);
+  if (r == 0) {
+#ifdef DEBUG
+    fprintf(stderr, "start ");
+#endif
+    r = nestegg_read_packet(ctx, &pkt);
+    if (r == 1) {
+      nestegg_packet_track(pkt, &track);
+      nestegg_packet_count(pkt, &cnt);
+      nestegg_packet_tstamp(pkt, &tstamp);
+#ifdef DEBUG
+      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
+#endif
+      nestegg_free_packet(pkt);
+    } else {
+#ifdef DEBUG
+      fprintf(stderr, "start seek failed\n");
+#endif
+    }
+  }
+#endif
+
+  while (nestegg_read_packet(ctx, &pkt) > 0) {
+    nestegg_packet_track(pkt, &pkt_track);
+    nestegg_packet_count(pkt, &pkt_cnt);
+    nestegg_packet_tstamp(pkt, &pkt_tstamp);
+
+#ifdef DEBUG
+    fprintf(stderr, "t %u pts %f frames %u: ", pkt_track, pkt_tstamp / 1e9, pkt_cnt);
+#endif
+
+    for (i = 0; i < pkt_cnt; ++i) {
+      nestegg_packet_data(pkt, i, &ptr, &size);
+#ifdef DEBUG
+      fprintf(stderr, "%u ", (unsigned int) size);
+#endif
+    }
+#ifdef DEBUG
+    fprintf(stderr, "\n");
+#endif
+
+    nestegg_free_packet(pkt);
+  }
+
+  nestegg_destroy(ctx);
+  fclose(fp);
+
+  return EXIT_SUCCESS;
+}

From fc94ffcea4225428335093c36278e5757b3de934 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 21 Oct 2010 10:26:50 -0400
Subject: [PATCH 229/307] Rewrite vp8_short_walsh4x4_sse2()

This rewriting reflects changes made in commit "Improve the
accuracy of forward walsh-hadamard transform". Since this function
is not called much, only a small encoder performance gain (~0.5% )
is seen.

Change-Id: Ie9df58a43028a11fd5b115c4bbe3141f7596578b
---
 vp8/encoder/x86/fwalsh_sse2.asm        | 198 +++++++++++++++----------
 vp8/encoder/x86/x86_csystemdependent.c |   2 +-
 2 files changed, 121 insertions(+), 79 deletions(-)

diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
index 38812c8d1..ffc9b3dca 100644
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -21,94 +21,122 @@ sym(vp8_short_walsh4x4_sse2):
     push        rdi
     ; end prolog
 
-    mov     rsi, arg(0)
-    mov     rdi, arg(1)
+    mov     rsi, arg(0)           ; input
+    mov     rdi, arg(1)           ; output
+    movsxd  rdx, dword ptr arg(2) ; pitch
 
-    movdqu    xmm4, [rsi + 0]       ;ip[4] ip[0]
-    movdqu    xmm0, [rsi + 16]      ;ip[12] ip[8]
+    ; first for loop
+    movq    xmm0, MMWORD PTR [rsi]           ; load input
+    movq    xmm1, MMWORD PTR [rsi + rdx]
+    lea     rsi,  [rsi + rdx*2]
+    movq    xmm2, MMWORD PTR [rsi]
+    movq    xmm3, MMWORD PTR [rsi + rdx]
 
-    pxor  xmm7, xmm7
-    ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    ; 13 12 11 10 03 02 01 00
-    ;
-    ; 33 32 31 30 23 22 21 20
-    ;
-    movdqa    xmm3, xmm4          ; 13 12 11 10 03 02 01 00
-    punpcklwd xmm4, xmm0          ; 23 03 22 02 21 01 20 00
-    punpckhwd xmm3, xmm0          ; 33 13 32 12 31 11 30 10
-    movdqa    xmm1, xmm4          ; 23 03 22 02 21 01 20 00
-    punpcklwd xmm4, xmm3          ; 31 21 11 01 30 20 10 00
-    punpckhwd xmm1, xmm3          ; 33 23 13 03 32 22 12 02
-    ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    pshufd    xmm2, xmm1, 4eh       ;ip[8] ip[12]
-    movdqa    xmm3, xmm4          ;ip[4] ip[0]
+    punpcklwd xmm0,  xmm1
+    punpcklwd xmm2,  xmm3
 
-    paddw   xmm4, xmm2          ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
-    psubw   xmm3, xmm2          ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
+    movdqa    xmm1, xmm0
+    punpckldq xmm0, xmm2           ; ip[1] ip[0]
+    punpckhdq xmm1, xmm2           ; ip[3] ip[2]
 
+    movdqa    xmm2, xmm0
+    paddw     xmm0, xmm1
+    psubw     xmm2, xmm1
+
+    psllw     xmm0, 2              ; d1  a1
+    psllw     xmm2, 2              ; c1  b1
+
+    movdqa    xmm1, xmm0
+    punpcklqdq xmm0, xmm2          ; b1  a1
+    punpckhqdq xmm1, xmm2          ; c1  d1
+
+    pxor      xmm6, xmm6
+    movq      xmm6, xmm0
+    pxor      xmm7, xmm7
+    pcmpeqw   xmm7, xmm6
+    paddw     xmm7, [GLOBAL(c1)]
+
+    movdqa    xmm2, xmm0
+    paddw     xmm0, xmm1           ; b1+c1  a1+d1
+    psubw     xmm2, xmm1           ; b1-c1  a1-d1
+    paddw     xmm0, xmm7           ; b1+c1  a1+d1+(a1!=0)
+
+    ; second for loop
+    ; input: 13  9  5  1 12  8  4  0 (xmm0)
+    ;        14 10  6  2 15 11  7  3 (xmm2)
+    ; after shuffle:
+    ;        13  5  9  1 12  4  8  0 (xmm0)
+    ;        14  6 10  2 15  7 11  3 (xmm1)
+    pshuflw   xmm3, xmm0, 0xd8
+    pshufhw   xmm0, xmm3, 0xd8
+    pshuflw   xmm3, xmm2, 0xd8
+    pshufhw   xmm1, xmm3, 0xd8
+
+    movdqa    xmm2, xmm0
+    pmaddwd   xmm0, [GLOBAL(c1)]    ; d11 a11 d10 a10
+    pmaddwd   xmm2, [GLOBAL(cn1)]   ; c11 b11 c10 b10
+    movdqa    xmm3, xmm1
+    pmaddwd   xmm1, [GLOBAL(c1)]    ; d12 a12 d13 a13
+    pmaddwd   xmm3, [GLOBAL(cn1)]   ; c12 b12 c13 b13
+
+    pshufd    xmm4, xmm0, 0xd8      ; d11 d10 a11 a10
+    pshufd    xmm5, xmm2, 0xd8      ; c11 c10 b11 b10
+    pshufd    xmm6, xmm1, 0x72      ; d13 d12 a13 a12
+    pshufd    xmm7, xmm3, 0x72      ; c13 c12 b13 b12
+
+    movdqa    xmm0, xmm4
+    punpcklqdq xmm0, xmm5           ; b11 b10 a11 a10
+    punpckhqdq xmm4, xmm5           ; c11 c10 d11 d10
+    movdqa    xmm1, xmm6
+    punpcklqdq xmm1, xmm7           ; b13 b12 a13 a12
+    punpckhqdq xmm6, xmm7           ; c13 c12 d13 d12
+
+    movdqa    xmm2, xmm0
+    paddd     xmm0, xmm4            ; b21 b20 a21 a20
+    psubd     xmm2, xmm4            ; c21 c20 d21 d20
+    movdqa    xmm3, xmm1
+    paddd     xmm1, xmm6            ; b23 b22 a23 a22
+    psubd     xmm3, xmm6            ; c23 c22 d23 d22
+
+    pxor      xmm4, xmm4
     movdqa    xmm5, xmm4
-    punpcklqdq  xmm4, xmm3          ;d1 a1
-    punpckhqdq  xmm5, xmm3          ;c1 b1
+    pcmpgtd   xmm4, xmm0
+    pcmpgtd   xmm5, xmm2
+    pand      xmm4, [GLOBAL(cd1)]
+    pand      xmm5, [GLOBAL(cd1)]
 
-    movdqa    xmm1, xmm5          ;c1 b1
-    paddw   xmm5, xmm4          ;dl+cl a1+b1 aka op[4] op[0]
-    psubw   xmm4, xmm1          ;d1-c1 a1-b1 aka op[12] op[8]
-    ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    ; 13 12 11 10 03 02 01 00
-    ;
-    ; 33 32 31 30 23 22 21 20
-    ;
-    movdqa    xmm0, xmm5          ; 13 12 11 10 03 02 01 00
-    punpcklwd xmm5, xmm4          ; 23 03 22 02 21 01 20 00
-    punpckhwd xmm0, xmm4          ; 33 13 32 12 31 11 30 10
-    movdqa    xmm1, xmm5          ; 23 03 22 02 21 01 20 00
-    punpcklwd xmm5, xmm0          ; 31 21 11 01 30 20 10 00
-    punpckhwd xmm1, xmm0          ; 33 23 13 03 32 22 12 02
-    ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-    pshufd    xmm2, xmm1, 4eh       ;ip[8] ip[12]
-    movdqa    xmm3, xmm5          ;ip[4] ip[0]
+    pxor      xmm6, xmm6
+    movdqa    xmm7, xmm6
+    pcmpgtd   xmm6, xmm1
+    pcmpgtd   xmm7, xmm3
+    pand      xmm6, [GLOBAL(cd1)]
+    pand      xmm7, [GLOBAL(cd1)]
 
-    paddw   xmm5, xmm2          ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1
-    psubw   xmm3, xmm2          ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1
+    paddd     xmm0, xmm4
+    paddd     xmm2, xmm5
+    paddd     xmm0, [GLOBAL(cd3)]
+    paddd     xmm2, [GLOBAL(cd3)]
+    paddd     xmm1, xmm6
+    paddd     xmm3, xmm7
+    paddd     xmm1, [GLOBAL(cd3)]
+    paddd     xmm3, [GLOBAL(cd3)]
 
-    movdqa    xmm6, xmm5
-    punpcklqdq  xmm5, xmm3          ;d1 a1
-    punpckhqdq  xmm6, xmm3          ;c1 b1
+    psrad     xmm0, 3
+    psrad     xmm1, 3
+    psrad     xmm2, 3
+    psrad     xmm3, 3
+    movdqa    xmm4, xmm0
+    punpcklqdq xmm0, xmm1           ; a23 a22 a21 a20
+    punpckhqdq xmm4, xmm1           ; b23 b22 b21 b20
+    movdqa    xmm5, xmm2
+    punpckhqdq xmm2, xmm3           ; c23 c22 c21 c20
+    punpcklqdq xmm5, xmm3           ; d23 d22 d21 d20
 
-    movdqa    xmm1, xmm6          ;c1 b1
-    paddw   xmm6, xmm5          ;dl+cl a1+b1 aka op[4] op[0]
-    psubw   xmm5, xmm1          ;d1-c1 a1-b1 aka op[12] op[8]
+    packssdw  xmm0, xmm4            ; b23 b22 b21 b20 a23 a22 a21 a20
+    packssdw  xmm2, xmm5            ; d23 d22 d21 d20 c23 c22 c21 c20
 
-    movdqa    xmm0, xmm6          ;aka b2 a2
-    movdqa    xmm1, xmm5          ;aka d2 c2
-
-    pcmpgtw   xmm0, xmm7
-    pcmpgtw   xmm1, xmm7
-
-    psrlw   xmm0, 15
-    psrlw   xmm1, 15
-
-    paddw   xmm6, xmm0
-    paddw   xmm5, xmm1
-
-    psraw   xmm6, 1
-    psraw   xmm5, 1
-
-    ;   a2 = a1 + b1;
-    ;   b2 = c1 + d1;
-    ;   c2 = a1 - b1;
-    ;   d2 = d1 - c1;
-    ;        a2 += (a2>0);
-    ;        b2 += (b2>0);
-    ;        c2 += (c2>0);
-    ;        d2 += (d2>0);
-    ;   op[0] = (a2)>>1;
-    ;   op[4] = (b2)>>1;
-    ;   op[8] = (c2)>>1;
-    ;   op[12]= (d2)>>1;
-
-    movdqu  [rdi + 0], xmm6
-    movdqu  [rdi + 16], xmm5
+    movdqa  XMMWORD PTR [rdi], xmm0
+    movdqa  XMMWORD PTR [rdi + 16], xmm2
 
     ; begin epilog
     pop rdi
@@ -116,3 +144,17 @@ sym(vp8_short_walsh4x4_sse2):
     UNSHADOW_ARGS
     pop         rbp
     ret
+
+SECTION_RODATA
+align 16
+c1:
+    dw 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001
+align 16
+cn1:
+    dw 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff
+align 16
+cd1:
+    dd 0x00000001, 0x00000001, 0x00000001, 0x00000001
+align 16
+cd3:
+    dd 0x00000003, 0x00000003, 0x00000003, 0x00000003
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 9b753bfa7..b4035ff53 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -289,7 +289,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_sse2;
         cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_sse2;
 
-        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c ;
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_sse2 ;
 
         cpi->rtcd.encodemb.berr                  = vp8_block_error_xmm;
         cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_xmm;

From 15acc84f10cefd98b2f8dbd2eac2cc92c5a3f851 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Thu, 14 Oct 2010 17:10:33 -0700
Subject: [PATCH 230/307] Remove stack shadowing for x86-64

x86-64 passes most arguments in registers.  There is no need to
push them to the stack before using them.

Change-Id: I13c683f1358782682ecafaf1df3fb0af23b978ea
---
 vp8/encoder/x86/sad_sse3.asm | 881 +++++++++++++++++------------------
 1 file changed, 421 insertions(+), 460 deletions(-)

diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index 1b7293c20..e662497f1 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -8,24 +8,171 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
-
 %include "vpx_ports/x86_abi_support.asm"
 
-%macro PROCESS_16X2X3 1
-%if %1
-        movdqa          xmm0,       XMMWORD PTR [rsi]
-        lddqu           xmm5,       XMMWORD PTR [rdi]
-        lddqu           xmm6,       XMMWORD PTR [rdi+1]
-        lddqu           xmm7,       XMMWORD PTR [rdi+2]
+%macro STACK_FRAME_CREATE_X3 0
+%if ABI_IS_32BIT
+  %define     src_ptr       rsi
+  %define     src_stride    rax
+  %define     ref_ptr       rdi
+  %define     ref_stride    rdx
+  %define     end_ptr       rcx
+  %define     ret_var       rbx
+  %define     result_ptr    arg(4)
+  %define     max_err       arg(4)
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+
+    mov         rsi,        arg(0)              ; src_ptr
+    mov         rdi,        arg(2)              ; ref_ptr
+
+    movsxd      rax,        dword ptr arg(1)    ; src_stride
+    movsxd      rdx,        dword ptr arg(3)    ; ref_stride
+%else
+  %ifidn __OUTPUT_FORMAT__,x64
+    %define     src_ptr     rcx
+    %define     src_stride  rdx
+    %define     ref_ptr     r8
+    %define     ref_stride  r9
+    %define     end_ptr     r10
+    %define     ret_var     r11
+    %define     result_ptr  [rsp+8+4*8]
+    %define     max_err     [rsp+8+4*8]
+  %else
+    %define     src_ptr     rdi
+    %define     src_stride  rsi
+    %define     ref_ptr     rdx
+    %define     ref_stride  rcx
+    %define     end_ptr     r9
+    %define     ret_var     r10
+    %define     result_ptr  r8
+    %define     max_err     r8
+  %endif
+%endif
+
+%endmacro
+
+%macro STACK_FRAME_DESTROY_X3 0
+  %define     src_ptr
+  %define     src_stride
+  %define     ref_ptr
+  %define     ref_stride
+  %define     end_ptr
+  %define     ret_var
+  %define     result_ptr
+  %define     max_err
+
+%if ABI_IS_32BIT
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+%else
+  %ifidn __OUTPUT_FORMAT__,x64
+  %endif
+%endif
+    ret
+%endmacro
+
+%macro STACK_FRAME_CREATE_X4 0
+%if ABI_IS_32BIT
+  %define     src_ptr       rsi
+  %define     src_stride    rax
+  %define     r0_ptr        rcx
+  %define     r1_ptr        rdx
+  %define     r2_ptr        rbx
+  %define     r3_ptr        rdi
+  %define     ref_stride    rbp
+  %define     result_ptr    arg(4)
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    push        rbx
+
+    push        rbp
+    mov         rdi,        arg(2)              ; ref_ptr_base
+
+    LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
+
+    mov         rsi,        arg(0)              ; src_ptr
+
+    movsxd      rbx,        dword ptr arg(1)    ; src_stride
+    movsxd      rbp,        dword ptr arg(3)    ; ref_stride
+
+    xchg        rbx,        rax
+%else
+  %ifidn __OUTPUT_FORMAT__,x64
+    %define     src_ptr     rcx
+    %define     src_stride  rdx
+    %define     r0_ptr      rsi
+    %define     r1_ptr      r10
+    %define     r2_ptr      r11
+    %define     r3_ptr      r8
+    %define     ref_stride  r9
+    %define     result_ptr  [rsp+16+4*8]
+    push        rsi
+
+    LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
+  %else
+    %define     src_ptr     rdi
+    %define     src_stride  rsi
+    %define     r0_ptr      r9
+    %define     r1_ptr      r10
+    %define     r2_ptr      r11
+    %define     r3_ptr      rdx
+    %define     ref_stride  rcx
+    %define     result_ptr  r8
+
+    LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr
+
+  %endif
+%endif
+%endmacro
+
+%macro STACK_FRAME_DESTROY_X4 0
+  %define     src_ptr
+  %define     src_stride
+  %define     r0_ptr
+  %define     r1_ptr
+  %define     r2_ptr
+  %define     r3_ptr
+  %define     ref_stride
+  %define     result_ptr
+
+%if ABI_IS_32BIT
+    pop         rbx
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+%else
+  %ifidn __OUTPUT_FORMAT__,x64
+    pop         rsi
+  %endif
+%endif
+    ret
+%endmacro
+
+%macro PROCESS_16X2X3 5
+%if %1==0
+        movdqa          xmm0,       XMMWORD PTR [%2]
+        lddqu           xmm5,       XMMWORD PTR [%3]
+        lddqu           xmm6,       XMMWORD PTR [%3+1]
+        lddqu           xmm7,       XMMWORD PTR [%3+2]
 
         psadbw          xmm5,       xmm0
         psadbw          xmm6,       xmm0
         psadbw          xmm7,       xmm0
 %else
-        movdqa          xmm0,       XMMWORD PTR [rsi]
-        lddqu           xmm1,       XMMWORD PTR [rdi]
-        lddqu           xmm2,       XMMWORD PTR [rdi+1]
-        lddqu           xmm3,       XMMWORD PTR [rdi+2]
+        movdqa          xmm0,       XMMWORD PTR [%2]
+        lddqu           xmm1,       XMMWORD PTR [%3]
+        lddqu           xmm2,       XMMWORD PTR [%3+1]
+        lddqu           xmm3,       XMMWORD PTR [%3+2]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
@@ -35,13 +182,15 @@
         paddw           xmm6,       xmm2
         paddw           xmm7,       xmm3
 %endif
-        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
-        lddqu           xmm1,       XMMWORD PTR [rdi+rdx]
-        lddqu           xmm2,       XMMWORD PTR [rdi+rdx+1]
-        lddqu           xmm3,       XMMWORD PTR [rdi+rdx+2]
+        movdqa          xmm0,       XMMWORD PTR [%2+%4]
+        lddqu           xmm1,       XMMWORD PTR [%3+%5]
+        lddqu           xmm2,       XMMWORD PTR [%3+%5+1]
+        lddqu           xmm3,       XMMWORD PTR [%3+%5+2]
 
-        lea             rsi,        [rsi+rax*2]
-        lea             rdi,        [rdi+rdx*2]
+%if %1==0 || %1==1
+        lea             %2,         [%2+%4*2]
+        lea             %3,         [%3+%5*2]
+%endif
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
@@ -52,21 +201,21 @@
         paddw           xmm7,       xmm3
 %endmacro
 
-%macro PROCESS_8X2X3 1
-%if %1
-        movq            mm0,       QWORD PTR [rsi]
-        movq            mm5,       QWORD PTR [rdi]
-        movq            mm6,       QWORD PTR [rdi+1]
-        movq            mm7,       QWORD PTR [rdi+2]
+%macro PROCESS_8X2X3 5
+%if %1==0
+        movq            mm0,       QWORD PTR [%2]
+        movq            mm5,       QWORD PTR [%3]
+        movq            mm6,       QWORD PTR [%3+1]
+        movq            mm7,       QWORD PTR [%3+2]
 
         psadbw          mm5,       mm0
         psadbw          mm6,       mm0
         psadbw          mm7,       mm0
 %else
-        movq            mm0,       QWORD PTR [rsi]
-        movq            mm1,       QWORD PTR [rdi]
-        movq            mm2,       QWORD PTR [rdi+1]
-        movq            mm3,       QWORD PTR [rdi+2]
+        movq            mm0,       QWORD PTR [%2]
+        movq            mm1,       QWORD PTR [%3]
+        movq            mm2,       QWORD PTR [%3+1]
+        movq            mm3,       QWORD PTR [%3+2]
 
         psadbw          mm1,       mm0
         psadbw          mm2,       mm0
@@ -76,13 +225,15 @@
         paddw           mm6,       mm2
         paddw           mm7,       mm3
 %endif
-        movq            mm0,       QWORD PTR [rsi+rax]
-        movq            mm1,       QWORD PTR [rdi+rdx]
-        movq            mm2,       QWORD PTR [rdi+rdx+1]
-        movq            mm3,       QWORD PTR [rdi+rdx+2]
+        movq            mm0,       QWORD PTR [%2+%4]
+        movq            mm1,       QWORD PTR [%3+%5]
+        movq            mm2,       QWORD PTR [%3+%5+1]
+        movq            mm3,       QWORD PTR [%3+%5+2]
 
-        lea             rsi,       [rsi+rax*2]
-        lea             rdi,       [rdi+rdx*2]
+%if %1==0 || %1==1
+        lea             %2,        [%2+%4*2]
+        lea             %3,        [%3+%5*2]
+%endif
 
         psadbw          mm1,       mm0
         psadbw          mm2,       mm0
@@ -101,115 +252,117 @@
         mov             %5,         [%1+REG_SZ_BYTES*3]
 %endmacro
 
-%macro PROCESS_16X2X4 1
-%if %1
-        movdqa          xmm0,       XMMWORD PTR [rsi]
-        lddqu           xmm4,       XMMWORD PTR [rcx]
-        lddqu           xmm5,       XMMWORD PTR [rdx]
-        lddqu           xmm6,       XMMWORD PTR [rbx]
-        lddqu           xmm7,       XMMWORD PTR [rdi]
+%macro PROCESS_16X2X4 8
+%if %1==0
+        movdqa          xmm0,       XMMWORD PTR [%2]
+        lddqu           xmm4,       XMMWORD PTR [%3]
+        lddqu           xmm5,       XMMWORD PTR [%4]
+        lddqu           xmm6,       XMMWORD PTR [%5]
+        lddqu           xmm7,       XMMWORD PTR [%6]
 
         psadbw          xmm4,       xmm0
         psadbw          xmm5,       xmm0
         psadbw          xmm6,       xmm0
         psadbw          xmm7,       xmm0
 %else
-        movdqa          xmm0,       XMMWORD PTR [rsi]
-        lddqu           xmm1,       XMMWORD PTR [rcx]
-        lddqu           xmm2,       XMMWORD PTR [rdx]
-        lddqu           xmm3,       XMMWORD PTR [rbx]
+        movdqa          xmm0,       XMMWORD PTR [%2]
+        lddqu           xmm1,       XMMWORD PTR [%3]
+        lddqu           xmm2,       XMMWORD PTR [%4]
+        lddqu           xmm3,       XMMWORD PTR [%5]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
         psadbw          xmm3,       xmm0
 
         paddw           xmm4,       xmm1
-        lddqu           xmm1,       XMMWORD PTR [rdi]
+        lddqu           xmm1,       XMMWORD PTR [%6]
         paddw           xmm5,       xmm2
         paddw           xmm6,       xmm3
 
         psadbw          xmm1,       xmm0
         paddw           xmm7,       xmm1
 %endif
-        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
-        lddqu           xmm1,       XMMWORD PTR [rcx+rbp]
-        lddqu           xmm2,       XMMWORD PTR [rdx+rbp]
-        lddqu           xmm3,       XMMWORD PTR [rbx+rbp]
+        movdqa          xmm0,       XMMWORD PTR [%2+%7]
+        lddqu           xmm1,       XMMWORD PTR [%3+%8]
+        lddqu           xmm2,       XMMWORD PTR [%4+%8]
+        lddqu           xmm3,       XMMWORD PTR [%5+%8]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
         psadbw          xmm3,       xmm0
 
         paddw           xmm4,       xmm1
-        lddqu           xmm1,       XMMWORD PTR [rdi+rbp]
+        lddqu           xmm1,       XMMWORD PTR [%6+%8]
         paddw           xmm5,       xmm2
         paddw           xmm6,       xmm3
 
-        lea             rsi,        [rsi+rax*2]
-        lea             rcx,        [rcx+rbp*2]
+%if %1==0 || %1==1
+        lea             %2,         [%2+%7*2]
+        lea             %3,         [%3+%8*2]
 
-        lea             rdx,        [rdx+rbp*2]
-        lea             rbx,        [rbx+rbp*2]
-
-        lea             rdi,        [rdi+rbp*2]
+        lea             %4,         [%4+%8*2]
+        lea             %5,         [%5+%8*2]
 
+        lea             %6,         [%6+%8*2]
+%endif
         psadbw          xmm1,       xmm0
         paddw           xmm7,       xmm1
 
 %endmacro
 
-%macro PROCESS_8X2X4 1
-%if %1
-        movq            mm0,        QWORD PTR [rsi]
-        movq            mm4,        QWORD PTR [rcx]
-        movq            mm5,        QWORD PTR [rdx]
-        movq            mm6,        QWORD PTR [rbx]
-        movq            mm7,        QWORD PTR [rdi]
+%macro PROCESS_8X2X4 8
+%if %1==0
+        movq            mm0,        QWORD PTR [%2]
+        movq            mm4,        QWORD PTR [%3]
+        movq            mm5,        QWORD PTR [%4]
+        movq            mm6,        QWORD PTR [%5]
+        movq            mm7,        QWORD PTR [%6]
 
         psadbw          mm4,        mm0
         psadbw          mm5,        mm0
         psadbw          mm6,        mm0
         psadbw          mm7,        mm0
 %else
-        movq            mm0,        QWORD PTR [rsi]
-        movq            mm1,        QWORD PTR [rcx]
-        movq            mm2,        QWORD PTR [rdx]
-        movq            mm3,        QWORD PTR [rbx]
+        movq            mm0,        QWORD PTR [%2]
+        movq            mm1,        QWORD PTR [%3]
+        movq            mm2,        QWORD PTR [%4]
+        movq            mm3,        QWORD PTR [%5]
 
         psadbw          mm1,        mm0
         psadbw          mm2,        mm0
         psadbw          mm3,        mm0
 
         paddw           mm4,        mm1
-        movq            mm1,        QWORD PTR [rdi]
+        movq            mm1,        QWORD PTR [%6]
         paddw           mm5,        mm2
         paddw           mm6,        mm3
 
         psadbw          mm1,        mm0
         paddw           mm7,        mm1
 %endif
-        movq            mm0,        QWORD PTR [rsi+rax]
-        movq            mm1,        QWORD PTR [rcx+rbp]
-        movq            mm2,        QWORD PTR [rdx+rbp]
-        movq            mm3,        QWORD PTR [rbx+rbp]
+        movq            mm0,        QWORD PTR [%2+%7]
+        movq            mm1,        QWORD PTR [%3+%8]
+        movq            mm2,        QWORD PTR [%4+%8]
+        movq            mm3,        QWORD PTR [%5+%8]
 
         psadbw          mm1,        mm0
         psadbw          mm2,        mm0
         psadbw          mm3,        mm0
 
         paddw           mm4,        mm1
-        movq            mm1,        QWORD PTR [rdi+rbp]
+        movq            mm1,        QWORD PTR [%6+%8]
         paddw           mm5,        mm2
         paddw           mm6,        mm3
 
-        lea             rsi,        [rsi+rax*2]
-        lea             rcx,        [rcx+rbp*2]
+%if %1==0 || %1==1
+        lea             %2,         [%2+%7*2]
+        lea             %3,         [%3+%8*2]
 
-        lea             rdx,        [rdx+rbp*2]
-        lea             rbx,        [rbx+rbp*2]
-
-        lea             rdi,        [rdi+rbp*2]
+        lea             %4,         [%4+%8*2]
+        lea             %5,         [%5+%8*2]
 
+        lea             %6,         [%6+%8*2]
+%endif
         psadbw          mm1,        mm0
         paddw           mm7,        mm1
 
@@ -223,54 +376,39 @@
 ;    int  *results)
 global sym(vp8_sad16x16x3_sse3)
 sym(vp8_sad16x16x3_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rax,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
 
-        PROCESS_16X2X3 1
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-
-        mov             rdi,        arg(4) ;Results
+        mov             rcx,        result_ptr
 
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rdi],      xmm0
+        movd            [rcx],      xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rdi+4],    xmm0
+        movd            [rcx+4],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rdi+8],    xmm0
+        movd            [rcx+8],    xmm0
 
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X3
 
 ;void int vp8_sad16x8x3_sse3(
 ;    unsigned char *src_ptr,
@@ -280,50 +418,35 @@ sym(vp8_sad16x16x3_sse3):
 ;    int  *results)
 global sym(vp8_sad16x8x3_sse3)
 sym(vp8_sad16x8x3_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rax,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
 
-        PROCESS_16X2X3 1
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-
-        mov             rdi,        arg(4) ;Results
+        mov             rcx,        result_ptr
 
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rdi],      xmm0
+        movd            [rcx],      xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rdi+4],    xmm0
+        movd            [rcx+4],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rdi+8],    xmm0
+        movd            [rcx+8],    xmm0
 
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X3
 
 ;void int vp8_sad8x16x3_sse3(
 ;    unsigned char *src_ptr,
@@ -333,40 +456,26 @@ sym(vp8_sad16x8x3_sse3):
 ;    int  *results)
 global sym(vp8_sad8x16x3_sse3)
 sym(vp8_sad8x16x3_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rax,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
 
-        PROCESS_8X2X3 1
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
+        mov             rcx,        result_ptr
 
-        mov             rdi,        arg(4) ;Results
+        punpckldq       mm5,        mm6
 
-        movd            [rdi],      mm5
-        movd            [rdi+4],    mm6
-        movd            [rdi+8],    mm7
+        movq            [rcx],      mm5
+        movd            [rcx+8],    mm7
 
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X3
 
 ;void int vp8_sad8x8x3_sse3(
 ;    unsigned char *src_ptr,
@@ -376,36 +485,22 @@ sym(vp8_sad8x16x3_sse3):
 ;    int  *results)
 global sym(vp8_sad8x8x3_sse3)
 sym(vp8_sad8x8x3_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rax,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
 
-        PROCESS_8X2X3 1
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
+        mov             rcx,        result_ptr
 
-        mov             rdi,        arg(4) ;Results
+        punpckldq       mm5,        mm6
 
-        movd            [rdi],      mm5
-        movd            [rdi+4],    mm6
-        movd            [rdi+8],    mm7
+        movq            [rcx],      mm5
+        movd            [rcx+8],    mm7
 
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X3
 
 ;void int vp8_sad4x4x3_sse3(
 ;    unsigned char *src_ptr,
@@ -415,33 +510,23 @@ sym(vp8_sad8x8x3_sse3):
 ;    int  *results)
 global sym(vp8_sad4x4x3_sse3)
 sym(vp8_sad4x4x3_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rax,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        movd            mm0,        DWORD PTR [src_ptr]
+        movd            mm1,        DWORD PTR [ref_ptr]
 
-        movd            mm0,        DWORD PTR [rsi]
-        movd            mm1,        DWORD PTR [rdi]
-
-        movd            mm2,        DWORD PTR [rsi+rax]
-        movd            mm3,        DWORD PTR [rdi+rdx]
+        movd            mm2,        DWORD PTR [src_ptr+src_stride]
+        movd            mm3,        DWORD PTR [ref_ptr+ref_stride]
 
         punpcklbw       mm0,        mm2
         punpcklbw       mm1,        mm3
 
-        movd            mm4,        DWORD PTR [rdi+1]
-        movd            mm5,        DWORD PTR [rdi+2]
+        movd            mm4,        DWORD PTR [ref_ptr+1]
+        movd            mm5,        DWORD PTR [ref_ptr+2]
 
-        movd            mm2,        DWORD PTR [rdi+rdx+1]
-        movd            mm3,        DWORD PTR [rdi+rdx+2]
+        movd            mm2,        DWORD PTR [ref_ptr+ref_stride+1]
+        movd            mm3,        DWORD PTR [ref_ptr+ref_stride+2]
 
         psadbw          mm1,        mm0
 
@@ -451,29 +536,27 @@ sym(vp8_sad4x4x3_sse3):
         psadbw          mm4,        mm0
         psadbw          mm5,        mm0
 
+        lea             src_ptr,    [src_ptr+src_stride*2]
+        lea             ref_ptr,    [ref_ptr+ref_stride*2]
 
+        movd            mm0,        DWORD PTR [src_ptr]
+        movd            mm2,        DWORD PTR [ref_ptr]
 
-        lea             rsi,        [rsi+rax*2]
-        lea             rdi,        [rdi+rdx*2]
-
-        movd            mm0,        DWORD PTR [rsi]
-        movd            mm2,        DWORD PTR [rdi]
-
-        movd            mm3,        DWORD PTR [rsi+rax]
-        movd            mm6,        DWORD PTR [rdi+rdx]
+        movd            mm3,        DWORD PTR [src_ptr+src_stride]
+        movd            mm6,        DWORD PTR [ref_ptr+ref_stride]
 
         punpcklbw       mm0,        mm3
         punpcklbw       mm2,        mm6
 
-        movd            mm3,        DWORD PTR [rdi+1]
-        movd            mm7,        DWORD PTR [rdi+2]
+        movd            mm3,        DWORD PTR [ref_ptr+1]
+        movd            mm7,        DWORD PTR [ref_ptr+2]
 
         psadbw          mm2,        mm0
 
         paddw           mm1,        mm2
 
-        movd            mm2,        DWORD PTR [rdi+rdx+1]
-        movd            mm6,        DWORD PTR [rdi+rdx+2]
+        movd            mm2,        DWORD PTR [ref_ptr+ref_stride+1]
+        movd            mm6,        DWORD PTR [ref_ptr+ref_stride+2]
 
         punpcklbw       mm3,        mm2
         punpcklbw       mm7,        mm6
@@ -484,19 +567,14 @@ sym(vp8_sad4x4x3_sse3):
         paddw           mm3,        mm4
         paddw           mm7,        mm5
 
-        mov             rdi,        arg(4) ;Results
-        movd            [rdi],      mm1
+        mov             rcx,        result_ptr
 
-        movd            [rdi+4],    mm3
-        movd            [rdi+8],    mm7
+        punpckldq       mm1,        mm3
 
+        movq            [rcx],      mm1
+        movd            [rcx+8],    mm7
 
-    ; begin epilog
-    pop rdi
-    pop rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X3
 
 ;unsigned int vp8_sad16x16_sse3(
 ;    unsigned char *src_ptr,
@@ -507,51 +585,40 @@ sym(vp8_sad4x4x3_sse3):
 ;%define lddqu movdqu
 global sym(vp8_sad16x16_sse3)
 sym(vp8_sad16x16_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rbx
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        lea             end_ptr,    [src_ptr+src_stride*8]
 
-        lea             rcx,        [rsi+rbx*8]
-
-        lea             rcx,        [rcx+rbx*8]
+        lea             end_ptr,    [end_ptr+src_stride*8]
         pxor            mm7,        mm7
 
-vp8_sad16x16_sse3_loop:
+.vp8_sad16x16_sse3_loop:
 
-        movq            rax,        mm7
-        cmp             rax,        arg(4)
-        jg              vp8_sad16x16_early_exit
+        movq            ret_var,    mm7
+        cmp             ret_var,    max_err
+        jg              .vp8_sad16x16_early_exit
 
-        movq            mm0,        QWORD PTR [rsi]
-        movq            mm2,        QWORD PTR [rsi+8]
+        movq            mm0,        QWORD PTR [src_ptr]
+        movq            mm2,        QWORD PTR [src_ptr+8]
 
-        movq            mm1,        QWORD PTR [rdi]
-        movq            mm3,        QWORD PTR [rdi+8]
+        movq            mm1,        QWORD PTR [ref_ptr]
+        movq            mm3,        QWORD PTR [ref_ptr+8]
 
-        movq            mm4,        QWORD PTR [rsi+rbx]
-        movq            mm5,        QWORD PTR [rdi+rdx]
+        movq            mm4,        QWORD PTR [src_ptr+src_stride]
+        movq            mm5,        QWORD PTR [ref_ptr+ref_stride]
 
         psadbw          mm0,        mm1
         psadbw          mm2,        mm3
 
-        movq            mm1,        QWORD PTR [rsi+rbx+8]
-        movq            mm3,        QWORD PTR [rdi+rdx+8]
+        movq            mm1,        QWORD PTR [src_ptr+src_stride+8]
+        movq            mm3,        QWORD PTR [ref_ptr+ref_stride+8]
 
         psadbw          mm4,        mm5
         psadbw          mm1,        mm3
 
-        lea             rsi,        [rsi+rbx*2]
-        lea             rdi,        [rdi+rdx*2]
+        lea             src_ptr,    [src_ptr+src_stride*2]
+        lea             ref_ptr,    [ref_ptr+ref_stride*2]
 
         paddw           mm0,        mm2
         paddw           mm4,        mm1
@@ -559,20 +626,16 @@ vp8_sad16x16_sse3_loop:
         paddw           mm7,        mm0
         paddw           mm7,        mm4
 
-        cmp             rsi,        rcx
-        jne             vp8_sad16x16_sse3_loop
+        cmp             src_ptr,    end_ptr
+        jne             .vp8_sad16x16_sse3_loop
 
-        movq            rax,        mm7
+        movq            ret_var,    mm7
 
-vp8_sad16x16_early_exit:
+.vp8_sad16x16_early_exit:
 
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    pop         rbx
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+        mov             rax,        ret_var
+
+    STACK_FRAME_DESTROY_X3
 
 ;void vp8_sad16x16x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -582,69 +645,48 @@ vp8_sad16x16_early_exit:
 ;    int  *results)
 global sym(vp8_sad16x16x4d_sse3)
 sym(vp8_sad16x16x4d_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    push        rbx
-    ; end prolog
 
-        push            rbp
-        mov             rdi,        arg(2) ; ref_ptr_base
+    STACK_FRAME_CREATE_X4
 
-        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
-        mov             rsi,        arg(0) ;src_ptr
-
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rbp,        dword ptr arg(3) ;ref_stride
-
-        xchg            rbx,        rax
-
-        PROCESS_16X2X4 1
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
 
+%if ABI_IS_32BIT
         pop             rbp
-        mov             rdi,        arg(4) ;Results
+%endif
+        mov             rcx,        result_ptr
 
         movq            xmm0,       xmm4
         psrldq          xmm4,       8
 
         paddw           xmm0,       xmm4
-        movd            [rdi],      xmm0
+        movd            [rcx],      xmm0
 ;-
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rdi+4],    xmm0
+        movd            [rcx+4],    xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rdi+8],    xmm0
+        movd            [rcx+8],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rdi+12],   xmm0
+        movd            [rcx+12],   xmm0
 
-    ; begin epilog
-    pop         rbx
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X4
 
 ;void vp8_sad16x8x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -654,65 +696,44 @@ sym(vp8_sad16x16x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad16x8x4d_sse3)
 sym(vp8_sad16x8x4d_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    push        rbx
-    ; end prolog
 
-        push            rbp
-        mov             rdi,        arg(2) ; ref_ptr_base
+    STACK_FRAME_CREATE_X4
 
-        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
-        mov             rsi,        arg(0) ;src_ptr
-
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rbp,        dword ptr arg(3) ;ref_stride
-
-        xchg            rbx,        rax
-
-        PROCESS_16X2X4 1
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
 
+%if ABI_IS_32BIT
         pop             rbp
-        mov             rdi,        arg(4) ;Results
+%endif
+        mov             rcx,        result_ptr
 
         movq            xmm0,       xmm4
         psrldq          xmm4,       8
 
         paddw           xmm0,       xmm4
-        movd            [rdi],      xmm0
+        movd            [rcx],      xmm0
 ;-
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rdi+4],    xmm0
+        movd            [rcx+4],    xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rdi+8],    xmm0
+        movd            [rcx+8],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rdi+12],   xmm0
+        movd            [rcx+12],   xmm0
 
-    ; begin epilog
-    pop         rbx
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X4
 
 ;void int vp8_sad8x16x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -722,50 +743,30 @@ sym(vp8_sad16x8x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad8x16x4d_sse3)
 sym(vp8_sad8x16x4d_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    push        rbx
-    ; end prolog
 
-        push            rbp
-        mov             rdi,        arg(2) ; ref_ptr_base
+    STACK_FRAME_CREATE_X4
 
-        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
-        mov             rsi,        arg(0) ;src_ptr
-
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rbp,        dword ptr arg(3) ;ref_stride
-
-        xchg            rbx,        rax
-
-        PROCESS_8X2X4 1
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
 
+%if ABI_IS_32BIT
         pop             rbp
-        mov             rdi,        arg(4) ;Results
+%endif
+        mov             rcx,        result_ptr
 
-        movd            [rdi],      mm4
-        movd            [rdi+4],    mm5
-        movd            [rdi+8],    mm6
-        movd            [rdi+12],   mm7
+        punpckldq       mm4,        mm5
+        punpckldq       mm6,        mm7
 
-    ; begin epilog
-    pop         rbx
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+        movq            [rcx],      mm4
+        movq            [rcx+8],    mm6
+
+    STACK_FRAME_DESTROY_X4
 
 ;void int vp8_sad8x8x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -775,46 +776,26 @@ sym(vp8_sad8x16x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad8x8x4d_sse3)
 sym(vp8_sad8x8x4d_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    push        rbx
-    ; end prolog
 
-        push            rbp
-        mov             rdi,        arg(2) ; ref_ptr_base
+    STACK_FRAME_CREATE_X4
 
-        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
-        mov             rsi,        arg(0) ;src_ptr
-
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rbp,        dword ptr arg(3) ;ref_stride
-
-        xchg            rbx,        rax
-
-        PROCESS_8X2X4 1
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
 
+%if ABI_IS_32BIT
         pop             rbp
-        mov             rdi,        arg(4) ;Results
+%endif
+        mov             rcx,        result_ptr
 
-        movd            [rdi],      mm4
-        movd            [rdi+4],    mm5
-        movd            [rdi+8],    mm6
-        movd            [rdi+12],   mm7
+        punpckldq       mm4,        mm5
+        punpckldq       mm6,        mm7
 
-    ; begin epilog
-    pop         rbx
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+        movq            [rcx],      mm4
+        movq            [rcx+8],    mm6
+
+    STACK_FRAME_DESTROY_X4
 
 ;void int vp8_sad4x4x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -824,43 +805,26 @@ sym(vp8_sad8x8x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad4x4x4d_sse3)
 sym(vp8_sad4x4x4d_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    push        rbx
-    ; end prolog
 
-        push            rbp
-        mov             rdi,        arg(2) ; ref_ptr_base
+    STACK_FRAME_CREATE_X4
 
-        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
+        movd            mm0,        DWORD PTR [src_ptr]
+        movd            mm1,        DWORD PTR [r0_ptr]
 
-        mov             rsi,        arg(0) ;src_ptr
-
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rbp,        dword ptr arg(3) ;ref_stride
-
-        xchg            rbx,        rax
-
-        movd            mm0,        DWORD PTR [rsi]
-        movd            mm1,        DWORD PTR [rcx]
-
-        movd            mm2,        DWORD PTR [rsi+rax]
-        movd            mm3,        DWORD PTR [rcx+rbp]
+        movd            mm2,        DWORD PTR [src_ptr+src_stride]
+        movd            mm3,        DWORD PTR [r0_ptr+ref_stride]
 
         punpcklbw       mm0,        mm2
         punpcklbw       mm1,        mm3
 
-        movd            mm4,        DWORD PTR [rdx]
-        movd            mm5,        DWORD PTR [rbx]
+        movd            mm4,        DWORD PTR [r1_ptr]
+        movd            mm5,        DWORD PTR [r2_ptr]
 
-        movd            mm6,        DWORD PTR [rdi]
-        movd            mm2,        DWORD PTR [rdx+rbp]
+        movd            mm6,        DWORD PTR [r3_ptr]
+        movd            mm2,        DWORD PTR [r1_ptr+ref_stride]
 
-        movd            mm3,        DWORD PTR [rbx+rbp]
-        movd            mm7,        DWORD PTR [rdi+rbp]
+        movd            mm3,        DWORD PTR [r2_ptr+ref_stride]
+        movd            mm7,        DWORD PTR [r3_ptr+ref_stride]
 
         psadbw          mm1,        mm0
 
@@ -875,37 +839,40 @@ sym(vp8_sad4x4x4d_sse3):
 
 
 
-        lea             rsi,        [rsi+rax*2]
-        lea             rcx,        [rcx+rbp*2]
+        lea             src_ptr,    [src_ptr+src_stride*2]
+        lea             r0_ptr,     [r0_ptr+ref_stride*2]
 
-        lea             rdx,        [rdx+rbp*2]
-        lea             rbx,        [rbx+rbp*2]
+        lea             r1_ptr,     [r1_ptr+ref_stride*2]
+        lea             r2_ptr,     [r2_ptr+ref_stride*2]
 
-        lea             rdi,        [rdi+rbp*2]
+        lea             r3_ptr,     [r3_ptr+ref_stride*2]
 
-        movd            mm0,        DWORD PTR [rsi]
-        movd            mm2,        DWORD PTR [rcx]
+        movd            mm0,        DWORD PTR [src_ptr]
+        movd            mm2,        DWORD PTR [r0_ptr]
 
-        movd            mm3,        DWORD PTR [rsi+rax]
-        movd            mm7,        DWORD PTR [rcx+rbp]
+        movd            mm3,        DWORD PTR [src_ptr+src_stride]
+        movd            mm7,        DWORD PTR [r0_ptr+ref_stride]
 
         punpcklbw       mm0,        mm3
         punpcklbw       mm2,        mm7
 
-        movd            mm3,        DWORD PTR [rdx]
-        movd            mm7,        DWORD PTR [rbx]
+        movd            mm3,        DWORD PTR [r1_ptr]
+        movd            mm7,        DWORD PTR [r2_ptr]
 
         psadbw          mm2,        mm0
+%if ABI_IS_32BIT
         mov             rax,        rbp
 
         pop             rbp
-        mov             rsi,        arg(4) ;Results
+%define     ref_stride    rax
+%endif
+        mov             rsi,        result_ptr
 
         paddw           mm1,        mm2
         movd            [rsi],      mm1
 
-        movd            mm2,        DWORD PTR [rdx+rax]
-        movd            mm1,        DWORD PTR [rbx+rax]
+        movd            mm2,        DWORD PTR [r1_ptr+ref_stride]
+        movd            mm1,        DWORD PTR [r2_ptr+ref_stride]
 
         punpcklbw       mm3,        mm2
         punpcklbw       mm7,        mm1
@@ -913,8 +880,8 @@ sym(vp8_sad4x4x4d_sse3):
         psadbw          mm3,        mm0
         psadbw          mm7,        mm0
 
-        movd            mm2,        DWORD PTR [rdi]
-        movd            mm1,        DWORD PTR [rdi+rax]
+        movd            mm2,        DWORD PTR [r3_ptr]
+        movd            mm1,        DWORD PTR [r3_ptr+ref_stride]
 
         paddw           mm3,        mm4
         paddw           mm7,        mm5
@@ -929,10 +896,4 @@ sym(vp8_sad4x4x4d_sse3):
         movd            [rsi+12],   mm2
 
 
-    ; begin epilog
-    pop         rbx
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X4

From 4cefb4434f31d3075378c4be124f19913e0f5164 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 21 Oct 2010 13:42:24 -0400
Subject: [PATCH 231/307] Add MMWORD PTR/XMMWORD PTR in subtract_sse2.asm

Change-Id: Ia649b500ef020225d8bbf611799d0f47658dc2ac
---
 vp8/encoder/x86/subtract_sse2.asm | 104 +++++++++++++++---------------
 1 file changed, 52 insertions(+), 52 deletions(-)

diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm
index ef329de71..60522bafa 100644
--- a/vp8/encoder/x86/subtract_sse2.asm
+++ b/vp8/encoder/x86/subtract_sse2.asm
@@ -35,21 +35,21 @@ sym(vp8_subtract_b_sse2_impl):
         punpcklbw   mm0,    mm7
         punpcklbw   mm1,    mm7
         psubw   mm0,        mm1
-        movq    [rdi],      mm0
+        movq    MMWORD PTR [rdi],      mm0
 
         movd    mm0,        [rsi+rdx]
         movd    mm1,        [rax+rcx]
         punpcklbw   mm0,    mm7
         punpcklbw   mm1,    mm7
         psubw   mm0,        mm1
-        movq    [rdi+rcx*2],mm0
+        movq    MMWORD PTR [rdi+rcx*2], mm0
 
         movd    mm0,        [rsi+rdx*2]
         movd    mm1,        [rax+rcx*2]
         punpcklbw   mm0,    mm7
         punpcklbw   mm1,    mm7
         psubw   mm0,        mm1
-        movq    [rdi+rcx*4],        mm0
+        movq    MMWORD PTR [rdi+rcx*4], mm0
 
         lea     rsi,        [rsi+rdx*2]
         lea     rcx,        [rcx+rcx*2]
@@ -59,7 +59,7 @@ sym(vp8_subtract_b_sse2_impl):
         punpcklbw   mm0,    mm7
         punpcklbw   mm1,    mm7
         psubw   mm0,        mm1
-        movq    [rdi+rcx*2],        mm0
+        movq    MMWORD PTR [rdi+rcx*2], mm0
 
     ; begin epilog
     pop rdi
@@ -88,8 +88,8 @@ sym(vp8_subtract_mby_sse2):
             mov         rcx,            8      ; do two lines at one time
 
 submby_loop:
-            movdqa      xmm0,           [rsi]   ; src
-            movdqa      xmm1,           [rax]   ; pred
+            movdqa      xmm0,           XMMWORD PTR [rsi]   ; src
+            movdqa      xmm1,           XMMWORD PTR [rax]   ; pred
 
             movdqa      xmm2,           xmm0
             psubb       xmm0,           xmm1
@@ -103,11 +103,11 @@ submby_loop:
             punpcklbw   xmm0,    xmm1            ; put sign back to subtraction
             punpckhbw   xmm2,    xmm3            ; put sign back to subtraction
 
-            movdqa      [rdi],   xmm0
-            movdqa      [rdi +16], xmm2
+            movdqa      XMMWORD PTR [rdi],   xmm0
+            movdqa      XMMWORD PTR [rdi +16], xmm2
 
-            movdqa      xmm4,           [rsi + rdx]
-            movdqa      xmm5,           [rax + 16]
+            movdqa      xmm4,           XMMWORD PTR [rsi + rdx]
+            movdqa      xmm5,           XMMWORD PTR [rax + 16]
 
             movdqa      xmm6,           xmm4
             psubb       xmm4,           xmm5
@@ -121,8 +121,8 @@ submby_loop:
             punpcklbw   xmm4,    xmm5            ; put sign back to subtraction
             punpckhbw   xmm6,    xmm7            ; put sign back to subtraction
 
-            movdqa      [rdi +32], xmm4
-            movdqa      [rdi +48], xmm6
+            movdqa      XMMWORD PTR [rdi +32], xmm4
+            movdqa      XMMWORD PTR [rdi +48], xmm6
 
             add         rdi,            64
             add         rax,            32
@@ -159,9 +159,9 @@ sym(vp8_subtract_mbuv_sse2):
 
             ;u
             ;line 0 1
-            movq       xmm0,    [rsi]  ; src
-            movq       xmm2,    [rsi+rdx]
-            movdqa     xmm1,    [rax]  ; pred
+            movq       xmm0,    MMWORD PTR [rsi]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rdx]
+            movdqa     xmm1,    XMMWORD PTR [rax]  ; pred
             punpcklqdq xmm0,    xmm2
 
             movdqa     xmm2,    xmm0
@@ -176,13 +176,13 @@ sym(vp8_subtract_mbuv_sse2):
             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
 
-            movdqa     [rdi],   xmm0
-            movdqa     [rdi +16],   xmm2
+            movdqa     XMMWORD PTR [rdi],   xmm0
+            movdqa     XMMWORD PTR [rdi +16],   xmm2
 
             ;line 2 3
-            movq       xmm0,    [rsi+rdx*2]  ; src
-            movq       xmm2,    [rsi+rcx]
-            movdqa     xmm1,    [rax+16]  ; pred
+            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rcx]
+            movdqa     xmm1,    XMMWORD PTR [rax+16]  ; pred
             punpcklqdq xmm0,    xmm2
 
             movdqa     xmm2,    xmm0
@@ -197,15 +197,15 @@ sym(vp8_subtract_mbuv_sse2):
             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
 
-            movdqa     [rdi + 32],   xmm0
-            movdqa     [rdi + 48],   xmm2
+            movdqa     XMMWORD PTR [rdi + 32],   xmm0
+            movdqa     XMMWORD PTR [rdi + 48],   xmm2
 
             ;line 4 5
             lea        rsi,     [rsi + rdx*4]
 
-            movq       xmm0,    [rsi]  ; src
-            movq       xmm2,    [rsi+rdx]
-            movdqa     xmm1,    [rax + 32]  ; pred
+            movq       xmm0,    MMWORD PTR [rsi]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rdx]
+            movdqa     xmm1,    XMMWORD PTR [rax + 32]  ; pred
             punpcklqdq xmm0,    xmm2
 
             movdqa     xmm2,    xmm0
@@ -220,13 +220,13 @@ sym(vp8_subtract_mbuv_sse2):
             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
 
-            movdqa     [rdi + 64],   xmm0
-            movdqa     [rdi + 80],   xmm2
+            movdqa     XMMWORD PTR [rdi + 64],   xmm0
+            movdqa     XMMWORD PTR [rdi + 80],   xmm2
 
             ;line 6 7
-            movq       xmm0,    [rsi+rdx*2]  ; src
-            movq       xmm2,    [rsi+rcx]
-            movdqa     xmm1,    [rax+ 48]  ; pred
+            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rcx]
+            movdqa     xmm1,    XMMWORD PTR [rax+ 48]  ; pred
             punpcklqdq xmm0,    xmm2
 
             movdqa     xmm2,    xmm0
@@ -241,8 +241,8 @@ sym(vp8_subtract_mbuv_sse2):
             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
 
-            movdqa     [rdi + 96],   xmm0
-            movdqa     [rdi + 112],  xmm2
+            movdqa     XMMWORD PTR [rdi + 96],   xmm0
+            movdqa     XMMWORD PTR [rdi + 112],  xmm2
 
             ;v
             mov     rsi,        arg(2) ;z = vsrc
@@ -250,9 +250,9 @@ sym(vp8_subtract_mbuv_sse2):
             add     rax,        64    ;Predictor = pred + 320
 
             ;line 0 1
-            movq       xmm0,    [rsi]  ; src
-            movq       xmm2,    [rsi+rdx]
-            movdqa     xmm1,    [rax]  ; pred
+            movq       xmm0,    MMWORD PTR [rsi]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rdx]
+            movdqa     xmm1,    XMMWORD PTR [rax]  ; pred
             punpcklqdq xmm0,    xmm2
 
             movdqa     xmm2,    xmm0
@@ -267,13 +267,13 @@ sym(vp8_subtract_mbuv_sse2):
             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
 
-            movdqa     [rdi],   xmm0
-            movdqa     [rdi +16],   xmm2
+            movdqa     XMMWORD PTR [rdi],   xmm0
+            movdqa     XMMWORD PTR [rdi +16],   xmm2
 
             ;line 2 3
-            movq       xmm0,    [rsi+rdx*2]  ; src
-            movq       xmm2,    [rsi+rcx]
-            movdqa     xmm1,    [rax+16]  ; pred
+            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rcx]
+            movdqa     xmm1,    XMMWORD PTR [rax+16]  ; pred
             punpcklqdq xmm0,    xmm2
 
             movdqa     xmm2,    xmm0
@@ -288,15 +288,15 @@ sym(vp8_subtract_mbuv_sse2):
             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
 
-            movdqa     [rdi + 32],   xmm0
-            movdqa     [rdi + 48],   xmm2
+            movdqa     XMMWORD PTR [rdi + 32],   xmm0
+            movdqa     XMMWORD PTR [rdi + 48],   xmm2
 
             ;line 4 5
             lea        rsi,     [rsi + rdx*4]
 
-            movq       xmm0,    [rsi]  ; src
-            movq       xmm2,    [rsi+rdx]
-            movdqa     xmm1,    [rax + 32]  ; pred
+            movq       xmm0,    MMWORD PTR [rsi]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rdx]
+            movdqa     xmm1,    XMMWORD PTR [rax + 32]  ; pred
             punpcklqdq xmm0,    xmm2
 
             movdqa     xmm2,    xmm0
@@ -311,13 +311,13 @@ sym(vp8_subtract_mbuv_sse2):
             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
 
-            movdqa     [rdi + 64],   xmm0
-            movdqa     [rdi + 80],   xmm2
+            movdqa     XMMWORD PTR [rdi + 64],   xmm0
+            movdqa     XMMWORD PTR [rdi + 80],   xmm2
 
             ;line 6 7
-            movq       xmm0,    [rsi+rdx*2]  ; src
-            movq       xmm2,    [rsi+rcx]
-            movdqa     xmm1,    [rax+ 48]  ; pred
+            movq       xmm0,    MMWORD PTR [rsi+rdx*2]  ; src
+            movq       xmm2,    MMWORD PTR [rsi+rcx]
+            movdqa     xmm1,    XMMWORD PTR [rax+ 48]  ; pred
             punpcklqdq xmm0,    xmm2
 
             movdqa     xmm2,    xmm0
@@ -332,8 +332,8 @@ sym(vp8_subtract_mbuv_sse2):
             punpcklbw  xmm0,    xmm1            ; put sign back to subtraction
             punpckhbw  xmm2,    xmm3            ; put sign back to subtraction
 
-            movdqa     [rdi + 96],   xmm0
-            movdqa     [rdi + 112],  xmm2
+            movdqa     XMMWORD PTR [rdi + 96],   xmm0
+            movdqa     XMMWORD PTR [rdi + 112],  xmm2
 
     ; begin epilog
     pop rdi

From bb7dd5b1baed31b15c6d39fc0c8981b852518b71 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 14 Oct 2010 16:40:12 -0400
Subject: [PATCH 232/307] Move firstpass motion map to stats packet

The first implementation of the firstpass motion map for motion
compensated temporal filtering created a file, fpmotionmap.stt,
in the current working directory. This was not safe for multiple
encoder instances. This patch merges this data into the first pass
stats packet interface, so that it is handled like the other
(numerical) firstpass stats.

The new stats packet is defined as follows:
    Numerical Stats (16 doubles) -- 128 bytes
    Motion Map                   -- 1 byte / Macroblock
    Padding                      -- to align packet to 8 bytes

The fpmotionmap.stt file can still be generated for debugging
purposes in the same way that the textual version of the stats
are available (defining OUTPUT_FPF in firstpass.c)

Change-Id: I083ffbfd95e7d6a42bb4039ba0e81f678c8183ca
---
 vp8/encoder/firstpass.c | 187 ++++++++++++++++++----------------------
 vp8/encoder/firstpass.h |   1 +
 vp8/encoder/onyx_if.c   |  20 +++--
 vp8/encoder/onyx_int.h  |   8 +-
 vp8/vp8_cx_iface.c      |  18 ++--
 5 files changed, 115 insertions(+), 119 deletions(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 13633e9a4..607c3d236 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -30,7 +30,6 @@
 #include "encodemv.h"
 
 //#define OUTPUT_FPF 1
-#define FIRSTPASS_MM 1
 
 #if CONFIG_RUNTIME_CPU_DETECT
 #define IF_RTCD(x) (x)
@@ -108,15 +107,6 @@ static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position)
 
 static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
 {
-    /*FIRSTPASS_STATS * start_pos;
-    int ret_val;
-
-    start_pos = cpi->stats_in;
-    ret_val = vp8_input_stats(cpi, next_frame);
-    reset_fpf_position(cpi, start_pos);
-
-    return ret_val;*/
-
     if (cpi->stats_in >= cpi->stats_in_end)
         return EOF;
 
@@ -127,7 +117,7 @@ static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame)
 // Calculate a modified Error used in distributing bits between easier and harder frames
 static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 {
-    double av_err = cpi->total_stats.ssim_weighted_pred_err;
+    double av_err = cpi->total_stats->ssim_weighted_pred_err;
     double this_err = this_frame->ssim_weighted_pred_err;
     double modified_err;
 
@@ -238,7 +228,7 @@ int frame_max_bits(VP8_COMP *cpi)
     else
     {
         // For VBR base this on the bits and frames left plus the two_pass_vbrmax_section rate passed in by the user
-        max_bits = (int)(((double)cpi->bits_left / (cpi->total_stats.count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
+        max_bits = (int)(((double)cpi->bits_left / (cpi->total_stats->count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0));
     }
 
     // Trap case where we are out of bits
@@ -248,13 +238,31 @@ int frame_max_bits(VP8_COMP *cpi)
     return max_bits;
 }
 
-void vp8_output_stats(struct vpx_codec_pkt_list *pktlist,
+
+extern size_t vp8_firstpass_stats_sz(unsigned int mb_count)
+{
+    /* Calculate the size of a stats packet, which is dependent on the frame
+     * resolution. The FIRSTPASS_STATS struct has a single element array,
+     * motion_map, which is virtually expanded to have one element per
+     * macroblock.
+     */
+    size_t stats_sz;
+    FIRSTPASS_STATS stats;
+
+    stats_sz = sizeof(FIRSTPASS_STATS) + mb_count;
+    stats_sz = (stats_sz + 7) & ~7;
+    return stats_sz;
+}
+
+
+void vp8_output_stats(const VP8_COMP            *cpi,
+                      struct vpx_codec_pkt_list *pktlist,
                       FIRSTPASS_STATS            *stats)
 {
     struct vpx_codec_cx_pkt pkt;
     pkt.kind = VPX_CODEC_STATS_PKT;
     pkt.data.twopass_stats.buf = stats;
-    pkt.data.twopass_stats.sz = sizeof(*stats);
+    pkt.data.twopass_stats.sz = vp8_firstpass_stats_sz(cpi->common.MBs);
     vpx_codec_pkt_list_add(pktlist, &pkt);
 
 // TEMP debug code
@@ -280,16 +288,24 @@ void vp8_output_stats(struct vpx_codec_pkt_list *pktlist,
                 stats->mv_in_out_count,
                 stats->count);
         fclose(fpfile);
+
+
+        fpfile = fopen("fpmotionmap.stt", "a");
+        fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, fpfile);
+        fclose(fpfile);
     }
 #endif
 }
 
 int vp8_input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps)
 {
+    size_t stats_sz = vp8_firstpass_stats_sz(cpi->common.MBs);
+
     if (cpi->stats_in >= cpi->stats_in_end)
         return EOF;
 
-    *fps = *cpi->stats_in++;
+    *fps = *cpi->stats_in;
+    cpi->stats_in = (void*)((char *)cpi->stats_in + stats_sz);
     return 1;
 }
 
@@ -352,59 +368,47 @@ void vp8_avg_stats(FIRSTPASS_STATS *section)
     section->duration   /= section->count;
 }
 
-int vp8_fpmm_get_pos(VP8_COMP *cpi)
+unsigned char *vp8_fpmm_get_pos(VP8_COMP *cpi)
 {
-    return ftell(cpi->fp_motion_mapfile);
+    return cpi->fp_motion_map_stats;
 }
-void vp8_fpmm_reset_pos(VP8_COMP *cpi, int target_pos)
+void vp8_fpmm_reset_pos(VP8_COMP *cpi, unsigned char *target_pos)
 {
     int Offset;
 
-    if (cpi->fp_motion_mapfile)
-    {
-        Offset = ftell(cpi->fp_motion_mapfile) - target_pos;
-        fseek(cpi->fp_motion_mapfile, (int) - Offset, SEEK_CUR);
-    }
+    cpi->fp_motion_map_stats = target_pos;
 }
 
 void vp8_advance_fpmm(VP8_COMP *cpi, int count)
 {
-#if FIRSTPASS_MM
-    fseek(cpi->fp_motion_mapfile, (int)(count * cpi->common.MBs), SEEK_CUR);
-#endif
+    cpi->fp_motion_map_stats = (void*)((char*)cpi->fp_motion_map_stats +
+        count * vp8_firstpass_stats_sz(cpi->common.MBs));
 }
 
 void vp8_input_fpmm(VP8_COMP *cpi)
 {
-#if FIRSTPASS_MM
+    unsigned char *fpmm = cpi->fp_motion_map;
     int MBs = cpi->common.MBs;
     int max_frames = cpi->active_arnr_frames;
+    int i;
 
-    if (!cpi->fp_motion_mapfile)
-        return;                 // Error
-
-    // Read the specified number of frame motion maps
-    if (fread(cpi->fp_motion_map, 1,
-              max_frames * MBs,
-              cpi->fp_motion_mapfile) != max_frames*MBs)
+    for (i=0; i<max_frames; i++)
     {
-        // Read error
-        return;
+        char *motion_map = (char*)cpi->fp_motion_map_stats
+                           + sizeof(FIRSTPASS_STATS);
+
+        memcpy(fpmm, motion_map, MBs);
+        fpmm += MBs;
+        vp8_advance_fpmm(cpi, 1);
     }
 
     // Flag the use of weights in the temporal filter
     cpi->use_weighted_temporal_filter = 1;
-
-#endif
 }
 
 void vp8_init_first_pass(VP8_COMP *cpi)
 {
-    vp8_zero_stats(&cpi->total_stats);
-
-#ifdef FIRSTPASS_MM
-    cpi->fp_motion_mapfile = fopen("fpmotionmap.stt", "wb");
-#endif
+    vp8_zero_stats(cpi->total_stats);
 
 // TEMP debug code
 #ifdef OUTPUT_FPF
@@ -412,6 +416,8 @@ void vp8_init_first_pass(VP8_COMP *cpi)
         FILE *fpfile;
         fpfile = fopen("firstpass.stt", "w");
         fclose(fpfile);
+        fpfile = fopen("fpmotionmap.stt", "wb");
+        fclose(fpfile);
     }
 #endif
 
@@ -419,16 +425,10 @@ void vp8_init_first_pass(VP8_COMP *cpi)
 
 void vp8_end_first_pass(VP8_COMP *cpi)
 {
-    vp8_output_stats(cpi->output_pkt_list, &cpi->total_stats);
-
-#if FIRSTPASS_MM
-
-    if (cpi->fp_motion_mapfile)
-        fclose(cpi->fp_motion_mapfile);
-
-#endif
-
+    vp8_output_stats(cpi, cpi->output_pkt_list, cpi->total_stats);
 }
+
+
 void vp8_zz_motion_search( VP8_COMP *cpi, MACROBLOCK * x, YV12_BUFFER_CONFIG * recon_buffer, int * best_motion_err, int recon_yoffset )
 {
     MACROBLOCKD * const xd = & x->e_mbd;
@@ -839,19 +839,20 @@ void vp8_first_pass(VP8_COMP *cpi)
         fps.duration = cpi->source_end_time_stamp - cpi->source_time_stamp;
 
         // don't want to do outputstats with a stack variable!
-        cpi->this_frame_stats = fps;
-        vp8_output_stats(cpi->output_pkt_list, &cpi->this_frame_stats);
-        vp8_accumulate_stats(&cpi->total_stats, &fps);
-
-#if FIRSTPASS_MM
-        fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, cpi->fp_motion_mapfile);
-#endif
+        memcpy(cpi->this_frame_stats,
+               &fps,
+               sizeof(FIRSTPASS_STATS));
+        memcpy((char*)cpi->this_frame_stats + sizeof(FIRSTPASS_STATS),
+               cpi->fp_motion_map,
+               sizeof(cpi->fp_motion_map[0]) * cpi->common.MBs);
+        vp8_output_stats(cpi, cpi->output_pkt_list, cpi->this_frame_stats);
+        vp8_accumulate_stats(cpi->total_stats, &fps);
     }
 
     // Copy the previous Last Frame into the GF buffer if specific conditions for doing so are met
     if ((cm->current_video_frame > 0) &&
-        (cpi->this_frame_stats.pcnt_inter > 0.20) &&
-        ((cpi->this_frame_stats.intra_error / cpi->this_frame_stats.coded_error) > 2.0))
+        (cpi->this_frame_stats->pcnt_inter > 0.20) &&
+        ((cpi->this_frame_stats->intra_error / cpi->this_frame_stats->coded_error) > 2.0))
     {
         vp8_yv12_copy_frame_ptr(lst_yv12, gld_yv12);
     }
@@ -1120,33 +1121,33 @@ void vp8_init_second_pass(VP8_COMP *cpi)
 
     double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100);
 
-    vp8_zero_stats(&cpi->total_stats);
+    vp8_zero_stats(cpi->total_stats);
 
     if (!cpi->stats_in_end)
         return;
 
-    cpi->total_stats = *cpi->stats_in_end;
+    *cpi->total_stats = *cpi->stats_in_end;
 
-    cpi->total_error_left = cpi->total_stats.ssim_weighted_pred_err;
-    cpi->total_intra_error_left = cpi->total_stats.intra_error;
-    cpi->total_coded_error_left = cpi->total_stats.coded_error;
+    cpi->total_error_left = cpi->total_stats->ssim_weighted_pred_err;
+    cpi->total_intra_error_left = cpi->total_stats->intra_error;
+    cpi->total_coded_error_left = cpi->total_stats->coded_error;
     cpi->start_tot_err_left = cpi->total_error_left;
 
-    //cpi->bits_left = (long long)(cpi->total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
-    //cpi->bits_left -= (long long)(cpi->total_stats.count * two_pass_min_rate / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
+    //cpi->bits_left = (long long)(cpi->total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
+    //cpi->bits_left -= (long long)(cpi->total_stats->count * two_pass_min_rate / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
 
     // each frame can have a different duration, as the frame rate in the source
     // isn't guaranteed to be constant.   The frame rate prior to the first frame
     // encoded in the second pass is a guess.  However the sum duration is not.
     // Its calculated based on the actual durations of all frames from the first
     // pass.
-    vp8_new_frame_rate(cpi, 10000000.0 * cpi->total_stats.count / cpi->total_stats.duration);
+    vp8_new_frame_rate(cpi, 10000000.0 * cpi->total_stats->count / cpi->total_stats->duration);
 
     cpi->output_frame_rate = cpi->oxcf.frame_rate;
-    cpi->bits_left = (long long)(cpi->total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
-    cpi->bits_left -= (long long)(cpi->total_stats.duration * two_pass_min_rate / 10000000.0);
+    cpi->bits_left = (long long)(cpi->total_stats->duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
+    cpi->bits_left -= (long long)(cpi->total_stats->duration * two_pass_min_rate / 10000000.0);
 
-    vp8_avg_stats(&cpi->total_stats);
+    vp8_avg_stats(cpi->total_stats);
 
     // Scan the first pass file and calculate an average Intra / Inter error score ratio for the sequence
     {
@@ -1162,7 +1163,7 @@ void vp8_init_second_pass(VP8_COMP *cpi)
             sum_iiratio += IIRatio;
         }
 
-        cpi->avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->total_stats.count);
+        cpi->avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->total_stats->count);
 
         // Reset file position
         reset_fpf_position(cpi, start_pos);
@@ -1184,21 +1185,11 @@ void vp8_init_second_pass(VP8_COMP *cpi)
 
     }
 
-#if FIRSTPASS_MM
-    cpi->fp_motion_mapfile = 0;
-    cpi->fp_motion_mapfile = fopen("fpmotionmap.stt", "rb");
-#endif
-
+    cpi->fp_motion_map_stats = (unsigned char *)cpi->stats_in;
 }
 
 void vp8_end_second_pass(VP8_COMP *cpi)
 {
-#if FIRSTPASS_MM
-
-    if (cpi->fp_motion_mapfile)
-        fclose(cpi->fp_motion_mapfile);
-
-#endif
 }
 
 // Analyse and define a gf/arf group .
@@ -1231,18 +1222,14 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
     int max_bits = frame_max_bits(cpi);    // Max for a single frame
 
-#if FIRSTPASS_MM
-    int fpmm_pos;
-#endif
+    unsigned char *fpmm_pos;
 
     cpi->gf_group_bits = 0;
     cpi->gf_decay_rate = 0;
 
     vp8_clear_system_state();  //__asm emms;
 
-#if FIRSTPASS_MM
     fpmm_pos = vp8_fpmm_get_pos(cpi);
-#endif
 
     start_pos = cpi->stats_in;
 
@@ -1494,7 +1481,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
             // Note: this_frame->frame has been updated in the loop
             // so it now points at the ARF frame.
             half_gf_int = cpi->baseline_gf_interval >> 1;
-            frames_after_arf = cpi->total_stats.count - this_frame->frame - 1;
+            frames_after_arf = cpi->total_stats->count - this_frame->frame - 1;
 
             switch (cpi->oxcf.arnr_type)
             {
@@ -1531,12 +1518,11 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
             cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd;
 
-#if FIRSTPASS_MM
             {
                 // Advance to & read in the motion map for those frames
                 // to be considered for filtering based on the position
                 // of the ARF
-                vp8_fpmm_reset_pos(cpi, cpi->fpmm_pos);
+                vp8_fpmm_reset_pos(cpi, cpi->fp_motion_map_stats_save);
 
                 // Position at the 'earliest' frame to be filtered
                 vp8_advance_fpmm(cpi,
@@ -1545,7 +1531,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
                 // Read / create a motion map for the region of interest
                 vp8_input_fpmm(cpi);
             }
-#endif
         }
         else
         {
@@ -1581,7 +1566,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     // Now decide how many bits should be allocated to the GF group as  a proportion of those remaining in the kf group.
     // The final key frame group in the clip is treated as a special case where cpi->kf_group_bits is tied to cpi->bits_left.
     // This is also important for short clips where there may only be one key frame.
-    if (cpi->frames_to_key >= (int)(cpi->total_stats.count - cpi->common.current_video_frame))
+    if (cpi->frames_to_key >= (int)(cpi->total_stats->count - cpi->common.current_video_frame))
     {
         cpi->kf_group_bits = (cpi->bits_left > 0) ? cpi->bits_left : 0;
     }
@@ -1781,10 +1766,8 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         reset_fpf_position(cpi, start_pos);
     }
 
-#if FIRSTPASS_MM
     // Reset the First pass motion map file position
     vp8_fpmm_reset_pos(cpi, fpmm_pos);
-#endif
 }
 
 // Allocate bits to a normal frame that is neither a gf an arf or a key frame.
@@ -1798,7 +1781,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
     int max_bits = frame_max_bits(cpi);    // Max for a single frame
 
     // The final few frames have special treatment
-    if (cpi->frames_till_gf_update_due >= (int)(cpi->total_stats.count - cpi->common.current_video_frame))
+    if (cpi->frames_till_gf_update_due >= (int)(cpi->total_stats->count - cpi->common.current_video_frame))
     {
         cpi->gf_group_bits = (cpi->bits_left > 0) ? cpi->bits_left : 0;;
     }
@@ -1843,7 +1826,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 void vp8_second_pass(VP8_COMP *cpi)
 {
     int tmp_q;
-    int frames_left = (int)(cpi->total_stats.count - cpi->common.current_video_frame);
+    int frames_left = (int)(cpi->total_stats->count - cpi->common.current_video_frame);
 
     FIRSTPASS_STATS this_frame;
     FIRSTPASS_STATS this_frame_copy;
@@ -1866,14 +1849,12 @@ void vp8_second_pass(VP8_COMP *cpi)
     if (EOF == vp8_input_stats(cpi, &this_frame))
         return;
 
-#if FIRSTPASS_MM
     vpx_memset(cpi->fp_motion_map, 0,
                 cpi->oxcf.arnr_max_frames*cpi->common.MBs);
-    cpi->fpmm_pos = vp8_fpmm_get_pos(cpi);
+    cpi->fp_motion_map_stats_save = vp8_fpmm_get_pos(cpi);
 
     // Step over this frame's first pass motion map
     vp8_advance_fpmm(cpi, 1);
-#endif
 
     this_frame_error = this_frame.ssim_weighted_pred_err;
     this_frame_intra_error = this_frame.intra_error;
@@ -2562,7 +2543,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         cpi->common.vert_scale = NORMAL;
 
         // Calculate Average bits per frame.
-        //av_bits_per_frame = cpi->bits_left/(double)(cpi->total_stats.count - cpi->common.current_video_frame);
+        //av_bits_per_frame = cpi->bits_left/(double)(cpi->total_stats->count - cpi->common.current_video_frame);
         av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate);
         //if ( av_bits_per_frame < 0.0 )
         //  av_bits_per_frame = 0.0
@@ -2625,7 +2606,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         }
         else
         {
-            long long clip_bits = (long long)(cpi->total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
+            long long clip_bits = (long long)(cpi->total_stats->count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.frame_rate));
             long long over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;
             long long over_spend2 = cpi->oxcf.starting_buffer_level - projected_buffer_level;
 
diff --git a/vp8/encoder/firstpass.h b/vp8/encoder/firstpass.h
index c7f3e0e45..95e1e5463 100644
--- a/vp8/encoder/firstpass.h
+++ b/vp8/encoder/firstpass.h
@@ -20,4 +20,5 @@ extern void vp8_init_second_pass(VP8_COMP *cpi);
 extern void vp8_second_pass(VP8_COMP *cpi);
 extern void vp8_end_second_pass(VP8_COMP *cpi);
 
+extern size_t vp8_firstpass_stats_sz(unsigned int mb_count);
 #endif
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 382a9de58..27964f97b 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -330,6 +330,8 @@ void vp8_dealloc_compressor_data(VP8_COMP *cpi)
 
     cpi->mb.pip = 0;
 
+    vpx_free(cpi->total_stats);
+    vpx_free(cpi->this_frame_stats);
 }
 
 static void enable_segmentation(VP8_PTR ptr)
@@ -1392,6 +1394,12 @@ void vp8_alloc_compressor_data(VP8_COMP *cpi)
     CHECK_MEM_ERROR(cpi->gf_active_flags, vpx_calloc(1, cm->mb_rows * cm->mb_cols));
 
     cpi->gf_active_count = cm->mb_rows * cm->mb_cols;
+
+    cpi->total_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
+    cpi->this_frame_stats = vpx_calloc(1, vp8_firstpass_stats_sz(cpi->common.MBs));
+    if(!cpi->total_stats || !cpi->this_frame_stats)
+        vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR,
+                           "Failed to allocate firstpass stats");
 }
 
 
@@ -2290,10 +2298,12 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
     }
     else if (cpi->pass == 2)
     {
+        size_t packet_sz = vp8_firstpass_stats_sz(cpi->common.MBs);
+        int packets = oxcf->two_pass_stats_in.sz / packet_sz;
+
         cpi->stats_in = oxcf->two_pass_stats_in.buf;
-        cpi->stats_in_end = cpi->stats_in
-                            + oxcf->two_pass_stats_in.sz / sizeof(FIRSTPASS_STATS)
-                            - 1;
+        cpi->stats_in_end = (void*)((char *)cpi->stats_in
+                            + (packets - 1) * packet_sz);
         vp8_init_second_pass(cpi);
     }
 
@@ -3481,7 +3491,7 @@ static void apply_temporal_filter
             modifier = 16 - modifier;
 #endif
             modifier *= filter_weight;
-            
+
             count[k] += modifier;
             accumulator[k] += modifier * pixel_value;
 
@@ -3656,7 +3666,7 @@ static void vp8cx_temp_blur1_c
     YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
     unsigned char *dst1, *dst2;
     DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
-    
+
     // Save input state
     unsigned char *y_buffer = mbd->pre.y_buffer;
     unsigned char *u_buffer = mbd->pre.u_buffer;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 8a34a1f3a..51b62c6eb 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -461,14 +461,14 @@ typedef struct
 
     int target_bandwidth;
     long long bits_left;
-    FIRSTPASS_STATS total_stats;
-    FIRSTPASS_STATS this_frame_stats;
+    FIRSTPASS_STATS *total_stats;
+    FIRSTPASS_STATS *this_frame_stats;
     FIRSTPASS_STATS *stats_in, *stats_in_end;
     struct vpx_codec_pkt_list  *output_pkt_list;
     int                          first_pass_done;
     unsigned char *fp_motion_map;
-    FILE *fp_motion_mapfile;
-    int fpmm_pos;
+
+    unsigned char *fp_motion_map_stats, *fp_motion_map_stats_save;
 
 #if 0
     // Experimental code for lagged and one pass
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 499bc503a..b4bb679b5 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -14,6 +14,7 @@
 #include "vpx_version.h"
 #include "onyx_int.h"
 #include "vpx/vp8e.h"
+#include "vp8/encoder/firstpass.h"
 #include "onyx.h"
 #include <stdlib.h>
 #include <string.h>
@@ -189,22 +190,25 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
 
     if (cfg->g_pass == VPX_RC_LAST_PASS)
     {
-        int n_doubles = cfg->rc_twopass_stats_in.sz / sizeof(double);
-        int n_packets = cfg->rc_twopass_stats_in.sz / sizeof(FIRSTPASS_STATS);
-        double frames;
+        int              mb_r = (cfg->g_h + 15) / 16;
+        int              mb_c = (cfg->g_w + 15) / 16;
+        size_t           packet_sz = vp8_firstpass_stats_sz(mb_r * mb_c);
+        int              n_packets = cfg->rc_twopass_stats_in.sz / packet_sz;
+        FIRSTPASS_STATS *stats;
 
         if (!cfg->rc_twopass_stats_in.buf)
             ERROR("rc_twopass_stats_in.buf not set.");
 
-        if (cfg->rc_twopass_stats_in.sz % sizeof(FIRSTPASS_STATS))
+        if (cfg->rc_twopass_stats_in.sz % packet_sz)
             ERROR("rc_twopass_stats_in.sz indicates truncated packet.");
 
-        if (cfg->rc_twopass_stats_in.sz < 2 * sizeof(FIRSTPASS_STATS))
+        if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz)
             ERROR("rc_twopass_stats_in requires at least two packets.");
 
-        frames = ((double *)cfg->rc_twopass_stats_in.buf)[n_doubles - 1];
+        stats = (void*)((char *)cfg->rc_twopass_stats_in.buf
+                + (n_packets - 1) * packet_sz);
 
-        if ((int)(frames + 0.5) != n_packets - 1)
+        if ((int)(stats->count + 0.5) != n_packets - 1)
             ERROR("rc_twopass_stats_in missing EOS stats packet");
     }
 

From 45e64941778058312d72711dbfcf039bb4ba5171 Mon Sep 17 00:00:00 2001
From: Frank Galligan <fgalligan@google.com>
Date: Tue, 5 Oct 2010 17:46:37 -0400
Subject: [PATCH 233/307] Change altref times to preceding pts+1.

Change the pts of the altref frame to be as close as possible to the
pts of the preceding frame and still be strictly increasing.

Change-Id: Iae3033a4c89ae5a9d0e5c4198e9196e5f3ee57c7
---
 vp8/vp8_cx_iface.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index b4bb679b5..8e50b7f1b 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -763,12 +763,13 @@ static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t  *ctx,
                 {
                     pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE;
 
-                    // TODO: ideally this timestamp should be as close as
-                    // possible to the prior PTS so that if a decoder uses
-                    // pts to schedule when to do this, we start right after
-                    // last frame was decoded.  Maybe should be set to
-                    // last time stamp. Invisible frames have no duration..
-                    pkt.data.frame.pts --;
+                    // This timestamp should be as close as possible to the
+                    // prior PTS so that if a decoder uses pts to schedule when
+                    // to do this, we start right after last frame was decoded.
+                    // Invisible frames have no duration.
+                    pkt.data.frame.pts = ((cpi->last_time_stamp_seen
+                        * ctx->cfg.g_timebase.den + round)
+                        / ctx->cfg.g_timebase.num / 10000000) + 1;
                     pkt.data.frame.duration = 0;
                 }
 

From 8f75ea6b5c0a72c233d1ff9a75e16e6fe43fda77 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Thu, 21 Oct 2010 17:04:30 -0700
Subject: [PATCH 234/307] Convert [4][4] matrices to [16] arrays.

Most of the code that actually uses these matrices indexes them as
 if they were a single contiguous array, and coverity produces
 reports about the resulting accesses that overflow the static
 bounds of the first row.
This is perfectly legal in C, but converting them to actual [16]
 arrays should eliminate the report, and removes a good deal of
 extraneous indexing and address operators from the code.

Change-Id: Ibda479e2232b3e51f9edf3b355b8640520fdbf23
---
 vp8/common/blockd.h                    |   2 +-
 vp8/common/onyxc_int.h                 |   6 +-
 vp8/decoder/arm/dequantize_arm.c       |   4 +-
 vp8/decoder/decodframe.c               |  25 +++---
 vp8/decoder/dequantize.c               |   2 +-
 vp8/decoder/threading.c                |  10 +--
 vp8/encoder/arm/quantize_arm.c         |   2 +-
 vp8/encoder/block.h                    |  10 +--
 vp8/encoder/encodeframe.c              | 120 ++++++++++++-------------
 vp8/encoder/encodemb.c                 |   4 +-
 vp8/encoder/onyx_int.h                 |  24 ++---
 vp8/encoder/quantize.c                 |  76 ++++++++--------
 vp8/encoder/x86/x86_csystemdependent.c |  44 ++++-----
 13 files changed, 160 insertions(+), 169 deletions(-)

diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index db8400ae4..a81bc8b95 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -195,7 +195,7 @@ typedef struct
     short *diff;
     short *reference;
 
-    short(*dequant)[4];
+    short *dequant;
 
     // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
     unsigned char **base_pre;
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 132765d18..4966002f5 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -83,9 +83,9 @@ typedef struct VP8Common
 {
     struct vpx_internal_error_info  error;
 
-    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);
 
     int Width;
     int Height;
diff --git a/vp8/decoder/arm/dequantize_arm.c b/vp8/decoder/arm/dequantize_arm.c
index 39265879b..b3e14b793 100644
--- a/vp8/decoder/arm/dequantize_arm.c
+++ b/vp8/decoder/arm/dequantize_arm.c
@@ -30,7 +30,7 @@ void vp8_dequantize_b_neon(BLOCKD *d)
     int i;
     short *DQ  = d->dqcoeff;
     short *Q   = d->qcoeff;
-    short *DQC = &d->dequant[0][0];
+    short *DQC = d->dequant;
 
     vp8_dequantize_b_loop_neon(Q, DQC, DQ);
 }
@@ -42,7 +42,7 @@ void vp8_dequantize_b_v6(BLOCKD *d)
     int i;
     short *DQ  = d->dqcoeff;
     short *Q   = d->qcoeff;
-    short *DQC = &d->dequant[0][0];
+    short *DQC = d->dequant;
 
     vp8_dequantize_b_loop_v6(Q, DQC, DQ);
 }
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index efe0ad8e2..ebfda663c 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -40,27 +40,24 @@
 
 void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
 {
-    int r, c;
     int i;
     int Q;
     VP8_COMMON *const pc = & pbi->common;
 
     for (Q = 0; Q < QINDEX_RANGE; Q++)
     {
-        pc->Y1dequant[Q][0][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q);
-        pc->Y2dequant[Q][0][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
-        pc->UVdequant[Q][0][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
+        pc->Y1dequant[Q][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q);
+        pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
+        pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
 
         // all the ac values = ;
         for (i = 1; i < 16; i++)
         {
             int rc = vp8_default_zig_zag1d[i];
-            r = (rc >> 2);
-            c = (rc & 3);
 
-            pc->Y1dequant[Q][r][c] = (short)vp8_ac_yquant(Q);
-            pc->Y2dequant[Q][r][c] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
-            pc->UVdequant[Q][r][c] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
+            pc->Y1dequant[Q][rc] = (short)vp8_ac_yquant(Q);
+            pc->Y2dequant[Q][rc] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q);
+            pc->UVdequant[Q][rc] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q);
         }
     }
 }
@@ -253,7 +250,7 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
         }
 
         DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
-                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                        (xd->qcoeff, xd->block[0].dequant,
                          xd->predictor, xd->dst.y_buffer,
                          xd->dst.y_stride, xd->eobs, xd->block[24].diff);
     }
@@ -268,13 +265,13 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
             if (xd->eobs[i] > 1)
             {
                 DEQUANT_INVOKE(&pbi->dequant, idct_add)
-                    (b->qcoeff, &b->dequant[0][0],  b->predictor,
+                    (b->qcoeff, b->dequant,  b->predictor,
                     *(b->base_dst) + b->dst, 16, b->dst_stride);
             }
             else
             {
                 IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
-                    (b->qcoeff[0] * b->dequant[0][0], b->predictor,
+                    (b->qcoeff[0] * b->dequant[0], b->predictor,
                     *(b->base_dst) + b->dst, 16, b->dst_stride);
                 ((int *)b->qcoeff)[0] = 0;
             }
@@ -284,13 +281,13 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
     else
     {
         DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
-                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                        (xd->qcoeff, xd->block[0].dequant,
                          xd->predictor, xd->dst.y_buffer,
                          xd->dst.y_stride, xd->eobs);
     }
 
     DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
-                    (xd->qcoeff+16*16, &xd->block[16].dequant[0][0],
+                    (xd->qcoeff+16*16, xd->block[16].dequant,
                      xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
                      xd->dst.uv_stride, xd->eobs+16);
 }
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index 8cfa2a32e..f5d576ac7 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -24,7 +24,7 @@ void vp8_dequantize_b_c(BLOCKD *d)
     int i;
     short *DQ  = d->dqcoeff;
     short *Q   = d->qcoeff;
-    short *DQC = &d->dequant[0][0];
+    short *DQC = d->dequant;
 
     for (i = 0; i < 16; i++)
     {
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 56dd5ef8e..2d7f7b9b8 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -184,7 +184,7 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
         }
 
         DEQUANT_INVOKE (&pbi->dequant, dc_idct_add_y_block)
-                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                        (xd->qcoeff, xd->block[0].dequant,
                          xd->predictor, xd->dst.y_buffer,
                          xd->dst.y_stride, xd->eobs, xd->block[24].diff);
     }
@@ -198,13 +198,13 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
             if (xd->eobs[i] > 1)
             {
                 DEQUANT_INVOKE(&pbi->dequant, idct_add)
-                    (b->qcoeff, &b->dequant[0][0],  b->predictor,
+                    (b->qcoeff, b->dequant,  b->predictor,
                     *(b->base_dst) + b->dst, 16, b->dst_stride);
             }
             else
             {
                 IDCT_INVOKE(RTCD_VTABLE(idct), idct1_scalar_add)
-                    (b->qcoeff[0] * b->dequant[0][0], b->predictor,
+                    (b->qcoeff[0] * b->dequant[0], b->predictor,
                     *(b->base_dst) + b->dst, 16, b->dst_stride);
                 ((int *)b->qcoeff)[0] = 0;
             }
@@ -213,13 +213,13 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
     else
     {
         DEQUANT_INVOKE (&pbi->dequant, idct_add_y_block)
-                        (xd->qcoeff, &xd->block[0].dequant[0][0],
+                        (xd->qcoeff, xd->block[0].dequant,
                          xd->predictor, xd->dst.y_buffer,
                          xd->dst.y_stride, xd->eobs);
     }
 
     DEQUANT_INVOKE (&pbi->dequant, idct_add_uv_block)
-                    (xd->qcoeff+16*16, &xd->block[16].dequant[0][0],
+                    (xd->qcoeff+16*16, xd->block[16].dequant,
                      xd->predictor+16*16, xd->dst.u_buffer, xd->dst.v_buffer,
                      xd->dst.uv_stride, xd->eobs+16);
 #else
diff --git a/vp8/encoder/arm/quantize_arm.c b/vp8/encoder/arm/quantize_arm.c
index 50f58bf08..65c616614 100644
--- a/vp8/encoder/arm/quantize_arm.c
+++ b/vp8/encoder/arm/quantize_arm.c
@@ -29,7 +29,7 @@ extern int vp8_fast_quantize_b_neon_func(short *coeff_ptr, short *zbin_ptr, shor
 
 void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d)
 {
-    d->eob = vp8_fast_quantize_b_neon_func(b->coeff, &b->zbin[0][0], d->qcoeff, d->dqcoeff, d->dequant[0], vp8_rvsplus1_default_zig_zag1d, &b->round[0][0], &b->quant[0][0]);
+    d->eob = vp8_fast_quantize_b_neon_func(b->coeff, b->zbin, d->qcoeff, d->dqcoeff, d->dequant, vp8_rvsplus1_default_zig_zag1d, b->round, b->quant);
 }
 
 /*
diff --git a/vp8/encoder/block.h b/vp8/encoder/block.h
index ffb88904e..e94e54976 100644
--- a/vp8/encoder/block.h
+++ b/vp8/encoder/block.h
@@ -32,11 +32,11 @@ typedef struct
     short *coeff;
 
     // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
-    short(*quant)[4];
-    short(*quant_shift)[4];
-    short(*zbin)[4];
-    short(*zrun_zbin_boost);
-    short(*round)[4];
+    short *quant;
+    short *quant_shift;
+    short *zbin;
+    short *zrun_zbin_boost;
+    short *round;
 
     // Zbin Over Quant value
     short zbin_extra;
diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 91384c73f..85e121be3 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -160,7 +160,6 @@ static void vp8cx_invert_quant(short *quant, short *shift, short d)
 
 void vp8cx_init_quantizer(VP8_COMP *cpi)
 {
-    int r, c;
     int i;
     int quant_val;
     int Q;
@@ -171,58 +170,56 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
     {
         // dc values
         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        vp8cx_invert_quant(cpi->Y1quant[Q][0] + 0,
-                           cpi->Y1quant_shift[Q][0] + 0, quant_val);
-        cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-        cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.Y1dequant[Q][0][0] = quant_val;
+        vp8cx_invert_quant(cpi->Y1quant[Q] + 0,
+                           cpi->Y1quant_shift[Q] + 0, quant_val);
+        cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.Y1dequant[Q][0] = quant_val;
         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        vp8cx_invert_quant(cpi->Y2quant[Q][0] + 0,
-                           cpi->Y2quant_shift[Q][0] + 0, quant_val);
-        cpi->Y2zbin[Q][0][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-        cpi->Y2round[Q][0][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-        cpi->common.Y2dequant[Q][0][0] = quant_val;
+        vp8cx_invert_quant(cpi->Y2quant[Q] + 0,
+                           cpi->Y2quant_shift[Q] + 0, quant_val);
+        cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+        cpi->common.Y2dequant[Q][0] = quant_val;
         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        vp8cx_invert_quant(cpi->UVquant[Q][0] + 0,
-                           cpi->UVquant_shift[Q][0] + 0, quant_val);
-        cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
-        cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.UVdequant[Q][0][0] = quant_val;
+        vp8cx_invert_quant(cpi->UVquant[Q] + 0,
+                           cpi->UVquant_shift[Q] + 0, quant_val);
+        cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
+        cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.UVdequant[Q][0] = quant_val;
         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         // all the ac values = ;
         for (i = 1; i < 16; i++)
         {
             int rc = vp8_default_zig_zag1d[i];
-            r = (rc >> 2);
-            c = (rc & 3);
 
             quant_val = vp8_ac_yquant(Q);
-            vp8cx_invert_quant(cpi->Y1quant[Q][r] + c,
-                               cpi->Y1quant_shift[Q][r] + c, quant_val);
-            cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.Y1dequant[Q][r][c] = quant_val;
+            vp8cx_invert_quant(cpi->Y1quant[Q] + rc,
+                               cpi->Y1quant_shift[Q] + rc, quant_val);
+            cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.Y1dequant[Q][rc] = quant_val;
             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            vp8cx_invert_quant(cpi->Y2quant[Q][r] + c,
-                               cpi->Y2quant_shift[Q][r] + c, quant_val);
-            cpi->Y2zbin[Q][r][c] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-            cpi->Y2round[Q][r][c] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-            cpi->common.Y2dequant[Q][r][c] = quant_val;
+            vp8cx_invert_quant(cpi->Y2quant[Q] + rc,
+                               cpi->Y2quant_shift[Q] + rc, quant_val);
+            cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+            cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+            cpi->common.Y2dequant[Q][rc] = quant_val;
             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            vp8cx_invert_quant(cpi->UVquant[Q][r] + c,
-                               cpi->UVquant_shift[Q][r] + c, quant_val);
-            cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.UVdequant[Q][r][c] = quant_val;
+            vp8cx_invert_quant(cpi->UVquant[Q] + rc,
+                               cpi->UVquant_shift[Q] + rc, quant_val);
+            cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.UVdequant[Q][rc] = quant_val;
             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
         }
     }
@@ -230,7 +227,6 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
 #else
 void vp8cx_init_quantizer(VP8_COMP *cpi)
 {
-    int r, c;
     int i;
     int quant_val;
     int Q;
@@ -241,52 +237,50 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
     {
         // dc values
         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        cpi->Y1quant[Q][0][0] = (1 << 16) / quant_val;
-        cpi->Y1zbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-        cpi->Y1round[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.Y1dequant[Q][0][0] = quant_val;
+        cpi->Y1quant[Q][0] = (1 << 16) / quant_val;
+        cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+        cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.Y1dequant[Q][0] = quant_val;
         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        cpi->Y2quant[Q][0][0] = (1 << 16) / quant_val;
-        cpi->Y2zbin[Q][0][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-        cpi->Y2round[Q][0][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-        cpi->common.Y2dequant[Q][0][0] = quant_val;
+        cpi->Y2quant[Q][0] = (1 << 16) / quant_val;
+        cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+        cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+        cpi->common.Y2dequant[Q][0] = quant_val;
         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        cpi->UVquant[Q][0][0] = (1 << 16) / quant_val;
-        cpi->UVzbin[Q][0][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
-        cpi->UVround[Q][0][0] = (qrounding_factors[Q] * quant_val) >> 7;
-        cpi->common.UVdequant[Q][0][0] = quant_val;
+        cpi->UVquant[Q][0] = (1 << 16) / quant_val;
+        cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
+        cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
+        cpi->common.UVdequant[Q][0] = quant_val;
         cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         // all the ac values = ;
         for (i = 1; i < 16; i++)
         {
             int rc = vp8_default_zig_zag1d[i];
-            r = (rc >> 2);
-            c = (rc & 3);
 
             quant_val = vp8_ac_yquant(Q);
-            cpi->Y1quant[Q][r][c] = (1 << 16) / quant_val;
-            cpi->Y1zbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->Y1round[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.Y1dequant[Q][r][c] = quant_val;
+            cpi->Y1quant[Q][rc] = (1 << 16) / quant_val;
+            cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.Y1dequant[Q][rc] = quant_val;
             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            cpi->Y2quant[Q][r][c] = (1 << 16) / quant_val;
-            cpi->Y2zbin[Q][r][c] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
-            cpi->Y2round[Q][r][c] = (qrounding_factors_y2[Q] * quant_val) >> 7;
-            cpi->common.Y2dequant[Q][r][c] = quant_val;
+            cpi->Y2quant[Q][rc] = (1 << 16) / quant_val;
+            cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
+            cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
+            cpi->common.Y2dequant[Q][rc] = quant_val;
             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            cpi->UVquant[Q][r][c] = (1 << 16) / quant_val;
-            cpi->UVzbin[Q][r][c] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
-            cpi->UVround[Q][r][c] = (qrounding_factors[Q] * quant_val) >> 7;
-            cpi->common.UVdequant[Q][r][c] = quant_val;
+            cpi->UVquant[Q][rc] = (1 << 16) / quant_val;
+            cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
+            cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
+            cpi->common.UVdequant[Q][rc] = quant_val;
             cpi->zrun_zbin_boost_uv[Q][i] = (quant_val * zbin_boost[i]) >> 7;
         }
     }
@@ -317,7 +311,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
         QIndex = cpi->common.base_qindex;
 
     // Y
-    zbin_extra = (cpi->common.Y1dequant[QIndex][0][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
+    zbin_extra = (cpi->common.Y1dequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 
     for (i = 0; i < 16; i++)
     {
@@ -331,7 +325,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     }
 
     // UV
-    zbin_extra = (cpi->common.UVdequant[QIndex][0][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
+    zbin_extra = (cpi->common.UVdequant[QIndex][1] * (cpi->zbin_over_quant + cpi->zbin_mode_boost)) >> 7;
 
     for (i = 16; i < 24; i++)
     {
@@ -345,7 +339,7 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
     }
 
     // Y2
-    zbin_extra = (cpi->common.Y2dequant[QIndex][0][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
+    zbin_extra = (cpi->common.Y2dequant[QIndex][1] * ((cpi->zbin_over_quant / 2) + cpi->zbin_mode_boost)) >> 7;
     x->block[24].quant = cpi->Y2quant[QIndex];
     x->block[24].quant_shift = cpi->Y2quant_shift[QIndex];
     x->block[24].zbin = cpi->Y2zbin[QIndex];
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 782b6fd2d..99ee2b806 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -301,8 +301,8 @@ void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
     vp8_strict_quantize_b(b, d);
 #endif
 
-    dequant_ptr = &d->dequant[0][0];
-    coeff_ptr = &b->coeff[0];
+    dequant_ptr = d->dequant;
+    coeff_ptr = b->coeff;
     qcoeff_ptr = d->qcoeff;
     dqcoeff_ptr = d->dqcoeff;
     i0 = !type;
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 51b62c6eb..eb516e927 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -232,20 +232,20 @@ typedef struct VP8_ENCODER_RTCD
 typedef struct
 {
 
-    DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]);
 
-    DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]);
 
-    DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]);
 
     DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]);
     DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]);
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 6cc224494..5e65fadb3 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -23,14 +23,14 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
     int i, rc, eob;
     int zbin;
     int x, y, z, sz;
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *quant_shift_ptr = &b->quant_shift[0][0];
-    short *qcoeff_ptr = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
+    short *coeff_ptr       = b->coeff;
+    short *zbin_ptr        = b->zbin;
+    short *round_ptr       = b->round;
+    short *quant_ptr       = b->quant;
+    short *quant_shift_ptr = b->quant_shift;
+    short *qcoeff_ptr      = d->qcoeff;
+    short *dqcoeff_ptr     = d->dqcoeff;
+    short *dequant_ptr     = d->dequant;
 
     vpx_memset(qcoeff_ptr, 0, 32);
     vpx_memset(dqcoeff_ptr, 0, 32);
@@ -69,16 +69,16 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
     int i, rc, eob;
     int zbin;
     int x, y, z, sz;
-    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *quant_shift_ptr = &b->quant_shift[0][0];
-    short *qcoeff_ptr = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
-    short zbin_oq_value = b->zbin_extra;
+    short *zbin_boost_ptr  = b->zrun_zbin_boost;
+    short *coeff_ptr       = b->coeff;
+    short *zbin_ptr        = b->zbin;
+    short *round_ptr       = b->round;
+    short *quant_ptr       = b->quant;
+    short *quant_shift_ptr = b->quant_shift;
+    short *qcoeff_ptr      = d->qcoeff;
+    short *dqcoeff_ptr     = d->dqcoeff;
+    short *dequant_ptr     = d->dequant;
+    short zbin_oq_value    = b->zbin_extra;
 
     vpx_memset(qcoeff_ptr, 0, 32);
     vpx_memset(dqcoeff_ptr, 0, 32);
@@ -136,12 +136,12 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
     short *dqcoeff_ptr;
     short *dequant_ptr;
 
-    coeff_ptr = &b->coeff[0];
-    quant_ptr = &b->quant[0][0];
-    quant_shift_ptr = &b->quant_shift[0][0];
-    qcoeff_ptr = d->qcoeff;
-    dqcoeff_ptr = d->dqcoeff;
-    dequant_ptr = &d->dequant[0][0];
+    coeff_ptr       = b->coeff;
+    quant_ptr       = b->quant;
+    quant_shift_ptr = b->quant_shift;
+    qcoeff_ptr      = d->qcoeff;
+    dqcoeff_ptr     = d->dqcoeff;
+    dequant_ptr     = d->dequant;
     eob = - 1;
     vpx_memset(qcoeff_ptr, 0, 32);
     vpx_memset(dqcoeff_ptr, 0, 32);
@@ -183,12 +183,12 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
     int i, rc, eob;
     int zbin;
     int x, y, z, sz;
-    short *coeff_ptr  = &b->coeff[0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
+    short *coeff_ptr   = b->coeff;
+    short *round_ptr   = b->round;
+    short *quant_ptr   = b->quant;
+    short *qcoeff_ptr  = d->qcoeff;
     short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
+    short *dequant_ptr = d->dequant;
 
     eob = -1;
     for (i = 0; i < 16; i++)
@@ -217,15 +217,15 @@ void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
     int i, rc, eob;
     int zbin;
     int x, y, z, sz;
-    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
-    short zbin_oq_value = b->zbin_extra;
+    short *zbin_boost_ptr = b->zrun_zbin_boost;
+    short *coeff_ptr      = b->coeff;
+    short *zbin_ptr       = b->zbin;
+    short *round_ptr      = b->round;
+    short *quant_ptr      = b->quant;
+    short *qcoeff_ptr     = d->qcoeff;
+    short *dqcoeff_ptr    = d->dqcoeff;
+    short *dequant_ptr    = d->dequant;
+    short zbin_oq_value   = b->zbin_extra;
 
     vpx_memset(qcoeff_ptr, 0, 32);
     vpx_memset(dqcoeff_ptr, 0, 32);
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index b4035ff53..3e5a8abf9 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -29,14 +29,14 @@ int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
                                  short *quant_ptr, short *dqcoeff_ptr);
 void vp8_fast_quantize_b_mmx(BLOCK *b, BLOCKD *d)
 {
-    short *scan_mask    = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
+    short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
+    short *coeff_ptr   = b->coeff;
+    short *zbin_ptr    = b->zbin;
+    short *round_ptr   = b->round;
+    short *quant_ptr   = b->quant;
+    short *qcoeff_ptr  = d->qcoeff;
     short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
+    short *dequant_ptr = d->dequant;
 
     d->eob = vp8_fast_quantize_b_impl_mmx(
                  coeff_ptr,
@@ -94,13 +94,13 @@ int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
                                  short *quant_ptr, short *dqcoeff_ptr);
 void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d)
 {
-    short *scan_mask    = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
-    short *coeff_ptr  = &b->coeff[0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
+    short *scan_mask   = vp8_default_zig_zag_mask;//d->scan_order_mask_ptr;
+    short *coeff_ptr   = b->coeff;
+    short *round_ptr   = b->round;
+    short *quant_ptr   = b->quant;
+    short *qcoeff_ptr  = d->qcoeff;
     short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
+    short *dequant_ptr = d->dequant;
 
     d->eob = vp8_fast_quantize_b_impl_sse2(
                  coeff_ptr,
@@ -124,15 +124,15 @@ int vp8_regular_quantize_b_impl_sse2(short *coeff_ptr, short *zbin_ptr,
 
 void vp8_regular_quantize_b_sse2(BLOCK *b,BLOCKD *d)
 {
-    short *zbin_boost_ptr = &b->zrun_zbin_boost[0];
-    short *coeff_ptr  = &b->coeff[0];
-    short *zbin_ptr   = &b->zbin[0][0];
-    short *round_ptr  = &b->round[0][0];
-    short *quant_ptr  = &b->quant[0][0];
-    short *qcoeff_ptr = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = &d->dequant[0][0];
-    short zbin_oq_value = b->zbin_extra;
+    short *zbin_boost_ptr = b->zrun_zbin_boost;
+    short *coeff_ptr      = b->coeff;
+    short *zbin_ptr       = b->zbin;
+    short *round_ptr      = b->round;
+    short *quant_ptr      = b->quant;
+    short *qcoeff_ptr     = d->qcoeff;
+    short *dqcoeff_ptr    = d->dqcoeff;
+    short *dequant_ptr    = d->dequant;
+    short zbin_oq_value   = b->zbin_extra;
 
     d->eob = vp8_regular_quantize_b_impl_sse2(
         coeff_ptr,

From 09bcc1f710ea65dc158639479288fb1908ff0c53 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Tue, 19 Oct 2010 15:40:46 -0700
Subject: [PATCH 235/307] Improve handling of invalid frames.

The code was not checking for frame sizes smaller than 3 bytes, and the
 partition size checks might have failed if the input buffer was within
 16MB of the top of the heap.
In addition, the reference count on the current frame buffer was not
 being decremented on error, so after a small number of errors, no new
 frame buffer could be found and it would run off the list of them.

Change-Id: I0c60dba6adb1e2a29df39754f72a56ab6c776b46
---
 vp8/decoder/decodframe.c | 9 ++++++---
 vp8/decoder/onyxd_if.c   | 8 ++++++--
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index efe0ad8e2..2d81d61d5 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -462,7 +462,7 @@ static void setup_token_decoder(VP8D_COMP *pbi,
             partition_size = user_data_end - partition;
         }
 
-        if (partition + partition_size > user_data_end)
+        if (user_data_end - partition < partition_size)
             vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
                                "Truncated packet or corrupt partition "
                                "%d length", i + 1);
@@ -564,12 +564,15 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     MACROBLOCKD *const xd  = & pbi->mb;
     const unsigned char *data = (const unsigned char *)pbi->Source;
     const unsigned char *const data_end = data + pbi->source_sz;
-    int first_partition_length_in_bytes;
+    unsigned int first_partition_length_in_bytes;
 
     int mb_row;
     int i, j, k, l;
     const int *const mb_feature_data_bits = vp8_mb_feature_data_bits;
 
+    if (data_end - data < 3)
+        vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
+                           "Truncated packet");
     pc->frame_type = (FRAME_TYPE)(data[0] & 1);
     pc->version = (data[0] >> 1) & 7;
     pc->show_frame = (data[0] >> 4) & 1;
@@ -577,7 +580,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
         (data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;
     data += 3;
 
-    if (data + first_partition_length_in_bytes > data_end)
+    if (data_end - data < first_partition_length_in_bytes)
         vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
                            "Truncated packet or corrupt partition 0 length");
     vp8_setup_version(pc);
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 7d716b0a0..884c38da0 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -327,9 +327,13 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 
     pbi->common.error.error_code = VPX_CODEC_OK;
 
+    cm->new_fb_idx = get_free_fb (cm);
+
     if (setjmp(pbi->common.error.jmp))
     {
         pbi->common.error.setjmp = 0;
+        if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
+          cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
         return -1;
     }
 
@@ -345,8 +349,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     pbi->Source = source;
     pbi->source_sz = size;
 
-    cm->new_fb_idx = get_free_fb (cm);
-
     retcode = vp8_decode_frame(pbi);
 
     if (retcode < 0)
@@ -356,6 +358,8 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 #endif
         pbi->common.error.error_code = VPX_CODEC_ERROR;
         pbi->common.error.setjmp = 0;
+        if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
+          cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
         return retcode;
     }
 

From e81e30c25de3a75e8b02b38d55c65134874f83aa Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Wed, 20 Oct 2010 16:27:33 -0400
Subject: [PATCH 236/307] isolate new temporal filtering code

onyx_if is getting pretty big. split out the temporal code to make it
easier to look at.

Change-Id: I207c3a94c90e91b32e3ea5e1836a53b7a990fabd
---
 vp8/encoder/onyx_if.c         | 617 +------------------------------
 vp8/encoder/temporal_filter.c | 662 ++++++++++++++++++++++++++++++++++
 vp8/encoder/temporal_filter.h |  19 +
 vp8/vp8cx.mk                  |   2 +
 4 files changed, 684 insertions(+), 616 deletions(-)
 create mode 100644 vp8/encoder/temporal_filter.c
 create mode 100644 vp8/encoder/temporal_filter.h

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 27964f97b..53d68be52 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -30,6 +30,7 @@
 #include "threading.h"
 #include "vpx_ports/vpx_timer.h"
 #include "vpxerrors.h"
+#include "temporal_filter.h"
 
 #include <math.h>
 #include <stdio.h>
@@ -43,9 +44,6 @@
 #define RTCD(x) NULL
 #endif
 
-#define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
-#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
-
 extern void vp8cx_init_mv_bits_sadcost();
 extern void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi);
 extern void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val);
@@ -3371,619 +3369,6 @@ void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame)
 }
 #endif
 // return of 0 means drop frame
-#define USE_FILTER_LUT 1
-#if VP8_TEMPORAL_ALT_REF
-
-#if USE_FILTER_LUT
-static int modifier_lut[7][19] =
-{
-    // Strength=0
-    {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    // Strength=1
-    {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    // Strength=2
-    {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    // Strength=3
-    {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    // Strength=4
-    {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
-    // Strength=5
-    {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
-    // Strength=6
-    {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
-};
-#endif
-static void build_predictors_mb
-(
-    MACROBLOCKD *x,
-    unsigned char *y_mb_ptr,
-    unsigned char *u_mb_ptr,
-    unsigned char *v_mb_ptr,
-    int stride,
-    int mv_row,
-    int mv_col,
-    unsigned char *pred
-)
-{
-    int offset;
-    unsigned char *yptr, *uptr, *vptr;
-
-    // Y
-    yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
-
-    if ((mv_row | mv_col) & 7)
-    {
-//        vp8_sixtap_predict16x16_c(yptr, stride,
-//                                    mv_col & 7, mv_row & 7, &pred[0], 16);
-        x->subpixel_predict16x16(yptr, stride,
-                                    mv_col & 7, mv_row & 7, &pred[0], 16);
-    }
-    else
-    {
-        //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16);
-        RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
-    }
-
-    // U & V
-    mv_row >>= 1;
-    mv_col >>= 1;
-    stride >>= 1;
-    offset = (mv_row >> 3) * stride + (mv_col >> 3);
-    uptr = u_mb_ptr + offset;
-    vptr = v_mb_ptr + offset;
-
-    if ((mv_row | mv_col) & 7)
-    {
-        x->subpixel_predict8x8(uptr, stride,
-                            mv_col & 7, mv_row & 7, &pred[256], 8);
-        x->subpixel_predict8x8(vptr, stride,
-                            mv_col & 7, mv_row & 7, &pred[320], 8);
-    }
-    else
-    {
-        RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
-        RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
-    }
-}
-static void apply_temporal_filter
-(
-    unsigned char *frame1,
-    unsigned int stride,
-    unsigned char *frame2,
-    unsigned int block_size,
-    int strength,
-    int filter_weight,
-    int *accumulator,
-    int *count
-)
-{
-    int i, j, k;
-    int modifier;
-    int byte = 0;
-
-#if USE_FILTER_LUT
-    int *lut = modifier_lut[strength];
-#endif
-
-    for (i = 0,k = 0; i < block_size; i++)
-    {
-        for (j = 0; j < block_size; j++, k++)
-        {
-
-            int src_byte = frame1[byte];
-            int pixel_value = *frame2++;
-
-#if USE_FILTER_LUT
-            // LUT implementation --
-            // improves precision of filter
-            modifier = abs(src_byte-pixel_value);
-            modifier = modifier>18 ? 0 : lut[modifier];
-#else
-            modifier   = src_byte;
-            modifier  -= pixel_value;
-            modifier  *= modifier;
-            modifier >>= strength;
-            modifier  *= 3;
-
-            if (modifier > 16)
-                modifier = 16;
-
-            modifier = 16 - modifier;
-#endif
-            modifier *= filter_weight;
-
-            count[k] += modifier;
-            accumulator[k] += modifier * pixel_value;
-
-            byte++;
-        }
-
-        byte += stride - block_size;
-    }
-}
-
-#if ALT_REF_MC_ENABLED
-static int dummy_cost[2*mv_max+1];
-
-static int find_matching_mb
-(
-    VP8_COMP *cpi,
-    YV12_BUFFER_CONFIG *arf_frame,
-    YV12_BUFFER_CONFIG *frame_ptr,
-    int mb_offset,
-    int error_thresh
-)
-{
-    MACROBLOCK *x = &cpi->mb;
-    int thissme;
-    int step_param;
-    int further_steps;
-    int n = 0;
-    int sadpb = x->sadperbit16;
-    int bestsme = INT_MAX;
-    int num00 = 0;
-
-    BLOCK *b = &x->block[0];
-    BLOCKD *d = &x->e_mbd.block[0];
-    MV best_ref_mv1 = {0,0};
-
-    int *mvcost[2]    = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
-    int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
-
-    // Save input state
-    unsigned char **base_src = b->base_src;
-    int src = b->src;
-    int src_stride = b->src_stride;
-    unsigned char **base_pre = d->base_pre;
-    int pre = d->pre;
-    int pre_stride = d->pre_stride;
-
-    // Setup frame pointers
-    b->base_src = &arf_frame->y_buffer;
-    b->src_stride = arf_frame->y_stride;
-    b->src = mb_offset;
-
-    d->base_pre = &frame_ptr->y_buffer;
-    d->pre_stride = frame_ptr->y_stride;
-    d->pre = mb_offset;
-
-    // Further step/diamond searches as necessary
-    if (cpi->Speed < 8)
-    {
-        step_param = cpi->sf.first_step +
-                    ((cpi->Speed > 5) ? 1 : 0);
-        further_steps =
-            (cpi->sf.max_step_search_steps - 1)-step_param;
-    }
-    else
-    {
-        step_param = cpi->sf.first_step + 2;
-        further_steps = 0;
-    }
-
-    if (1/*cpi->sf.search_method == HEX*/)
-    {
-        // TODO Check that the 16x16 vf & sdf are selected here
-        bestsme = vp8_hex_search(x, b, d,
-            &best_ref_mv1, &d->bmi.mv.as_mv,
-            step_param,
-            sadpb/*x->errorperbit*/,
-            &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf,
-            mvsadcost, mvcost);
-    }
-    else
-    {
-        int mv_x, mv_y;
-
-        bestsme = cpi->diamond_search_sad(x, b, d,
-            &best_ref_mv1, &d->bmi.mv.as_mv,
-            step_param,
-            sadpb / 2/*x->errorperbit*/,
-            &num00, &cpi->fn_ptr,
-            mvsadcost, mvcost); //sadpb < 9
-
-        // Further step/diamond searches as necessary
-        n = 0;
-        //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
-
-        n = num00;
-        num00 = 0;
-
-        while (n < further_steps)
-        {
-            n++;
-
-            if (num00)
-                num00--;
-            else
-            {
-                thissme = cpi->diamond_search_sad(x, b, d,
-                    &best_ref_mv1, &d->bmi.mv.as_mv,
-                    step_param + n,
-                    sadpb / 4/*x->errorperbit*/,
-                    &num00, &cpi->fn_ptr,
-                    mvsadcost, mvcost); //sadpb = 9
-
-                if (thissme < bestsme)
-                {
-                    bestsme = thissme;
-                    mv_y = d->bmi.mv.as_mv.row;
-                    mv_x = d->bmi.mv.as_mv.col;
-                }
-                else
-                {
-                    d->bmi.mv.as_mv.row = mv_y;
-                    d->bmi.mv.as_mv.col = mv_x;
-                }
-            }
-        }
-    }
-
-#if ALT_REF_SUBPEL_ENABLED
-    // Try sub-pixel MC?
-    //if (bestsme > error_thresh && bestsme < INT_MAX)
-    {
-        bestsme = cpi->find_fractional_mv_step(x, b, d,
-                    &d->bmi.mv.as_mv, &best_ref_mv1,
-                    x->errorperbit, cpi->fn_ptr.svf,
-                    cpi->fn_ptr.vf, cpi->mb.mvcost);
-    }
-#endif
-
-    // Save input state
-    b->base_src = base_src;
-    b->src = src;
-    b->src_stride = src_stride;
-    d->base_pre = base_pre;
-    d->pre = pre;
-    d->pre_stride = pre_stride;
-
-    return bestsme;
-}
-#endif
-
-static void vp8cx_temp_blur1_c
-(
-    VP8_COMP *cpi,
-    int frame_count,
-    int alt_ref_index,
-    int strength
-)
-{
-    int byte;
-    int frame;
-    int mb_col, mb_row;
-    unsigned int filter_weight[MAX_LAG_BUFFERS];
-    unsigned char *mm_ptr = cpi->fp_motion_map;
-    int cols = cpi->common.mb_cols;
-    int rows = cpi->common.mb_rows;
-    int MBs  = cpi->common.MBs;
-    int mb_y_offset = 0;
-    int mb_uv_offset = 0;
-    unsigned int accumulator[384];
-    unsigned int count[384];
-    MACROBLOCKD *mbd = &cpi->mb.e_mbd;
-    YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
-    unsigned char *dst1, *dst2;
-    DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
-
-    // Save input state
-    unsigned char *y_buffer = mbd->pre.y_buffer;
-    unsigned char *u_buffer = mbd->pre.u_buffer;
-    unsigned char *v_buffer = mbd->pre.v_buffer;
-
-    if (!cpi->use_weighted_temporal_filter)
-    {
-        // Temporal filtering is unweighted
-        for (frame = 0; frame < frame_count; frame++)
-            filter_weight[frame] = 1;
-    }
-
-    for (mb_row = 0; mb_row < rows; mb_row++)
-    {
-#if ALT_REF_MC_ENABLED
-        // Reduced search extent by 3 for 6-tap filter & smaller UMV border
-        cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
-        cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
-                                + (VP8BORDERINPIXELS - 19);
-#endif
-
-        for (mb_col = 0; mb_col < cols; mb_col++)
-        {
-            int i, j, k, w;
-            int weight_cap;
-            int stride;
-
-            vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
-            vpx_memset(count, 0, 384*sizeof(unsigned int));
-
-#if ALT_REF_MC_ENABLED
-            // Reduced search extent by 3 for 6-tap filter & smaller UMV border
-            cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
-            cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
-                                    + (VP8BORDERINPIXELS - 19);
-#endif
-
-            // Read & process macroblock weights from motion map
-            if (cpi->use_weighted_temporal_filter)
-            {
-                weight_cap = 2;
-
-                for (frame = alt_ref_index-1; frame >= 0; frame--)
-                {
-                    w = *(mm_ptr + (frame+1)*MBs);
-                    filter_weight[frame] = w < weight_cap ? w : weight_cap;
-                    weight_cap = w;
-                }
-
-                filter_weight[alt_ref_index] = 2;
-
-                weight_cap = 2;
-
-                for (frame = alt_ref_index+1; frame < frame_count; frame++)
-                {
-                    w = *(mm_ptr + frame*MBs);
-                    filter_weight[frame] = w < weight_cap ? w : weight_cap;
-                    weight_cap = w;
-                }
-
-            }
-
-            for (frame = 0; frame < frame_count; frame++)
-            {
-                int err;
-
-                if (cpi->frames[frame] == NULL)
-                    continue;
-
-                mbd->block[0].bmi.mv.as_mv.row = 0;
-                mbd->block[0].bmi.mv.as_mv.col = 0;
-
-#if ALT_REF_MC_ENABLED
-                //if (filter_weight[frame] == 0)
-                {
-#define THRESH_LOW   10000
-#define THRESH_HIGH  20000
-
-                    // Correlation has been lost try MC
-                    err = find_matching_mb ( cpi,
-                                             cpi->frames[alt_ref_index],
-                                             cpi->frames[frame],
-                                             mb_y_offset,
-                                             THRESH_LOW );
-
-                    if (filter_weight[frame] < 2)
-                    {
-                        // Set weight depending on error
-                        filter_weight[frame] = err<THRESH_LOW
-                                                ? 2 : err<THRESH_HIGH ? 1 : 0;
-                    }
-                }
-#endif
-                if (filter_weight[frame] != 0)
-                {
-                    // Construct the predictors
-                    build_predictors_mb (
-                              mbd,
-                              cpi->frames[frame]->y_buffer + mb_y_offset,
-                              cpi->frames[frame]->u_buffer + mb_uv_offset,
-                              cpi->frames[frame]->v_buffer + mb_uv_offset,
-                              cpi->frames[frame]->y_stride,
-                              mbd->block[0].bmi.mv.as_mv.row,
-                              mbd->block[0].bmi.mv.as_mv.col,
-                              predictor );
-
-                    // Apply the filter (YUV)
-                    apply_temporal_filter ( f->y_buffer + mb_y_offset,
-                                            f->y_stride,
-                                            predictor,
-                                            16,
-                                            strength,
-                                            filter_weight[frame],
-                                            accumulator,
-                                            count );
-
-                    apply_temporal_filter ( f->u_buffer + mb_uv_offset,
-                                            f->uv_stride,
-                                            predictor + 256,
-                                            8,
-                                            strength,
-                                            filter_weight[frame],
-                                            accumulator + 256,
-                                            count + 256 );
-
-                    apply_temporal_filter ( f->v_buffer + mb_uv_offset,
-                                            f->uv_stride,
-                                            predictor + 320,
-                                            8,
-                                            strength,
-                                            filter_weight[frame],
-                                            accumulator + 320,
-                                            count + 320 );
-                }
-            }
-
-            // Normalize filter output to produce AltRef frame
-            dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer;
-            stride = cpi->alt_ref_buffer.source_buffer.y_stride;
-            byte = mb_y_offset;
-            for (i = 0,k = 0; i < 16; i++)
-            {
-                for (j = 0; j < 16; j++, k++)
-                {
-                    unsigned int pval = accumulator[k] + (count[k] >> 1);
-                    pval *= cpi->fixed_divide[count[k]];
-                    pval >>= 19;
-
-                    dst1[byte] = (unsigned char)pval;
-
-                    // move to next pixel
-                    byte++;
-                }
-
-                byte += stride - 16;
-            }
-
-            dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer;
-            dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer;
-            stride = cpi->alt_ref_buffer.source_buffer.uv_stride;
-            byte = mb_uv_offset;
-            for (i = 0,k = 256; i < 8; i++)
-            {
-                for (j = 0; j < 8; j++, k++)
-                {
-                    int m=k+64;
-
-                    // U
-                    unsigned int pval = accumulator[k] + (count[k] >> 1);
-                    pval *= cpi->fixed_divide[count[k]];
-                    pval >>= 19;
-                    dst1[byte] = (unsigned char)pval;
-
-                    // V
-                    pval = accumulator[m] + (count[m] >> 1);
-                    pval *= cpi->fixed_divide[count[m]];
-                    pval >>= 19;
-                    dst2[byte] = (unsigned char)pval;
-
-                    // move to next pixel
-                    byte++;
-                }
-
-                byte += stride - 8;
-            }
-
-            mm_ptr++;
-            mb_y_offset += 16;
-            mb_uv_offset += 8;
-        }
-
-        mb_y_offset += 16*f->y_stride-f->y_width;
-        mb_uv_offset += 8*f->uv_stride-f->uv_width;
-    }
-
-    // Restore input state
-    mbd->pre.y_buffer = y_buffer;
-    mbd->pre.u_buffer = u_buffer;
-    mbd->pre.v_buffer = v_buffer;
-}
-
-static void vp8cx_temp_filter_c
-(
-    VP8_COMP *cpi
-)
-{
-    int frame = 0;
-
-    int num_frames_backward = 0;
-    int num_frames_forward = 0;
-    int frames_to_blur_backward = 0;
-    int frames_to_blur_forward = 0;
-    int frames_to_blur = 0;
-    int start_frame = 0;
-    unsigned int filtered = 0;
-
-    int strength = cpi->oxcf.arnr_strength;
-
-    int blur_type = cpi->oxcf.arnr_type;
-
-    int max_frames = cpi->active_arnr_frames;
-
-    num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
-
-    if (num_frames_backward < 0)
-        num_frames_backward += cpi->oxcf.lag_in_frames;
-
-    num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1);
-
-    switch (blur_type)
-    {
-    case 1:
-        /////////////////////////////////////////
-        // Backward Blur
-
-        frames_to_blur_backward = num_frames_backward;
-
-        if (frames_to_blur_backward >= max_frames)
-            frames_to_blur_backward = max_frames - 1;
-
-        frames_to_blur = frames_to_blur_backward + 1;
-        break;
-
-    case 2:
-        /////////////////////////////////////////
-        // Forward Blur
-
-        frames_to_blur_forward = num_frames_forward;
-
-        if (frames_to_blur_forward >= max_frames)
-            frames_to_blur_forward = max_frames - 1;
-
-        frames_to_blur = frames_to_blur_forward + 1;
-        break;
-
-    case 3:
-    default:
-        /////////////////////////////////////////
-        // Center Blur
-        frames_to_blur_forward = num_frames_forward;
-        frames_to_blur_backward = num_frames_backward;
-
-        if (frames_to_blur_forward > frames_to_blur_backward)
-            frames_to_blur_forward = frames_to_blur_backward;
-
-        if (frames_to_blur_backward > frames_to_blur_forward)
-            frames_to_blur_backward = frames_to_blur_forward;
-
-        // When max_frames is even we have 1 more frame backward than forward
-        if (frames_to_blur_forward > (max_frames - 1) / 2)
-            frames_to_blur_forward = ((max_frames - 1) / 2);
-
-        if (frames_to_blur_backward > (max_frames / 2))
-            frames_to_blur_backward = (max_frames / 2);
-
-        frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
-        break;
-    }
-
-    start_frame = (cpi->last_alt_ref_sei
-                    + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
-
-#ifdef DEBUGFWG
-    // DEBUG FWG
-    printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
-           , max_frames
-           , num_frames_backward
-           , num_frames_forward
-           , frames_to_blur
-           , frames_to_blur_backward
-           , frames_to_blur_forward
-           , cpi->source_encode_index
-           , cpi->last_alt_ref_sei
-           , start_frame);
-#endif
-
-    // Setup frame pointers, NULL indicates frame not included in filter
-    vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
-    for (frame = 0; frame < frames_to_blur; frame++)
-    {
-        int which_buffer =  start_frame - frame;
-
-        if (which_buffer < 0)
-            which_buffer += cpi->oxcf.lag_in_frames;
-
-        cpi->frames[frames_to_blur-1-frame]
-                = &cpi->src_buffer[which_buffer].source_buffer;
-    }
-
-    vp8cx_temp_blur1_c (
-        cpi,
-        frames_to_blur,
-        frames_to_blur_backward,
-        strength );
-}
-#endif
-
 
 static void encode_frame_to_data_rate
 (
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
new file mode 100644
index 000000000..28938657f
--- /dev/null
+++ b/vp8/encoder/temporal_filter.c
@@ -0,0 +1,662 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "onyxc_int.h"
+#include "onyx_int.h"
+#include "systemdependent.h"
+#include "quantize.h"
+#include "alloccommon.h"
+#include "mcomp.h"
+#include "firstpass.h"
+#include "psnr.h"
+#include "vpx_scale/vpxscale.h"
+#include "extend.h"
+#include "ratectrl.h"
+#include "quant_common.h"
+#include "segmentation.h"
+#include "g_common.h"
+#include "vpx_scale/yv12extend.h"
+#include "postproc.h"
+#include "vpx_mem/vpx_mem.h"
+#include "swapyv12buffer.h"
+#include "threading.h"
+#include "vpx_ports/vpx_timer.h"
+#include "vpxerrors.h"
+
+#include <math.h>
+#include <stdio.h>
+#include <limits.h>
+
+/*
+#if CONFIG_RUNTIME_CPU_DETECT
+#define IF_RTCD(x) (x)
+#define RTCD(x) &cpi->common.rtcd.x
+#else
+#define IF_RTCD(x) NULL
+#define RTCD(x) NULL
+#endif
+*/
+
+#define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
+#define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
+
+#define USE_FILTER_LUT 1
+#if VP8_TEMPORAL_ALT_REF
+
+#if USE_FILTER_LUT
+static int modifier_lut[7][19] =
+{
+    // Strength=0
+    {16, 13, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=1
+    {16, 15, 10, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=2
+    {16, 15, 13, 9, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=3
+    {16, 16, 15, 13, 10, 7, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=4
+    {16, 16, 15, 14, 13, 11, 9, 7, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
+    // Strength=5
+    {16, 16, 16, 15, 15, 14, 13, 11, 10, 8, 7, 5, 3, 0, 0, 0, 0, 0, 0},
+    // Strength=6
+    {16, 16, 16, 16, 15, 15, 14, 14, 13, 12, 11, 10, 9, 8, 7, 5, 4, 2, 1}
+};
+#endif
+static void build_predictors_mb
+(
+    MACROBLOCKD *x,
+    unsigned char *y_mb_ptr,
+    unsigned char *u_mb_ptr,
+    unsigned char *v_mb_ptr,
+    int stride,
+    int mv_row,
+    int mv_col,
+    unsigned char *pred
+)
+{
+    int offset;
+    unsigned char *yptr, *uptr, *vptr;
+
+    // Y
+    yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3);
+
+    if ((mv_row | mv_col) & 7)
+    {
+//        vp8_sixtap_predict16x16_c(yptr, stride,
+//                                    mv_col & 7, mv_row & 7, &pred[0], 16);
+        x->subpixel_predict16x16(yptr, stride,
+                                    mv_col & 7, mv_row & 7, &pred[0], 16);
+    }
+    else
+    {
+        //vp8_copy_mem16x16_c (yptr, stride, &pred[0], 16);
+        RECON_INVOKE(&x->rtcd->recon, copy16x16)(yptr, stride, &pred[0], 16);
+    }
+
+    // U & V
+    mv_row >>= 1;
+    mv_col >>= 1;
+    stride >>= 1;
+    offset = (mv_row >> 3) * stride + (mv_col >> 3);
+    uptr = u_mb_ptr + offset;
+    vptr = v_mb_ptr + offset;
+
+    if ((mv_row | mv_col) & 7)
+    {
+        x->subpixel_predict8x8(uptr, stride,
+                            mv_col & 7, mv_row & 7, &pred[256], 8);
+        x->subpixel_predict8x8(vptr, stride,
+                            mv_col & 7, mv_row & 7, &pred[320], 8);
+    }
+    else
+    {
+        RECON_INVOKE(&x->rtcd->recon, copy8x8)(uptr, stride, &pred[256], 8);
+        RECON_INVOKE(&x->rtcd->recon, copy8x8)(vptr, stride, &pred[320], 8);
+    }
+}
+static void apply_temporal_filter
+(
+    unsigned char *frame1,
+    unsigned int stride,
+    unsigned char *frame2,
+    unsigned int block_size,
+    int strength,
+    int filter_weight,
+    int *accumulator,
+    int *count
+)
+{
+    int i, j, k;
+    int modifier;
+    int byte = 0;
+
+#if USE_FILTER_LUT
+    int *lut = modifier_lut[strength];
+#endif
+
+    for (i = 0,k = 0; i < block_size; i++)
+    {
+        for (j = 0; j < block_size; j++, k++)
+        {
+
+            int src_byte = frame1[byte];
+            int pixel_value = *frame2++;
+
+#if USE_FILTER_LUT
+            // LUT implementation --
+            // improves precision of filter
+            modifier = abs(src_byte-pixel_value);
+            modifier = modifier>18 ? 0 : lut[modifier];
+#else
+            modifier   = src_byte;
+            modifier  -= pixel_value;
+            modifier  *= modifier;
+            modifier >>= strength;
+            modifier  *= 3;
+
+            if (modifier > 16)
+                modifier = 16;
+
+            modifier = 16 - modifier;
+#endif
+            modifier *= filter_weight;
+            
+            count[k] += modifier;
+            accumulator[k] += modifier * pixel_value;
+
+            byte++;
+        }
+
+        byte += stride - block_size;
+    }
+}
+
+#if ALT_REF_MC_ENABLED
+static int dummy_cost[2*mv_max+1];
+
+static int find_matching_mb
+(
+    VP8_COMP *cpi,
+    YV12_BUFFER_CONFIG *arf_frame,
+    YV12_BUFFER_CONFIG *frame_ptr,
+    int mb_offset,
+    int error_thresh
+)
+{
+    MACROBLOCK *x = &cpi->mb;
+    int thissme;
+    int step_param;
+    int further_steps;
+    int n = 0;
+    int sadpb = x->sadperbit16;
+    int bestsme = INT_MAX;
+    int num00 = 0;
+
+    BLOCK *b = &x->block[0];
+    BLOCKD *d = &x->e_mbd.block[0];
+    MV best_ref_mv1 = {0,0};
+
+    int *mvcost[2]    = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
+    int *mvsadcost[2] = { &dummy_cost[mv_max+1], &dummy_cost[mv_max+1] };
+
+    // Save input state
+    unsigned char **base_src = b->base_src;
+    int src = b->src;
+    int src_stride = b->src_stride;
+    unsigned char **base_pre = d->base_pre;
+    int pre = d->pre;
+    int pre_stride = d->pre_stride;
+
+    // Setup frame pointers
+    b->base_src = &arf_frame->y_buffer;
+    b->src_stride = arf_frame->y_stride;
+    b->src = mb_offset;
+
+    d->base_pre = &frame_ptr->y_buffer;
+    d->pre_stride = frame_ptr->y_stride;
+    d->pre = mb_offset;
+
+    // Further step/diamond searches as necessary
+    if (cpi->Speed < 8)
+    {
+        step_param = cpi->sf.first_step +
+                    ((cpi->Speed > 5) ? 1 : 0);
+        further_steps =
+            (cpi->sf.max_step_search_steps - 1)-step_param;
+    }
+    else
+    {
+        step_param = cpi->sf.first_step + 2;
+        further_steps = 0;
+    }
+
+    if (1/*cpi->sf.search_method == HEX*/)
+    {
+        // TODO Check that the 16x16 vf & sdf are selected here
+        bestsme = vp8_hex_search(x, b, d,
+            &best_ref_mv1, &d->bmi.mv.as_mv,
+            step_param,
+            sadpb/*x->errorperbit*/,
+            &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf,
+            mvsadcost, mvcost);
+    }
+    else
+    {
+        int mv_x, mv_y;
+
+        bestsme = cpi->diamond_search_sad(x, b, d,
+            &best_ref_mv1, &d->bmi.mv.as_mv,
+            step_param,
+            sadpb / 2/*x->errorperbit*/,
+            &num00, &cpi->fn_ptr,
+            mvsadcost, mvcost); //sadpb < 9
+
+        // Further step/diamond searches as necessary
+        n = 0;
+        //further_steps = (cpi->sf.max_step_search_steps - 1) - step_param;
+
+        n = num00;
+        num00 = 0;
+
+        while (n < further_steps)
+        {
+            n++;
+
+            if (num00)
+                num00--;
+            else
+            {
+                thissme = cpi->diamond_search_sad(x, b, d,
+                    &best_ref_mv1, &d->bmi.mv.as_mv,
+                    step_param + n,
+                    sadpb / 4/*x->errorperbit*/,
+                    &num00, &cpi->fn_ptr,
+                    mvsadcost, mvcost); //sadpb = 9
+
+                if (thissme < bestsme)
+                {
+                    bestsme = thissme;
+                    mv_y = d->bmi.mv.as_mv.row;
+                    mv_x = d->bmi.mv.as_mv.col;
+                }
+                else
+                {
+                    d->bmi.mv.as_mv.row = mv_y;
+                    d->bmi.mv.as_mv.col = mv_x;
+                }
+            }
+        }
+    }
+
+#if ALT_REF_SUBPEL_ENABLED
+    // Try sub-pixel MC?
+    //if (bestsme > error_thresh && bestsme < INT_MAX)
+    {
+        bestsme = cpi->find_fractional_mv_step(x, b, d,
+                    &d->bmi.mv.as_mv, &best_ref_mv1,
+                    x->errorperbit, cpi->fn_ptr.svf,
+                    cpi->fn_ptr.vf, cpi->mb.mvcost);
+    }
+#endif
+
+    // Save input state
+    b->base_src = base_src;
+    b->src = src;
+    b->src_stride = src_stride;
+    d->base_pre = base_pre;
+    d->pre = pre;
+    d->pre_stride = pre_stride;
+
+    return bestsme;
+}
+#endif
+
+static void vp8cx_temp_blur1_c
+(
+    VP8_COMP *cpi,
+    int frame_count,
+    int alt_ref_index,
+    int strength
+)
+{
+    int byte;
+    int frame;
+    int mb_col, mb_row;
+    unsigned int filter_weight[MAX_LAG_BUFFERS];
+    unsigned char *mm_ptr = cpi->fp_motion_map;
+    int cols = cpi->common.mb_cols;
+    int rows = cpi->common.mb_rows;
+    int MBs  = cpi->common.MBs;
+    int mb_y_offset = 0;
+    int mb_uv_offset = 0;
+    unsigned int accumulator[384];
+    unsigned int count[384];
+    MACROBLOCKD *mbd = &cpi->mb.e_mbd;
+    YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
+    unsigned char *dst1, *dst2;
+    DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
+    
+    // Save input state
+    unsigned char *y_buffer = mbd->pre.y_buffer;
+    unsigned char *u_buffer = mbd->pre.u_buffer;
+    unsigned char *v_buffer = mbd->pre.v_buffer;
+
+    if (!cpi->use_weighted_temporal_filter)
+    {
+        // Temporal filtering is unweighted
+        for (frame = 0; frame < frame_count; frame++)
+            filter_weight[frame] = 1;
+    }
+
+    for (mb_row = 0; mb_row < rows; mb_row++)
+    {
+#if ALT_REF_MC_ENABLED
+        // Reduced search extent by 3 for 6-tap filter & smaller UMV border
+        cpi->mb.mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 19));
+        cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16)
+                                + (VP8BORDERINPIXELS - 19);
+#endif
+
+        for (mb_col = 0; mb_col < cols; mb_col++)
+        {
+            int i, j, k, w;
+            int weight_cap;
+            int stride;
+
+            vpx_memset(accumulator, 0, 384*sizeof(unsigned int));
+            vpx_memset(count, 0, 384*sizeof(unsigned int));
+
+#if ALT_REF_MC_ENABLED
+            // Reduced search extent by 3 for 6-tap filter & smaller UMV border
+            cpi->mb.mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 19));
+            cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16)
+                                    + (VP8BORDERINPIXELS - 19);
+#endif
+
+            // Read & process macroblock weights from motion map
+            if (cpi->use_weighted_temporal_filter)
+            {
+                weight_cap = 2;
+
+                for (frame = alt_ref_index-1; frame >= 0; frame--)
+                {
+                    w = *(mm_ptr + (frame+1)*MBs);
+                    filter_weight[frame] = w < weight_cap ? w : weight_cap;
+                    weight_cap = w;
+                }
+
+                filter_weight[alt_ref_index] = 2;
+
+                weight_cap = 2;
+
+                for (frame = alt_ref_index+1; frame < frame_count; frame++)
+                {
+                    w = *(mm_ptr + frame*MBs);
+                    filter_weight[frame] = w < weight_cap ? w : weight_cap;
+                    weight_cap = w;
+                }
+
+            }
+
+            for (frame = 0; frame < frame_count; frame++)
+            {
+                int err;
+
+                if (cpi->frames[frame] == NULL)
+                    continue;
+
+                mbd->block[0].bmi.mv.as_mv.row = 0;
+                mbd->block[0].bmi.mv.as_mv.col = 0;
+
+#if ALT_REF_MC_ENABLED
+                //if (filter_weight[frame] == 0)
+                {
+#define THRESH_LOW   10000
+#define THRESH_HIGH  20000
+
+                    // Correlation has been lost try MC
+                    err = find_matching_mb ( cpi,
+                                             cpi->frames[alt_ref_index],
+                                             cpi->frames[frame],
+                                             mb_y_offset,
+                                             THRESH_LOW );
+
+                    if (filter_weight[frame] < 2)
+                    {
+                        // Set weight depending on error
+                        filter_weight[frame] = err<THRESH_LOW
+                                                ? 2 : err<THRESH_HIGH ? 1 : 0;
+                    }
+                }
+#endif
+                if (filter_weight[frame] != 0)
+                {
+                    // Construct the predictors
+                    build_predictors_mb (
+                              mbd,
+                              cpi->frames[frame]->y_buffer + mb_y_offset,
+                              cpi->frames[frame]->u_buffer + mb_uv_offset,
+                              cpi->frames[frame]->v_buffer + mb_uv_offset,
+                              cpi->frames[frame]->y_stride,
+                              mbd->block[0].bmi.mv.as_mv.row,
+                              mbd->block[0].bmi.mv.as_mv.col,
+                              predictor );
+
+                    // Apply the filter (YUV)
+                    apply_temporal_filter ( f->y_buffer + mb_y_offset,
+                                            f->y_stride,
+                                            predictor,
+                                            16,
+                                            strength,
+                                            filter_weight[frame],
+                                            accumulator,
+                                            count );
+
+                    apply_temporal_filter ( f->u_buffer + mb_uv_offset,
+                                            f->uv_stride,
+                                            predictor + 256,
+                                            8,
+                                            strength,
+                                            filter_weight[frame],
+                                            accumulator + 256,
+                                            count + 256 );
+
+                    apply_temporal_filter ( f->v_buffer + mb_uv_offset,
+                                            f->uv_stride,
+                                            predictor + 320,
+                                            8,
+                                            strength,
+                                            filter_weight[frame],
+                                            accumulator + 320,
+                                            count + 320 );
+                }
+            }
+
+            // Normalize filter output to produce AltRef frame
+            dst1 = cpi->alt_ref_buffer.source_buffer.y_buffer;
+            stride = cpi->alt_ref_buffer.source_buffer.y_stride;
+            byte = mb_y_offset;
+            for (i = 0,k = 0; i < 16; i++)
+            {
+                for (j = 0; j < 16; j++, k++)
+                {
+                    unsigned int pval = accumulator[k] + (count[k] >> 1);
+                    pval *= cpi->fixed_divide[count[k]];
+                    pval >>= 19;
+
+                    dst1[byte] = (unsigned char)pval;
+
+                    // move to next pixel
+                    byte++;
+                }
+
+                byte += stride - 16;
+            }
+
+            dst1 = cpi->alt_ref_buffer.source_buffer.u_buffer;
+            dst2 = cpi->alt_ref_buffer.source_buffer.v_buffer;
+            stride = cpi->alt_ref_buffer.source_buffer.uv_stride;
+            byte = mb_uv_offset;
+            for (i = 0,k = 256; i < 8; i++)
+            {
+                for (j = 0; j < 8; j++, k++)
+                {
+                    int m=k+64;
+
+                    // U
+                    unsigned int pval = accumulator[k] + (count[k] >> 1);
+                    pval *= cpi->fixed_divide[count[k]];
+                    pval >>= 19;
+                    dst1[byte] = (unsigned char)pval;
+
+                    // V
+                    pval = accumulator[m] + (count[m] >> 1);
+                    pval *= cpi->fixed_divide[count[m]];
+                    pval >>= 19;
+                    dst2[byte] = (unsigned char)pval;
+
+                    // move to next pixel
+                    byte++;
+                }
+
+                byte += stride - 8;
+            }
+
+            mm_ptr++;
+            mb_y_offset += 16;
+            mb_uv_offset += 8;
+        }
+
+        mb_y_offset += 16*f->y_stride-f->y_width;
+        mb_uv_offset += 8*f->uv_stride-f->uv_width;
+    }
+
+    // Restore input state
+    mbd->pre.y_buffer = y_buffer;
+    mbd->pre.u_buffer = u_buffer;
+    mbd->pre.v_buffer = v_buffer;
+}
+
+void vp8cx_temp_filter_c
+(
+    VP8_COMP *cpi
+)
+{
+    int frame = 0;
+
+    int num_frames_backward = 0;
+    int num_frames_forward = 0;
+    int frames_to_blur_backward = 0;
+    int frames_to_blur_forward = 0;
+    int frames_to_blur = 0;
+    int start_frame = 0;
+    unsigned int filtered = 0;
+
+    int strength = cpi->oxcf.arnr_strength;
+
+    int blur_type = cpi->oxcf.arnr_type;
+
+    int max_frames = cpi->active_arnr_frames;
+
+    num_frames_backward = cpi->last_alt_ref_sei - cpi->source_encode_index;
+
+    if (num_frames_backward < 0)
+        num_frames_backward += cpi->oxcf.lag_in_frames;
+
+    num_frames_forward = cpi->oxcf.lag_in_frames - (num_frames_backward + 1);
+
+    switch (blur_type)
+    {
+    case 1:
+        /////////////////////////////////////////
+        // Backward Blur
+
+        frames_to_blur_backward = num_frames_backward;
+
+        if (frames_to_blur_backward >= max_frames)
+            frames_to_blur_backward = max_frames - 1;
+
+        frames_to_blur = frames_to_blur_backward + 1;
+        break;
+
+    case 2:
+        /////////////////////////////////////////
+        // Forward Blur
+
+        frames_to_blur_forward = num_frames_forward;
+
+        if (frames_to_blur_forward >= max_frames)
+            frames_to_blur_forward = max_frames - 1;
+
+        frames_to_blur = frames_to_blur_forward + 1;
+        break;
+
+    case 3:
+    default:
+        /////////////////////////////////////////
+        // Center Blur
+        frames_to_blur_forward = num_frames_forward;
+        frames_to_blur_backward = num_frames_backward;
+
+        if (frames_to_blur_forward > frames_to_blur_backward)
+            frames_to_blur_forward = frames_to_blur_backward;
+
+        if (frames_to_blur_backward > frames_to_blur_forward)
+            frames_to_blur_backward = frames_to_blur_forward;
+
+        // When max_frames is even we have 1 more frame backward than forward
+        if (frames_to_blur_forward > (max_frames - 1) / 2)
+            frames_to_blur_forward = ((max_frames - 1) / 2);
+
+        if (frames_to_blur_backward > (max_frames / 2))
+            frames_to_blur_backward = (max_frames / 2);
+
+        frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1;
+        break;
+    }
+
+    start_frame = (cpi->last_alt_ref_sei
+                    + frames_to_blur_forward) % cpi->oxcf.lag_in_frames;
+
+#ifdef DEBUGFWG
+    // DEBUG FWG
+    printf("max:%d FBCK:%d FFWD:%d ftb:%d ftbbck:%d ftbfwd:%d sei:%d lasei:%d start:%d"
+           , max_frames
+           , num_frames_backward
+           , num_frames_forward
+           , frames_to_blur
+           , frames_to_blur_backward
+           , frames_to_blur_forward
+           , cpi->source_encode_index
+           , cpi->last_alt_ref_sei
+           , start_frame);
+#endif
+
+    // Setup frame pointers, NULL indicates frame not included in filter
+    vpx_memset(cpi->frames, 0, max_frames*sizeof(YV12_BUFFER_CONFIG *));
+    for (frame = 0; frame < frames_to_blur; frame++)
+    {
+        int which_buffer =  start_frame - frame;
+
+        if (which_buffer < 0)
+            which_buffer += cpi->oxcf.lag_in_frames;
+
+        cpi->frames[frames_to_blur-1-frame]
+                = &cpi->src_buffer[which_buffer].source_buffer;
+    }
+
+    vp8cx_temp_blur1_c (
+        cpi,
+        frames_to_blur,
+        frames_to_blur_backward,
+        strength );
+}
+#endif
diff --git a/vp8/encoder/temporal_filter.h b/vp8/encoder/temporal_filter.h
new file mode 100644
index 000000000..f70e8c01e
--- /dev/null
+++ b/vp8/encoder/temporal_filter.h
@@ -0,0 +1,19 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef __INC_VP8_TEMPORAL_FILTER_H
+#define __INC_VP8_TEMPORAL_FILTER_H
+
+#include "onyx_int.h"
+
+void vp8cx_temp_filter_c(VP8_COMP *cpi);
+
+#endif // __INC_VP8_TEMPORAL_FILTER_H
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 2a8440265..98288f2b6 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -82,6 +82,8 @@ VP8_CX_SRCS-yes += encoder/treewriter.c
 VP8_CX_SRCS-yes += encoder/variance_c.c
 VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.h
 VP8_CX_SRCS-$(CONFIG_PSNR) += common/postproc.c
+VP8_CX_SRCS-yes += encoder/temporal_filter.c
+VP8_CX_SRCS-yes += encoder/temporal_filter.h
 
 ifeq ($(CONFIG_REALTIME_ONLY),yes)
 VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c

From b71962fdc98c3fb9fdf95d74191452cd51cbc2b5 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Wed, 20 Oct 2010 15:39:11 -0700
Subject: [PATCH 237/307] Add runtime CPU detection support for ARM.

The primary goal is to allow a binary to be built which supports
 NEON, but can fall back to non-NEON routines, since some Android
 devices do not have NEON, even if they are otherwise ARMv7 (e.g.,
 Tegra).
The configure-generated flags HAVE_ARMV7, etc., are used to decide
 which versions of each function to build, and when
 CONFIG_RUNTIME_CPU_DETECT is enabled, the correct version is chosen
 at run time.
In order for this to work, the CFLAGS must be set to something
 appropriate (e.g., without -mfpu=neon for ARMv7, and with
 appropriate -march and -mcpu for even earlier configurations), or
 the native C code will not be able to run.
The ASFLAGS must remain set for the most advanced instruction set
 required at build time, since the ARM assembler will refuse to emit
 them otherwise.
I have not attempted to make any changes to configure to do this
 automatically.
Doing so will probably require the addition of new configure options.

Many of the hooks for RTCD on ARM were already there, but a lot of
 the code had bit-rotted, and a good deal of the ARM-specific code
 is not integrated into the RTCD structs at all.
I did not try to resolve the latter, merely to add the minimal amount
 of protection around them to allow RTCD to work.
Those functions that were called based on an ifdef at the calling
 site were expanded to check the RTCD flags at that site, but they
 should be added to an RTCD struct somewhere in the future.
The functions invoked with global function pointers still are, but
 these should be moved into an RTCD struct for thread safety (I
 believe every platform currently supported has atomic pointer
 stores, but this is not guaranteed).

The encoder's boolhuff functions did not even have _c and armv7
 suffixes, and the correct version was resolved at link time.
The token packing functions did have appropriate suffixes, but the
 version was selected with a define, with no associated RTCD struct.
However, for both of these, the only armv7 instruction they actually
 used was rbit, and this was completely superfluous, so I reworked
 them to avoid it.
The only non-ARMv4 instruction remaining in them is clz, which is
 ARMv5 (not even ARMv5TE is required).
Considering that there are no ARM-specific configs which are not at
 least ARMv5TE, I did not try to detect these at runtime, and simply
 enable them for ARMv5 and above.

Finally, the NEON register saving code was completely non-reentrant,
 since it saved the registers to a global, static variable.
I moved the storage for this onto the stack.
A single binary built with this code was tested on an ARM11 (ARMv6)
 and a Cortex A8 (ARMv7 w/NEON), for both the encoder and decoder,
 and produced identical output, while using the correct accelerated
 functions on each.
I did not test on any earlier processors.

Change-Id: I45cbd63a614f4554c3b325c45d46c0806f009eaa
---
 libs.mk                                       |   1 +
 vp8/common/arm/arm_systemdependent.c          | 134 ++++++++++++
 vp8/common/arm/idct_arm.h                     |   4 +
 vp8/common/arm/loopfilter_arm.h               |   4 +
 vp8/common/arm/recon_arm.h                    |   4 +
 vp8/common/arm/subpixel_arm.h                 |   4 +
 vp8/common/arm/systemdependent.c              | 149 --------------
 vp8/common/generic/systemdependent.c          |   5 +
 vp8/common/onyxc_int.h                        |   1 +
 vp8/decoder/arm/arm_dsystemdependent.c        |  66 ++++++
 vp8/decoder/arm/dequantize_arm.h              |   4 +
 vp8/decoder/arm/dsystemdependent.c            |  39 ----
 vp8/decoder/generic/dsystemdependent.c        |   5 +
 vp8/decoder/onyxd_if.c                        |  58 +++++-
 vp8/encoder/arm/arm_csystemdependent.c        | 136 +++++++++++++
 .../boolhuff_armv5te.asm}                     |  11 +-
 .../vp8_packtokens_armv5.asm}                 |  20 +-
 .../vp8_packtokens_mbrow_armv5.asm}           |  20 +-
 .../vp8_packtokens_partitions_armv5.asm}      |  20 +-
 vp8/encoder/arm/csystemdependent.c            | 164 ---------------
 vp8/encoder/arm/dct_arm.h                     |   4 +
 vp8/encoder/arm/encodemb_arm.h                |   2 +
 vp8/encoder/arm/variance_arm.h                |   2 +
 vp8/encoder/bitstream.h                       |  14 +-
 vp8/encoder/generic/csystemdependent.c        |   5 +
 vp8/encoder/onyx_if.c                         |  92 +++++++--
 vp8/encoder/picklpf.c                         |  71 +++++--
 vp8/vp8_common.mk                             |   9 +-
 vp8/vp8cx_arm.mk                              |  19 +-
 vp8/vp8dx_arm.mk                              |   6 +-
 vpx_ports/arm.h                               |  27 +++
 vpx_ports/arm_cpudetect.c                     | 190 ++++++++++++++++++
 vpx_scale/arm/scalesystemdependant.c          |  22 +-
 33 files changed, 841 insertions(+), 471 deletions(-)
 create mode 100644 vp8/common/arm/arm_systemdependent.c
 delete mode 100644 vp8/common/arm/systemdependent.c
 create mode 100644 vp8/decoder/arm/arm_dsystemdependent.c
 delete mode 100644 vp8/decoder/arm/dsystemdependent.c
 create mode 100644 vp8/encoder/arm/arm_csystemdependent.c
 rename vp8/encoder/arm/{neon/boolhuff_armv7.asm => armv5te/boolhuff_armv5te.asm} (95%)
 rename vp8/encoder/arm/{neon/vp8_packtokens_armv7.asm => armv5te/vp8_packtokens_armv5.asm} (93%)
 rename vp8/encoder/arm/{neon/vp8_packtokens_mbrow_armv7.asm => armv5te/vp8_packtokens_mbrow_armv5.asm} (94%)
 rename vp8/encoder/arm/{neon/vp8_packtokens_partitions_armv7.asm => armv5te/vp8_packtokens_partitions_armv5.asm} (95%)
 delete mode 100644 vp8/encoder/arm/csystemdependent.c
 create mode 100644 vpx_ports/arm.h
 create mode 100644 vpx_ports/arm_cpudetect.c

diff --git a/libs.mk b/libs.mk
index 4beaa50cb..9ded3945a 100644
--- a/libs.mk
+++ b/libs.mk
@@ -93,6 +93,7 @@ CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c
 endif
+CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c
 CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm
 CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c
new file mode 100644
index 000000000..fe62fae13
--- /dev/null
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -0,0 +1,134 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "g_common.h"
+#include "pragmas.h"
+#include "subpixel.h"
+#include "loopfilter.h"
+#include "recon.h"
+#include "idct.h"
+#include "onyxc_int.h"
+
+extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
+
+extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
+
+void vp8_arch_arm_common_init(VP8_COMMON *ctx)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
+    int flags = arm_cpu_caps();
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+    rtcd->flags = flags;
+
+    /* Override default functions with fastest ones for this CPU. */
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_armv6;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_v6;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_v6;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
+        rtcd->recon.recon       = vp8_recon_b_armv6;
+        rtcd->recon.recon2      = vp8_recon2b_armv6;
+        rtcd->recon.recon4      = vp8_recon4b_armv6;
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
+        rtcd->recon.recon       = vp8_recon_b_neon;
+        rtcd->recon.recon2      = vp8_recon2b_neon;
+        rtcd->recon.recon4      = vp8_recon4b_neon;
+    }
+#endif
+
+#endif
+
+#if HAVE_ARMV6
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_media)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
+        vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
+    }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_neon)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr =
+         vp8_build_intra_predictors_mby_neon;
+        vp8_build_intra_predictors_mby_s_ptr =
+         vp8_build_intra_predictors_mby_s_neon;
+    }
+#endif
+}
diff --git a/vp8/common/arm/idct_arm.h b/vp8/common/arm/idct_arm.h
index f28d7f649..8b8d17917 100644
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -19,6 +19,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
 extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_idct_idct1
 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6
 
@@ -34,6 +35,7 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
 #undef  vp8_idct_iwalsh16
 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_idct(vp8_short_idct4x4llm_1_neon);
@@ -42,6 +44,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_idct_idct1
 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_neon
 
@@ -57,5 +60,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 #undef  vp8_idct_iwalsh16
 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_neon
 #endif
+#endif
 
 #endif
diff --git a/vp8/common/arm/loopfilter_arm.h b/vp8/common/arm/loopfilter_arm.h
index 6c3628ae9..cd62207d7 100644
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -22,6 +22,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
 extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
 extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_lf_normal_mb_v
 #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_armv6
 
@@ -46,6 +47,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
 #undef  vp8_lf_simple_b_h
 #define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
@@ -57,6 +59,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
 extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
 extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_lf_normal_mb_v
 #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_neon
 
@@ -81,5 +84,6 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
 #undef  vp8_lf_simple_b_h
 #define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
 #endif
+#endif
 
 #endif
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h
index 18855a3c0..c30f6dc2d 100644
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -21,6 +21,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_v6);
 extern prototype_copy_block(vp8_copy_mem8x4_v6);
 extern prototype_copy_block(vp8_copy_mem16x16_v6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_recon_recon
 #define vp8_recon_recon vp8_recon_b_armv6
 
@@ -39,6 +40,7 @@ extern prototype_copy_block(vp8_copy_mem16x16_v6);
 #undef  vp8_recon_copy16x16
 #define vp8_recon_copy16x16 vp8_copy_mem16x16_v6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_recon_block(vp8_recon_b_neon);
@@ -49,6 +51,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon);
 extern prototype_copy_block(vp8_copy_mem8x4_neon);
 extern prototype_copy_block(vp8_copy_mem16x16_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_recon_recon
 #define vp8_recon_recon vp8_recon_b_neon
 
@@ -67,5 +70,6 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);
 #undef  vp8_recon_copy16x16
 #define vp8_recon_copy16x16 vp8_copy_mem16x16_neon
 #endif
+#endif
 
 #endif
diff --git a/vp8/common/arm/subpixel_arm.h b/vp8/common/arm/subpixel_arm.h
index 53600e547..6288538d0 100644
--- a/vp8/common/arm/subpixel_arm.h
+++ b/vp8/common/arm/subpixel_arm.h
@@ -22,6 +22,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_armv6);
 extern prototype_subpixel_predict(vp8_bilinear_predict8x4_armv6);
 extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_subpix_sixtap16x16
 #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_armv6
 
@@ -46,6 +47,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
 #undef  vp8_subpix_bilinear4x4
 #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_armv6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_subpixel_predict(vp8_sixtap_predict16x16_neon);
@@ -57,6 +59,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_neon);
 extern prototype_subpixel_predict(vp8_bilinear_predict8x4_neon);
 extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_subpix_sixtap16x16
 #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_neon
 
@@ -81,5 +84,6 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
 #undef  vp8_subpix_bilinear4x4
 #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_neon
 #endif
+#endif
 
 #endif
diff --git a/vp8/common/arm/systemdependent.c b/vp8/common/arm/systemdependent.c
deleted file mode 100644
index 1eed97e02..000000000
--- a/vp8/common/arm/systemdependent.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "g_common.h"
-#include "pragmas.h"
-#include "subpixel.h"
-#include "loopfilter.h"
-#include "recon.h"
-#include "idct.h"
-#include "onyxc_int.h"
-
-void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
-
-void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
-
-void vp8_machine_specific_config(VP8_COMMON *ctx)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
-
-#if HAVE_ARMV7
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
-
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
-    rtcd->recon.recon       = vp8_recon_b_neon;
-    rtcd->recon.recon2      = vp8_recon2b_neon;
-    rtcd->recon.recon4      = vp8_recon4b_neon;
-#elif HAVE_ARMV6
-
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_armv6;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
-
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_armv6;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_armv6;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
-    rtcd->recon.recon       = vp8_recon_b_armv6;
-    rtcd->recon.recon2      = vp8_recon2b_armv6;
-    rtcd->recon.recon4      = vp8_recon4b_armv6;
-#else
-//pure c
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_c;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_c;
-    rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_c;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_c;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_c;
-    rtcd->recon.recon      = vp8_recon_b_c;
-    rtcd->recon.recon2      = vp8_recon2b_c;
-    rtcd->recon.recon4     = vp8_recon4b_c;
-
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_c;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_c;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_c;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_c;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_c;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_c;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_c;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_c;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_c;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_c;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_c;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;
-#endif
-
-#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
-    rtcd->postproc.down        = vp8_mbpost_proc_down_c;
-    rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
-    rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
-    rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby_neon;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s_neon;
-#elif HAVE_ARMV6
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-#else
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-
-#endif
-
-}
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index c04e31ffe..0ef375e33 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -18,6 +18,7 @@
 #include "onyxc_int.h"
 
 extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
+extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);
 
 void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
 extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
@@ -77,4 +78,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
     vp8_arch_x86_common_init(ctx);
 #endif
 
+#if ARCH_ARM
+    vp8_arch_arm_common_init(ctx);
+#endif
+
 }
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index 4966002f5..d12143d4d 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -74,6 +74,7 @@ typedef struct VP8_COMMON_RTCD
     vp8_subpix_rtcd_vtable_t      subpix;
     vp8_loopfilter_rtcd_vtable_t  loopfilter;
     vp8_postproc_rtcd_vtable_t    postproc;
+    int                           flags;
 #else
     int unused;
 #endif
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
new file mode 100644
index 000000000..77cff47db
--- /dev/null
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -0,0 +1,66 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "blockd.h"
+#include "pragmas.h"
+#include "postproc.h"
+#include "dboolhuff.h"
+#include "dequantize.h"
+#include "onyxd_int.h"
+
+void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    int flags = pbi->common.rtcd.flags;
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        pbi->dequant.block               = vp8_dequantize_b_v6;
+        pbi->dequant.idct_add            = vp8_dequant_idct_add_v6;
+        pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_v6;
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_v6;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_v6;
+#if 0 //For use with RTCD, when implemented
+        pbi->dboolhuff.start             = vp8dx_start_decode_c;
+        pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
+        pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
+        pbi->dboolhuff.devalue           = vp8dx_decode_value_c;
+#endif
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        pbi->dequant.block               = vp8_dequantize_b_neon;
+        pbi->dequant.idct_add            = vp8_dequant_idct_add_neon;
+        /*This is not used: NEON always dequants two blocks at once.
+        pbi->dequant.dc_idct_add         = vp8_dequant_dc_idct_add_neon;*/
+        pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
+        pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_neon;
+        pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_neon;
+#if 0 //For use with RTCD, when implemented
+        pbi->dboolhuff.start             = vp8dx_start_decode_c;
+        pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
+        pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
+        pbi->dboolhuff.devalue           = vp8dx_decode_value_c;
+#endif
+    }
+#endif
+#endif
+}
diff --git a/vp8/decoder/arm/dequantize_arm.h b/vp8/decoder/arm/dequantize_arm.h
index 40151e01a..b7d800d26 100644
--- a/vp8/decoder/arm/dequantize_arm.h
+++ b/vp8/decoder/arm/dequantize_arm.h
@@ -20,6 +20,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_v6)
 extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_v6);
 extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_v6
 
@@ -38,6 +39,7 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_v6);
 #undef vp8_dequant_idct_add_uv_block
 #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_v6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_dequant_block(vp8_dequantize_b_neon);
@@ -47,6 +49,7 @@ extern prototype_dequant_dc_idct_add_y_block(vp8_dequant_dc_idct_add_y_block_neo
 extern prototype_dequant_idct_add_y_block(vp8_dequant_idct_add_y_block_neon);
 extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_dequant_block
 #define vp8_dequant_block vp8_dequantize_b_neon
 
@@ -65,5 +68,6 @@ extern prototype_dequant_idct_add_uv_block(vp8_dequant_idct_add_uv_block_neon);
 #undef vp8_dequant_idct_add_uv_block
 #define vp8_dequant_idct_add_uv_block vp8_dequant_idct_add_uv_block_neon
 #endif
+#endif
 
 #endif
diff --git a/vp8/decoder/arm/dsystemdependent.c b/vp8/decoder/arm/dsystemdependent.c
deleted file mode 100644
index 9dcf7b657..000000000
--- a/vp8/decoder/arm/dsystemdependent.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "blockd.h"
-#include "pragmas.h"
-#include "postproc.h"
-#include "dboolhuff.h"
-#include "dequantize.h"
-#include "onyxd_int.h"
-
-void vp8_dmachine_specific_config(VP8D_COMP *pbi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    pbi->mb.rtcd         = &pbi->common.rtcd;
-#if HAVE_ARMV7
-    pbi->dequant.block   = vp8_dequantize_b_neon;
-    pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
-    pbi->dboolhuff.debool = vp8dx_decode_bool_c;
-    pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-
-#elif HAVE_ARMV6
-    pbi->dequant.block   = vp8_dequantize_b_v6;
-    pbi->dboolhuff.start = vp8dx_start_decode_c;
-    pbi->dboolhuff.fill  = vp8dx_bool_decoder_fill_c;
-    pbi->dboolhuff.debool = vp8dx_decode_bool_c;
-    pbi->dboolhuff.devalue = vp8dx_decode_value_c;
-#endif
-#endif
-}
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 60f2af5b8..84de7af43 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -14,6 +14,7 @@
 #include "onyxd_int.h"
 
 extern void vp8_arch_x86_decode_init(VP8D_COMP *pbi);
+extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi);
 
 void vp8_dmachine_specific_config(VP8D_COMP *pbi)
 {
@@ -37,4 +38,8 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
 #if ARCH_X86 || ARCH_X86_64
     vp8_arch_x86_decode_init(pbi);
 #endif
+
+#if ARCH_ARM
+    vp8_arch_arm_decode_init(pbi);
+#endif
 }
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 884c38da0..b5a6e3e85 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -30,6 +30,9 @@
 #include "systemdependent.h"
 #include "vpx_ports/vpx_timer.h"
 #include "detokenize.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
 
 extern void vp8_init_loop_filter(VP8_COMMON *cm);
 extern void vp8cx_init_de_quantizer(VP8D_COMP *pbi);
@@ -224,7 +227,6 @@ int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C
 #if HAVE_ARMV7
 extern void vp8_push_neon(INT64 *store);
 extern void vp8_pop_neon(INT64 *store);
-static INT64 dx_store_reg[8];
 #endif
 
 static int get_free_fb (VP8_COMMON *cm)
@@ -312,6 +314,9 @@ static int swap_frame_buffers (VP8_COMMON *cm)
 
 int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsigned char *source, INT64 time_stamp)
 {
+#if HAVE_ARMV7
+    INT64 dx_store_reg[8];
+#endif
     VP8D_COMP *pbi = (VP8D_COMP *) ptr;
     VP8_COMMON *cm = &pbi->common;
     int retcode = 0;
@@ -327,10 +332,27 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 
     pbi->common.error.error_code = VPX_CODEC_OK;
 
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(dx_store_reg);
+    }
+#endif
+
     cm->new_fb_idx = get_free_fb (cm);
 
     if (setjmp(pbi->common.error.jmp))
     {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(dx_store_reg);
+        }
+#endif
         pbi->common.error.setjmp = 0;
         if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0)
           cm->fb_idx_ref_cnt[cm->new_fb_idx]--;
@@ -339,10 +361,6 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 
     pbi->common.error.setjmp = 1;
 
-#if HAVE_ARMV7
-    vp8_push_neon(dx_store_reg);
-#endif
-
     vpx_usec_timer_start(&timer);
 
     //cm->current_video_frame++;
@@ -354,7 +372,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     if (retcode < 0)
     {
 #if HAVE_ARMV7
-        vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(dx_store_reg);
+        }
 #endif
         pbi->common.error.error_code = VPX_CODEC_ERROR;
         pbi->common.error.setjmp = 0;
@@ -367,6 +390,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     {
         if (swap_frame_buffers (cm))
         {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_pop_neon(dx_store_reg);
+            }
+#endif
             pbi->common.error.error_code = VPX_CODEC_ERROR;
             pbi->common.error.setjmp = 0;
             return -1;
@@ -375,6 +406,14 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     {
         if (swap_frame_buffers (cm))
         {
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_pop_neon(dx_store_reg);
+            }
+#endif
             pbi->common.error.error_code = VPX_CODEC_ERROR;
             pbi->common.error.setjmp = 0;
             return -1;
@@ -455,7 +494,12 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 #endif
 
 #if HAVE_ARMV7
-    vp8_pop_neon(dx_store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(dx_store_reg);
+    }
 #endif
     pbi->common.error.setjmp = 0;
     return retcode;
diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
new file mode 100644
index 000000000..8736fcf1d
--- /dev/null
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -0,0 +1,136 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "variance.h"
+#include "onyx_int.h"
+
+extern void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
+
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    int flags = cpi->common.rtcd.flags;
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        /*cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
+        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
+        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
+        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
+        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;*/
+
+        /*cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
+        cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
+        cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
+        cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
+        cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;*/
+
+        /*cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
+        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
+        cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
+        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;*/
+
+        /*cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
+        cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
+
+        /*cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
+        cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
+        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
+        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;*/
+
+        /*cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
+        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
+        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;*/
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_armv6;
+
+        /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
+        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
+        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
+        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
+        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
+        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;*/
+
+        /*cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
+        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;*/
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        cpi->rtcd.variance.sad16x16              = vp8_sad16x16_neon;
+        cpi->rtcd.variance.sad16x8               = vp8_sad16x8_neon;
+        cpi->rtcd.variance.sad8x16               = vp8_sad8x16_neon;
+        cpi->rtcd.variance.sad8x8                = vp8_sad8x8_neon;
+        cpi->rtcd.variance.sad4x4                = vp8_sad4x4_neon;
+
+        /*cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;*/
+        cpi->rtcd.variance.var8x8                = vp8_variance8x8_neon;
+        cpi->rtcd.variance.var8x16               = vp8_variance8x16_neon;
+        cpi->rtcd.variance.var16x8               = vp8_variance16x8_neon;
+        cpi->rtcd.variance.var16x16              = vp8_variance16x16_neon;
+
+        /*cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;*/
+        cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_neon;
+        /*cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
+        cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
+        cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
+
+        cpi->rtcd.variance.mse16x16              = vp8_mse16x16_neon;
+        /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
+
+        cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_neon;
+        /*cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
+        cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;*/
+        cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
+
+        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;
+        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_neon;
+        cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_neon;
+        cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_neon;
+        cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_neon;
+
+        /*cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
+        cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
+        cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;*/
+        cpi->rtcd.encodemb.subb                  = vp8_subtract_b_neon;
+        cpi->rtcd.encodemb.submby                = vp8_subtract_mby_neon;
+        cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
+
+        /*cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
+        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;*/
+        /* The neon quantizer has not been updated to match the new exact
+         * quantizer introduced in commit e04e2935
+         */
+        /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
+    }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_neon)
+#endif
+    {
+        vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
+    }
+#endif
+#endif
+}
diff --git a/vp8/encoder/arm/neon/boolhuff_armv7.asm b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
similarity index 95%
rename from vp8/encoder/arm/neon/boolhuff_armv7.asm
rename to vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
index 9c4823c51..e78dc3322 100644
--- a/vp8/encoder/arm/neon/boolhuff_armv7.asm
+++ b/vp8/encoder/arm/armv5te/boolhuff_armv5te.asm
@@ -205,17 +205,10 @@ token_count_lt_zero_se
     ldr     r5, [r0, #vp8_writer_range]
     ldr     r3, [r0, #vp8_writer_count]
 
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r11, r1
     rsb     r4, r10, #32                 ; 32-n
 
     ; v is kept in r1 during the token pack loop
-    lsr     r1, r11, r4                 ; v >>= 32 - n
+    lsl     r1, r1, r4                  ; r1 = v << 32 - n
 
 encode_value_loop
     sub     r7, r5, #1                  ; range-1
@@ -223,7 +216,7 @@ encode_value_loop
     ; Decisions are made based on the bit value shifted
     ; off of v, so set a flag here based on this.
     ; This value is refered to as "bb"
-    lsrs    r1, r1, #1                  ; bit = v >> n
+    lsls    r1, r1, #1                  ; bit = v >> n
     mov     r4, r7, lsl #7              ; ((range-1) * 128)
 
     mov     r7, #1
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
similarity index 93%
rename from vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
rename to vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
index c19ac8250..3233d2a96 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_armv5.asm
@@ -9,7 +9,7 @@
 ;
 
 
-    EXPORT |vp8cx_pack_tokens_armv7|
+    EXPORT |vp8cx_pack_tokens_armv5|
 
     INCLUDE vpx_vp8_enc_asm_offsets.asm
 
@@ -25,7 +25,7 @@
 ; r3 vp8_coef_encodings
 ; s0 vp8_extra_bits
 ; s1 vp8_coef_tree
-|vp8cx_pack_tokens_armv7| PROC
+|vp8cx_pack_tokens_armv5| PROC
     push    {r4-r11, lr}
 
     ; Add size of xcount * sizeof (TOKENEXTRA) to get stop
@@ -57,18 +57,11 @@ while_p_lt_stop
     movne   lr, #2                      ; i = 2
     subne   r8, r8, #1                  ; --n
 
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
     rsb     r4, r8, #32                 ; 32-n
     ldr     r10, [sp, #52]              ; vp8_coef_tree
 
     ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                ; r12 = v << 32 - n
 
 ; loop start
 token_loop
@@ -78,7 +71,7 @@ token_loop
     ; Decisions are made based on the bit value shifted
     ; off of v, so set a flag here based on this.
     ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
     mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
 
     ; bb can only be 0 or 1.  So only execute this statement
@@ -172,16 +165,15 @@ token_count_lt_zero
     ldr     r10, [r12, #vp8_extra_bit_struct_tree]
     str     r10, [sp, #4]               ; b->tree
 
-    rbit    r12, r7                     ; reverse v
     rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
 
     mov     lr, #0                      ; i = 0
 
 extra_bits_loop
     ldrb    r4, [r9, lr, asr #1]            ; pp[i>>1]
     sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
     mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
     addcs   lr, lr, #1                  ; i + bb
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
similarity index 94%
rename from vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
rename to vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
index 075645586..a9b552ae1 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_mbrow_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_mbrow_armv5.asm
@@ -9,7 +9,7 @@
 ;
 
 
-    EXPORT |vp8cx_pack_mb_row_tokens_armv7|
+    EXPORT |vp8cx_pack_mb_row_tokens_armv5|
 
     INCLUDE vpx_vp8_enc_asm_offsets.asm
 
@@ -25,7 +25,7 @@
 ; r3 vp8_extra_bits
 ; s0 vp8_coef_tree
 
-|vp8cx_pack_mb_row_tokens_armv7| PROC
+|vp8cx_pack_mb_row_tokens_armv5| PROC
     push    {r4-r11, lr}
     sub     sp, sp, #24
 
@@ -78,18 +78,11 @@ while_p_lt_stop
     movne   lr, #2                      ; i = 2
     subne   r8, r8, #1                  ; --n
 
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
     rsb     r4, r8, #32                 ; 32-n
     ldr     r10, [sp, #60]              ; vp8_coef_tree
 
     ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                 ; r12 = v << 32 - n
 
 ; loop start
 token_loop
@@ -99,7 +92,7 @@ token_loop
     ; Decisions are made based on the bit value shifted
     ; off of v, so set a flag here based on this.
     ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
     mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
 
     ; bb can only be 0 or 1.  So only execute this statement
@@ -193,16 +186,15 @@ token_count_lt_zero
     ldr     r10, [r12, #vp8_extra_bit_struct_tree]
     str     r10, [sp, #4]               ; b->tree
 
-    rbit    r12, r7                     ; reverse v
     rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
 
     mov     lr, #0                      ; i = 0
 
 extra_bits_loop
     ldrb    r4, [r9, lr, asr #1]            ; pp[i>>1]
     sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
     mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
     addcs   lr, lr, #1                  ; i + bb
 
diff --git a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
similarity index 95%
rename from vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
rename to vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
index 10a3d9851..0835164e5 100644
--- a/vp8/encoder/arm/neon/vp8_packtokens_partitions_armv7.asm
+++ b/vp8/encoder/arm/armv5te/vp8_packtokens_partitions_armv5.asm
@@ -9,7 +9,7 @@
 ;
 
 
-    EXPORT |vp8cx_pack_tokens_into_partitions_armv7|
+    EXPORT |vp8cx_pack_tokens_into_partitions_armv5|
 
     INCLUDE vpx_vp8_enc_asm_offsets.asm
 
@@ -27,7 +27,7 @@
 ; s1 vp8_extra_bits,
 ; s2 const vp8_tree_index *,
 
-|vp8cx_pack_tokens_into_partitions_armv7| PROC
+|vp8cx_pack_tokens_into_partitions_armv5| PROC
     push    {r4-r11, lr}
     sub     sp, sp, #44
 
@@ -106,18 +106,11 @@ while_p_lt_stop
     movne   lr, #2                      ; i = 2
     subne   r8, r8, #1                  ; --n
 
-    ; reverse the stream of bits to be packed.  Normally
-    ; the most significant bit is peeled off and compared
-    ; in the form of (v >> --n) & 1.  ARM architecture has
-    ; the ability to set a flag based on the value of the
-    ; bit shifted off the bottom of the register.  To make
-    ; that happen the bitstream is reversed.
-    rbit    r12, r6
     rsb     r4, r8, #32                 ; 32-n
     ldr     r10, [sp, #88]              ; vp8_coef_tree
 
     ; v is kept in r12 during the token pack loop
-    lsr     r12, r12, r4                ; v >>= 32 - n
+    lsl     r12, r6, r4                ; r12 = v << 32 - n
 
 ; loop start
 token_loop
@@ -127,7 +120,7 @@ token_loop
     ; Decisions are made based on the bit value shifted
     ; off of v, so set a flag here based on this.
     ; This value is refered to as "bb"
-    lsrs    r12, r12, #1                ; bb = v >> n
+    lsls    r12, r12, #1                ; bb = v >> n
     mul     r4, r4, r7                  ; ((range-1) * pp[i>>1]))
 
     ; bb can only be 0 or 1.  So only execute this statement
@@ -221,16 +214,15 @@ token_count_lt_zero
     ldr     r10, [r12, #vp8_extra_bit_struct_tree]
     str     r10, [sp, #4]               ; b->tree
 
-    rbit    r12, r7                     ; reverse v
     rsb     r4, r8, #32
-    lsr     r12, r12, r4
+    lsl     r12, r7, r4
 
     mov     lr, #0                      ; i = 0
 
 extra_bits_loop
     ldrb    r4, [r9, lr, asr #1]        ; pp[i>>1]
     sub     r7, r5, #1                  ; range-1
-    lsrs    r12, r12, #1                ; v >> n
+    lsls    r12, r12, #1                ; v >> n
     mul     r4, r4, r7                  ; (range-1) * pp[i>>1]
     addcs   lr, lr, #1                  ; i + bb
 
diff --git a/vp8/encoder/arm/csystemdependent.c b/vp8/encoder/arm/csystemdependent.c
deleted file mode 100644
index 8d70d635a..000000000
--- a/vp8/encoder/arm/csystemdependent.c
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "variance.h"
-#include "onyx_int.h"
-
-void (*vp8_yv12_copy_partial_frame_ptr)(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vp8_yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-extern void vpxyv12_copy_partial_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc, int Fraction);
-
-void vp8_cmachine_specific_config(VP8_COMP *cpi)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    cpi->rtcd.common                         = &cpi->common.rtcd;
-
-#if HAVE_ARMV7
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_neon;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_neon;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_neon;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_neon;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_neon;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_neon;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_neon;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_neon;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_neon;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_neon;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_neon;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_neon;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_neon;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_neon;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_neon;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_neon;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_neon;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_neon;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_neon;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_neon;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_neon;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-    /* The neon quantizer has not been updated to match the new exact
-     * quantizer introduced in commit e04e2935
-     */
-    /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_neon;*/
-#elif HAVE_ARMV6
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_armv6;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-#else
-    //pure c
-    cpi->rtcd.variance.sad16x16              = vp8_sad16x16_c;
-    cpi->rtcd.variance.sad16x8               = vp8_sad16x8_c;
-    cpi->rtcd.variance.sad8x16               = vp8_sad8x16_c;
-    cpi->rtcd.variance.sad8x8                = vp8_sad8x8_c;
-    cpi->rtcd.variance.sad4x4                = vp8_sad4x4_c;
-
-    cpi->rtcd.variance.var4x4                = vp8_variance4x4_c;
-    cpi->rtcd.variance.var8x8                = vp8_variance8x8_c;
-    cpi->rtcd.variance.var8x16               = vp8_variance8x16_c;
-    cpi->rtcd.variance.var16x8               = vp8_variance16x8_c;
-    cpi->rtcd.variance.var16x16              = vp8_variance16x16_c;
-
-    cpi->rtcd.variance.subpixvar4x4          = vp8_sub_pixel_variance4x4_c;
-    cpi->rtcd.variance.subpixvar8x8          = vp8_sub_pixel_variance8x8_c;
-    cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
-    cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
-    cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
-
-    cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
-    cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;
-
-    cpi->rtcd.variance.get16x16prederror     = vp8_get16x16pred_error_c;
-    cpi->rtcd.variance.get8x8var             = vp8_get8x8var_c;
-    cpi->rtcd.variance.get16x16var           = vp8_get16x16var_c;;
-    cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_c;
-
-    cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
-    cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-    cpi->rtcd.fdct.fast4x4                   = vp8_fast_fdct4x4_c;
-    cpi->rtcd.fdct.fast8x4                   = vp8_fast_fdct8x4_c;
-    cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
-
-    cpi->rtcd.encodemb.berr                  = vp8_block_error_c;
-    cpi->rtcd.encodemb.mberr                 = vp8_mbblock_error_c;
-    cpi->rtcd.encodemb.mbuverr               = vp8_mbuverror_c;
-    cpi->rtcd.encodemb.subb                  = vp8_subtract_b_c;
-    cpi->rtcd.encodemb.submby                = vp8_subtract_mby_c;
-    cpi->rtcd.encodemb.submbuv               = vp8_subtract_mbuv_c;
-
-    cpi->rtcd.quantize.quantb                = vp8_regular_quantize_b;
-    cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
-    vp8_yv12_copy_partial_frame_ptr = vpxyv12_copy_partial_frame_neon;
-#else
-    vp8_yv12_copy_partial_frame_ptr = vp8_yv12_copy_partial_frame;
-#endif
-}
diff --git a/vp8/encoder/arm/dct_arm.h b/vp8/encoder/arm/dct_arm.h
index 774599bf0..41fa5d192 100644
--- a/vp8/encoder/arm/dct_arm.h
+++ b/vp8/encoder/arm/dct_arm.h
@@ -15,9 +15,11 @@
 #if HAVE_ARMV6
 extern prototype_fdct(vp8_short_walsh4x4_armv6);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_fdct_walsh_short4x4
 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_armv6
 #endif
+#endif
 
 #if HAVE_ARMV7
 extern prototype_fdct(vp8_short_fdct4x4_neon);
@@ -26,6 +28,7 @@ extern prototype_fdct(vp8_fast_fdct4x4_neon);
 extern prototype_fdct(vp8_fast_fdct8x4_neon);
 extern prototype_fdct(vp8_short_walsh4x4_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_fdct_short4x4
 #define vp8_fdct_short4x4 vp8_short_fdct4x4_neon
 
@@ -40,6 +43,7 @@ extern prototype_fdct(vp8_short_walsh4x4_neon);
 
 #undef  vp8_fdct_walsh_short4x4
 #define vp8_fdct_walsh_short4x4 vp8_short_walsh4x4_neon
+#endif
 
 #endif
 
diff --git a/vp8/encoder/arm/encodemb_arm.h b/vp8/encoder/arm/encodemb_arm.h
index eb699433f..8fe453735 100644
--- a/vp8/encoder/arm/encodemb_arm.h
+++ b/vp8/encoder/arm/encodemb_arm.h
@@ -30,6 +30,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
 //#undef  vp8_encodemb_mbuverr
 //#define vp8_encodemb_mbuverr vp8_mbuverror_c
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_encodemb_subb
 #define vp8_encodemb_subb vp8_subtract_b_neon
 
@@ -38,6 +39,7 @@ extern prototype_submbuv(vp8_subtract_mbuv_neon);
 
 #undef  vp8_encodemb_submbuv
 #define vp8_encodemb_submbuv vp8_subtract_mbuv_neon
+#endif
 
 #endif
 
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index 859e43f51..fb9dd5a5b 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -38,6 +38,7 @@ extern prototype_sad(vp8_get16x16pred_error_neon);
 //extern prototype_variance2(vp8_get16x16var_c);
 extern prototype_sad(vp8_get4x4sse_cs_neon);
 
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_variance_sad4x4
 #define vp8_variance_sad4x4 vp8_sad4x4_neon
 
@@ -100,6 +101,7 @@ extern prototype_sad(vp8_get4x4sse_cs_neon);
 
 #undef  vp8_variance_get4x4sse_cs
 #define vp8_variance_get4x4sse_cs vp8_get4x4sse_cs_neon
+#endif
 
 #endif
 
diff --git a/vp8/encoder/bitstream.h b/vp8/encoder/bitstream.h
index 559631338..f5d148ea4 100644
--- a/vp8/encoder/bitstream.h
+++ b/vp8/encoder/bitstream.h
@@ -12,25 +12,25 @@
 #ifndef __INC_BITSTREAM_H
 #define __INC_BITSTREAM_H
 
-#if HAVE_ARMV7
-void vp8cx_pack_tokens_armv7(vp8_writer *w, const TOKENEXTRA *p, int xcount,
+#if HAVE_ARMV5TE
+void vp8cx_pack_tokens_armv5(vp8_writer *w, const TOKENEXTRA *p, int xcount,
                              vp8_token *,
                              vp8_extra_bit_struct *,
                              const vp8_tree_index *);
-void vp8cx_pack_tokens_into_partitions_armv7(VP8_COMP *, unsigned char *, int , int *,
+void vp8cx_pack_tokens_into_partitions_armv5(VP8_COMP *, unsigned char *, int , int *,
         vp8_token *,
         vp8_extra_bit_struct *,
         const vp8_tree_index *);
-void vp8cx_pack_mb_row_tokens_armv7(VP8_COMP *cpi, vp8_writer *w,
+void vp8cx_pack_mb_row_tokens_armv5(VP8_COMP *cpi, vp8_writer *w,
                                     vp8_token *,
                                     vp8_extra_bit_struct *,
                                     const vp8_tree_index *);
 # define pack_tokens(a,b,c)                  \
-    vp8cx_pack_tokens_armv7(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_tokens_armv5(a,b,c,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
 # define pack_tokens_into_partitions(a,b,c,d)  \
-    vp8cx_pack_tokens_into_partitions_armv7(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_tokens_into_partitions_armv5(a,b,c,d,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
 # define pack_mb_row_tokens(a,b)               \
-    vp8cx_pack_mb_row_tokens_armv7(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
+    vp8cx_pack_mb_row_tokens_armv5(a,b,vp8_coef_encodings,vp8_extra_bits,vp8_coef_tree)
 #else
 # define pack_tokens(a,b,c)                  pack_tokens_c(a,b,c)
 # define pack_tokens_into_partitions(a,b,c,d)  pack_tokens_into_partitions_c(a,b,c,d)
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 1acb73d9c..520b08f51 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -15,6 +15,7 @@
 
 
 void vp8_arch_x86_encoder_init(VP8_COMP *cpi);
+void vp8_arch_arm_encoder_init(VP8_COMP *cpi);
 
 
 void (*vp8_fast_quantize_b)(BLOCK *b, BLOCKD *d);
@@ -94,4 +95,8 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
     vp8_arch_x86_encoder_init(cpi);
 #endif
 
+#if ARCH_ARM
+    vp8_arch_arm_encoder_init(cpi);
+#endif
+
 }
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 53d68be52..7e1583dd9 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -31,6 +31,9 @@
 #include "vpx_ports/vpx_timer.h"
 #include "vpxerrors.h"
 #include "temporal_filter.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
 
 #include <math.h>
 #include <stdio.h>
@@ -2106,8 +2109,8 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
     CHECK_MEM_ERROR(cpi->rdtok, vpx_calloc(256 * 3 / 2, sizeof(TOKENEXTRA)));
     CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1));
 
-    vp8_cmachine_specific_config(cpi);
     vp8_create_common(&cpi->common);
+    vp8_cmachine_specific_config(cpi);
 
     vp8_init_config((VP8_PTR)cpi, oxcf);
 
@@ -2852,9 +2855,20 @@ static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
     {
         //vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
 #if HAVE_ARMV7
-        vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
-#else
-        vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_yv12_copy_src_frame_func_neon(sd, &cpi->scaled_source);
+        }
+#if CONFIG_RUNTIME_CPU_DETECT
+        else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+        {
+            vp8_yv12_copy_frame_ptr(sd, &cpi->scaled_source);
+        }
 #endif
 
         cpi->Source = &cpi->scaled_source;
@@ -4624,10 +4638,10 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
 #if HAVE_ARMV7
 extern void vp8_push_neon(INT64 *store);
 extern void vp8_pop_neon(INT64 *store);
-static INT64 store_reg[8];
 #endif
 int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time)
 {
+    INT64 store_reg[8];
     VP8_COMP *cpi = (VP8_COMP *) ptr;
     VP8_COMMON *cm = &cpi->common;
     struct vpx_usec_timer  timer;
@@ -4636,7 +4650,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
         return -1;
 
 #if HAVE_ARMV7
-    vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(store_reg);
+    }
 #endif
 
     vpx_usec_timer_start(&timer);
@@ -4645,7 +4664,12 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
     if (cpi->source_buffer_count != 0 && cpi->source_buffer_count >= cpi->oxcf.lag_in_frames)
     {
 #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
 #endif
         return -1;
     }
@@ -4686,9 +4710,20 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
         s->source_time_stamp = time_stamp;
         s->source_frame_flags = frame_flags;
 #if HAVE_ARMV7
-        vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
-#else
-        vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_yv12_copy_src_frame_func_neon(sd, &s->source_buffer);
+        }
+#if CONFIG_RUNTIME_CPU_DETECT
+        else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+        {
+            vp8_yv12_copy_frame_ptr(sd, &s->source_buffer);
+        }
 #endif
         cpi->source_buffer_count = 1;
     }
@@ -4697,14 +4732,19 @@ int vp8_receive_raw_frame(VP8_PTR ptr, unsigned int frame_flags, YV12_BUFFER_CON
     cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);
 
 #if HAVE_ARMV7
-    vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(store_reg);
+    }
 #endif
 
     return 0;
 }
 int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush)
 {
-
+    INT64 store_reg[8];
     VP8_COMP *cpi = (VP8_COMP *) ptr;
     VP8_COMMON *cm = &cpi->common;
     struct vpx_usec_timer  tsctimer;
@@ -4715,7 +4755,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
         return -1;
 
 #if HAVE_ARMV7
-    vp8_push_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_push_neon(store_reg);
+    }
 #endif
 
     vpx_usec_timer_start(&cmptimer);
@@ -4867,7 +4912,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
 #endif
 
 #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
 #endif
         return -1;
     }
@@ -4910,7 +4960,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
     if (!cpi)
     {
 #if HAVE_ARMV7
-        vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+        if (cm->rtcd.flags & HAS_NEON)
+#endif
+        {
+            vp8_pop_neon(store_reg);
+        }
 #endif
         return 0;
     }
@@ -5099,7 +5154,12 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
 #endif
 
 #if HAVE_ARMV7
-    vp8_pop_neon(store_reg);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_pop_neon(store_reg);
+    }
 #endif
 
     return 0;
diff --git a/vp8/encoder/picklpf.c b/vp8/encoder/picklpf.c
index 79e07dbc0..09e8b5412 100644
--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -16,6 +16,9 @@
 #include "vpx_scale/yv12extend.h"
 #include "vpx_scale/vpxscale.h"
 #include "alloccommon.h"
+#if ARCH_ARM
+#include "vpx_ports/arm.h"
+#endif
 
 extern void vp8_loop_filter_frame(VP8_COMMON *cm,    MACROBLOCKD *mbd,  int filt_val);
 extern void vp8_loop_filter_frame_yonly(VP8_COMMON *cm,    MACROBLOCKD *mbd,  int filt_val, int sharpness_lvl);
@@ -306,9 +309,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 
     //  Make a copy of the unfiltered / processed recon buffer
 #if HAVE_ARMV7
-    vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
-#else
-    vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(cm->frame_to_show, &cpi->last_frame_uf);
+    }
+#if CONFIG_RUNTIME_CPU_DETECT
+    else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+    {
+        vp8_yv12_copy_frame_ptr(cm->frame_to_show, &cpi->last_frame_uf);
+    }
 #endif
 
     if (cm->frame_type == KEY_FRAME)
@@ -343,9 +357,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 
     //  Re-instate the unfiltered frame
 #if HAVE_ARMV7
-    vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-    vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (cm->rtcd.flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+    }
+#if CONFIG_RUNTIME_CPU_DETECT
+    else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+    {
+        vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+    }
 #endif
 
     while (filter_step > 0)
@@ -372,9 +397,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 
             //  Re-instate the unfiltered frame
 #if HAVE_ARMV7
-            vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-            vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+            }
+#if CONFIG_RUNTIME_CPU_DETECT
+            else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+            {
+                vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+            }
 #endif
 
             // If value is close to the best so far then bias towards a lower loop filter value.
@@ -401,9 +437,20 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
 
             //  Re-instate the unfiltered frame
 #if HAVE_ARMV7
-            vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
-#else
-            vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+#if CONFIG_RUNTIME_CPU_DETECT
+            if (cm->rtcd.flags & HAS_NEON)
+#endif
+            {
+                vp8_yv12_copy_frame_yonly_no_extend_frame_borders_neon(&cpi->last_frame_uf, cm->frame_to_show);
+            }
+#if CONFIG_RUNTIME_CPU_DETECT
+            else
+#endif
+#endif
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
+            {
+                vp8_yv12_copy_frame_yonly_ptr(&cpi->last_frame_uf, cm->frame_to_show);
+            }
 #endif
 
             // Was it better than the previous best?
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index ecca18a0a..3b5aaa548 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -112,6 +112,8 @@ VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/postproc_mmx.asm
 VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/postproc_sse2.asm
 endif
 
+VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/arm_systemdependent.c
+
 # common (c)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/bilinearfilter_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/filter_arm.c
@@ -119,15 +121,8 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/loopfilter_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/recon_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra4x4_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/systemdependent.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/vpx_asm_offsets.c
 
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/filter_c.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/idctllm.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/recon.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/reconintra4x4.c
-VP8_COMMON_SRCS_REMOVE-$(HAVE_ARMV6)  += common/generic/systemdependent.c
-
 # common (armv6)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/bilinearfilter_v6$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/armv6/copymem8x4_v6$(ASM)
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index 1424bd15a..d126faf32 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -13,17 +13,22 @@
 
 #File list for arm
 # encoder
-VP8_CX_SRCS-$(HAVE_ARMV6)  += encoder/arm/csystemdependent.c
+VP8_CX_SRCS-$(ARCH_ARM)  += encoder/arm/arm_csystemdependent.c
 
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/encodemb_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/quantize_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/picklpf_arm.c
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/boolhuff_arm.c
+VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/mcomp_arm.c
 
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV6)  += encoder/generic/csystemdependent.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7)  += encoder/boolhuff.c
-VP8_CX_SRCS_REMOVE-$(HAVE_ARMV7)  += encoder/mcomp.c
+VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE)  += encoder/boolhuff.c
+
+#File list for armv5te
+# encoder
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/boolhuff_armv5te$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_mbrow_armv5$(ASM)
+VP8_CX_SRCS-$(HAVE_ARMV5TE)  += encoder/arm/armv5te/vp8_packtokens_partitions_armv5$(ASM)
 
 #File list for armv6
 # encoder
@@ -44,10 +49,6 @@ VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance8x8_neon$(ASM
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_memcpy_neon$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_mbrow_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_packtokens_partitions_armv7$(ASM)
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/boolhuff_armv7$(ASM)
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/neon/vp8_shortwalsh4x4_neon$(ASM)
 
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/vpx_vp8_enc_asm_offsets.c
diff --git a/vp8/vp8dx_arm.mk b/vp8/vp8dx_arm.mk
index ae0610cda..0803a9cb0 100644
--- a/vp8/vp8dx_arm.mk
+++ b/vp8/vp8dx_arm.mk
@@ -11,11 +11,9 @@
 
 #VP8_DX_SRCS list is modified according to different platforms.
 
+VP8_DX_SRCS-$(ARCH_ARM)  += decoder/arm/arm_dsystemdependent.c
+
 VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dequantize_arm.c
-VP8_DX_SRCS-$(HAVE_ARMV6)  += decoder/arm/dsystemdependent.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/generic/dsystemdependent.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/dequantize.c
-VP8_DX_SRCS_REMOVE-$(HAVE_ARMV6)  += decoder/idct_blk.c
 VP8_DX_SRCS-$(CONFIG_ARM_ASM_DETOK)  += decoder/arm/detokenize$(ASM)
 
 #File list for armv6
diff --git a/vpx_ports/arm.h b/vpx_ports/arm.h
new file mode 100644
index 000000000..81af1f11f
--- /dev/null
+++ b/vpx_ports/arm.h
@@ -0,0 +1,27 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#ifndef VPX_PORTS_ARM_H
+#define VPX_PORTS_ARM_H
+#include <stdlib.h>
+#include "config.h"
+
+/*ARMv5TE "Enhanced DSP" instructions.*/
+#define HAS_EDSP  0x01
+/*ARMv6 "Parallel" or "Media" instructions.*/
+#define HAS_MEDIA 0x02
+/*ARMv7 optional NEON instructions.*/
+#define HAS_NEON  0x04
+
+int arm_cpu_caps(void);
+
+#endif
+
diff --git a/vpx_ports/arm_cpudetect.c b/vpx_ports/arm_cpudetect.c
new file mode 100644
index 000000000..4109924cf
--- /dev/null
+++ b/vpx_ports/arm_cpudetect.c
@@ -0,0 +1,190 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+#include "arm.h"
+
+static int arm_cpu_env_flags(int *flags)
+{
+    char *env;
+    env = getenv("VPX_SIMD_CAPS");
+    if (env && *env)
+    {
+        *flags = (int)strtol(env, NULL, 0);
+        return 0;
+    }
+    *flags = 0;
+    return -1;
+}
+
+static int arm_cpu_env_mask(void)
+{
+    char *env;
+    env = getenv("VPX_SIMD_CAPS_MASK");
+    return env && *env ? (int)strtol(env, NULL, 0) : ~0;
+}
+
+
+#if defined(_MSC_VER)
+/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
+#define WIN32_LEAN_AND_MEAN
+#define WIN32_EXTRA_LEAN
+#include <windows.h>
+
+int arm_cpu_caps(void)
+{
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+    /* MSVC has no inline __asm support for ARM, but it does let you __emit
+     *  instructions via their assembled hex code.
+     * All of these instructions should be essentially nops.
+     */
+#if defined(HAVE_ARMV5TE)
+    if (mask & HAS_EDSP)
+    {
+        __try
+        {
+            /*PLD [r13]*/
+            __emit(0xF5DDF000);
+            flags |= HAS_EDSP;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#if defined(HAVE_ARMV6)
+    if (mask & HAS_MEDIA)
+        __try
+        {
+            /*SHADD8 r3,r3,r3*/
+            __emit(0xE6333F93);
+            flags |= HAS_MEDIA;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#if defined(HAVE_ARMV7)
+    if (mask & HAS_NEON)
+    {
+        __try
+        {
+            /*VORR q0,q0,q0*/
+            __emit(0xF2200150);
+            flags |= HAS_NEON;
+        }
+        __except(GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION)
+        {
+            /*Ignore exception.*/
+        }
+    }
+#endif
+#endif
+#endif
+    return flags & mask;
+}
+
+#elif defined(__linux__)
+#include <stdio.h>
+
+int arm_cpu_caps(void)
+{
+    FILE *fin;
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+    /* Reading /proc/self/auxv would be easier, but that doesn't work reliably
+     *  on Android.
+     * This also means that detection will fail in Scratchbox.
+     */
+    fin = fopen("/proc/cpuinfo","r");
+    if(fin != NULL)
+    {
+        /* 512 should be enough for anybody (it's even enough for all the flags
+         * that x86 has accumulated... so far).
+         */
+        char buf[512];
+        while (fgets(buf, 511, fin) != NULL)
+        {
+#if defined(HAVE_ARMV5TE) || defined(HAVE_ARMV7)
+            if (memcmp(buf, "Features", 8) == 0)
+            {
+                char *p;
+#if defined(HAVE_ARMV5TE)
+                p=strstr(buf, " edsp");
+                if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
+                {
+                    flags |= HAS_EDSP;
+                }
+#if defined(HAVE_ARMV7)
+                p = strstr(buf, " neon");
+                if (p != NULL && (p[5] == ' ' || p[5] == '\n'))
+                {
+                    flags |= HAS_NEON;
+                }
+#endif
+#endif
+            }
+#endif
+#if defined(HAVE_ARMV6)
+            if (memcmp(buf, "CPU architecture:",17) == 0){
+                int version;
+                version = atoi(buf+17);
+                if (version >= 6)
+                {
+                    flags |= HAS_MEDIA;
+                }
+            }
+#endif
+        }
+        fclose(fin);
+    }
+    return flags & mask;
+}
+
+#elif !CONFIG_RUNTIME_CPU_DETECT
+
+int arm_cpu_caps(void)
+{
+    int flags;
+    int mask;
+    if (!arm_cpu_env_flags(&flags))
+    {
+        return flags;
+    }
+    mask = arm_cpu_env_mask();
+#if defined(HAVE_ARMV5TE)
+    flags |= HAS_EDSP;
+#endif
+#if defined(HAVE_ARMV6)
+    flags |= HAS_MEDIA;
+#endif
+#if defined(HAVE_ARMV7)
+    flags |= HAS_NEON;
+#endif
+    return flags & mask;
+}
+
+#else
+#error "--enable-runtime-cpu-detect selected, but no CPU detection method " \
+ "available for your platform. Reconfigure without --enable-runtime-cpu-detect."
+#endif
diff --git a/vpx_scale/arm/scalesystemdependant.c b/vpx_scale/arm/scalesystemdependant.c
index 1e8bcb89d..fee76fff7 100644
--- a/vpx_scale/arm/scalesystemdependant.c
+++ b/vpx_scale/arm/scalesystemdependant.c
@@ -10,6 +10,7 @@
 
 
 #include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
 #include "vpx_scale/vpxscale.h"
 
 
@@ -47,6 +48,9 @@ extern void vp8_yv12_copy_frame_neon(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CO
  ****************************************************************************/
 void vp8_scale_machine_specific_config()
 {
+#if HAVE_ARMV7 && CONFIG_RUNTIME_CPU_DETECT
+    int flags;
+#endif
     /*
     vp8_horizontal_line_1_2_scale        = horizontal_line_1_2_scale_armv4;
     vp8_vertical_band_1_2_scale          = vertical_band_1_2_scale_armv4;
@@ -73,14 +77,20 @@ void vp8_scale_machine_specific_config()
     vp8_horizontal_line_5_4_scale         = vp8cx_horizontal_line_5_4_scale_c;
     */
 
-#if HAVE_ARMV7
-    vp8_yv12_extend_frame_borders_ptr      = vp8_yv12_extend_frame_borders_neon;
-    vp8_yv12_copy_frame_yonly_ptr          = vp8_yv12_copy_frame_yonly_neon;
-    vp8_yv12_copy_frame_ptr               = vp8_yv12_copy_frame_neon;
-#else
+#if !HAVE_ARMV7 || CONFIG_RUNTIME_CPU_DETECT
     vp8_yv12_extend_frame_borders_ptr      = vp8_yv12_extend_frame_borders;
     vp8_yv12_copy_frame_yonly_ptr          = vp8_yv12_copy_frame_yonly;
     vp8_yv12_copy_frame_ptr           = vp8_yv12_copy_frame;
 #endif
-
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    flags = arm_cpu_caps();
+    if (flags & HAS_NEON)
+#endif
+    {
+        vp8_yv12_extend_frame_borders_ptr = vp8_yv12_extend_frame_borders_neon;
+        vp8_yv12_copy_frame_yonly_ptr     = vp8_yv12_copy_frame_yonly_neon;
+        vp8_yv12_copy_frame_ptr           = vp8_yv12_copy_frame_neon;
+    }
+#endif
 }

From 1376f061dae199af3378339367a1bc9f95fd4187 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Mon, 18 Oct 2010 14:57:40 -0400
Subject: [PATCH 238/307] reuse common loopfilter code

there were four versions for the regular and
macroblock loopfilters:
horizontal [y|uv]
vertical [y|uv]

this moves all the common code into 2 functions:
vp8_loop_filter_neon
vp8_mbloop_filter_neon

this provides no gain in performance. there's a bit
of jitter, but it trends down ~0.25-0.5%. however,
this is a huge gain maintenance. also, there is the
potential to drop some stack usage in the macroblock
loopfilter.

Change-Id: I91506f07d2f449631ff67ad6f1b3f3be63b81a92
---
 vp8/common/arm/neon/loopfilter_neon.asm       | 409 ++++++++++++++
 .../neon/loopfilterhorizontaledge_uv_neon.asm | 178 ------
 .../neon/loopfilterhorizontaledge_y_neon.asm  | 161 ------
 .../neon/loopfilterverticaledge_uv_neon.asm   | 203 -------
 .../neon/loopfilterverticaledge_y_neon.asm    | 207 -------
 vp8/common/arm/neon/mbloopfilter_neon.asm     | 519 ++++++++++++++++++
 .../mbloopfilterhorizontaledge_uv_neon.asm    | 220 --------
 .../mbloopfilterhorizontaledge_y_neon.asm     | 201 -------
 .../neon/mbloopfilterverticaledge_uv_neon.asm | 261 ---------
 .../neon/mbloopfilterverticaledge_y_neon.asm  | 267 ---------
 vp8/vp8_common.mk                             |  10 +-
 11 files changed, 930 insertions(+), 1706 deletions(-)
 create mode 100644 vp8/common/arm/neon/loopfilter_neon.asm
 delete mode 100644 vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
 delete mode 100644 vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
 delete mode 100644 vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
 delete mode 100644 vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
 create mode 100644 vp8/common/arm/neon/mbloopfilter_neon.asm
 delete mode 100644 vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
 delete mode 100644 vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
 delete mode 100644 vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
 delete mode 100644 vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm

diff --git a/vp8/common/arm/neon/loopfilter_neon.asm b/vp8/common/arm/neon/loopfilter_neon.asm
new file mode 100644
index 000000000..bf0c35721
--- /dev/null
+++ b/vp8/common/arm/neon/loopfilter_neon.asm
@@ -0,0 +1,409 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_loop_filter_horizontal_edge_y_neon|
+    EXPORT  |vp8_loop_filter_horizontal_edge_uv_neon|
+    EXPORT  |vp8_loop_filter_vertical_edge_y_neon|
+    EXPORT  |vp8_loop_filter_vertical_edge_uv_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; flimit, limit, and thresh should be positive numbers.
+; All 16 elements in these variables are equal.
+
+; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
+;                                             const signed char *flimit,
+;                                             const signed char *limit,
+;                                             const signed char *thresh,
+;                                             int count)
+; r0    unsigned char *src
+; r1    int pitch
+; r2    const signed char *flimit
+; r3    const signed char *limit
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_loop_filter_horizontal_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    sub         r2, r0, r1, lsl #2          ; move src pointer down by 4 lines
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {q3}, [r2], r1              ; p3
+    vld1.u8     {q4}, [r2], r1              ; p2
+    vld1.u8     {q5}, [r2], r1              ; p1
+    vld1.u8     {q6}, [r2], r1              ; p0
+    vld1.u8     {q7}, [r2], r1              ; q0
+    vld1.u8     {q8}, [r2], r1              ; q1
+    vld1.u8     {q9}, [r2], r1              ; q2
+    vld1.u8     {q10}, [r2]                 ; q3
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    sub         r0, r0, r1, lsl #1
+
+    bl          vp8_loop_filter_neon
+
+    vst1.u8     {q5}, [r0], r1              ; store op1
+    vst1.u8     {q6}, [r0], r1              ; store op0
+    vst1.u8     {q7}, [r0], r1              ; store oq0
+    vst1.u8     {q8}, [r0], r1              ; store oq1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
+
+; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch
+;                                              const signed char *flimit,
+;                                              const signed char *limit,
+;                                              const signed char *thresh,
+;                                              unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_loop_filter_horizontal_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r2, [sp, #8]                ; load v ptr
+
+    sub         r3, r0, r1, lsl #2          ; move u pointer down by 4 lines
+    vld1.u8     {d6}, [r3], r1              ; p3
+    vld1.u8     {d8}, [r3], r1              ; p2
+    vld1.u8     {d10}, [r3], r1             ; p1
+    vld1.u8     {d12}, [r3], r1             ; p0
+    vld1.u8     {d14}, [r3], r1             ; q0
+    vld1.u8     {d16}, [r3], r1             ; q1
+    vld1.u8     {d18}, [r3], r1             ; q2
+    vld1.u8     {d20}, [r3]                 ; q3
+
+    ldr         r3, [sp, #4]                ; load thresh pointer
+
+    sub         r12, r2, r1, lsl #2         ; move v pointer down by 4 lines
+    vld1.u8     {d7}, [r12], r1             ; p3
+    vld1.u8     {d9}, [r12], r1             ; p2
+    vld1.u8     {d11}, [r12], r1            ; p1
+    vld1.u8     {d13}, [r12], r1            ; p0
+    vld1.u8     {d15}, [r12], r1            ; q0
+    vld1.u8     {d17}, [r12], r1            ; q1
+    vld1.u8     {d19}, [r12], r1            ; q2
+    vld1.u8     {d21}, [r12]                ; q3
+
+    vld1.s8     {d4[], d5[]}, [r3]          ; thresh
+
+    bl          vp8_loop_filter_neon
+
+    sub         r0, r0, r1, lsl #1
+    sub         r2, r2, r1, lsl #1
+
+    vst1.u8     {d10}, [r0], r1             ; store u op1
+    vst1.u8     {d11}, [r2], r1             ; store v op1
+    vst1.u8     {d12}, [r0], r1             ; store u op0
+    vst1.u8     {d13}, [r2], r1             ; store v op0
+    vst1.u8     {d14}, [r0], r1             ; store u oq0
+    vst1.u8     {d15}, [r2], r1             ; store v oq0
+    vst1.u8     {d16}, [r0]                 ; store u oq1
+    vst1.u8     {d17}, [r2]                 ; store v oq1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_horizontal_edge_uv_neon|
+
+; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
+;                                           const signed char *flimit,
+;                                           const signed char *limit,
+;                                           const signed char *thresh,
+;                                           int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_loop_filter_vertical_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    sub         r2, r0, #4                  ; src ptr down by 4 columns
+    sub         r0, r0, #2                  ; dst ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {d6}, [r2], r1              ; load first 8-line src data
+    vld1.u8     {d8}, [r2], r1
+    vld1.u8     {d10}, [r2], r1
+    vld1.u8     {d12}, [r2], r1
+    vld1.u8     {d14}, [r2], r1
+    vld1.u8     {d16}, [r2], r1
+    vld1.u8     {d18}, [r2], r1
+    vld1.u8     {d20}, [r2], r1
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    vld1.u8     {d7}, [r2], r1              ; load second 8-line src data
+    vld1.u8     {d9}, [r2], r1
+    vld1.u8     {d11}, [r2], r1
+    vld1.u8     {d13}, [r2], r1
+    vld1.u8     {d15}, [r2], r1
+    vld1.u8     {d17}, [r2], r1
+    vld1.u8     {d19}, [r2], r1
+    vld1.u8     {d21}, [r2]
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    bl          vp8_loop_filter_neon
+
+    vswp        d12, d11
+    vswp        d16, d13
+    vswp        d14, d12
+    vswp        d16, d15
+
+    ;store op1, op0, oq0, oq1
+    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
+    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
+    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
+    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
+    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
+    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
+    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r0], r1
+    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
+    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r0], r1
+    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
+    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
+    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r0], r1
+    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r0]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_vertical_edge_y_neon|
+
+; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
+;                                            const signed char *flimit,
+;                                            const signed char *limit,
+;                                            const signed char *thresh,
+;                                            unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_loop_filter_vertical_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r12, r0, #4                  ; move u pointer down by 4 columns
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+
+    ldr         r2, [sp, #8]                ; load v ptr
+
+    vld1.u8     {d6}, [r12], r1              ;load u data
+    vld1.u8     {d8}, [r12], r1
+    vld1.u8     {d10}, [r12], r1
+    vld1.u8     {d12}, [r12], r1
+    vld1.u8     {d14}, [r12], r1
+    vld1.u8     {d16}, [r12], r1
+    vld1.u8     {d18}, [r12], r1
+    vld1.u8     {d20}, [r12]
+
+    sub         r3, r2, #4                  ; move v pointer down by 4 columns
+    vld1.u8     {d7}, [r3], r1              ;load v data
+    vld1.u8     {d9}, [r3], r1
+    vld1.u8     {d11}, [r3], r1
+    vld1.u8     {d13}, [r3], r1
+    vld1.u8     {d15}, [r3], r1
+    vld1.u8     {d17}, [r3], r1
+    vld1.u8     {d19}, [r3], r1
+    vld1.u8     {d21}, [r3]
+
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    bl          vp8_loop_filter_neon
+
+    sub         r0, r0, #2
+    sub         r2, r2, #2
+
+    vswp        d12, d11
+    vswp        d16, d13
+    vswp        d14, d12
+    vswp        d16, d15
+
+    ;store op1, op0, oq0, oq1
+    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
+    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
+    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
+    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
+    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
+    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
+    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
+    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
+    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
+    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
+    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
+    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0]
+    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_vertical_edge_uv_neon|
+
+; void vp8_loop_filter_neon();
+; This is a helper function for the loopfilters. The invidual functions do the
+; necessary load, transpose (if necessary) and store.
+
+; r0-r3 PRESERVE
+; q0    flimit
+; q1    limit
+; q2    thresh
+; q3    p3
+; q4    p2
+; q5    p1
+; q6    p0
+; q7    q0
+; q8    q1
+; q9    q2
+; q10   q3
+|vp8_loop_filter_neon| PROC
+    ldr         r12, _lf_coeff_
+
+    ; vp8_filter_mask
+    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
+    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
+    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
+    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
+    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
+    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
+    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
+
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q4
+    vmax.u8     q15, q11, q12
+
+    ; vp8_hevmask
+    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
+    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
+
+    vadd.u8     q0, q0, q0                  ; flimit * 2
+    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
+    vcge.u8     q15, q1, q15
+
+    vabd.u8     q2, q5, q8                  ; a = abs(p1 - q1)
+    vqadd.u8    q9, q9, q9                  ; b = abs(p0 - q0) * 2
+    vshr.u8     q2, q2, #1                  ; a = a / 2
+    vqadd.u8    q9, q9, q2                  ; a = b + a
+    vcge.u8     q9, q0, q9                  ; (a > flimit * 2 + limit) * -1
+
+    vld1.u8     {q0}, [r12]!
+
+    ; vp8_filter() function
+    ; convert to signed
+    veor        q7, q7, q0                  ; qs0
+    veor        q6, q6, q0                  ; ps0
+    veor        q5, q5, q0                  ; ps1
+    veor        q8, q8, q0                  ; qs1
+
+    vld1.u8     {q10}, [r12]!
+
+    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
+    vsubl.s8    q11, d15, d13
+
+    vmovl.u8    q4, d20
+
+    vqsub.s8    q1, q5, q8                  ; vp8_filter = clamp(ps1-qs1)
+    vorr        q14, q13, q14               ; vp8_hevmask
+
+    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
+    vmul.i16    q11, q11, q4
+
+    vand        q1, q1, q14                 ; vp8_filter &= hev
+    vand        q15, q15, q9                ; vp8_filter_mask
+
+    vaddw.s8    q2, q2, d2
+    vaddw.s8    q11, q11, d3
+
+    vld1.u8     {q9}, [r12]!
+
+    ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vqmovn.s16  d2, q2
+    vqmovn.s16  d3, q11
+    vand        q1, q1, q15                 ; vp8_filter &= mask
+
+    vqadd.s8    q2, q1, q10                 ; Filter2 = clamp(vp8_filter+3)
+    vqadd.s8    q1, q1, q9                  ; Filter1 = clamp(vp8_filter+4)
+    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
+    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
+
+    vqadd.s8    q11, q6, q2                 ; u = clamp(ps0 + Filter2)
+    vqsub.s8    q10, q7, q1                 ; u = clamp(qs0 - Filter1)
+
+    ; outer tap adjustments: ++vp8_filter >> 1
+    vrshr.s8    q1, q1, #1
+    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
+
+    vqadd.s8    q13, q5, q1                 ; u = clamp(ps1 + vp8_filter)
+    vqsub.s8    q12, q8, q1                 ; u = clamp(qs1 - vp8_filter)
+
+    veor        q5, q13, q0                 ; *op1 = u^0x80
+    veor        q6, q11, q0                 ; *op0 = u^0x80
+    veor        q7, q10, q0                 ; *oq0 = u^0x80
+    veor        q8, q12, q0                 ; *oq1 = u^0x80
+
+    bx          lr
+    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
+
+    AREA    loopfilter_dat, DATA, READONLY
+_lf_coeff_
+    DCD     lf_coeff
+lf_coeff
+    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
+    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
+    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
+    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
+
+    END
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
deleted file mode 100644
index 23ace0f95..000000000
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
+++ /dev/null
@@ -1,178 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_horizontal_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-
-|vp8_loop_filter_horizontal_edge_uv_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move u pointer down by 4 lines
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-
-    ldr         r2, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    sub         r2, r2, r1, lsl #2          ; move v pointer down by 4 lines
-
-    vld1.u8     {d6}, [r0], r1              ; p3
-    vld1.u8     {d7}, [r2], r1              ; p3
-    vld1.u8     {d8}, [r0], r1              ; p2
-    vld1.u8     {d9}, [r2], r1              ; p2
-    vld1.u8     {d10}, [r0], r1             ; p1
-    vld1.u8     {d11}, [r2], r1             ; p1
-    vld1.u8     {d12}, [r0], r1             ; p0
-    vld1.u8     {d13}, [r2], r1             ; p0
-    vld1.u8     {d14}, [r0], r1             ; q0
-    vld1.u8     {d15}, [r2], r1             ; q0
-    vld1.u8     {d16}, [r0], r1             ; q1
-    vld1.u8     {d17}, [r2], r1             ; q1
-    vld1.u8     {d18}, [r0], r1             ; q2
-    vld1.u8     {d19}, [r2], r1             ; q2
-    vld1.u8     {d20}, [r0], r1             ; q3
-    vld1.u8     {d21}, [r2], r1             ; q3
-
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-
-    ldr         r12, _lfhuv_coeff_
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15                ; (max  > limit) * -1
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-
-    ;calculate output
-    vqadd.s8    q11, q6, q2             ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #2
-    sub         r0, r0, r1, lsl #1
-    ;
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-
-    sub         r2, r2, r1, lsl #2
-    sub         r2, r2, r1, lsl #1
-    ;;
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-    ;
-
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-    ;
-
-    vst1.u8     {d10}, [r0], r1             ; store u op1
-    vst1.u8     {d11}, [r2], r1             ; store v op1
-    vst1.u8     {d12}, [r0], r1             ; store u op0
-    vst1.u8     {d13}, [r2], r1             ; store v op0
-    vst1.u8     {d14}, [r0], r1             ; store u oq0
-    vst1.u8     {d15}, [r2], r1             ; store v oq0
-    vst1.u8     {d16}, [r0], r1             ; store u oq1
-    vst1.u8     {d17}, [r2], r1             ; store v oq1
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_horizontal_edge_uv_neon|
-
-;-----------------
-    AREA    hloopfilteruv_dat, DATA, READWRITE          ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_lfhuv_coeff_
-    DCD     lfhuv_coeff
-lfhuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
diff --git a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
deleted file mode 100644
index e1896e4d8..000000000
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
+++ /dev/null
@@ -1,161 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_horizontal_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-
-|vp8_loop_filter_horizontal_edge_y_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move src pointer down by 4 lines
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    vld1.u8     {q3}, [r0], r1              ; p3
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-    vld1.u8     {q4}, [r0], r1              ; p2
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.u8     {q5}, [r0], r1              ; p1
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vld1.u8     {q6}, [r0], r1              ; p0
-    ldr         r12, _lfhy_coeff_
-    vld1.u8     {q7}, [r0], r1              ; q0
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vld1.u8     {q8}, [r0], r1              ; q1
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vld1.u8     {q9}, [r0], r1              ; q2
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vld1.u8     {q10}, [r0], r1             ; q3
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-
-    ;calculate output
-    vqadd.s8    q11, q6, q2                 ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #2
-    sub         r0, r0, r1, lsl #1
-    ;
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-    ;
-    add         r2, r1, r0
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
-    add         r3, r2, r1
-
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-
-    add         r12, r3, r1
-
-    vst1.u8     {q5}, [r0]                  ; store op1
-    vst1.u8     {q6}, [r2]                  ; store op0
-    vst1.u8     {q7}, [r3]                  ; store oq0
-    vst1.u8     {q8}, [r12]                 ; store oq1
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
-
-;-----------------
-    AREA    hloopfiltery_dat, DATA, READWRITE           ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_lfhy_coeff_
-    DCD     lfhy_coeff
-lfhy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
deleted file mode 100644
index a9c2d12f0..000000000
--- a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
+++ /dev/null
@@ -1,203 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_vertical_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-
-|vp8_loop_filter_vertical_edge_uv_neon| PROC
-    sub         r0, r0, #4          ; move u pointer down by 4 columns
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-
-    ldr         r2, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    sub         r2, r2, #4          ; move v pointer down by 4 columns
-
-    vld1.u8     {d6}, [r0], r1              ;load u data
-    vld1.u8     {d7}, [r2], r1              ;load v data
-    vld1.u8     {d8}, [r0], r1
-    vld1.u8     {d9}, [r2], r1
-    vld1.u8     {d10}, [r0], r1
-    vld1.u8     {d11}, [r2], r1
-    vld1.u8     {d12}, [r0], r1
-    vld1.u8     {d13}, [r2], r1
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d15}, [r2], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d17}, [r2], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d19}, [r2], r1
-    vld1.u8     {d20}, [r0], r1
-    vld1.u8     {d21}, [r2], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-
-    ldr         r12, _vlfuv_coeff_
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-    ;calculate output
-    vqadd.s8    q11, q6, q2             ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #3
-    add         r0, r0, #2
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-
-    sub         r2, r2, r1, lsl #3
-    add         r2, r2, #2
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-
-    vswp        d12, d11
-    vswp        d16, d13
-    vswp        d14, d12
-    vswp        d16, d15
-
-    ;store op1, op0, oq0, oq1
-    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
-    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
-    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
-    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
-    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
-    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
-    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
-    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
-    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
-    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
-    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
-    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
-    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
-    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
-    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
-    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2], r1
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_vertical_edge_uv_neon|
-
-;-----------------
-    AREA    vloopfilteruv_dat, DATA, READWRITE          ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_vlfuv_coeff_
-    DCD     vlfuv_coeff
-vlfuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
diff --git a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
deleted file mode 100644
index 64a49bb41..000000000
--- a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
+++ /dev/null
@@ -1,207 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_vertical_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-
-|vp8_loop_filter_vertical_edge_y_neon| PROC
-    sub         r0, r0, #4                  ; move src pointer down by 4 columns
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    vld1.u8     {d6}, [r0], r1              ; load first 8-line src data
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-    vld1.u8     {d8}, [r0], r1
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.u8     {d10}, [r0], r1
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vld1.u8     {d12}, [r0], r1
-    ldr         r12, _vlfy_coeff_
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d20}, [r0], r1
-
-    vld1.u8     {d7}, [r0], r1              ; load second 8-line src data
-    vld1.u8     {d9}, [r0], r1
-    vld1.u8     {d11}, [r0], r1
-    vld1.u8     {d13}, [r0], r1
-    vld1.u8     {d15}, [r0], r1
-    vld1.u8     {d17}, [r0], r1
-    vld1.u8     {d19}, [r0], r1
-    vld1.u8     {d21}, [r0], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-    ;calculate output
-    vqadd.s8    q11, q6, q2             ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #4
-    add         r0, r0, #2
-    ;
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-    add         r2, r0, r1
-    ;
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-    add         r3, r2, r1
-    ;
-    vswp        d12, d11
-    vswp        d16, d13
-    add         r12, r3, r1
-    vswp        d14, d12
-    vswp        d16, d15
-
-    ;store op1, op0, oq0, oq1
-    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0]
-    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r2]
-    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r3]
-    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r12], r1
-    add         r0, r12, r1
-    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r12]
-    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
-    add         r2, r0, r1
-    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0]
-    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r2], r1
-    add         r3, r2, r1
-    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r2]
-    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r3], r1
-    add         r12, r3, r1
-    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r3]
-    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r12], r1
-    add         r0, r12, r1
-    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r12]
-    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
-    add         r2, r0, r1
-    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r0]
-    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2]
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_vertical_edge_y_neon|
-
-;-----------------
-    AREA    vloopfiltery_dat, DATA, READWRITE           ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_vlfy_coeff_
-    DCD     vlfy_coeff
-vlfy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
diff --git a/vp8/common/arm/neon/mbloopfilter_neon.asm b/vp8/common/arm/neon/mbloopfilter_neon.asm
new file mode 100644
index 000000000..255dd5619
--- /dev/null
+++ b/vp8/common/arm/neon/mbloopfilter_neon.asm
@@ -0,0 +1,519 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_mbloop_filter_horizontal_edge_y_neon|
+    EXPORT  |vp8_mbloop_filter_horizontal_edge_uv_neon|
+    EXPORT  |vp8_mbloop_filter_vertical_edge_y_neon|
+    EXPORT  |vp8_mbloop_filter_vertical_edge_uv_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; flimit, limit, and thresh should be positive numbers.
+; All 16 elements in these variables are equal.
+
+; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
+;                                               const signed char *flimit,
+;                                               const signed char *limit,
+;                                               const signed char *thresh,
+;                                               int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, r1, lsl #2          ; move src pointer down by 4 lines
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {q3}, [r0], r1              ; p3
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    vld1.u8     {q4}, [r0], r1              ; p2
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    vld1.u8     {q5}, [r0], r1              ; p1
+    vld1.u8     {q6}, [r0], r1              ; p0
+    vld1.u8     {q7}, [r0], r1              ; q0
+    vld1.u8     {q8}, [r0], r1              ; q1
+    vld1.u8     {q9}, [r0], r1              ; q2
+    vld1.u8     {q10}, [r0], r1             ; q3
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    add         r0, r0, r1
+    add         r2, r0, r1
+    add         r3, r2, r1
+
+    vst1.u8     {q4}, [r0]                  ; store op2
+    vst1.u8     {q5}, [r2]                  ; store op1
+    vst1.u8     {q6}, [r3], r1              ; store op0
+    add         r12, r3, r1
+    vst1.u8     {q7}, [r3]                  ; store oq0
+    vst1.u8     {q8}, [r12], r1             ; store oq1
+    vst1.u8     {q9}, [r12]             ; store oq2
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_horizontal_edge_y_neon|
+
+; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
+;                                                const signed char *flimit,
+;                                                const signed char *limit,
+;                                                const signed char *thresh,
+;                                                unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, r1, lsl #2          ; move u pointer down by 4 lines
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r3, [sp, #8]                ; load v ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+    sub         r3, r3, r1, lsl #2          ; move v pointer down by 4 lines
+
+    vld1.u8     {d6}, [r0], r1              ; p3
+    vld1.u8     {d7}, [r3], r1              ; p3
+    vld1.u8     {d8}, [r0], r1              ; p2
+    vld1.u8     {d9}, [r3], r1              ; p2
+    vld1.u8     {d10}, [r0], r1             ; p1
+    vld1.u8     {d11}, [r3], r1             ; p1
+    vld1.u8     {d12}, [r0], r1             ; p0
+    vld1.u8     {d13}, [r3], r1             ; p0
+    vld1.u8     {d14}, [r0], r1             ; q0
+    vld1.u8     {d15}, [r3], r1             ; q0
+    vld1.u8     {d16}, [r0], r1             ; q1
+    vld1.u8     {d17}, [r3], r1             ; q1
+    vld1.u8     {d18}, [r0], r1             ; q2
+    vld1.u8     {d19}, [r3], r1             ; q2
+    vld1.u8     {d20}, [r0], r1             ; q3
+    vld1.u8     {d21}, [r3], r1             ; q3
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    sub         r3, r3, r1, lsl #3
+
+    add         r0, r0, r1
+    add         r3, r3, r1
+
+    vst1.u8     {d8}, [r0], r1              ; store u op2
+    vst1.u8     {d9}, [r3], r1              ; store v op2
+    vst1.u8     {d10}, [r0], r1             ; store u op1
+    vst1.u8     {d11}, [r3], r1             ; store v op1
+    vst1.u8     {d12}, [r0], r1             ; store u op0
+    vst1.u8     {d13}, [r3], r1             ; store v op0
+    vst1.u8     {d14}, [r0], r1             ; store u oq0
+    vst1.u8     {d15}, [r3], r1             ; store v oq0
+    vst1.u8     {d16}, [r0], r1             ; store u oq1
+    vst1.u8     {d17}, [r3], r1             ; store v oq1
+    vst1.u8     {d18}, [r0], r1             ; store u oq2
+    vst1.u8     {d19}, [r3], r1             ; store v oq2
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
+
+; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
+;                                             const signed char *flimit,
+;                                             const signed char *limit,
+;                                             const signed char *thresh,
+;                                             int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_mbloop_filter_vertical_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, #4                  ; move src pointer down by 4 columns
+
+    vld1.u8     {d6}, [r0], r1              ; load first 8-line src data
+    ldr         r12, [sp, #4]               ; load thresh pointer
+    vld1.u8     {d8}, [r0], r1
+    sub         sp, sp, #32
+    vld1.u8     {d10}, [r0], r1
+    vld1.u8     {d12}, [r0], r1
+    vld1.u8     {d14}, [r0], r1
+    vld1.u8     {d16}, [r0], r1
+    vld1.u8     {d18}, [r0], r1
+    vld1.u8     {d20}, [r0], r1
+
+    vld1.u8     {d7}, [r0], r1              ; load second 8-line src data
+    vld1.u8     {d9}, [r0], r1
+    vld1.u8     {d11}, [r0], r1
+    vld1.u8     {d13}, [r0], r1
+    vld1.u8     {d15}, [r0], r1
+    vld1.u8     {d17}, [r0], r1
+    vld1.u8     {d19}, [r0], r1
+    vld1.u8     {d21}, [r0], r1
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    mov         r12, sp
+    vst1.u8     {q3}, [r12]!
+    vst1.u8     {q10}, [r12]!
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #4
+
+    add         r2, r0, r1
+
+    add         r3, r2, r1
+
+    vld1.u8     {q3}, [sp]!
+    vld1.u8     {q10}, [sp]!
+
+    ;transpose to 16x8 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+    add         r12, r3, r1
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    ;store op2, op1, op0, oq0, oq1, oq2
+    vst1.8      {d6}, [r0]
+    vst1.8      {d8}, [r2]
+    vst1.8      {d10}, [r3]
+    vst1.8      {d12}, [r12], r1
+    add         r0, r12, r1
+    vst1.8      {d14}, [r12]
+    vst1.8      {d16}, [r0], r1
+    add         r2, r0, r1
+    vst1.8      {d18}, [r0]
+    vst1.8      {d20}, [r2], r1
+    add         r3, r2, r1
+    vst1.8      {d7}, [r2]
+    vst1.8      {d9}, [r3], r1
+    add         r12, r3, r1
+    vst1.8      {d11}, [r3]
+    vst1.8      {d13}, [r12], r1
+    add         r0, r12, r1
+    vst1.8      {d15}, [r12]
+    vst1.8      {d17}, [r0], r1
+    add         r2, r0, r1
+    vst1.8      {d19}, [r0]
+    vst1.8      {d21}, [r2]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_vertical_edge_y_neon|
+
+; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
+;                                              const signed char *flimit,
+;                                              const signed char *limit,
+;                                              const signed char *thresh,
+;                                              unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, #4                  ; move src pointer down by 4 columns
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r3, [sp, #8]                ; load v ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    sub         r3, r3, #4                  ; move v pointer down by 4 columns
+
+    vld1.u8     {d6}, [r0], r1              ;load u data
+    vld1.u8     {d7}, [r3], r1              ;load v data
+    vld1.u8     {d8}, [r0], r1
+    vld1.u8     {d9}, [r3], r1
+    vld1.u8     {d10}, [r0], r1
+    vld1.u8     {d11}, [r3], r1
+    vld1.u8     {d12}, [r0], r1
+    vld1.u8     {d13}, [r3], r1
+    vld1.u8     {d14}, [r0], r1
+    vld1.u8     {d15}, [r3], r1
+    vld1.u8     {d16}, [r0], r1
+    vld1.u8     {d17}, [r3], r1
+    vld1.u8     {d18}, [r0], r1
+    vld1.u8     {d19}, [r3], r1
+    vld1.u8     {d20}, [r0], r1
+    vld1.u8     {d21}, [r3], r1
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    sub         sp, sp, #32
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    mov         r12, sp
+    vst1.u8     {q3}, [r12]!
+    vst1.u8     {q10}, [r12]!
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    sub         r3, r3, r1, lsl #3
+
+    vld1.u8     {q3}, [sp]!
+    vld1.u8     {q10}, [sp]!
+
+    ;transpose to 16x8 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    ;store op2, op1, op0, oq0, oq1, oq2
+    vst1.8      {d6}, [r0], r1
+    vst1.8      {d7}, [r3], r1
+    vst1.8      {d8}, [r0], r1
+    vst1.8      {d9}, [r3], r1
+    vst1.8      {d10}, [r0], r1
+    vst1.8      {d11}, [r3], r1
+    vst1.8      {d12}, [r0], r1
+    vst1.8      {d13}, [r3], r1
+    vst1.8      {d14}, [r0], r1
+    vst1.8      {d15}, [r3], r1
+    vst1.8      {d16}, [r0], r1
+    vst1.8      {d17}, [r3], r1
+    vst1.8      {d18}, [r0], r1
+    vst1.8      {d19}, [r3], r1
+    vst1.8      {d20}, [r0], r1
+    vst1.8      {d21}, [r3], r1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_vertical_edge_uv_neon|
+
+; void vp8_mbloop_filter_neon()
+; This is a helper function for the macroblock loopfilters. The individual
+; functions do the necessary load, transpose (if necessary), preserve (if
+; necessary) and store.
+
+; TODO:
+; The vertical filter writes p3/q3 back out because two 4 element writes are
+; much simpler than ordering and writing two 3 element sets (or three 2 elements
+; sets, or whichever other combinations are possible).
+; If we can preserve q3 and q10, the vertical filter will be able to avoid
+; storing those values on the stack and reading them back after the filter.
+
+; r0,r1 PRESERVE
+; r2    flimit
+; r3    PRESERVE
+; q1    limit
+; q2    thresh
+; q3    p3
+; q4    p2
+; q5    p1
+; q6    p0
+; q7    q0
+; q8    q1
+; q9    q2
+; q10   q3
+
+|vp8_mbloop_filter_neon| PROC
+    ldr         r12, _mblf_coeff_
+
+    ; vp8_filter_mask
+    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
+    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
+    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
+    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
+    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
+    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
+
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q0
+    vmax.u8     q15, q11, q12
+
+    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
+
+    ; vp8_hevmask
+    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh) * -1
+    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh) * -1
+    vmax.u8     q15, q15, q3
+
+    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
+
+    vld1.u8     {q0}, [r12]!
+
+    vadd.u8     q2, q2, q2                  ; flimit * 2
+    vadd.u8     q2, q2, q1                  ; flimit * 2 +  limit
+    vcge.u8     q15, q1, q15
+
+    vabd.u8     q1, q5, q8                  ; a = abs(p1 - q1)
+    vqadd.u8    q12, q12, q12               ; b = abs(p0 - q0) * 2
+    vshr.u8     q1, q1, #1                  ; a = a / 2
+    vqadd.u8    q12, q12, q1                ; a = b + a
+    vcge.u8     q12, q2, q12                ; (a > flimit * 2 + limit) * -1
+
+    ; vp8_filter
+    ; convert to signed
+    veor        q7, q7, q0                  ; qs0
+    veor        q6, q6, q0                  ; ps0
+    veor        q5, q5, q0                  ; ps1
+    veor        q8, q8, q0                  ; qs1
+    veor        q4, q4, q0                  ; ps2
+    veor        q9, q9, q0                  ; qs2
+
+    vorr        q14, q13, q14               ; vp8_hevmask
+
+    vsubl.s8    q2, d14, d12                ; qs0 - ps0
+    vsubl.s8    q13, d15, d13
+
+    vqsub.s8    q1, q5, q8                  ; vp8_filter = clamp(ps1-qs1)
+
+    vadd.s16    q10, q2, q2                 ; 3 * (qs0 - ps0)
+    vadd.s16    q11, q13, q13
+    vand        q15, q15, q12               ; vp8_filter_mask
+
+    vadd.s16    q2, q2, q10
+    vadd.s16    q13, q13, q11
+
+    vld1.u8     {q12}, [r12]!               ; #3
+
+    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
+    vaddw.s8    q13, q13, d3
+
+    vld1.u8     {q11}, [r12]!               ; #4
+
+    ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vqmovn.s16  d2, q2
+    vqmovn.s16  d3, q13
+
+    vand        q1, q1, q15                 ; vp8_filter &= mask
+
+    vld1.u8     {q15}, [r12]!               ; #63
+    ;
+    vand        q13, q1, q14                ; Filter2 &= hev
+
+    vld1.u8     {d7}, [r12]!                ; #9
+
+    vqadd.s8    q2, q13, q11                ; Filter1 = clamp(Filter2+4)
+    vqadd.s8    q13, q13, q12               ; Filter2 = clamp(Filter2+3)
+
+    vld1.u8     {d6}, [r12]!                ; #18
+
+    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
+    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
+
+    vmov        q10, q15
+    vmov        q12, q15
+
+    vqsub.s8    q7, q7, q2                  ; qs0 = clamp(qs0 - Filter1)
+
+    vld1.u8     {d5}, [r12]!                ; #27
+
+    vqadd.s8    q6, q6, q13                 ; ps0 = clamp(ps0 + Filter2)
+
+    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
+
+    ; roughly 1/7th difference across boundary
+    ; roughly 2/7th difference across boundary
+    ; roughly 3/7th difference across boundary
+    vmov        q11, q15
+    vmov        q13, q15
+    vmov        q14, q15
+
+    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
+    vmlal.s8    q11, d3, d7
+    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
+    vmlal.s8    q13, d3, d6
+    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
+    vmlal.s8    q15, d3, d5
+    vqshrn.s16  d20, q10, #7                ; u = clamp((63 + Filter2 * 9)>>7)
+    vqshrn.s16  d21, q11, #7
+    vqshrn.s16  d24, q12, #7                ; u = clamp((63 + Filter2 * 18)>>7)
+    vqshrn.s16  d25, q13, #7
+    vqshrn.s16  d28, q14, #7                ; u = clamp((63 + Filter2 * 27)>>7)
+    vqshrn.s16  d29, q15, #7
+
+    vqsub.s8    q11, q9, q10                ; s = clamp(qs2 - u)
+    vqadd.s8    q10, q4, q10                ; s = clamp(ps2 + u)
+    vqsub.s8    q13, q8, q12                ; s = clamp(qs1 - u)
+    vqadd.s8    q12, q5, q12                ; s = clamp(ps1 + u)
+    vqsub.s8    q15, q7, q14                ; s = clamp(qs0 - u)
+    vqadd.s8    q14, q6, q14                ; s = clamp(ps0 + u)
+    veor        q9, q11, q0                 ; *oq2 = s^0x80
+    veor        q4, q10, q0                 ; *op2 = s^0x80
+    veor        q8, q13, q0                 ; *oq1 = s^0x80
+    veor        q5, q12, q0                 ; *op2 = s^0x80
+    veor        q7, q15, q0                 ; *oq0 = s^0x80
+    veor        q6, q14, q0                 ; *op0 = s^0x80
+
+    bx          lr
+    ENDP        ; |vp8_mbloop_filter_neon|
+
+    AREA    mbloopfilter_dat, DATA, READONLY
+_mblf_coeff_
+    DCD     mblf_coeff
+mblf_coeff
+    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
+    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
+    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
+    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
+    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
+    DCD     0x1b1b1b1b, 0x1b1b1b1b
+
+    END
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
deleted file mode 100644
index 52ab059db..000000000
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
+++ /dev/null
@@ -1,220 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_horizontal_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move u pointer down by 4 lines
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    ldr         r3, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-    sub         r3, r3, r1, lsl #2          ; move v pointer down by 4 lines
-
-    vld1.u8     {d6}, [r0], r1              ; p3
-    vld1.u8     {d7}, [r3], r1              ; p3
-    vld1.u8     {d8}, [r0], r1              ; p2
-    vld1.u8     {d9}, [r3], r1              ; p2
-    vld1.u8     {d10}, [r0], r1             ; p1
-    vld1.u8     {d11}, [r3], r1             ; p1
-    vld1.u8     {d12}, [r0], r1             ; p0
-    vld1.u8     {d13}, [r3], r1             ; p0
-    vld1.u8     {d14}, [r0], r1             ; q0
-    vld1.u8     {d15}, [r3], r1             ; q0
-    vld1.u8     {d16}, [r0], r1             ; q1
-    vld1.u8     {d17}, [r3], r1             ; q1
-    vld1.u8     {d18}, [r0], r1             ; q2
-    vld1.u8     {d19}, [r3], r1             ; q2
-    vld1.u8     {d20}, [r0], r1             ; q3
-    vld1.u8     {d21}, [r3], r1             ; q3
-
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-
-    ldr         r12, _mbhlfuv_coeff_
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 +  limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-
-    sub         r0, r0, r1, lsl #3
-    sub         r3, r3, r1, lsl #3
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-
-    add         r0, r0, r1
-    add         r3, r3, r1
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-
-    vst1.u8     {d8}, [r0], r1              ; store u op2
-    vst1.u8     {d9}, [r3], r1              ; store v op2
-    vst1.u8     {d10}, [r0], r1             ; store u op1
-    vst1.u8     {d11}, [r3], r1             ; store v op1
-    vst1.u8     {d12}, [r0], r1             ; store u op0
-    vst1.u8     {d13}, [r3], r1             ; store v op0
-    vst1.u8     {d14}, [r0], r1             ; store u oq0
-    vst1.u8     {d15}, [r3], r1             ; store v oq0
-    vst1.u8     {d16}, [r0], r1             ; store u oq1
-    vst1.u8     {d17}, [r3], r1             ; store v oq1
-    vst1.u8     {d18}, [r0], r1             ; store u oq2
-    vst1.u8     {d19}, [r3], r1             ; store v oq2
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
-
-;-----------------
-    AREA    mbhloopfilteruv_dat, DATA, READWRITE            ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbhlfuv_coeff_
-    DCD     mbhlfuv_coeff
-mbhlfuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
diff --git a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
deleted file mode 100644
index b0755b05f..000000000
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
+++ /dev/null
@@ -1,201 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_horizontal_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move src pointer down by 4 lines
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    vld1.u8     {q3}, [r0], r1              ; p3
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.u8     {q4}, [r0], r1              ; p2
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vld1.u8     {q5}, [r0], r1              ; p1
-    ldr         r12, _mbhlfy_coeff_
-    vld1.u8     {q6}, [r0], r1              ; p0
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vld1.u8     {q7}, [r0], r1              ; q0
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vld1.u8     {q8}, [r0], r1              ; q1
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vld1.u8     {q9}, [r0], r1              ; q2
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vld1.u8     {q10}, [r0], r1             ; q3
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-    sub         r0, r0, r1, lsl #3
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-    add         r0, r0, r1
-    add         r2, r0, r1
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-    add         r3, r2, r1
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-
-    vst1.u8     {q4}, [r0]                  ; store op2
-    vst1.u8     {q5}, [r2]                  ; store op1
-    vst1.u8     {q6}, [r3], r1              ; store op0
-    add         r12, r3, r1
-    vst1.u8     {q7}, [r3]                  ; store oq0
-    vst1.u8     {q8}, [r12], r1             ; store oq1
-    vst1.u8     {q9}, [r12]             ; store oq2
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_horizontal_edge_y_neon|
-
-;-----------------
-    AREA    mbhloopfiltery_dat, DATA, READWRITE         ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbhlfy_coeff_
-    DCD     mbhlfy_coeff
-mbhlfy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
deleted file mode 100644
index 044b3a3a8..000000000
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
+++ /dev/null
@@ -1,261 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_vertical_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
-    sub         r0, r0, #4                  ; move src pointer down by 4 columns
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    ldr         r3, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    sub         r3, r3, #4                  ; move v pointer down by 4 columns
-
-    vld1.u8     {d6}, [r0], r1              ;load u data
-    vld1.u8     {d7}, [r3], r1              ;load v data
-    vld1.u8     {d8}, [r0], r1
-    vld1.u8     {d9}, [r3], r1
-    vld1.u8     {d10}, [r0], r1
-    vld1.u8     {d11}, [r3], r1
-    vld1.u8     {d12}, [r0], r1
-    vld1.u8     {d13}, [r3], r1
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d15}, [r3], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d17}, [r3], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d19}, [r3], r1
-    vld1.u8     {d20}, [r0], r1
-    vld1.u8     {d21}, [r3], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    sub         sp, sp, #32
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    mov         r12, sp
-    vst1.u8     {q3}, [r12]!
-    vst1.u8     {q10}, [r12]!
-    ldr         r12, _mbvlfuv_coeff_
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-    ;
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-
-    sub         r0, r0, r1, lsl #3
-    sub         r3, r3, r1, lsl #3
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-    vld1.u8     {q3}, [sp]!
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-    vld1.u8     {q10}, [sp]!
-
-    ;transpose to 16x8 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    ;store op2, op1, op0, oq0, oq1, oq2
-    vst1.8      {d6}, [r0], r1
-    vst1.8      {d7}, [r3], r1
-    vst1.8      {d8}, [r0], r1
-    vst1.8      {d9}, [r3], r1
-    vst1.8      {d10}, [r0], r1
-    vst1.8      {d11}, [r3], r1
-    vst1.8      {d12}, [r0], r1
-    vst1.8      {d13}, [r3], r1
-    vst1.8      {d14}, [r0], r1
-    vst1.8      {d15}, [r3], r1
-    vst1.8      {d16}, [r0], r1
-    vst1.8      {d17}, [r3], r1
-    vst1.8      {d18}, [r0], r1
-    vst1.8      {d19}, [r3], r1
-    vst1.8      {d20}, [r0], r1
-    vst1.8      {d21}, [r3], r1
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_vertical_edge_uv_neon|
-
-;-----------------
-    AREA    mbvloopfilteruv_dat, DATA, READWRITE            ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbvlfuv_coeff_
-    DCD     mbvlfuv_coeff
-mbvlfuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
diff --git a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
deleted file mode 100644
index e0716625c..000000000
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
+++ /dev/null
@@ -1,267 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_vertical_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-|vp8_mbloop_filter_vertical_edge_y_neon| PROC
-    sub         r0, r0, #4                  ; move src pointer down by 4 columns
-
-    vld1.u8     {d6}, [r0], r1              ; load first 8-line src data
-    ldr         r12, [sp, #0]               ; load thresh pointer
-    vld1.u8     {d8}, [r0], r1
-    sub         sp, sp, #32
-    vld1.u8     {d10}, [r0], r1
-    vld1.u8     {d12}, [r0], r1
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d20}, [r0], r1
-
-    vld1.u8     {d7}, [r0], r1              ; load second 8-line src data
-    vld1.u8     {d9}, [r0], r1
-    vld1.u8     {d11}, [r0], r1
-    vld1.u8     {d13}, [r0], r1
-    vld1.u8     {d15}, [r0], r1
-    vld1.u8     {d17}, [r0], r1
-    vld1.u8     {d19}, [r0], r1
-    vld1.u8     {d21}, [r0], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    mov         r12, sp
-    vst1.u8     {q3}, [r12]!
-    vst1.u8     {q10}, [r12]!
-    ldr         r12, _mbvlfy_coeff_
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-    ;
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-    sub         r0, r0, r1, lsl #4
-
-    add         r2, r0, r1
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-    add         r3, r2, r1
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-    vld1.u8     {q3}, [sp]!
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-    vld1.u8     {q10}, [sp]!
-
-    ;transpose to 16x8 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-    add         r12, r3, r1
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    ;store op2, op1, op0, oq0, oq1, oq2
-    vst1.8      {d6}, [r0]
-    vst1.8      {d8}, [r2]
-    vst1.8      {d10}, [r3]
-    vst1.8      {d12}, [r12], r1
-    add         r0, r12, r1
-    vst1.8      {d14}, [r12]
-    vst1.8      {d16}, [r0], r1
-    add         r2, r0, r1
-    vst1.8      {d18}, [r0]
-    vst1.8      {d20}, [r2], r1
-    add         r3, r2, r1
-    vst1.8      {d7}, [r2]
-    vst1.8      {d9}, [r3], r1
-    add         r12, r3, r1
-    vst1.8      {d11}, [r3]
-    vst1.8      {d13}, [r12], r1
-    add         r0, r12, r1
-    vst1.8      {d15}, [r12]
-    vst1.8      {d17}, [r0], r1
-    add         r2, r0, r1
-    vst1.8      {d19}, [r0]
-    vst1.8      {d21}, [r2]
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_vertical_edge_y_neon|
-
-;-----------------
-    AREA    mbvloopfiltery_dat, DATA, READWRITE         ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbvlfy_coeff_
-    DCD     mbvlfy_coeff
-mbvlfy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 3b5aaa548..9ab92b334 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -147,16 +147,10 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem8x8_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/copymem16x16_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/dc_only_idct_add_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/iwalsh_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfilter_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfiltersimplehorizontaledge_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfiltersimpleverticaledge_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfilterhorizontaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfilterhorizontaledge_y_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfilterverticaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/loopfilterverticaledge_y_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/mbloopfilterhorizontaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/mbloopfilterhorizontaledge_y_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/mbloopfilterverticaledge_uv_neon$(ASM)
-VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/mbloopfilterverticaledge_y_neon$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/mbloopfilter_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/recon2b_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/recon4b_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/reconb_neon$(ASM)

From 385865f8202a8f273d6c9920134ddfbeb2b323da Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Mon, 25 Oct 2010 10:07:35 -0400
Subject: [PATCH 239/307] quiet compiler

clean up compiler warnings, man in the yellow hat warnings, and start to
remove unused #includes

Change-Id: I6267e98d9b3024b6fb1ef2732b29067a33cb96f6
---
 vp8/encoder/temporal_filter.c | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 28938657f..630afdbf1 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -32,19 +32,8 @@
 #include "vpxerrors.h"
 
 #include <math.h>
-#include <stdio.h>
 #include <limits.h>
 
-/*
-#if CONFIG_RUNTIME_CPU_DETECT
-#define IF_RTCD(x) (x)
-#define RTCD(x) &cpi->common.rtcd.x
-#else
-#define IF_RTCD(x) NULL
-#define RTCD(x) NULL
-#endif
-*/
-
 #define ALT_REF_MC_ENABLED 1    // dis/enable MC in AltRef filtering
 #define ALT_REF_SUBPEL_ENABLED 1 // dis/enable subpel in MC AltRef filtering
 
@@ -130,8 +119,8 @@ static void apply_temporal_filter
     unsigned int block_size,
     int strength,
     int filter_weight,
-    int *accumulator,
-    int *count
+    unsigned int *accumulator,
+    unsigned int *count
 )
 {
     int i, j, k;
@@ -168,7 +157,7 @@ static void apply_temporal_filter
             modifier = 16 - modifier;
 #endif
             modifier *= filter_weight;
-            
+
             count[k] += modifier;
             accumulator[k] += modifier * pixel_value;
 
@@ -343,7 +332,7 @@ static void vp8cx_temp_blur1_c
     YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index];
     unsigned char *dst1, *dst2;
     DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
-    
+
     // Save input state
     unsigned char *y_buffer = mbd->pre.y_buffer;
     unsigned char *u_buffer = mbd->pre.u_buffer;

From 2ad4810a215bec3c944ee9e4a71f7aec58dc0f15 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 25 Oct 2010 10:28:45 -0400
Subject: [PATCH 240/307] NASM: trailing slash for ASFLAGS includes

Fix out-of-tree builds using NASM. NASM expects its include paths to
have a trailing slash. These aren't used used when doing in-tree builds
(./configure)

Change-Id: I38d469d15acb1b7e65733a2e5ca8c9d86fa4ad86
---
 build/make/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build/make/Makefile b/build/make/Makefile
index 1ca747a26..40fa6d50c 100755
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -65,7 +65,7 @@ endif
 BUILD_ROOT?=.
 VPATH=$(SRC_PATH_BARE)
 CFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH)
-ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH)
+ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT)/ -I$(SRC_PATH)/
 DIST_DIR?=dist
 HOSTCC?=gcc
 TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN)))

From c3fd2c4ea7432cf568165f7da6d99140c1f0957f Mon Sep 17 00:00:00 2001
From: Martin Ettl <ettl.martin78@googlemail.com>
Date: Mon, 25 Oct 2010 13:14:11 -0400
Subject: [PATCH 241/307] Fix leaked file descriptor with ENTROPY_STATS

cppcheck found a leaked file descriptor in the debugging code
enabled by defining ENTROPY_STATS. Fixes issue #60.

Change-Id: I0c1d0669cb94d44fed77860f97b82763be06b7cb
---
 vp8/encoder/onyx_if.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 7e1583dd9..b4332b321 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2529,6 +2529,7 @@ void vp8_remove_compressor(VP8_PTR *ptr)
             }
 
             fprintf(fmode, "};\n");
+            fclose(fmode);
         }
 #endif
 

From 53f61ce226ed6c84ca7036839d1c9efa6832cb48 Mon Sep 17 00:00:00 2001
From: Aaron Watry <awatry@gmail.com>
Date: Thu, 30 Sep 2010 15:36:00 -0400
Subject: [PATCH 242/307] Add sparc-solaris-gcc as a build target.

Solaris 10 requires -lposix4 to build successfully on gcc. I only have a
Sparc machine to test with on Solaris 10, but this change leaves
OpenSolaris x86 in a usable state w/ gnu-generic.

I am of the belief that this change should fix Solaris 10 on Sparc, but
will leave other Solaris architectures as is. If someone has an x86
Solaris 10 machine to test on, they may add x86-solaris-gcc to
libvpx/configure and give it a go.

Change-Id: I17a282028bb4d3e9fd8764159f95665160f7b62a
---
 build/make/configure.sh | 13 +++++++++++++
 configure               |  1 +
 2 files changed, 14 insertions(+)

diff --git a/build/make/configure.sh b/build/make/configure.sh
index e20f0d133..cdd55ebd8 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -532,6 +532,9 @@ process_common_toolchain() {
             *powerpc*)
                 tgt_isa=ppc32
                 ;;
+            *sparc*)
+                tgt_isa=sparc
+                ;;
         esac
 
         # detect tgt_os
@@ -551,6 +554,9 @@ process_common_toolchain() {
             *linux*|*bsd*)
                 tgt_os=linux
                 ;;
+            *solaris2.10)
+                tgt_os=solaris
+                ;;
         esac
 
         if [ -n "$tgt_isa" ] && [ -n "$tgt_os" ]; then
@@ -602,6 +608,13 @@ process_common_toolchain() {
             ;;
     esac
 
+    # Handle Solaris variants. Solaris 10 needs -lposix4
+    case ${toolchain} in
+        *-solaris-*)
+            add_extralibs -lposix4
+            ;;
+    esac
+
     # Process ARM architecture variants
     case ${toolchain} in
     arm*|iwmmxt*)
diff --git a/configure b/configure
index 0321e1abf..39ef83ffa 100755
--- a/configure
+++ b/configure
@@ -101,6 +101,7 @@ all_platforms="${all_platforms} ppc32-linux-gcc"
 all_platforms="${all_platforms} ppc64-darwin8-gcc"
 all_platforms="${all_platforms} ppc64-darwin9-gcc"
 all_platforms="${all_platforms} ppc64-linux-gcc"
+all_platforms="${all_platforms} sparc-solaris-gcc"
 all_platforms="${all_platforms} x86-darwin8-gcc"
 all_platforms="${all_platforms} x86-darwin8-icc"
 all_platforms="${all_platforms} x86-darwin9-gcc"

From d1a4cce8092f0a71d4be34b1c2ea74d06dbac9d0 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Fri, 22 Oct 2010 15:41:06 -0700
Subject: [PATCH 243/307] Debug option for drawing motion vectors.

Postproc level that uses Bresenham's line algorithm
to draw motion vectors onto the postproc buffer.

Change-Id: I34c7daa324f2bdfee71e84fcb1c50b90fa06f6fb
---
 vp8/common/postproc.c | 70 +++++++++++++++++++++++++++++++++++++-
 vp8/common/ppflags.h  |  1 +
 vp8/common/textblit.c | 79 ++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 148 insertions(+), 2 deletions(-)

diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 0c8cf13bf..df18c7c75 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -76,7 +76,7 @@ const short vp8_rv[] =
 
 
 extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch);
-
+extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch);
 /***********************************************************************************************************
  */
 void vp8_post_proc_down_and_across_c
@@ -450,6 +450,45 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
 #define RTCD_VTABLE(oci) NULL
 #endif
 
+static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
+{
+    int dx = *x1 - x0;
+    int dy = *y1 - y0;
+
+    if (*x1 > width)
+    {
+        *x1 = width;
+        if (dy)
+            *y1 = ((width-x0)*dy)/dx + y0;
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+    }
+    if (*x1 < 0)
+    {
+        *x1 = 0;
+        if (dy)
+            *y1 = ((0-x0)*dy)/dx + y0;
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+    }
+    if (*y1 > height)
+    {
+        *y1 = height;
+        if (dx)
+            *x1 = ((height-y0)*dx)/dy + x0;
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+    }
+    if (*y1 < 0)
+    {
+        *y1 = 0;
+        if (dx)
+            *x1 = ((0-y0)*dx)/dy + x0;
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+    }
+}
+
 int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
 {
     char message[512];
@@ -622,8 +661,37 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 #endif
 
     }
+    else if (flags & VP8D_DEBUG_LEVEL5)
+    {
+        YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+        int width  = post->y_width;
+        int height = post->y_height;
+        int mb_cols = width  >> 4;
+        unsigned char *y_buffer = oci->post_proc_buffer.y_buffer;
+        int y_stride = oci->post_proc_buffer.y_stride;
+        MODE_INFO *mi = oci->mi;
+        int x0, y0;
 
+        for (y0 = 8; y0 < (height + 8); y0 += 16)
+        {
+            for (x0 = 8; x0 < (width + 8); x0 += 16)
+            {
+               int x1, y1;
+               if (mi->mbmi.mode >= NEARESTMV)
+                {
+                    MV *mv = &mi->mbmi.mv.as_mv;
 
+                    x1 = x0 + (mv->col >> 3);
+                    y1 = y0 + (mv->row >> 3);
+
+                    constrain_line (x0, &x1, y0, &y1, width, height);
+                    vp8_blit_line (x0, x1, y0, y1, y_buffer, y_stride);
+                }
+                mi++;
+            }
+            mi++;
+        }
+    }
 
     *dest = oci->post_proc_buffer;
 
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index b1f925c44..a1e2330bb 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -21,6 +21,7 @@ enum
     VP8D_DEBUG_LEVEL2   = 16,
     VP8D_DEBUG_LEVEL3   = 32,
     VP8D_DEBUG_LEVEL4   = 64,
+    VP8D_DEBUG_LEVEL5   = 128,
 };
 
 #endif
diff --git a/vp8/common/textblit.c b/vp8/common/textblit.c
index da40f9352..b7922d385 100644
--- a/vp8/common/textblit.c
+++ b/vp8/common/textblit.c
@@ -8,7 +8,7 @@
  *  be found in the AUTHORS file in the root of the source tree.
  */
 
-
+#include <stdlib.h>
 
 
 void vp8_blit_text(const char *msg, unsigned char *address, const int pitch)
@@ -51,3 +51,80 @@ void vp8_blit_text(const char *msg, unsigned char *address, const int pitch)
         colpos++;
     }
 }
+
+static void plot (const int x, const int y, unsigned char *image, const int pitch)
+{
+    image [x+y*pitch] ^= 255;
+}
+
+// Bresenham line algorithm
+void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch)
+{
+    int steep = abs(y1 - y0) > abs(x1 - x0);
+    int deltax, deltay;
+    int error, ystep, y, x;
+
+    if (steep)
+    {
+        int t;
+        t = x0;
+        x0 = y0;
+        y0 = t;
+
+        t = x1;
+        x1 = y1;
+        y1 = t;
+    }
+
+    if (x0 > x1)
+    {
+        int t;
+        t = x0;
+        x0 = x1;
+        x1 = t;
+
+        t = y0;
+        y0 = y1;
+        y1 = t;
+    }
+
+    deltax = x1 - x0;
+    deltay = abs(y1 - y0);
+    error  = deltax / 2;
+
+    y = y0;
+
+    if (y0 < y1)
+        ystep = 1;
+    else
+        ystep = -1;
+
+    if (steep)
+    {
+        for (x = x0; x <= x1; x++)
+        {
+            plot(y,x, image, pitch);
+
+            error = error - deltay;
+            if (error < 0)
+            {
+                y = y + ystep;
+                error = error + deltax;
+            }
+        }
+    }
+    else
+    {
+        for (x = x0; x <= x1; x++)
+        {
+            plot(x,y, image, pitch);
+
+            error = error - deltay;
+            if (error < 0)
+            {
+                y = y + ystep;
+                error = error + deltax;
+            }
+        }
+    }
+}

From ad252daf65bcc989f5265b7bef7b04430cafcef4 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 19 Oct 2010 17:20:17 -0400
Subject: [PATCH 244/307] ivfdec: webm reader support

This patch enables ivfdec to decode WebM files. WebM demuxing is
provided by the Matthew Gregan's Nestegg library.

This patch also makes minor changes to the timebase->framerate
handling when doing Y4M output. For WebM files, the framerate is
guessed by looking at the first second of video. For IVF files,
the timebase=1/(2*fps) hack is still in place, but is only used
if the timebase denominator is less than 1000. This is in anticipation
of change I8d25b5b, which introduces the distinction between
framerate and timebase to ivfenc. In the case of high resolution
timebases, like 100ns, we would have to guess the framerate
like we do for WebM, but since WebM support in ivfenc will
deprecate IVF output, we just assume 30fps rather than writing the
lookahead code.

Change-Id: I1dd8600f13bf6071533d2816f005da9ede4f60a2
---
 examples.mk                       |   7 +
 ivfdec.c                          | 288 ++++++++++++++++++++++++++----
 nestegg/halloc/src/halloc.c       |   2 +-
 nestegg/include/nestegg/nestegg.h |   2 +-
 nestegg/src/nestegg.c             |   4 +-
 5 files changed, 268 insertions(+), 35 deletions(-)

diff --git a/examples.mk b/examples.mk
index 00ffc7037..56da642ae 100644
--- a/examples.mk
+++ b/examples.mk
@@ -17,6 +17,13 @@ ivfdec.SRCS                 += md5_utils.c md5_utils.h
 ivfdec.SRCS                 += vpx_ports/vpx_timer.h
 ivfdec.SRCS                 += vpx/vpx_integer.h
 ivfdec.SRCS                 += args.c args.h vpx_ports/config.h
+ivfdec.SRCS                 += nestegg/halloc/halloc.h
+ivfdec.SRCS                 += nestegg/halloc/src/align.h
+ivfdec.SRCS                 += nestegg/halloc/src/halloc.c
+ivfdec.SRCS                 += nestegg/halloc/src/hlist.h
+ivfdec.SRCS                 += nestegg/halloc/src/macros.h
+ivfdec.SRCS                 += nestegg/include/nestegg/nestegg.h
+ivfdec.SRCS                 += nestegg/src/nestegg.c
 ivfdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
 ivfdec.DESCRIPTION           = Full featured decoder
 UTILS-$(CONFIG_ENCODERS)    += ivfenc.c
diff --git a/ivfdec.c b/ivfdec.c
index 8065e70be..d7ab4171b 100644
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -26,9 +26,11 @@
 #if CONFIG_MD5
 #include "md5_utils.h"
 #endif
+#include "nestegg/include/nestegg/nestegg.h"
 
 static const char *exec_name;
 
+#define VP8_FOURCC (0x00385056)
 static const struct
 {
     char const *name;
@@ -38,7 +40,7 @@ static const struct
 } ifaces[] =
 {
 #if CONFIG_VP8_DECODER
-    {"vp8",  &vpx_codec_vp8_dx_algo,   0x00385056, 0x00FFFFFF},
+    {"vp8",  &vpx_codec_vp8_dx_algo,   VP8_FOURCC, 0x00FFFFFF},
 #endif
 };
 
@@ -157,23 +159,70 @@ static unsigned int mem_get_le32(const void *vmem)
     return val;
 }
 
+enum file_kind
+{
+    RAW_FILE,
+    IVF_FILE,
+    WEBM_FILE
+};
+
+struct input_ctx
+{
+    enum file_kind  kind;
+    FILE           *infile;
+    nestegg        *nestegg_ctx;
+    nestegg_packet *pkt;
+    unsigned int    chunk;
+    unsigned int    chunks;
+    unsigned int    video_track;
+};
+
 #define IVF_FRAME_HDR_SZ (sizeof(uint32_t) + sizeof(uint64_t))
 #define RAW_FRAME_HDR_SZ (sizeof(uint32_t))
-static int read_frame(FILE                  *infile,
+static int read_frame(struct input_ctx      *input,
                       uint8_t               **buf,
-                      uint32_t              *buf_sz,
-                      uint32_t              *buf_alloc_sz,
-                      int                    is_ivf)
+                      size_t                *buf_sz,
+                      size_t                *buf_alloc_sz)
 {
-    char     raw_hdr[IVF_FRAME_HDR_SZ];
-    uint32_t new_buf_sz;
+    char            raw_hdr[IVF_FRAME_HDR_SZ];
+    size_t          new_buf_sz;
+    FILE           *infile = input->infile;
+    enum file_kind  kind = input->kind;
+    if(kind == WEBM_FILE)
+    {
+        if(input->chunk >= input->chunks)
+        {
+            unsigned int track;
 
+            do
+            {
+                /* End of this packet, get another. */
+                if(input->pkt)
+                    nestegg_free_packet(input->pkt);
+
+                if(nestegg_read_packet(input->nestegg_ctx, &input->pkt) <= 0
+                   || nestegg_packet_track(input->pkt, &track))
+                    return 1;
+
+            } while(track != input->video_track);
+
+            if(nestegg_packet_count(input->pkt, &input->chunks))
+                return 1;
+            input->chunk = 0;
+        }
+
+        if(nestegg_packet_data(input->pkt, input->chunk, buf, buf_sz))
+            return 1;
+        input->chunk++;
+
+        return 0;
+    }
     /* For both the raw and ivf formats, the frame size is the first 4 bytes
      * of the frame header. We just need to special case on the header
      * size.
      */
-    if (fread(raw_hdr, is_ivf ? IVF_FRAME_HDR_SZ : RAW_FRAME_HDR_SZ, 1,
-              infile) != 1)
+    else if (fread(raw_hdr, kind==IVF_FILE
+                   ? IVF_FRAME_HDR_SZ : RAW_FRAME_HDR_SZ, 1, infile) != 1)
     {
         if (!feof(infile))
             fprintf(stderr, "Failed to read frame size\n");
@@ -187,13 +236,13 @@ static int read_frame(FILE                  *infile,
         if (new_buf_sz > 256 * 1024 * 1024)
         {
             fprintf(stderr, "Error: Read invalid frame size (%u)\n",
-                    new_buf_sz);
+                    (unsigned int)new_buf_sz);
             new_buf_sz = 0;
         }
 
-        if (!is_ivf && new_buf_sz > 256 * 1024)
+        if (kind == RAW_FILE && new_buf_sz > 256 * 1024)
             fprintf(stderr, "Warning: Read invalid frame size (%u)"
-                    " - not a raw file?\n", new_buf_sz);
+                    " - not a raw file?\n", (unsigned int)new_buf_sz);
 
         if (new_buf_sz > *buf_alloc_sz)
         {
@@ -295,8 +344,8 @@ unsigned int file_is_ivf(FILE *infile,
                          unsigned int *fourcc,
                          unsigned int *width,
                          unsigned int *height,
-                         unsigned int *timebase_num,
-                         unsigned int *timebase_den)
+                         unsigned int *fps_den,
+                         unsigned int *fps_num)
 {
     char raw_hdr[32];
     int is_ivf = 0;
@@ -315,8 +364,30 @@ unsigned int file_is_ivf(FILE *infile,
             *fourcc = mem_get_le32(raw_hdr + 8);
             *width = mem_get_le16(raw_hdr + 12);
             *height = mem_get_le16(raw_hdr + 14);
-            *timebase_den = mem_get_le32(raw_hdr + 16);
-            *timebase_num = mem_get_le32(raw_hdr + 20);
+            *fps_num = mem_get_le32(raw_hdr + 16);
+            *fps_den = mem_get_le32(raw_hdr + 20);
+
+            /* Some versions of ivfenc used 1/(2*fps) for the timebase, so
+             * we can guess the framerate using only the timebase in this
+             * case. Other files would require reading ahead to guess the
+             * timebase, like we do for webm.
+             */
+            if(*fps_num < 1000)
+            {
+                /* Correct for the factor of 2 applied to the timebase in the
+                 * encoder.
+                 */
+                if(*fps_num&1)*fps_den<<=1;
+                else *fps_num>>=1;
+            }
+            else
+            {
+                /* Don't know FPS for sure, and don't have readahead code
+                 * (yet?), so just default to 30fps.
+                 */
+                *fps_num = 30;
+                *fps_den = 1;
+            }
         }
     }
 
@@ -326,18 +397,163 @@ unsigned int file_is_ivf(FILE *infile,
     return is_ivf;
 }
 
+
+static int
+nestegg_read_cb(void *buffer, size_t length, void *userdata)
+{
+    FILE *f = userdata;
+
+    fread(buffer, 1, length, f);
+    if (ferror(f))
+        return -1;
+    if (feof(f))
+        return 0;
+    return 1;
+}
+
+
+static int
+nestegg_seek_cb(int64_t offset, int whence, void * userdata)
+{
+    switch(whence) {
+        case NESTEGG_SEEK_SET: whence = SEEK_SET; break;
+        case NESTEGG_SEEK_CUR: whence = SEEK_CUR; break;
+        case NESTEGG_SEEK_END: whence = SEEK_END; break;
+    };
+    return fseek(userdata, offset, whence)? -1 : 0;
+}
+
+
+static int64_t
+nestegg_tell_cb(void * userdata)
+{
+    return ftell(userdata);
+}
+
+
+static void
+nestegg_log_cb(nestegg * context, unsigned int severity, char const * format,
+               ...)
+{
+    va_list ap;
+
+    va_start(ap, format);
+    vfprintf(stderr, format, ap);
+    fprintf(stderr, "\n");
+    va_end(ap);
+}
+
+
+static int
+webm_guess_framerate(struct input_ctx *input,
+                     unsigned int     *fps_den,
+                     unsigned int     *fps_num)
+{
+    unsigned int i;
+    uint64_t     tstamp=0;
+
+    /* Guess the framerate. Read up to 1 second, or 50 video packets,
+     * whichever comes first.
+     */
+    for(i=0; tstamp < 1000000000 && i < 50;)
+    {
+        nestegg_packet * pkt;
+        unsigned int track;
+
+        if(nestegg_read_packet(input->nestegg_ctx, &pkt) <= 0)
+            break;
+
+        nestegg_packet_track(pkt, &track);
+        if(track == input->video_track)
+        {
+            nestegg_packet_tstamp(pkt, &tstamp);
+            i++;
+        }
+
+        nestegg_free_packet(pkt);
+    }
+
+    if(nestegg_track_seek(input->nestegg_ctx, input->video_track, 0))
+        goto fail;
+
+    *fps_num = (i - 1) * 1000000;
+    *fps_den = tstamp / 1000;
+    return 0;
+fail:
+    input->nestegg_ctx = NULL;
+    rewind(input->infile);
+    return 1;
+}
+
+
+static int
+file_is_webm(struct input_ctx *input,
+             unsigned int     *fourcc,
+             unsigned int     *width,
+             unsigned int     *height,
+             unsigned int     *fps_den,
+             unsigned int     *fps_num)
+{
+    unsigned int i, n;
+    int          track_type = -1;
+    uint64_t     tstamp=0;
+
+    nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb,
+                     input->infile};
+    nestegg_video_params params;
+    nestegg_packet * pkt;
+
+    if(nestegg_init(&input->nestegg_ctx, io, NULL))
+        goto fail;
+
+    if(nestegg_track_count(input->nestegg_ctx, &n))
+        goto fail;
+
+    for(i=0; i<n; i++)
+    {
+        track_type = nestegg_track_type(input->nestegg_ctx, i);
+
+        if(track_type == NESTEGG_TRACK_VIDEO)
+            break;
+        else if(track_type < 0)
+            goto fail;
+    }
+
+    if(nestegg_track_codec_id(input->nestegg_ctx, i) != NESTEGG_CODEC_VP8)
+    {
+        fprintf(stderr, "Not VP8 video, quitting.\n");
+        exit(1);
+    }
+
+    input->video_track = i;
+
+    if(nestegg_track_video_params(input->nestegg_ctx, i, &params))
+        goto fail;
+
+    *fps_den = 0;
+    *fps_num = 0;
+    *fourcc = VP8_FOURCC;
+    *width = params.width;
+    *height = params.height;
+    return 1;
+fail:
+    input->nestegg_ctx = NULL;
+    rewind(input->infile);
+    return 0;
+}
+
 int main(int argc, const char **argv_)
 {
     vpx_codec_ctx_t          decoder;
     char                  *prefix = NULL, *fn = NULL;
     int                    i;
     uint8_t               *buf = NULL;
-    uint32_t               buf_sz = 0, buf_alloc_sz = 0;
+    size_t                 buf_sz = 0, buf_alloc_sz = 0;
     FILE                  *infile;
     int                    frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0;
     int                    stop_after = 0, postproc = 0, summary = 0, quiet = 0;
     vpx_codec_iface_t       *iface = NULL;
-    unsigned int           is_ivf, fourcc;
+    unsigned int           fourcc;
     unsigned long          dx_time = 0;
     struct arg               arg;
     char                   **argv, **argi, **argj;
@@ -345,13 +561,14 @@ int main(int argc, const char **argv_)
     int                     use_y4m = 0;
     unsigned int            width;
     unsigned int            height;
-    unsigned int            timebase_num;
-    unsigned int            timebase_den;
+    unsigned int            fps_den;
+    unsigned int            fps_num;
     void                   *out = NULL;
     vpx_codec_dec_cfg_t     cfg = {0};
 #if CONFIG_VP8_DECODER
     vp8_postproc_cfg_t      vp8_pp_cfg = {0};
 #endif
+    struct input_ctx        input = {0};
 
     /* Parse command line */
     exec_name = argv_[0];
@@ -465,10 +682,16 @@ int main(int argc, const char **argv_)
     if (fn2)
         out = out_open(fn2, do_md5);
 
-    is_ivf = file_is_ivf(infile, &fourcc, &width, &height,
-                         &timebase_num, &timebase_den);
+    input.infile = infile;
+    if(file_is_ivf(infile, &fourcc, &width, &height, &fps_den,
+                   &fps_num))
+        input.kind = IVF_FILE;
+    else if(file_is_webm(&input, &fourcc, &width, &height, &fps_den,                                 &fps_num))
+        input.kind = WEBM_FILE;
+    else
+        input.kind = RAW_FILE;
 
-    if (is_ivf)
+    if (input.kind != RAW_FILE)
     {
         if (use_y4m)
         {
@@ -478,15 +701,15 @@ int main(int argc, const char **argv_)
                 fprintf(stderr, "YUV4MPEG2 output only supported with -o.\n");
                 return EXIT_FAILURE;
             }
-            /*Correct for the factor of 2 applied to the timebase in the
-               encoder.*/
-            if(timebase_den&1)timebase_num<<=1;
-            else timebase_den>>=1;
+
+            if(input.kind == WEBM_FILE)
+                webm_guess_framerate(&input, &fps_den, &fps_num);
+
             /*Note: We can't output an aspect ratio here because IVF doesn't
                store one, and neither does VP8.
               That will have to wait until these tools support WebM natively.*/
             sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
-                    "420jpeg", width, height, timebase_den, timebase_num, 'p');
+                    "420jpeg", width, height, fps_num, fps_den, 'p');
             out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5);
         }
 
@@ -507,7 +730,7 @@ int main(int argc, const char **argv_)
     }
     else if(use_y4m)
     {
-        fprintf(stderr, "YUV4MPEG2 output only supported from IVF input.\n");
+        fprintf(stderr, "YUV4MPEG2 output not supported from raw input.\n");
         return EXIT_FAILURE;
     }
 
@@ -533,7 +756,7 @@ int main(int argc, const char **argv_)
 #endif
 
     /* Decode file */
-    while (!read_frame(infile, &buf, &buf_sz, &buf_alloc_sz, is_ivf))
+    while (!read_frame(&input, &buf, &buf_sz, &buf_alloc_sz))
     {
         vpx_codec_iter_t  iter = NULL;
         vpx_image_t    *img;
@@ -631,7 +854,10 @@ fail:
     if (fn2)
         out_close(out, fn2, do_md5);
 
-    free(buf);
+    if(input.nestegg_ctx)
+        nestegg_destroy(input.nestegg_ctx);
+    if(input.kind != WEBM_FILE)
+        free(buf);
     fclose(infile);
     free(prefix);
     free(argv);
diff --git a/nestegg/halloc/src/halloc.c b/nestegg/halloc/src/halloc.c
index 5758fc07b..38fd6c11a 100644
--- a/nestegg/halloc/src/halloc.c
+++ b/nestegg/halloc/src/halloc.c
@@ -15,7 +15,7 @@
 #include <stdlib.h>  /* realloc */
 #include <string.h>  /* memset & co */
 
-#include "halloc.h"
+#include "../halloc.h"
 #include "align.h"
 #include "hlist.h"
 
diff --git a/nestegg/include/nestegg/nestegg.h b/nestegg/include/nestegg/nestegg.h
index 5e1069884..7447d141d 100644
--- a/nestegg/include/nestegg/nestegg.h
+++ b/nestegg/include/nestegg/nestegg.h
@@ -7,7 +7,7 @@
 #ifndef   NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79
 #define   NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79
 
-#include <nestegg/nestegg-stdint.h>
+#include "vpx/vpx_integer.h"
 
 #ifdef __cplusplus
 extern "C" {
diff --git a/nestegg/src/nestegg.c b/nestegg/src/nestegg.c
index ef09918da..63a0e83e5 100644
--- a/nestegg/src/nestegg.c
+++ b/nestegg/src/nestegg.c
@@ -8,8 +8,8 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "halloc.h"
-#include "nestegg/nestegg.h"
+#include "nestegg/halloc/halloc.h"
+#include "nestegg/include/nestegg/nestegg.h"
 
 /* EBML Elements */
 #define ID_EBML                 0x1a45dfa3

From cfe3f9173f7b19f41c59baf2390b6db65b023c00 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 20 Oct 2010 10:49:12 -0400
Subject: [PATCH 245/307] ivfdec: support y4m output from raw input

The width and height needed to write the Y4M header can be found by
probing the stream with vpx_codec_peek_stream_info(). This also
has the consequence of supporting multiple codecs from raw files
with automatic detections, should we add additional codecs in the
future.

Change-Id: I7522a8f4c7577b6ed9876d744c59cd86d30c6049
---
 ivfdec.c | 116 +++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 74 insertions(+), 42 deletions(-)

diff --git a/ivfdec.c b/ivfdec.c
index d7ab4171b..c2822904f 100644
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -398,6 +398,41 @@ unsigned int file_is_ivf(FILE *infile,
 }
 
 
+unsigned int file_is_raw(FILE *infile,
+                         unsigned int *fourcc,
+                         unsigned int *width,
+                         unsigned int *height,
+                         unsigned int *fps_den,
+                         unsigned int *fps_num)
+{
+    unsigned char buf[32];
+    int is_raw = 0;
+    vpx_codec_stream_info_t si;
+
+    if (fread(buf, 1, 32, infile) == 32)
+    {
+        int i;
+
+        if(mem_get_le32(buf) < 256 * 1024 * 1024)
+            for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+                if(!vpx_codec_peek_stream_info(ifaces[i].iface,
+                                               buf + 4, 32 - 4, &si))
+                {
+                    is_raw = 1;
+                    *fourcc = ifaces[i].fourcc;
+                    *width = si.w;
+                    *height = si.h;
+                    *fps_num = 30;
+                    *fps_den = 1;
+                    break;
+                }
+    }
+
+    rewind(infile);
+    return is_raw;
+}
+
+
 static int
 nestegg_read_cb(void *buffer, size_t length, void *userdata)
 {
@@ -686,54 +721,51 @@ int main(int argc, const char **argv_)
     if(file_is_ivf(infile, &fourcc, &width, &height, &fps_den,
                    &fps_num))
         input.kind = IVF_FILE;
-    else if(file_is_webm(&input, &fourcc, &width, &height, &fps_den,                                 &fps_num))
+    else if(file_is_webm(&input, &fourcc, &width, &height, &fps_den, &fps_num))
         input.kind = WEBM_FILE;
-    else
+    else if(file_is_raw(infile, &fourcc, &width, &height, &fps_den, &fps_num))
         input.kind = RAW_FILE;
-
-    if (input.kind != RAW_FILE)
+    else
     {
-        if (use_y4m)
-        {
-            char buffer[128];
-            if (!fn2)
-            {
-                fprintf(stderr, "YUV4MPEG2 output only supported with -o.\n");
-                return EXIT_FAILURE;
-            }
-
-            if(input.kind == WEBM_FILE)
-                webm_guess_framerate(&input, &fps_den, &fps_num);
-
-            /*Note: We can't output an aspect ratio here because IVF doesn't
-               store one, and neither does VP8.
-              That will have to wait until these tools support WebM natively.*/
-            sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
-                    "420jpeg", width, height, fps_num, fps_den, 'p');
-            out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5);
-        }
-
-        /* Try to determine the codec from the fourcc. */
-        for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
-            if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc)
-            {
-                vpx_codec_iface_t  *ivf_iface = ifaces[i].iface;
-
-                if (iface && iface != ivf_iface)
-                    fprintf(stderr, "Notice -- IVF header indicates codec: %s\n",
-                            ifaces[i].name);
-                else
-                    iface = ivf_iface;
-
-                break;
-            }
-    }
-    else if(use_y4m)
-    {
-        fprintf(stderr, "YUV4MPEG2 output not supported from raw input.\n");
+        fprintf(stderr, "Unrecognized input file type.\n");
         return EXIT_FAILURE;
     }
 
+    if (use_y4m)
+    {
+        char buffer[128];
+        if (!fn2)
+        {
+            fprintf(stderr, "YUV4MPEG2 output only supported with -o.\n");
+            return EXIT_FAILURE;
+        }
+
+        if(input.kind == WEBM_FILE)
+            webm_guess_framerate(&input, &fps_den, &fps_num);
+
+        /*Note: We can't output an aspect ratio here because IVF doesn't
+           store one, and neither does VP8.
+          That will have to wait until these tools support WebM natively.*/
+        sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
+                "420jpeg", width, height, fps_num, fps_den, 'p');
+        out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5);
+    }
+
+    /* Try to determine the codec from the fourcc. */
+    for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
+        if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc)
+        {
+            vpx_codec_iface_t  *ivf_iface = ifaces[i].iface;
+
+            if (iface && iface != ivf_iface)
+                fprintf(stderr, "Notice -- IVF header indicates codec: %s\n",
+                        ifaces[i].name);
+            else
+                iface = ivf_iface;
+
+            break;
+        }
+
     if (vpx_codec_dec_init(&decoder, iface ? iface :  ifaces[0].iface, &cfg,
                            postproc ? VPX_CODEC_USE_POSTPROC : 0))
     {

From 1258cf62aeb8846f0f319d1149cd69a5c517c7ba Mon Sep 17 00:00:00 2001
From: Frank Galligan <fgalligan@google.com>
Date: Wed, 6 Oct 2010 12:51:00 -0400
Subject: [PATCH 246/307] Fixed the timebase parameter of ivfenc.

Ivfenc will use timebase if it is set. If it is not set ivfenc will
still double the timebase so altref frames will have a unique pts.
Patch Set #3: Use integer math to generate source pts. Added a
framerate parameter. Increased the default timebase to milliseconds to
remove the *2 everywhere.

Change-Id: I8d25b5b2cb26deef7eb72d74b5f76c98cafaf4db
---
 ivfenc.c | 55 +++++++++++++++++++++++++++++--------------------------
 1 file changed, 29 insertions(+), 26 deletions(-)

diff --git a/ivfenc.c b/ivfenc.c
index 3fea5b5cc..ab6e10aa9 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -423,10 +423,12 @@ static const arg_def_t verbosearg       = ARG_DEF("v", "verbose", 0,
         "Show encoder parameters");
 static const arg_def_t psnrarg          = ARG_DEF(NULL, "psnr", 0,
         "Show PSNR in status line");
+static const arg_def_t framerate        = ARG_DEF(NULL, "framerate", 1,
+        "Stream frame rate (rate/scale)");
 static const arg_def_t *main_args[] =
 {
     &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline, &best_dl, &good_dl, &rt_dl,
-    &verbosearg, &psnrarg,
+    &verbosearg, &psnrarg, &framerate,
     NULL
 };
 
@@ -450,7 +452,7 @@ static const arg_def_t lag_in_frames    = ARG_DEF(NULL, "lag-in-frames", 1,
 static const arg_def_t *global_args[] =
 {
     &use_yv12, &use_i420, &usage, &threads, &profile,
-    &width, &height, &timebase, &error_resilient,
+    &width, &height, &timebase, &framerate, &error_resilient,
     &lag_in_frames, NULL
 };
 
@@ -614,10 +616,11 @@ int main(int argc, const char **argv_)
     static const int        *ctrl_args_map = NULL;
     int                      verbose = 0, show_psnr = 0;
     int                      arg_use_i420 = 1;
-    int                      arg_have_timebase = 0;
     unsigned long            cx_time = 0;
     unsigned int             file_type, fourcc;
     y4m_input                y4m;
+    struct vpx_rational      arg_framerate = {30, 1};
+    int                      arg_have_framerate = 0;
 
     exec_name = argv_[0];
 
@@ -689,6 +692,11 @@ int main(int argc, const char **argv_)
             arg_limit = arg_parse_uint(&arg);
         else if (arg_match(&arg, &psnrarg, argi))
             show_psnr = 1;
+        else if (arg_match(&arg, &framerate, argi))
+        {
+            arg_framerate = arg_parse_rational(&arg);
+            arg_have_framerate = 1;
+        }
         else
             argj++;
     }
@@ -720,6 +728,11 @@ int main(int argc, const char **argv_)
         return EXIT_FAILURE;
     }
 
+    /* Change the default timebase to a high enough value so that the encoder
+     * will always create strictly increasing timestamps.
+     */
+    cfg.g_timebase.den = 1000;
+
     /* Now parse the remainder of the parameters. */
     for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
     {
@@ -735,10 +748,7 @@ int main(int argc, const char **argv_)
         else if (arg_match(&arg, &height, argi))
             cfg.g_h = arg_parse_uint(&arg);
         else if (arg_match(&arg, &timebase, argi))
-        {
             cfg.g_timebase = arg_parse_rational(&arg);
-            arg_have_timebase = 1;
-        }
         else if (arg_match(&arg, &error_resilient, argi))
             cfg.g_error_resilient = arg_parse_uint(&arg);
         else if (arg_match(&arg, &lag_in_frames, argi))
@@ -883,16 +893,16 @@ int main(int argc, const char **argv_)
                 file_type = FILE_TYPE_Y4M;
                 cfg.g_w = y4m.pic_w;
                 cfg.g_h = y4m.pic_h;
+
                 /* Use the frame rate from the file only if none was specified
                  * on the command-line.
                  */
-                if (!arg_have_timebase)
+                if (!arg_have_framerate)
                 {
-                    cfg.g_timebase.num = y4m.fps_d;
-                    cfg.g_timebase.den = y4m.fps_n;
-                    /* And don't reset it in the second pass.*/
-                    arg_have_timebase = 1;
+                    arg_framerate.num = y4m.fps_n;
+                    arg_framerate.den = y4m.fps_d;
                 }
+
                 arg_use_i420 = 0;
             }
             else
@@ -972,13 +982,6 @@ int main(int argc, const char **argv_)
             else
                 vpx_img_alloc(&raw, arg_use_i420 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_YV12,
                               cfg.g_w, cfg.g_h, 1);
-
-            // This was added so that ivfenc will create monotically increasing
-            // timestamps.  Since we create new timestamps for alt-reference frames
-            // we need to make room in the series of timestamps.  Since there can
-            // only be 1 alt-ref frame ( current bitstream) multiplying by 2
-            // gives us enough room.
-            cfg.g_timebase.den *= 2;
         }
 
         outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb") : stdout;
@@ -1047,6 +1050,7 @@ int main(int argc, const char **argv_)
             vpx_codec_iter_t iter = NULL;
             const vpx_codec_cx_pkt_t *pkt;
             struct vpx_usec_timer timer;
+            int64_t frame_start;
 
             if (!arg_limit || frames_in < arg_limit)
             {
@@ -1065,10 +1069,12 @@ int main(int argc, const char **argv_)
 
             vpx_usec_timer_start(&timer);
 
-            // since we halved our timebase we need to double the timestamps
-            // and duration we pass in.
-            vpx_codec_encode(&encoder, frame_avail ? &raw : NULL, (frames_in - 1) * 2,
-                             2, 0, arg_deadline);
+            frame_start = (cfg.g_timebase.den * (int64_t)(frames_in - 1)
+                          * arg_framerate.den) / cfg.g_timebase.num / arg_framerate.num;
+            vpx_codec_encode(&encoder, frame_avail ? &raw : NULL, frame_start,
+                             cfg.g_timebase.den * arg_framerate.den
+                             / cfg.g_timebase.num / arg_framerate.num,
+                             0, arg_deadline);
             vpx_usec_timer_mark(&timer);
             cx_time += vpx_usec_timer_elapsed(&timer);
             ctx_exit_on_error(&encoder, "Failed to encode frame");
@@ -1116,14 +1122,11 @@ int main(int argc, const char **argv_)
             fflush(stdout);
         }
 
-        /* this bitrate calc is simplified and relies on the fact that this
-         * application uses 1/timebase for framerate.
-         */
         fprintf(stderr,
                "\rPass %d/%d frame %4d/%-4d %7ldB %7ldb/f %7"PRId64"b/s"
                " %7lu %s (%.2f fps)\033[K", pass + 1,
                arg_passes, frames_in, frames_out, nbytes, nbytes * 8 / frames_in,
-               nbytes * 8 *(int64_t)cfg.g_timebase.den/2/ cfg.g_timebase.num / frames_in,
+               nbytes * 8 *(int64_t)arg_framerate.num / arg_framerate.den / frames_in,
                cx_time > 9999999 ? cx_time / 1000 : cx_time,
                cx_time > 9999999 ? "ms" : "us",
                (float)frames_in * 1000000.0 / (float)cx_time);

From f9d9824047d5d94f0e7312815e99f8347ec09581 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 20 Oct 2010 11:06:48 -0400
Subject: [PATCH 247/307] Import webmquicktime webm writer

Initial import of the libmkv directory from the webmquicktime[1]
project, at commit fedbda1.

[1]: git://review.webmproject.org/webmquicktime.git
     commit fedbda18de899ff94855cb334de7e471036fbf1d

Change-Id: I1564a0ebfa72293fc296ee02178196530dfd90e4
---
 libmkv/EbmlBufferWriter.c |  60 ++++++++++
 libmkv/EbmlBufferWriter.h |  21 ++++
 libmkv/EbmlIDs.h          | 231 ++++++++++++++++++++++++++++++++++++++
 libmkv/EbmlWriter.c       | 160 ++++++++++++++++++++++++++
 libmkv/EbmlWriter.h       |  38 +++++++
 libmkv/Makefile           |  25 +++++
 libmkv/WebMElement.c      | 220 ++++++++++++++++++++++++++++++++++++
 libmkv/WebMElement.h      |  35 ++++++
 libmkv/testlibmkv.c       |  63 +++++++++++
 9 files changed, 853 insertions(+)
 create mode 100644 libmkv/EbmlBufferWriter.c
 create mode 100644 libmkv/EbmlBufferWriter.h
 create mode 100644 libmkv/EbmlIDs.h
 create mode 100644 libmkv/EbmlWriter.c
 create mode 100644 libmkv/EbmlWriter.h
 create mode 100644 libmkv/Makefile
 create mode 100644 libmkv/WebMElement.c
 create mode 100644 libmkv/WebMElement.h
 create mode 100644 libmkv/testlibmkv.c

diff --git a/libmkv/EbmlBufferWriter.c b/libmkv/EbmlBufferWriter.c
new file mode 100644
index 000000000..d9b04a81a
--- /dev/null
+++ b/libmkv/EbmlBufferWriter.c
@@ -0,0 +1,60 @@
+//#include <strmif.h>
+#include "EbmlBufferWriter.h"
+#include "EbmlWriter.h"
+//#include <cassert>
+//#include <limits>
+//#include <malloc.h>  //_alloca
+#include <stdlib.h>
+#include <wchar.h>
+#include <string.h>
+
+void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
+{
+    unsigned char *src = glob->buf;
+    src += glob->offset;
+    memcpy(src, buffer_in, len);
+    glob->offset += len;
+}
+
+static void _Serialize(EbmlGlobal *glob, const unsigned char *p, const unsigned char *q)
+{
+    while (q != p)
+    {
+        --q;
+
+        unsigned long cbWritten;
+        memcpy(&(glob->buf[glob->offset]), q, 1);
+        glob->offset ++;
+    }
+}
+
+void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
+{
+    //assert(buf);
+
+    const unsigned char *const p = (const unsigned char *)(buffer_in);
+    const unsigned char *const q = p + len;
+
+    _Serialize(glob, p, q);
+}
+
+
+void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id)
+{
+    Ebml_WriteID(glob, class_id);
+    ebmlLoc->offset = glob->offset;
+    //todo this is always taking 8 bytes, this may need later optimization
+    unsigned long long unknownLen =  0x01FFFFFFFFFFFFFFLLU;
+    Ebml_Serialize(glob, (void *)&unknownLen, 8); //this is a key that says lenght unknown
+}
+
+void Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc)
+{
+    unsigned long long size = glob->offset - ebmlLoc->offset - 8;
+    unsigned long long curOffset = glob->offset;
+    glob->offset = ebmlLoc->offset;
+    size |=  0x0100000000000000LLU;
+    Ebml_Serialize(glob, &size, 8);
+    glob->offset = curOffset;
+}
+
diff --git a/libmkv/EbmlBufferWriter.h b/libmkv/EbmlBufferWriter.h
new file mode 100644
index 000000000..ba0a9b3ab
--- /dev/null
+++ b/libmkv/EbmlBufferWriter.h
@@ -0,0 +1,21 @@
+#ifndef EBMLBUFFERWRITER_HPP
+#define EBMLBUFFERWRITER_HPP
+
+typedef struct
+{
+    unsigned long long offset;
+} EbmlLoc;
+
+typedef struct
+{
+    unsigned char *buf;
+    unsigned int length;
+    unsigned int offset;
+} EbmlGlobal;
+
+
+void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id);
+void Ebml_EndSubElement(EbmlGlobal *glob,  EbmlLoc *ebmlLoc);
+
+
+#endif
diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h
new file mode 100644
index 000000000..271990827
--- /dev/null
+++ b/libmkv/EbmlIDs.h
@@ -0,0 +1,231 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#ifndef MKV_DEFS_HPP
+#define MKV_DEFS_HPP 1
+
+//Commenting out values not available in webm, but available in matroska
+
+enum mkv
+{
+    EBML = 0x1A45DFA3,
+    EBMLVersion = 0x4286,
+    EBMLReadVersion = 0x42F7,
+    EBMLMaxIDLength = 0x42F2,
+    EBMLMaxSizeLength = 0x42F3,
+    DocType = 0x4282,
+    DocTypeVersion = 0x4287,
+    DocTypeReadVersion = 0x4285,
+//  CRC_32 = 0xBF,
+    Void = 0xEC,
+    SignatureSlot = 0x1B538667,
+    SignatureAlgo = 0x7E8A,
+    SignatureHash = 0x7E9A,
+    SignaturePublicKey = 0x7EA5,
+    Signature = 0x7EB5,
+    SignatureElements = 0x7E5B,
+    SignatureElementList = 0x7E7B,
+    SignedElement = 0x6532,
+    //segment
+    Segment = 0x18538067,
+    //Meta Seek Information
+    SeekHead = 0x114D9B74,
+    Seek = 0x4DBB,
+    SeekID = 0x53AB,
+    SeekPosition = 0x53AC,
+    //Segment Information
+    Info = 0x1549A966,
+//  SegmentUID = 0x73A4,
+//  SegmentFilename = 0x7384,
+//  PrevUID = 0x3CB923,
+//  PrevFilename = 0x3C83AB,
+//  NextUID = 0x3EB923,
+//  NextFilename = 0x3E83BB,
+//  SegmentFamily = 0x4444,
+//  ChapterTranslate = 0x6924,
+//  ChapterTranslateEditionUID = 0x69FC,
+//  ChapterTranslateCodec = 0x69BF,
+//  ChapterTranslateID = 0x69A5,
+    TimecodeScale = 0x2AD7B1,
+    Segment_Duration = 0x4489,
+    DateUTC = 0x4461,
+//  Title = 0x7BA9,
+    MuxingApp = 0x4D80,
+    WritingApp = 0x5741,
+    //Cluster
+    Cluster = 0x1F43B675,
+    Timecode = 0xE7,
+//  SilentTracks = 0x5854,
+//  SilentTrackNumber = 0x58D7,
+//  Position = 0xA7,
+    PrevSize = 0xAB,
+    BlockGroup = 0xA0,
+    Block = 0xA1,
+//  BlockVirtual = 0xA2,
+//  BlockAdditions = 0x75A1,
+//  BlockMore = 0xA6,
+//  BlockAddID = 0xEE,
+//  BlockAdditional = 0xA5,
+    BlockDuration = 0x9B,
+//  ReferencePriority = 0xFA,
+    ReferenceBlock = 0xFB,
+//  ReferenceVirtual = 0xFD,
+//  CodecState = 0xA4,
+//  Slices = 0x8E,
+//  TimeSlice = 0xE8,
+    LaceNumber = 0xCC,
+//  FrameNumber = 0xCD,
+//  BlockAdditionID = 0xCB,
+//  MkvDelay = 0xCE,
+//  Cluster_Duration = 0xCF,
+    SimpleBlock = 0xA3,
+//  EncryptedBlock = 0xAF,
+    //Track
+    Tracks = 0x1654AE6B,
+    TrackEntry = 0xAE,
+    TrackNumber = 0xD7,
+    TrackUID = 0x73C5,
+    TrackType = 0x83,
+    FlagEnabled = 0xB9,
+    FlagDefault = 0x88,
+    FlagForced = 0x55AA,
+    FlagLacing = 0x9C,
+//  MinCache = 0x6DE7,
+//  MaxCache = 0x6DF8,
+    DefaultDuration = 0x23E383,
+//  TrackTimecodeScale = 0x23314F,
+//  TrackOffset = 0x537F,
+//  MaxBlockAdditionID = 0x55EE,
+    Name = 0x536E,
+    Language = 0x22B59C,
+    CodecID = 0x86,
+    CodecPrivate = 0x63A2,
+    CodecName = 0x258688,
+//  AttachmentLink = 0x7446,
+//  CodecSettings = 0x3A9697,
+//  CodecInfoURL = 0x3B4040,
+//  CodecDownloadURL = 0x26B240,
+//  CodecDecodeAll = 0xAA,
+//  TrackOverlay = 0x6FAB,
+//  TrackTranslate = 0x6624,
+//  TrackTranslateEditionUID = 0x66FC,
+//  TrackTranslateCodec = 0x66BF,
+//  TrackTranslateTrackID = 0x66A5,
+    //video
+    Video = 0xE0,
+    FlagInterlaced = 0x9A,
+//  StereoMode = 0x53B8,
+    PixelWidth = 0xB0,
+    PixelHeight = 0xBA,
+    PixelCropBottom = 0x54AA,
+    PixelCropTop = 0x54BB,
+    PixelCropLeft = 0x54CC,
+    PixelCropRight = 0x54DD,
+    DisplayWidth = 0x54B0,
+    DisplayHeight = 0x54BA,
+    DisplayUnit = 0x54B2,
+    AspectRatioType = 0x54B3,
+//  ColourSpace = 0x2EB524,
+//  GammaValue = 0x2FB523,
+    FrameRate = 0x2383E3,
+    //end video
+    //audio
+    Audio = 0xE1,
+    SamplingFrequency = 0xB5,
+    OutputSamplingFrequency = 0x78B5,
+    Channels = 0x9F,
+//  ChannelPositions = 0x7D7B,
+    BitDepth = 0x6264,
+    //end audio
+    //content encoding
+//  ContentEncodings = 0x6d80,
+//  ContentEncoding = 0x6240,
+//  ContentEncodingOrder = 0x5031,
+//  ContentEncodingScope = 0x5032,
+//  ContentEncodingType = 0x5033,
+//  ContentCompression = 0x5034,
+//  ContentCompAlgo = 0x4254,
+//  ContentCompSettings = 0x4255,
+//  ContentEncryption = 0x5035,
+//  ContentEncAlgo = 0x47e1,
+//  ContentEncKeyID = 0x47e2,
+//  ContentSignature = 0x47e3,
+//  ContentSigKeyID = 0x47e4,
+//  ContentSigAlgo = 0x47e5,
+//  ContentSigHashAlgo = 0x47e6,
+    //end content encoding
+    //Cueing Data
+    Cues = 0x1C53BB6B,
+    CuePoint = 0xBB,
+    CueTime = 0xB3,
+    CueTrackPositions = 0xB7,
+    CueTrack = 0xF7,
+    CueClusterPosition = 0xF1,
+    CueBlockNumber = 0x5378,
+//  CueCodecState = 0xEA,
+//  CueReference = 0xDB,
+//  CueRefTime = 0x96,
+//  CueRefCluster = 0x97,
+//  CueRefNumber = 0x535F,
+//  CueRefCodecState = 0xEB,
+    //Attachment
+//  Attachments = 0x1941A469,
+//  AttachedFile = 0x61A7,
+//  FileDescription = 0x467E,
+//  FileName = 0x466E,
+//  FileMimeType = 0x4660,
+//  FileData = 0x465C,
+//  FileUID = 0x46AE,
+//  FileReferral = 0x4675,
+    //Chapters
+//  Chapters = 0x1043A770,
+//  EditionEntry = 0x45B9,
+//  EditionUID = 0x45BC,
+//  EditionFlagHidden = 0x45BD,
+//  EditionFlagDefault = 0x45DB,
+//  EditionFlagOrdered = 0x45DD,
+//  ChapterAtom = 0xB6,
+//  ChapterUID = 0x73C4,
+//  ChapterTimeStart = 0x91,
+//  ChapterTimeEnd = 0x92,
+//  ChapterFlagHidden = 0x98,
+//  ChapterFlagEnabled = 0x4598,
+//  ChapterSegmentUID = 0x6E67,
+//  ChapterSegmentEditionUID = 0x6EBC,
+//  ChapterPhysicalEquiv = 0x63C3,
+//  ChapterTrack = 0x8F,
+//  ChapterTrackNumber = 0x89,
+//  ChapterDisplay = 0x80,
+//  ChapString = 0x85,
+//  ChapLanguage = 0x437C,
+//  ChapCountry = 0x437E,
+//  ChapProcess = 0x6944,
+//  ChapProcessCodecID = 0x6955,
+//  ChapProcessPrivate = 0x450D,
+//  ChapProcessCommand = 0x6911,
+//  ChapProcessTime = 0x6922,
+//  ChapProcessData = 0x6933,
+    //Tagging
+//  Tags = 0x1254C367,
+//  Tag = 0x7373,
+//  Targets = 0x63C0,
+//  TargetTypeValue = 0x68CA,
+//  TargetType = 0x63CA,
+//  Tagging_TrackUID = 0x63C5,
+//  Tagging_EditionUID = 0x63C9,
+//  Tagging_ChapterUID = 0x63C4,
+//  AttachmentUID = 0x63C6,
+//  SimpleTag = 0x67C8,
+//  TagName = 0x45A3,
+//  TagLanguage = 0x447A,
+//  TagDefault = 0x4484,
+//  TagString = 0x4487,
+//  TagBinary = 0x4485,
+};
+#endif
\ No newline at end of file
diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c
new file mode 100644
index 000000000..46d2dd84d
--- /dev/null
+++ b/libmkv/EbmlWriter.c
@@ -0,0 +1,160 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#include "EbmlWriter.h"
+#include <stdlib.h>
+#include <wchar.h>
+#include <string.h>
+
+
+void Ebml_WriteLen(EbmlGlobal *glob, long long val)
+{
+    //TODO check and make sure we are not > than 0x0100000000000000LLU
+    unsigned char size = 8; //size in bytes to output
+    unsigned long long minVal = 0x00000000000000ffLLU; //mask to compare for byte size
+
+    for (size = 1; size < 8; size ++)
+    {
+        if (val < minVal)
+            break;
+
+        minVal = (minVal << 7);
+    }
+
+    val |= (0x000000000000080LLU << ((size - 1) * 7));
+
+    Ebml_Serialize(glob, (void *) &val, size);
+}
+
+void Ebml_WriteString(EbmlGlobal *glob, const char *str)
+{
+    const size_t size_ = strlen(str);
+    const unsigned long long  size = size_;
+    Ebml_WriteLen(glob, size);
+    //TODO: it's not clear from the spec whether the nul terminator
+    //should be serialized too.  For now we omit the null terminator.
+    Ebml_Write(glob, str, size);
+}
+
+void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr)
+{
+    const size_t strlen = wcslen(wstr);
+
+    //TODO: it's not clear from the spec whether the nul terminator
+    //should be serialized too.  For now we include it.
+    const unsigned long long  size = strlen;
+
+    Ebml_WriteLen(glob, size);
+    Ebml_Write(glob, wstr, size);
+}
+
+void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id)
+{
+    if (class_id >= 0x01000000)
+        Ebml_Serialize(glob, (void *)&class_id, 4);
+    else if (class_id >= 0x00010000)
+        Ebml_Serialize(glob, (void *)&class_id, 3);
+    else if (class_id >= 0x00000100)
+        Ebml_Serialize(glob, (void *)&class_id, 2);
+    else
+        Ebml_Serialize(glob, (void *)&class_id, 1);
+}
+void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, UInt64 ui)
+{
+    unsigned char sizeSerialized = 8 | 0x80;
+    Ebml_WriteID(glob, class_id);
+    Ebml_Serialize(glob, &sizeSerialized, 1);
+    Ebml_Serialize(glob, &ui, 8);
+}
+
+void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui)
+{
+    unsigned char size = 8; //size in bytes to output
+    unsigned char sizeSerialized = 0;
+    Ebml_WriteID(glob, class_id);
+    unsigned long minVal;
+    minVal = 0x7fLU; //mask to compare for byte size
+
+    for (size = 1; size < 4; size ++)
+    {
+        if (ui < minVal)
+        {
+            break;
+        }
+
+        minVal <<= 7;
+    }
+
+    sizeSerialized = 0x80 | size;
+    Ebml_Serialize(glob, &sizeSerialized, 1);
+    unsigned int _s = size;
+    Ebml_Serialize(glob, &ui, size);
+}
+//TODO: perhaps this is a poor name for this id serializer helper function
+void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin)
+{
+    int size;
+    for (size=4; size > 1; size--)
+    {
+        if (bin & 0x000000ff << ((size-1) * 8))
+            break;
+    }
+    Ebml_WriteID(glob, class_id);
+    Ebml_WriteLen(glob, size);
+    Ebml_WriteID(glob, bin);
+}
+
+void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d)
+{
+    Ebml_WriteID(glob, class_id);
+    unsigned char len = 0x88;
+    Ebml_Serialize(glob, &len, 1);
+    Ebml_Serialize(glob,  &d, 8);
+}
+
+void Ebml_WriteSigned16(EbmlGlobal *glob, short val)
+{
+    signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8;
+    Ebml_Serialize(glob, &out, 3);
+}
+
+void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s)
+{
+    Ebml_WriteID(glob, class_id);
+    Ebml_WriteString(glob, s);
+}
+
+void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s)
+{
+    Ebml_WriteID(glob,  class_id);
+    Ebml_WriteUTF8(glob,  s);
+}
+
+void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length)
+{
+    unsigned char size = 4;
+    Ebml_WriteID(glob, class_id);
+    Ebml_WriteLen(glob, data_length);
+    Ebml_Write(glob,  data, data_length);
+}
+
+void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize)
+{
+    Ebml_WriteID(glob, 0xEC);
+    unsigned char tmp = 0;
+    unsigned long i = 0;
+    Ebml_WriteLen(glob, vSize);
+
+    for (i = 0; i < vSize; i++)
+    {
+        Ebml_Write(glob, &tmp, 1);
+    }
+}
+
+//TODO Serialize Date
diff --git a/libmkv/EbmlWriter.h b/libmkv/EbmlWriter.h
new file mode 100644
index 000000000..e149f397e
--- /dev/null
+++ b/libmkv/EbmlWriter.h
@@ -0,0 +1,38 @@
+#ifndef EBMLWRITER_HPP
+#define EBMLWRITER_HPP
+
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+//If you wish a different writer simply replace this
+//note: you must define write and serialize functions as well as your own EBML_GLOBAL
+#include <stddef.h>
+#include "EbmlBufferWriter.h"
+//These functions MUST be implemented
+void  Ebml_Serialize(EbmlGlobal *glob, const void *, unsigned long);
+void  Ebml_Write(EbmlGlobal *glob, const void *, unsigned long);
+/////
+
+
+void Ebml_WriteLen(EbmlGlobal *glob, long long val);
+void Ebml_WriteString(EbmlGlobal *glob, const char *str);
+void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr);
+void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id);
+void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, UInt64 ui);
+void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
+void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
+void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d);
+//TODO make this more generic to signed
+void Ebml_WriteSigned16(EbmlGlobal *glob, short val);
+void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s);
+void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s);
+void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length);
+void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize);
+//TODO need date function
+#endif
diff --git a/libmkv/Makefile b/libmkv/Makefile
new file mode 100644
index 000000000..b53377b21
--- /dev/null
+++ b/libmkv/Makefile
@@ -0,0 +1,25 @@
+#Variables
+CC=gcc
+LINKER=gcc
+FLAGS=
+
+
+#Build Targets
+EbmlWriter.o: EbmlWriter.c EbmlWriter.h
+	$(CC) $(FLAGS) -c EbmlWriter.c
+
+EbmlBufferWriter.o: EbmlBufferWriter.c EbmlBufferWriter.h
+	$(CC) $(FLAGS) -c EbmlBufferWriter.c
+	
+MkvElement.o: MkvElement.c WebMElement.h
+	$(CC) $(FLAGS) -c MkvElement.c
+	
+testlibmkv.o: testlibmkv.c
+	$(CC) $(FLAGS) -c testlibmkv.c
+	
+testlibmkv: testlibmkv.o MkvElement.o EbmlBufferWriter.o EbmlWriter.o
+	$(LINKER) $(FLAGS) testlibmkv.o MkvElement.o EbmlBufferWriter.o EbmlWriter.o -o testlibmkv
+
+clean:
+	rm -rf *.o testlibmkv
+	
\ No newline at end of file
diff --git a/libmkv/WebMElement.c b/libmkv/WebMElement.c
new file mode 100644
index 000000000..25a90249a
--- /dev/null
+++ b/libmkv/WebMElement.c
@@ -0,0 +1,220 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#include "EbmlBufferWriter.h"
+#include "EbmlIDs.h"
+#include "WebMElement.h"
+#include <stdio.h>
+
+#define kVorbisPrivateMaxSize  4000
+
+void writeHeader(EbmlGlobal *glob)
+{
+    EbmlLoc start;
+    Ebml_StartSubElement(glob, &start, EBML);
+    Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
+    Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version
+    Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length
+    Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length
+    Ebml_SerializeString(glob, DocType, "webm"); //Doc Type
+    Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version
+    Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version
+    Ebml_EndSubElement(glob, &start);
+}
+
+void writeSimpleBlock(EbmlGlobal *glob, unsigned char trackNumber, short timeCode,
+                      int isKeyframe, unsigned char lacingFlag, int discardable,
+                      unsigned char *data, unsigned long dataLength)
+{
+    Ebml_WriteID(glob, SimpleBlock);
+    unsigned long blockLength = 4 + dataLength;
+    blockLength |= 0x10000000; //TODO check length < 0x0FFFFFFFF
+    Ebml_Serialize(glob, &blockLength, 4);
+    trackNumber |= 0x80;  //TODO check track nubmer < 128
+    Ebml_Write(glob, &trackNumber, 1);
+    //Ebml_WriteSigned16(glob, timeCode,2); //this is 3 bytes
+    Ebml_Serialize(glob, &timeCode, 2);
+    unsigned char flags = 0x00 | (isKeyframe ? 0x80 : 0x00) | (lacingFlag << 1) | discardable;
+    Ebml_Write(glob, &flags, 1);
+    Ebml_Write(glob, data, dataLength);
+}
+
+static UInt64 generateTrackID(unsigned int trackNumber)
+{
+    UInt64 t = time(NULL) * trackNumber;
+    UInt64 r = rand();
+    r = r << 32;
+    r +=  rand();
+    UInt64 rval = t ^ r;
+    return rval;
+}
+
+void writeVideoTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
+                     char *codecId, unsigned int pixelWidth, unsigned int pixelHeight,
+                     double frameRate)
+{
+    EbmlLoc start;
+    Ebml_StartSubElement(glob, &start, TrackEntry);
+    Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+    UInt64 trackID = generateTrackID(trackNumber);
+    Ebml_SerializeUnsigned(glob, TrackUID, trackID);
+    Ebml_SerializeString(glob, CodecName, "VP8");  //TODO shouldn't be fixed
+
+    Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1
+    Ebml_SerializeString(glob, CodecID, codecId);
+    {
+        EbmlLoc videoStart;
+        Ebml_StartSubElement(glob, &videoStart, Video);
+        Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
+        Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
+        Ebml_SerializeFloat(glob, FrameRate, frameRate);
+        Ebml_EndSubElement(glob, &videoStart); //Video
+    }
+    Ebml_EndSubElement(glob, &start); //Track Entry
+}
+void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
+                     char *codecId, double samplingFrequency, unsigned int channels,
+                     unsigned char *private, unsigned long privateSize)
+{
+    EbmlLoc start;
+    Ebml_StartSubElement(glob, &start, TrackEntry);
+    Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+    UInt64 trackID = generateTrackID(trackNumber);
+    Ebml_SerializeUnsigned(glob, TrackUID, trackID);
+    Ebml_SerializeUnsigned(glob, TrackType, 2); //audio is always 2
+    //I am using defaults for thesed required fields
+    /*  Ebml_SerializeUnsigned(glob, FlagEnabled, 1);
+        Ebml_SerializeUnsigned(glob, FlagDefault, 1);
+        Ebml_SerializeUnsigned(glob, FlagForced, 1);
+        Ebml_SerializeUnsigned(glob, FlagLacing, flagLacing);*/
+    Ebml_SerializeString(glob, CodecID, codecId);
+    Ebml_SerializeData(glob, CodecPrivate, private, privateSize);
+
+    Ebml_SerializeString(glob, CodecName, "VORBIS");  //fixed for now
+    {
+        EbmlLoc AudioStart;
+        Ebml_StartSubElement(glob, &AudioStart, Audio);
+        Ebml_SerializeFloat(glob, SamplingFrequency, samplingFrequency);
+        Ebml_SerializeUnsigned(glob, Channels, channels);
+        Ebml_EndSubElement(glob, &AudioStart);
+    }
+    Ebml_EndSubElement(glob, &start);
+}
+void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc* startInfo, unsigned long timeCodeScale, double duration)
+{
+    Ebml_StartSubElement(ebml, startInfo, Info);
+    Ebml_SerializeUnsigned(ebml, TimecodeScale, timeCodeScale);
+    Ebml_SerializeFloat(ebml, Segment_Duration, duration * 1000.0); //Currently fixed to using milliseconds
+    Ebml_SerializeString(ebml, 0x4D80, "QTmuxingAppLibWebM-0.0.1");
+    Ebml_SerializeString(ebml, 0x5741, "QTwritingAppLibWebM-0.0.1");
+    Ebml_EndSubElement(ebml, startInfo);
+}
+
+/*
+void Mkv_InitializeSegment(Ebml& ebml_out, EbmlLoc& ebmlLoc)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x18538067);
+}
+
+void Mkv_InitializeSeek(Ebml& ebml_out, EbmlLoc& ebmlLoc)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x114d9b74);
+}
+void Mkv_WriteSeekInformation(Ebml& ebml_out, SeekStruct& seekInformation)
+{
+    EbmlLoc ebmlLoc;
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x4dbb);
+    Ebml_SerializeString(ebml_out, 0x53ab, seekInformation.SeekID);
+    Ebml_SerializeUnsigned(ebml_out, 0x53ac, seekInformation.SeekPosition);
+    Ebml_EndSubElement(ebml_out, ebmlLoc);
+}
+
+void Mkv_WriteSegmentInformation(Ebml& ebml_out, SegmentInformationStruct& segmentInformation)
+{
+    Ebml_SerializeUnsigned(ebml_out, 0x73a4, segmentInformation.segmentUID);
+    if (segmentInformation.filename != 0)
+        Ebml_SerializeString(ebml_out, 0x7384, segmentInformation.filename);
+    Ebml_SerializeUnsigned(ebml_out, 0x2AD7B1, segmentInformation.TimecodeScale);
+    Ebml_SerializeUnsigned(ebml_out, 0x4489, segmentInformation.Duration);
+    //TODO date
+    Ebml_SerializeWString(ebml_out, 0x4D80, L"MKVMUX");
+    Ebml_SerializeWString(ebml_out, 0x5741, segmentInformation.WritingApp);
+}
+
+void Mkv_InitializeTrack(Ebml& ebml_out, EbmlLoc& ebmlLoc)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1654AE6B);
+}
+
+static void Mkv_WriteGenericTrackData(Ebml& ebml_out, TrackStruct& track)
+{
+    Ebml_SerializeUnsigned(ebml_out, 0xD7, track.TrackNumber);
+    Ebml_SerializeUnsigned(ebml_out, 0x73C5, track.TrackUID);
+    Ebml_SerializeUnsigned(ebml_out, 0x83, track.TrackType);
+    Ebml_SerializeUnsigned(ebml_out, 0xB9, track.FlagEnabled ? 1 :0);
+    Ebml_SerializeUnsigned(ebml_out, 0x88, track.FlagDefault ? 1 :0);
+    Ebml_SerializeUnsigned(ebml_out, 0x55AA, track.FlagForced ? 1 :0);
+    if (track.Language != 0)
+        Ebml_SerializeString(ebml_out, 0x22B59C, track.Language);
+    if (track.CodecID != 0)
+        Ebml_SerializeString(ebml_out, 0x86, track.CodecID);
+    if (track.CodecPrivate != 0)
+        Ebml_SerializeData(ebml_out, 0x63A2, track.CodecPrivate, track.CodecPrivateLength);
+    if (track.CodecName != 0)
+        Ebml_SerializeWString(ebml_out, 0x258688, track.CodecName);
+}
+
+void Mkv_WriteVideoTrack(Ebml& ebml_out, TrackStruct & track, VideoTrackStruct& video)
+{
+    EbmlLoc trackHeadLoc, videoHeadLoc;
+    Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE);  //start Track
+    Mkv_WriteGenericTrackData(ebml_out, track);
+    Ebml_StartSubElement(ebml_out, videoHeadLoc, 0xE0);  //start Video
+    Ebml_SerializeUnsigned(ebml_out, 0x9A, video.FlagInterlaced ? 1 :0);
+    Ebml_SerializeUnsigned(ebml_out, 0xB0, video.PixelWidth);
+    Ebml_SerializeUnsigned(ebml_out, 0xBA, video.PixelHeight);
+    Ebml_SerializeUnsigned(ebml_out, 0x54B0, video.PixelDisplayWidth);
+    Ebml_SerializeUnsigned(ebml_out, 0x54BA, video.PixelDisplayHeight);
+    Ebml_SerializeUnsigned(ebml_out, 0x54B2, video.displayUnit);
+    Ebml_SerializeFloat(ebml_out, 0x2383E3, video.FrameRate);
+    Ebml_EndSubElement(ebml_out, videoHeadLoc);
+    Ebml_EndSubElement(ebml_out, trackHeadLoc);
+
+}
+
+void Mkv_WriteAudioTrack(Ebml& ebml_out, TrackStruct & track, AudioTrackStruct& video)
+{
+    EbmlLoc trackHeadLoc, audioHeadLoc;
+    Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE);
+    Mkv_WriteGenericTrackData(ebml_out, track);
+    Ebml_StartSubElement(ebml_out, audioHeadLoc, 0xE0);  //start Audio
+    Ebml_SerializeFloat(ebml_out, 0xB5, video.SamplingFrequency);
+    Ebml_SerializeUnsigned(ebml_out, 0x9F, video.Channels);
+    Ebml_SerializeUnsigned(ebml_out, 0x6264, video.BitDepth);
+    Ebml_EndSubElement(ebml_out, audioHeadLoc); // end audio
+    Ebml_EndSubElement(ebml_out, trackHeadLoc);
+}
+
+void Mkv_WriteEbmlClusterHead(Ebml& ebml_out,  EbmlLoc& ebmlLoc, ClusterHeadStruct & clusterHead)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1F43B675);
+    Ebml_SerializeUnsigned(ebml_out, 0x6264, clusterHead.TimeCode);
+}
+
+void Mkv_WriteSimpleBlockHead(Ebml& ebml_out,  EbmlLoc& ebmlLoc, SimpleBlockStruct& block)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0xA3);
+    Ebml_Write1UInt(ebml_out, block.TrackNumber);
+    Ebml_WriteSigned16(ebml_out,block.TimeCode);
+    unsigned char flags = 0x00 | (block.iskey ? 0x80:0x00) | (block.lacing << 1) | block.discardable;
+    Ebml_Write1UInt(ebml_out, flags);  //TODO this may be the wrong function
+    Ebml_Serialize(ebml_out, block.data, block.dataLength);
+    Ebml_EndSubElement(ebml_out,ebmlLoc);
+}
+*/
diff --git a/libmkv/WebMElement.h b/libmkv/WebMElement.h
new file mode 100644
index 000000000..b4208f285
--- /dev/null
+++ b/libmkv/WebMElement.h
@@ -0,0 +1,35 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#ifndef MKV_CONTEXT_HPP
+#define MKV_CONTEXT_HPP 1
+
+void writeSimpleBock(EbmlGlobal *ebml, unsigned char trackNumber, unsigned short timeCode,
+                     int isKeyframe, unsigned char lacingFlag, int  discardable,
+                     unsigned char *data, unsigned long dataLength);
+
+
+// these are helper functions
+void writeHeader(EbmlGlobal *ebml);
+void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc* startInfo , unsigned long timeCodeScale, double duration);
+//this function is a helper only, it assumes a lot of defaults
+void writeVideoTrack(EbmlGlobal *ebml, unsigned int trackNumber, int flagLacing,
+                     char *codecId, unsigned int pixelWidth, unsigned int pixelHeight,
+                     double frameRate);
+void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
+                     char *codecId, double samplingFrequency, unsigned int channels,
+                     unsigned char *private, unsigned long privateSize);
+
+void writeSimpleBlock(EbmlGlobal *ebml, unsigned char trackNumber, short timeCode,
+                      int isKeyframe, unsigned char lacingFlag, int discardable,
+                      unsigned char *data, unsigned long dataLength);
+
+
+
+#endif
\ No newline at end of file
diff --git a/libmkv/testlibmkv.c b/libmkv/testlibmkv.c
new file mode 100644
index 000000000..7edfc4347
--- /dev/null
+++ b/libmkv/testlibmkv.c
@@ -0,0 +1,63 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+
+#include "EbmlIDs.h"
+#include "EbmlBufferWriter.h"
+#include "WebMElement.h"
+
+#include <stdio.h>
+int main(int argc, char *argv[])
+{
+    //init the datatype we're using for ebml output
+    unsigned char data[8192];
+    EbmlGlobal ebml;
+    ebml.buf = data;
+    ebml.offset = 0;
+    ebml.length = 8192;
+
+    writeHeader(&ebml);
+    {
+        EbmlLoc startSegment;
+        Ebml_StartSubElement(&ebml, &startSegment, Segment); //segment
+        {
+            //segment info
+            EbmlLoc startInfo;
+            Ebml_StartSubElement(&ebml, &startInfo, Info);
+            Ebml_SerializeString(&ebml, 0x4D80, "muxingAppLibMkv");
+            Ebml_SerializeString(&ebml, 0x5741, "writingAppLibMkv");
+            Ebml_EndSubElement(&ebml, &startInfo);
+        }
+
+        {
+            EbmlLoc trackStart;
+            Ebml_StartSubElement(&ebml, &trackStart, Tracks);
+            writeVideoTrack(&ebml, 1, 1, "V_MS/VFW/FOURCC", 320, 240, 29.97);
+            //writeAudioTrack(&ebml,2,1, "A_VORBIS", 32000, 1, NULL, 0);
+            Ebml_EndSubElement(&ebml, &trackStart);
+        }
+
+        {
+            EbmlLoc clusterStart;
+            Ebml_StartSubElement(&ebml, &clusterStart, Cluster); //cluster
+            Ebml_SerializeUnsigned(&ebml, Timecode, 0);
+
+            unsigned char someData[4] = {1, 2, 3, 4};
+            writeSimpleBlock(&ebml, 1, 0, 1, 0, 0, someData, 4);
+            Ebml_EndSubElement(&ebml, &clusterStart);
+        }    //end cluster
+        Ebml_EndSubElement(&ebml, &startSegment);
+    }
+
+    //dump ebml stuff to the file
+    FILE *file_out = fopen("test.mkv", "wb");
+    size_t bytesWritten = fwrite(data, 1, ebml.offset, file_out);
+    fclose(file_out);
+    return 0;
+}
\ No newline at end of file

From dc66630cca948880b1e90548511114577917969d Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 20 Oct 2010 12:05:48 -0400
Subject: [PATCH 248/307] ivfenc: webm output support

This patch adds the --webm option, to allow the creation of WebM streams
without having to remux ivf into webm.

Change-Id: Ief93c114a6913c55a04cf51bce38f594372d0ad0
---
 examples.mk         |   3 +
 ivfenc.c            | 381 +++++++++++++++++++++++++++++++++++++++++++-
 libmkv/EbmlWriter.c |  22 ++-
 libmkv/EbmlWriter.h |  10 +-
 4 files changed, 397 insertions(+), 19 deletions(-)

diff --git a/examples.mk b/examples.mk
index 56da642ae..ef1d1a26b 100644
--- a/examples.mk
+++ b/examples.mk
@@ -30,6 +30,9 @@ UTILS-$(CONFIG_ENCODERS)    += ivfenc.c
 ivfenc.SRCS                 += args.c args.h y4minput.c y4minput.h
 ivfenc.SRCS                 += vpx_ports/config.h vpx_ports/mem_ops.h
 ivfenc.SRCS                 += vpx_ports/mem_ops_aligned.h
+ivfenc.SRCS                 += libmkv/EbmlIDs.h
+ivfenc.SRCS                 += libmkv/EbmlWriter.c
+ivfenc.SRCS                 += libmkv/EbmlWriter.h
 ivfenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
 ivfenc.DESCRIPTION           = Full featured encoder
 
diff --git a/ivfenc.c b/ivfenc.c
index ab6e10aa9..7d3cea9c9 100644
--- a/ivfenc.c
+++ b/ivfenc.c
@@ -22,6 +22,7 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include <string.h>
+#include <limits.h>
 #include "vpx/vpx_encoder.h"
 #if USE_POSIX_MMAP
 #include <sys/types.h>
@@ -30,10 +31,31 @@
 #include <fcntl.h>
 #include <unistd.h>
 #endif
+#include "vpx_version.h"
 #include "vpx/vp8cx.h"
 #include "vpx_ports/mem_ops.h"
 #include "vpx_ports/vpx_timer.h"
 #include "y4minput.h"
+#include "libmkv/EbmlWriter.h"
+#include "libmkv/EbmlIDs.h"
+
+/* Need special handling of these functions on Windows */
+#if defined(_MSC_VER)
+/* MSVS doesn't define off_t, and uses _f{seek,tell}i64 */
+typedef __int64 off_t;
+#define fseeko _fseeki64
+#define ftello _ftelli64
+#elif defined(_WIN32)
+/* MinGW defines off_t, and uses f{seek,tell}o64 */
+#define fseeko fseeko64
+#define ftello ftello64
+#endif
+
+#if defined(_MSC_VER)
+#define LITERALU64(n) n
+#else
+#define LITERALU64(n) n##LLU
+#endif
 
 static const char *exec_name;
 
@@ -395,6 +417,327 @@ static void write_ivf_frame_header(FILE *outfile,
     fwrite(header, 1, 12, outfile);
 }
 
+
+typedef off_t EbmlLoc;
+
+
+struct cue_entry
+{
+    unsigned int time;
+    uint64_t     loc;
+};
+
+
+struct EbmlGlobal
+{
+    FILE    *stream;
+    uint64_t last_pts_ms;
+    vpx_rational_t  framerate;
+
+    /* These pointers are to the start of an element */
+    off_t    position_reference;
+    off_t    seek_info_pos;
+    off_t    segment_info_pos;
+    off_t    track_pos;
+    off_t    cue_pos;
+    off_t    cluster_pos;
+
+    /* These pointers are to the size field of the element */
+    EbmlLoc  startSegment;
+    EbmlLoc  startCluster;
+
+    uint32_t cluster_timecode;
+    int      cluster_open;
+
+    struct cue_entry *cue_list;
+    unsigned int      cues;
+
+};
+
+
+void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
+{
+    fwrite(buffer_in, 1, len, glob->stream);
+}
+
+
+void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
+{
+    const unsigned char *q = (const unsigned char *)buffer_in + len - 1;
+
+    for(; len; len--)
+        Ebml_Write(glob, q--, 1);
+}
+
+
+static void
+Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc,
+                          unsigned long class_id)
+{
+    //todo this is always taking 8 bytes, this may need later optimization
+    //this is a key that says lenght unknown
+    unsigned long long unknownLen =  LITERALU64(0x01FFFFFFFFFFFFFF);
+
+    Ebml_WriteID(glob, class_id);
+    *ebmlLoc = ftello(glob->stream);
+    Ebml_Serialize(glob, &unknownLen, 8);
+}
+
+static void
+Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc)
+{
+    off_t pos;
+    uint64_t size;
+
+    /* Save the current stream pointer */
+    pos = ftello(glob->stream);
+
+    /* Calculate the size of this element */
+    size = pos - *ebmlLoc - 8;
+    size |=  LITERALU64(0x0100000000000000);
+
+    /* Seek back to the beginning of the element and write the new size */
+    fseeko(glob->stream, *ebmlLoc, SEEK_SET);
+    Ebml_Serialize(glob, &size, 8);
+
+    /* Reset the stream pointer */
+    fseeko(glob->stream, pos, SEEK_SET);
+}
+
+
+static void
+write_webm_seek_element(EbmlGlobal *ebml, unsigned long id, off_t pos)
+{
+    uint64_t offset = pos - ebml->position_reference;
+    EbmlLoc start;
+    Ebml_StartSubElement(ebml, &start, Seek);
+    Ebml_SerializeBinary(ebml, SeekID, id);
+    Ebml_SerializeUnsigned64(ebml, SeekPosition, offset);
+    Ebml_EndSubElement(ebml, &start);
+}
+
+
+static void
+write_webm_seek_info(EbmlGlobal *ebml)
+{
+
+    off_t pos;
+
+    /* Save the current stream pointer */
+    pos = ftello(ebml->stream);
+
+    if(ebml->seek_info_pos)
+        fseeko(ebml->stream, ebml->seek_info_pos, SEEK_SET);
+    else
+        ebml->seek_info_pos = pos;
+
+    {
+        EbmlLoc start;
+
+        Ebml_StartSubElement(ebml, &start, SeekHead);
+        write_webm_seek_element(ebml, Tracks, ebml->track_pos);
+        write_webm_seek_element(ebml, Cues,   ebml->cue_pos);
+        write_webm_seek_element(ebml, Info,   ebml->segment_info_pos);
+        Ebml_EndSubElement(ebml, &start);
+    }
+    {
+        //segment info
+        EbmlLoc startInfo;
+        uint64_t frame_time;
+
+        frame_time = (uint64_t)1000 * ebml->framerate.den
+                     / ebml->framerate.num;
+        ebml->segment_info_pos = ftello(ebml->stream);
+        Ebml_StartSubElement(ebml, &startInfo, Info);
+        Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000);
+        Ebml_SerializeFloat(ebml, Segment_Duration,
+                            ebml->last_pts_ms + frame_time);
+        Ebml_SerializeString(ebml, 0x4D80, "ivfenc" VERSION_STRING);
+        Ebml_SerializeString(ebml, 0x5741, "ivfenc" VERSION_STRING);
+        Ebml_EndSubElement(ebml, &startInfo);
+    }
+}
+
+
+static void
+write_webm_file_header(EbmlGlobal                *glob,
+                       const vpx_codec_enc_cfg_t *cfg,
+                       const struct vpx_rational *fps)
+{
+    {
+        EbmlLoc start;
+        Ebml_StartSubElement(glob, &start, EBML);
+        Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
+        Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version
+        Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length
+        Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length
+        Ebml_SerializeString(glob, DocType, "webm"); //Doc Type
+        Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version
+        Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version
+        Ebml_EndSubElement(glob, &start);
+    }
+    {
+        Ebml_StartSubElement(glob, &glob->startSegment, Segment); //segment
+        glob->position_reference = ftello(glob->stream);
+        glob->framerate = *fps;
+        write_webm_seek_info(glob);
+
+        {
+            EbmlLoc trackStart;
+            glob->track_pos = ftello(glob->stream);
+            Ebml_StartSubElement(glob, &trackStart, Tracks);
+            {
+                unsigned int trackNumber = 1;
+                uint64_t     trackID = 0;
+
+                EbmlLoc start;
+                Ebml_StartSubElement(glob, &start, TrackEntry);
+                Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+                Ebml_SerializeUnsigned(glob, TrackUID, trackID);
+                Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1
+                Ebml_SerializeString(glob, CodecID, "V_VP8");
+                {
+                    unsigned int pixelWidth = cfg->g_w;
+                    unsigned int pixelHeight = cfg->g_h;
+                    float        frameRate   = (float)fps->num/(float)fps->den;
+
+                    EbmlLoc videoStart;
+                    Ebml_StartSubElement(glob, &videoStart, Video);
+                    Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
+                    Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
+                    Ebml_SerializeFloat(glob, FrameRate, frameRate);
+                    Ebml_EndSubElement(glob, &videoStart); //Video
+                }
+                Ebml_EndSubElement(glob, &start); //Track Entry
+            }
+            Ebml_EndSubElement(glob, &trackStart);
+        }
+        // segment element is open
+    }
+}
+
+
+static void
+write_webm_block(EbmlGlobal                *glob,
+                 const vpx_codec_enc_cfg_t *cfg,
+                 const vpx_codec_cx_pkt_t  *pkt)
+{
+    unsigned long  block_length;
+    unsigned char  track_number;
+    unsigned short block_timecode = 0;
+    unsigned char  flags;
+    uint64_t       pts_ms;
+    int            start_cluster = 0, is_keyframe;
+
+    /* Calculate the PTS of this frame in milliseconds */
+    pts_ms = pkt->data.frame.pts * 1000
+             * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den;
+    if(pts_ms <= glob->last_pts_ms)
+        pts_ms = glob->last_pts_ms + 1;
+    glob->last_pts_ms = pts_ms;
+
+    /* Calculate the relative time of this block */
+    if(pts_ms - glob->cluster_timecode > SHRT_MAX)
+        start_cluster = 1;
+    else
+        block_timecode = pts_ms - glob->cluster_timecode;
+
+    is_keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY);
+    if(start_cluster || is_keyframe)
+    {
+        if(glob->cluster_open)
+            Ebml_EndSubElement(glob, &glob->startCluster);
+
+        /* Open the new cluster */
+        block_timecode = 0;
+        glob->cluster_open = 1;
+        glob->cluster_timecode = pts_ms;
+        glob->cluster_pos = ftello(glob->stream);
+        Ebml_StartSubElement(glob, &glob->startCluster, Cluster); //cluster
+        Ebml_SerializeUnsigned(glob, Timecode, glob->cluster_timecode);
+
+        /* Save a cue point if this is a keyframe. */
+        if(is_keyframe)
+        {
+            struct cue_entry *cue;
+
+            glob->cue_list = realloc(glob->cue_list,
+                                     (glob->cues+1) * sizeof(struct cue_entry));
+            cue = &glob->cue_list[glob->cues];
+            cue->time = glob->cluster_timecode;
+            cue->loc = glob->cluster_pos;
+            glob->cues++;
+        }
+    }
+
+    /* Write the Simple Block */
+    Ebml_WriteID(glob, SimpleBlock);
+
+    block_length = pkt->data.frame.sz + 4;
+    block_length |= 0x10000000;
+    Ebml_Serialize(glob, &block_length, 4);
+
+    track_number = 1;
+    track_number |= 0x80;
+    Ebml_Write(glob, &track_number, 1);
+
+    Ebml_Serialize(glob, &block_timecode, 2);
+
+    flags = 0;
+    if(is_keyframe)
+        flags |= 0x80;
+    if(pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE)
+        flags |= 0x08;
+    Ebml_Write(glob, &flags, 1);
+
+    Ebml_Write(glob, pkt->data.frame.buf, pkt->data.frame.sz);
+}
+
+
+static void
+write_webm_file_footer(EbmlGlobal *glob)
+{
+
+    if(glob->cluster_open)
+        Ebml_EndSubElement(glob, &glob->startCluster);
+
+    {
+        EbmlLoc start;
+        int i;
+
+        glob->cue_pos = ftello(glob->stream);
+        Ebml_StartSubElement(glob, &start, Cues);
+        for(i=0; i<glob->cues; i++)
+        {
+            struct cue_entry *cue = &glob->cue_list[i];
+            EbmlLoc start;
+
+            Ebml_StartSubElement(glob, &start, CuePoint);
+            {
+                EbmlLoc start;
+
+                Ebml_SerializeUnsigned(glob, CueTime, cue->time);
+
+                Ebml_StartSubElement(glob, &start, CueTrackPositions);
+                Ebml_SerializeUnsigned(glob, CueTrack, 1);
+                Ebml_SerializeUnsigned64(glob, CueClusterPosition,
+                                         cue->loc - glob->position_reference);
+                //Ebml_SerializeUnsigned(glob, CueBlockNumber, cue->blockNumber);
+                Ebml_EndSubElement(glob, &start);
+            }
+            Ebml_EndSubElement(glob, &start);
+        }
+        Ebml_EndSubElement(glob, &start);
+    }
+
+    Ebml_EndSubElement(glob, &glob->startSegment);
+
+    /* Patch up the seek info block */
+    write_webm_seek_info(glob);
+    fseeko(glob->stream, 0, SEEK_END);
+}
+
+
 #include "args.h"
 
 static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
@@ -425,10 +768,12 @@ static const arg_def_t psnrarg          = ARG_DEF(NULL, "psnr", 0,
         "Show PSNR in status line");
 static const arg_def_t framerate        = ARG_DEF(NULL, "framerate", 1,
         "Stream frame rate (rate/scale)");
+static const arg_def_t use_webm         = ARG_DEF(NULL, "webm", 0,
+        "Output WebM (default is IVF)");
 static const arg_def_t *main_args[] =
 {
     &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline, &best_dl, &good_dl, &rt_dl,
-    &verbosearg, &psnrarg, &framerate,
+    &verbosearg, &psnrarg, &use_webm, &framerate,
     NULL
 };
 
@@ -621,6 +966,8 @@ int main(int argc, const char **argv_)
     y4m_input                y4m;
     struct vpx_rational      arg_framerate = {30, 1};
     int                      arg_have_framerate = 0;
+    int                      write_webm = 0;
+    EbmlGlobal               ebml = {0};
 
     exec_name = argv_[0];
 
@@ -697,6 +1044,8 @@ int main(int argc, const char **argv_)
             arg_framerate = arg_parse_rational(&arg);
             arg_have_framerate = 1;
         }
+        else if (arg_match(&arg, &use_webm, argi))
+            write_webm = 1;
         else
             argj++;
     }
@@ -1021,7 +1370,13 @@ int main(int argc, const char **argv_)
 
 #endif
 
-        write_ivf_file_header(outfile, &cfg, codec->fourcc, 0);
+        if(write_webm)
+        {
+            ebml.stream = outfile;
+            write_webm_file_header(&ebml, &cfg, &arg_framerate);
+        }
+        else
+            write_ivf_file_header(outfile, &cfg, codec->fourcc, 0);
 
 
         /* Construct Encoder Context */
@@ -1090,8 +1445,15 @@ int main(int argc, const char **argv_)
                     frames_out++;
                     fprintf(stderr, " %6luF",
                             (unsigned long)pkt->data.frame.sz);
-                    write_ivf_frame_header(outfile, pkt);
-                    fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile);
+                    if(write_webm)
+                    {
+                        write_webm_block(&ebml, &cfg, pkt);
+                    }
+                    else
+                    {
+                        write_ivf_frame_header(outfile, pkt);
+                        fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile);
+                    }
                     nbytes += pkt->data.raw.sz;
                     break;
                 case VPX_CODEC_STATS_PKT:
@@ -1135,8 +1497,15 @@ int main(int argc, const char **argv_)
 
         fclose(infile);
 
-        if (!fseek(outfile, 0, SEEK_SET))
-            write_ivf_file_header(outfile, &cfg, codec->fourcc, frames_out);
+        if(write_webm)
+        {
+            write_webm_file_footer(&ebml);
+        }
+        else
+        {
+            if (!fseek(outfile, 0, SEEK_SET))
+                write_ivf_file_header(outfile, &cfg, codec->fourcc, frames_out);
+        }
 
         fclose(outfile);
         stats_close(&stats);
diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c
index 46d2dd84d..9d564c177 100644
--- a/libmkv/EbmlWriter.c
+++ b/libmkv/EbmlWriter.c
@@ -11,13 +11,17 @@
 #include <stdlib.h>
 #include <wchar.h>
 #include <string.h>
-
+#if defined(_MSC_VER)
+#define LITERALU64(n) n
+#else
+#define LITERALU64(n) n##LLU
+#endif
 
 void Ebml_WriteLen(EbmlGlobal *glob, long long val)
 {
     //TODO check and make sure we are not > than 0x0100000000000000LLU
     unsigned char size = 8; //size in bytes to output
-    unsigned long long minVal = 0x00000000000000ffLLU; //mask to compare for byte size
+    unsigned long long minVal = LITERALU64(0x00000000000000ff); //mask to compare for byte size
 
     for (size = 1; size < 8; size ++)
     {
@@ -27,7 +31,7 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val)
         minVal = (minVal << 7);
     }
 
-    val |= (0x000000000000080LLU << ((size - 1) * 7));
+    val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7));
 
     Ebml_Serialize(glob, (void *) &val, size);
 }
@@ -65,7 +69,7 @@ void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id)
     else
         Ebml_Serialize(glob, (void *)&class_id, 1);
 }
-void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, UInt64 ui)
+void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui)
 {
     unsigned char sizeSerialized = 8 | 0x80;
     Ebml_WriteID(glob, class_id);
@@ -77,8 +81,9 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l
 {
     unsigned char size = 8; //size in bytes to output
     unsigned char sizeSerialized = 0;
-    Ebml_WriteID(glob, class_id);
     unsigned long minVal;
+
+    Ebml_WriteID(glob, class_id);
     minVal = 0x7fLU; //mask to compare for byte size
 
     for (size = 1; size < 4; size ++)
@@ -93,7 +98,6 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l
 
     sizeSerialized = 0x80 | size;
     Ebml_Serialize(glob, &sizeSerialized, 1);
-    unsigned int _s = size;
     Ebml_Serialize(glob, &ui, size);
 }
 //TODO: perhaps this is a poor name for this id serializer helper function
@@ -112,8 +116,9 @@ void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned lon
 
 void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d)
 {
-    Ebml_WriteID(glob, class_id);
     unsigned char len = 0x88;
+
+    Ebml_WriteID(glob, class_id);
     Ebml_Serialize(glob, &len, 1);
     Ebml_Serialize(glob,  &d, 8);
 }
@@ -146,9 +151,10 @@ void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char
 
 void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize)
 {
-    Ebml_WriteID(glob, 0xEC);
     unsigned char tmp = 0;
     unsigned long i = 0;
+
+    Ebml_WriteID(glob, 0xEC);
     Ebml_WriteLen(glob, vSize);
 
     for (i = 0; i < vSize; i++)
diff --git a/libmkv/EbmlWriter.h b/libmkv/EbmlWriter.h
index e149f397e..8c7fe7c66 100644
--- a/libmkv/EbmlWriter.h
+++ b/libmkv/EbmlWriter.h
@@ -9,12 +9,12 @@
 // in the file PATENTS.  All contributing project authors may
 // be found in the AUTHORS file in the root of the source tree.
 
-
-//If you wish a different writer simply replace this
 //note: you must define write and serialize functions as well as your own EBML_GLOBAL
-#include <stddef.h>
-#include "EbmlBufferWriter.h"
 //These functions MUST be implemented
+#include <stddef.h>
+#include "vpx/vpx_integer.h"
+
+typedef struct EbmlGlobal EbmlGlobal;
 void  Ebml_Serialize(EbmlGlobal *glob, const void *, unsigned long);
 void  Ebml_Write(EbmlGlobal *glob, const void *, unsigned long);
 /////
@@ -24,7 +24,7 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val);
 void Ebml_WriteString(EbmlGlobal *glob, const char *str);
 void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr);
 void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id);
-void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, UInt64 ui);
+void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui);
 void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
 void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
 void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d);

From ea68ee0458acf1b9eb27a41083d165de084e4c45 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 21 Oct 2010 15:02:10 -0400
Subject: [PATCH 249/307] rename ivf{enc,dec} to vpx{enc,dec}

The new WebM output support should be preferred to IVF, but we can't
change the default behavior of the ivf* tools. There are a few other
default behaviors for these tools that are counterintuitive for
historical reasons, and changing the binary name provides the
opportunity to clean those up as well. This patch takes the first
step by renaming the binaries.

Change-Id: I647008ae37cc352dd27ec1da7ed13489e0609b24
---
 examples.mk          | 55 ++++++++++++++++++++++++++------------------
 solution.mk          |  2 +-
 ivfdec.c => vpxdec.c |  2 +-
 ivfenc.c => vpxenc.c |  4 ++--
 4 files changed, 36 insertions(+), 27 deletions(-)
 rename ivfdec.c => vpxdec.c (99%)
 rename ivfenc.c => vpxenc.c (99%)

diff --git a/examples.mk b/examples.mk
index ef1d1a26b..28fc6eaa9 100644
--- a/examples.mk
+++ b/examples.mk
@@ -12,29 +12,38 @@
 # List of examples to build. UTILS are files that are taken from the source
 # tree directly, and GEN_EXAMPLES are files that are created from the
 # examples folder.
-UTILS-$(CONFIG_DECODERS)    += ivfdec.c
-ivfdec.SRCS                 += md5_utils.c md5_utils.h
-ivfdec.SRCS                 += vpx_ports/vpx_timer.h
-ivfdec.SRCS                 += vpx/vpx_integer.h
-ivfdec.SRCS                 += args.c args.h vpx_ports/config.h
-ivfdec.SRCS                 += nestegg/halloc/halloc.h
-ivfdec.SRCS                 += nestegg/halloc/src/align.h
-ivfdec.SRCS                 += nestegg/halloc/src/halloc.c
-ivfdec.SRCS                 += nestegg/halloc/src/hlist.h
-ivfdec.SRCS                 += nestegg/halloc/src/macros.h
-ivfdec.SRCS                 += nestegg/include/nestegg/nestegg.h
-ivfdec.SRCS                 += nestegg/src/nestegg.c
-ivfdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
-ivfdec.DESCRIPTION           = Full featured decoder
-UTILS-$(CONFIG_ENCODERS)    += ivfenc.c
-ivfenc.SRCS                 += args.c args.h y4minput.c y4minput.h
-ivfenc.SRCS                 += vpx_ports/config.h vpx_ports/mem_ops.h
-ivfenc.SRCS                 += vpx_ports/mem_ops_aligned.h
-ivfenc.SRCS                 += libmkv/EbmlIDs.h
-ivfenc.SRCS                 += libmkv/EbmlWriter.c
-ivfenc.SRCS                 += libmkv/EbmlWriter.h
-ivfenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
-ivfenc.DESCRIPTION           = Full featured encoder
+UTILS-$(CONFIG_DECODERS)    += vpxdec.c
+vpxdec.SRCS                 += md5_utils.c md5_utils.h
+vpxdec.SRCS                 += vpx_ports/vpx_timer.h
+vpxdec.SRCS                 += vpx/vpx_integer.h
+vpxdec.SRCS                 += args.c args.h vpx_ports/config.h
+vpxdec.SRCS                 += nestegg/halloc/halloc.h
+vpxdec.SRCS                 += nestegg/halloc/src/align.h
+vpxdec.SRCS                 += nestegg/halloc/src/halloc.c
+vpxdec.SRCS                 += nestegg/halloc/src/hlist.h
+vpxdec.SRCS                 += nestegg/halloc/src/macros.h
+vpxdec.SRCS                 += nestegg/include/nestegg/nestegg.h
+vpxdec.SRCS                 += nestegg/src/nestegg.c
+vpxdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
+vpxdec.DESCRIPTION           = Full featured decoder
+UTILS-$(CONFIG_ENCODERS)    += vpxenc.c
+vpxenc.SRCS                 += args.c args.h y4minput.c y4minput.h
+vpxenc.SRCS                 += vpx_ports/config.h vpx_ports/mem_ops.h
+vpxenc.SRCS                 += vpx_ports/mem_ops_aligned.h
+vpxenc.SRCS                 += libmkv/EbmlIDs.h
+vpxenc.SRCS                 += libmkv/EbmlWriter.c
+vpxenc.SRCS                 += libmkv/EbmlWriter.h
+vpxenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
+vpxenc.DESCRIPTION           = Full featured encoder
+
+# Clean up old ivfenc, ivfdec binaries.
+ifeq ($(CONFIG_MSVS),yes)
+CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfenc.exe)
+CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfdec.exe)
+else
+CLEAN-OBJS += ivfenc{.c.o,.c.d,.dox,.exe}
+CLEAN-OBJS += ivfdec{.c.o,.c.d,.dox,.exe}
+endif
 
 # XMA example disabled for now, not used in VP8
 #UTILS-$(CONFIG_DECODERS)    += example_xma.c
diff --git a/solution.mk b/solution.mk
index 8e852ec5d..6d2c08d06 100644
--- a/solution.mk
+++ b/solution.mk
@@ -22,7 +22,7 @@ else
 vpx.sln: $(wildcard *.vcproj)
 	@echo "    [CREATE] $@"
 	$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
-            $(if $(filter %vpx.vcproj,$^),--dep=ivfdec:vpx) \
+            $(if $(filter %vpx.vcproj,$^),--dep=vpxdec:vpx) \
             $(if $(filter %vpx.vcproj,$^),--dep=xma:vpx) \
             --ver=$(CONFIG_VS_VERSION)\
             --target=$(TOOLCHAIN)\
diff --git a/ivfdec.c b/vpxdec.c
similarity index 99%
rename from ivfdec.c
rename to vpxdec.c
index c2822904f..0de072e8b 100644
--- a/ivfdec.c
+++ b/vpxdec.c
@@ -367,7 +367,7 @@ unsigned int file_is_ivf(FILE *infile,
             *fps_num = mem_get_le32(raw_hdr + 16);
             *fps_den = mem_get_le32(raw_hdr + 20);
 
-            /* Some versions of ivfenc used 1/(2*fps) for the timebase, so
+            /* Some versions of vpxenc used 1/(2*fps) for the timebase, so
              * we can guess the framerate using only the timebase in this
              * case. Other files would require reading ahead to guess the
              * timebase, like we do for webm.
diff --git a/ivfenc.c b/vpxenc.c
similarity index 99%
rename from ivfenc.c
rename to vpxenc.c
index 7d3cea9c9..3537419d8 100644
--- a/ivfenc.c
+++ b/vpxenc.c
@@ -552,8 +552,8 @@ write_webm_seek_info(EbmlGlobal *ebml)
         Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000);
         Ebml_SerializeFloat(ebml, Segment_Duration,
                             ebml->last_pts_ms + frame_time);
-        Ebml_SerializeString(ebml, 0x4D80, "ivfenc" VERSION_STRING);
-        Ebml_SerializeString(ebml, 0x5741, "ivfenc" VERSION_STRING);
+        Ebml_SerializeString(ebml, 0x4D80, "vpxenc" VERSION_STRING);
+        Ebml_SerializeString(ebml, 0x5741, "vpxenc" VERSION_STRING);
         Ebml_EndSubElement(ebml, &startInfo);
     }
 }

From 456bfb196b3943b7a9b9ab1941d177dc11990f22 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 21 Oct 2010 16:23:20 -0400
Subject: [PATCH 250/307] vpxenc: output webm by default

WebM should be preferred to IVF output, since it has wider tool support.

Change-Id: I5ac3d5cb68722e6c8af917cdba32ac01dd5e0ea2
---
 vpxenc.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vpxenc.c b/vpxenc.c
index 3537419d8..6605ab88a 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -768,12 +768,12 @@ static const arg_def_t psnrarg          = ARG_DEF(NULL, "psnr", 0,
         "Show PSNR in status line");
 static const arg_def_t framerate        = ARG_DEF(NULL, "framerate", 1,
         "Stream frame rate (rate/scale)");
-static const arg_def_t use_webm         = ARG_DEF(NULL, "webm", 0,
-        "Output WebM (default is IVF)");
+static const arg_def_t use_ivf          = ARG_DEF(NULL, "ivf", 0,
+        "Output IVF (default is WebM)");
 static const arg_def_t *main_args[] =
 {
     &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline, &best_dl, &good_dl, &rt_dl,
-    &verbosearg, &psnrarg, &use_webm, &framerate,
+    &verbosearg, &psnrarg, &use_ivf, &framerate,
     NULL
 };
 
@@ -966,7 +966,7 @@ int main(int argc, const char **argv_)
     y4m_input                y4m;
     struct vpx_rational      arg_framerate = {30, 1};
     int                      arg_have_framerate = 0;
-    int                      write_webm = 0;
+    int                      write_webm = 1;
     EbmlGlobal               ebml = {0};
 
     exec_name = argv_[0];
@@ -1044,8 +1044,8 @@ int main(int argc, const char **argv_)
             arg_framerate = arg_parse_rational(&arg);
             arg_have_framerate = 1;
         }
-        else if (arg_match(&arg, &use_webm, argi))
-            write_webm = 1;
+        else if (arg_match(&arg, &use_ivf, argi))
+            write_webm = 0;
         else
             argj++;
     }

From 28f177cd3aa72e2ca61e38ac9e8cf99a8bcfca4d Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 21 Oct 2010 16:52:14 -0400
Subject: [PATCH 251/307] vpxenc: change --framerate to --fps

Saves a little typing. FPS is a well known abbreviation.

Change-Id: I53730ea36afb9309732eb1c72c52d824d5365fec
---
 vpxenc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpxenc.c b/vpxenc.c
index 6605ab88a..f36af14a0 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -766,7 +766,7 @@ static const arg_def_t verbosearg       = ARG_DEF("v", "verbose", 0,
         "Show encoder parameters");
 static const arg_def_t psnrarg          = ARG_DEF(NULL, "psnr", 0,
         "Show PSNR in status line");
-static const arg_def_t framerate        = ARG_DEF(NULL, "framerate", 1,
+static const arg_def_t framerate        = ARG_DEF(NULL, "fps", 1,
         "Stream frame rate (rate/scale)");
 static const arg_def_t use_ivf          = ARG_DEF(NULL, "ivf", 0,
         "Output IVF (default is WebM)");

From dea6193d093df79a191350fedf52577d2e269804 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 21 Oct 2010 16:53:52 -0400
Subject: [PATCH 252/307] usage: fix horizontal alignment of options

When showing the command usage information for vpxenc and vpxdec,
options with both a short and long version that do not take an
argument were not properly aligned.

Change-Id: I8d65b5ab85bcb5a5dc8bc0d4b293b5189d56dedb
---
 args.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/args.c b/args.c
index 5365e9120..782929022 100644
--- a/args.c
+++ b/args.c
@@ -120,9 +120,13 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs)
         char *long_val = def->has_val ? "=<arg>" : "";
 
         if (def->short_name && def->long_name)
-            snprintf(option_text, 37, "-%s%s, --%s%s",
-                     def->short_name, short_val,
+        {
+            char *comma = def->has_val ? "," : ",      ";
+
+            snprintf(option_text, 37, "-%s%s%s --%s%6s",
+                     def->short_name, short_val, comma,
                      def->long_name, long_val);
+        }
         else if (def->short_name)
             snprintf(option_text, 37, "-%s%s",
                      def->short_name, short_val);

From 5d12e04d16f6127c66b54a1a62b9192b9d880f54 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 21 Oct 2010 17:28:34 -0400
Subject: [PATCH 253/307] vpxdec: use the same output for --progress and
 --summary

Update the timing information in-place for the --progress
option.

Change-Id: I8efea57050db72963c0bc5c994425e7e692d1502
---
 vpxdec.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/vpxdec.c b/vpxdec.c
index 0de072e8b..2d96b28b6 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -577,6 +577,15 @@ fail:
     return 0;
 }
 
+
+void show_progress(int frame_in, int frame_out, unsigned long dx_time)
+{
+    fprintf(stderr, "%d decoded frames/%d showed frames in %lu us (%.2f fps)\r",
+            frame_in, frame_out, dx_time,
+            (float)frame_out * 1000000.0 / (float)dx_time);
+}
+
+
 int main(int argc, const char **argv_)
 {
     vpx_codec_ctx_t          decoder;
@@ -812,12 +821,12 @@ int main(int argc, const char **argv_)
 
         ++frame_in;
 
-        if (progress)
-            fprintf(stderr, "decoded frame %d.\n", frame_in);
-
         if ((img = vpx_codec_get_frame(&decoder, &iter)))
             ++frame_out;
 
+        if (progress)
+            show_progress(frame_in, frame_out, dx_time);
+
         if (!noblit)
         {
             if (img)
@@ -869,10 +878,10 @@ int main(int argc, const char **argv_)
             break;
     }
 
-    if (summary)
+    if (summary || progress)
     {
-        fprintf(stderr, "%d decoded frames/%d showed frames in %lu us (%.2f fps)\n",
-                frame_in, frame_out, dx_time, (float)frame_out * 1000000.0 / (float)dx_time);
+        show_progress(frame_in, frame_out, dx_time);
+        fprintf(stderr, "\n");
     }
 
 fail:

From 4b578ea6c4c45cc9aa850932878860c8eae948ed Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 21 Oct 2010 20:35:12 -0700
Subject: [PATCH 254/307] vpxdec: replace --quiet with --verbose

Be quiet by default, to play nicer with scripts.

Change-Id: I68f6c88411fd5487566f268fb73b4e55ae64410c
---
 vpxdec.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/vpxdec.c b/vpxdec.c
index 2d96b28b6..a13bf44ad 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -71,8 +71,8 @@ static const arg_def_t usey4marg = ARG_DEF("y", "y4m", 0,
                                     "Output file is YUV4MPEG2");
 static const arg_def_t threadsarg = ARG_DEF("t", "threads", 1,
                                     "Max threads to use");
-static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0,
-                                  "Suppress version string");
+static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0,
+                                  "Show version string");
 
 #if CONFIG_MD5
 static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0,
@@ -82,7 +82,7 @@ static const arg_def_t *all_args[] =
 {
     &codecarg, &prefixarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
     &progressarg, &limitarg, &postprocarg, &summaryarg, &outputfile,
-    &usey4marg, &threadsarg, &quietarg,
+    &usey4marg, &threadsarg, &verbosearg,
 #if CONFIG_MD5
     &md5arg,
 #endif
@@ -595,7 +595,7 @@ int main(int argc, const char **argv_)
     size_t                 buf_sz = 0, buf_alloc_sz = 0;
     FILE                  *infile;
     int                    frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0;
-    int                    stop_after = 0, postproc = 0, summary = 0, quiet = 0;
+    int                    stop_after = 0, postproc = 0, summary = 0, quiet = 1;
     vpx_codec_iface_t       *iface = NULL;
     unsigned int           fourcc;
     unsigned long          dx_time = 0;
@@ -663,8 +663,8 @@ int main(int argc, const char **argv_)
             summary = 1;
         else if (arg_match(&arg, &threadsarg, argi))
             cfg.threads = arg_parse_uint(&arg);
-        else if (arg_match(&arg, &quietarg, argi))
-            quiet = 1;
+        else if (arg_match(&arg, &verbosearg, argi))
+            quiet = 0;
 
 #if CONFIG_VP8_DECODER
         else if (arg_match(&arg, &addnoise_level, argi))

From 933d44b8180f8552fafcbc6563c009dae1ba1626 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 21 Oct 2010 20:40:42 -0700
Subject: [PATCH 255/307] vpxdec: rework default output parameters

This patch reworks the default behavior of the tool to output Y4M
instead of writing individual raw frames. The relevant controls are
now:
  --yv12, --i420  - These options change the output format to be
                    raw planar data. The output will be Y4M unless
                    one of these options is specified.

  --flipuv        - Swaps the chroma planes. Works with Y4M output.

  -o, --output    - Sets the output filename. Defaults to stdout if
                    not specified. Supports escape character
                    expansion for frame width (%w) height (%h) and
                    sequence number (%1..%9). The --prefix option
                    has been removed in favor of this escape
                    expansion.

Since the output defaults to stdout if -o is not specified, an
error will be thrown if stdout is not connected to a pipe. This
can be overridden by specifying '-o -'.

Change-Id: I94e42c57ca75721fdd57a6129e79bcdb2afe5d4d
---
 vpxdec.c | 197 +++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 161 insertions(+), 36 deletions(-)

diff --git a/vpxdec.c b/vpxdec.c
index a13bf44ad..ee8419784 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -16,6 +16,15 @@
 #include <stdlib.h>
 #include <stdarg.h>
 #include <string.h>
+#include <limits.h>
+#if defined(_WIN32)
+#include <io.h>
+#define snprintf _snprintf
+#define isatty   _isatty
+#define fileno   _fileno
+#else
+#include <unistd.h>
+#endif
 #define VPX_CODEC_DISABLE_COMPAT 1
 #include "vpx_config.h"
 #include "vpx/vpx_decoder.h"
@@ -28,6 +37,10 @@
 #endif
 #include "nestegg/include/nestegg/nestegg.h"
 
+#ifndef PATH_MAX
+#define PATH_MAX 256
+#endif
+
 static const char *exec_name;
 
 #define VP8_FOURCC (0x00385056)
@@ -47,14 +60,12 @@ static const struct
 #include "args.h"
 static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1,
                                   "Codec to use");
-static const arg_def_t prefixarg = ARG_DEF("p", "prefix", 1,
-                                   "Prefix to use when saving frames");
 static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
-                                  "Output file is YV12 ");
+                                  "Output raw YV12 frames");
 static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0,
-                                  "Output file is I420 (default)");
+                                  "Output raw I420 frames");
 static const arg_def_t flipuvarg = ARG_DEF(NULL, "flipuv", 0,
-                                   "Synonym for --yv12");
+                                   "Flip the chroma planes in the output");
 static const arg_def_t noblitarg = ARG_DEF(NULL, "noblit", 0,
                                    "Don't process the decoded frames");
 static const arg_def_t progressarg = ARG_DEF(NULL, "progress", 0,
@@ -66,9 +77,7 @@ static const arg_def_t postprocarg = ARG_DEF(NULL, "postproc", 0,
 static const arg_def_t summaryarg = ARG_DEF(NULL, "summary", 0,
                                     "Show timing summary");
 static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
-                                    "Output raw yv12 file instead of images");
-static const arg_def_t usey4marg = ARG_DEF("y", "y4m", 0,
-                                    "Output file is YUV4MPEG2");
+                                    "Output file name pattern (see below)");
 static const arg_def_t threadsarg = ARG_DEF("t", "threads", 1,
                                     "Max threads to use");
 static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0,
@@ -80,9 +89,9 @@ static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0,
 #endif
 static const arg_def_t *all_args[] =
 {
-    &codecarg, &prefixarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
+    &codecarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
     &progressarg, &limitarg, &postprocarg, &summaryarg, &outputfile,
-    &usey4marg, &threadsarg, &verbosearg,
+    &threadsarg, &verbosearg,
 #if CONFIG_MD5
     &md5arg,
 #endif
@@ -118,6 +127,20 @@ static void usage_exit()
     fprintf(stderr, "\nVP8 Postprocessing Options:\n");
     arg_show_usage(stderr, vp8_pp_args);
 #endif
+    fprintf(stderr,
+            "\nOutput File Patterns:\n\n"
+            "  The -o argument specifies the name of the file(s) to "
+            "write to. If the\n  argument does not include any escape "
+            "characters, the output will be\n  written to a single file. "
+            "Otherwise, the filename will be calculated by\n  expanding "
+            "the following escape characters:\n"
+            "\n\t%%w   - Frame width"
+            "\n\t%%h   - Frame height"
+            "\n\t%%<n> - Frame number, zero padded to <n> places (1..9)"
+            "\n\n  Pattern arguments are only supported in conjunction "
+            "with the --yv12 and\n  --i420 options. If the -o option is "
+            "not specified, the output will be\n  directed to stdout.\n"
+            );
     fprintf(stderr, "\nIncluded decoders:\n\n");
 
     for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
@@ -586,10 +609,74 @@ void show_progress(int frame_in, int frame_out, unsigned long dx_time)
 }
 
 
+void generate_filename(const char *pattern, char *out, size_t q_len,
+                       unsigned int d_w, unsigned int d_h,
+                       unsigned int frame_in)
+{
+    const char *p = pattern;
+    char *q = out;
+
+    do
+    {
+        char *next_pat = strchr(p, '%');
+
+        if(p == next_pat)
+        {
+            size_t pat_len;
+
+            // parse the pattern
+            q[q_len - 1] = '\0';
+            switch(p[1])
+            {
+            case 'w': snprintf(q, q_len - 1, "%d", d_w); break;
+            case 'h': snprintf(q, q_len - 1, "%d", d_h); break;
+            case '1': snprintf(q, q_len - 1, "%d", frame_in); break;
+            case '2': snprintf(q, q_len - 1, "%02d", frame_in); break;
+            case '3': snprintf(q, q_len - 1, "%03d", frame_in); break;
+            case '4': snprintf(q, q_len - 1, "%04d", frame_in); break;
+            case '5': snprintf(q, q_len - 1, "%05d", frame_in); break;
+            case '6': snprintf(q, q_len - 1, "%06d", frame_in); break;
+            case '7': snprintf(q, q_len - 1, "%07d", frame_in); break;
+            case '8': snprintf(q, q_len - 1, "%08d", frame_in); break;
+            case '9': snprintf(q, q_len - 1, "%09d", frame_in); break;
+            default:
+                die("Unrecognized pattern %%%c\n", p[1]);
+            }
+
+            pat_len = strlen(q);
+            if(pat_len >= q_len - 1)
+                die("Output filename too long.\n");
+            q += pat_len;
+            p += 2;
+            q_len -= pat_len;
+        }
+        else
+        {
+            size_t copy_len;
+
+            // copy the next segment
+            if(!next_pat)
+                copy_len = strlen(p);
+            else
+                copy_len = next_pat - p;
+
+            if(copy_len >= q_len - 1)
+                die("Output filename too long.\n");
+
+            memcpy(q, p, copy_len);
+            q[copy_len] = '\0';
+            q += copy_len;
+            p += copy_len;
+            q_len -= copy_len;
+        }
+    } while(*p);
+}
+
+
 int main(int argc, const char **argv_)
 {
     vpx_codec_ctx_t          decoder;
-    char                  *prefix = NULL, *fn = NULL;
+    char                  *fn = NULL;
     int                    i;
     uint8_t               *buf = NULL;
     size_t                 buf_sz = 0, buf_alloc_sz = 0;
@@ -601,8 +688,10 @@ int main(int argc, const char **argv_)
     unsigned long          dx_time = 0;
     struct arg               arg;
     char                   **argv, **argi, **argj;
-    const char                   *fn2 = 0;
-    int                     use_y4m = 0;
+    const char             *outfile_pattern = 0;
+    char                    outfile[PATH_MAX];
+    int                     single_file;
+    int                     use_y4m = 1;
     unsigned int            width;
     unsigned int            height;
     unsigned int            fps_den;
@@ -638,15 +727,17 @@ int main(int argc, const char **argv_)
                     arg.val);
         }
         else if (arg_match(&arg, &outputfile, argi))
-            fn2 = arg.val;
-        else if (arg_match(&arg, &usey4marg, argi))
-            use_y4m = 1;
-        else if (arg_match(&arg, &prefixarg, argi))
-            prefix = strdup(arg.val);
+            outfile_pattern = arg.val;
         else if (arg_match(&arg, &use_yv12, argi))
+        {
+            use_y4m = 0;
             flipuv = 1;
+        }
         else if (arg_match(&arg, &use_i420, argi))
+        {
+            use_y4m = 0;
             flipuv = 0;
+        }
         else if (arg_match(&arg, &flipuvarg, argi))
             flipuv = 1;
         else if (arg_match(&arg, &noblitarg, argi))
@@ -711,20 +802,24 @@ int main(int argc, const char **argv_)
     if (!fn)
         usage_exit();
 
-    if (!prefix)
-        prefix = strdup("img");
-
     /* Open file */
     infile = strcmp(fn, "-") ? fopen(fn, "rb") : stdin;
 
     if (!infile)
     {
-        fprintf(stderr, "Failed to open file");
+        fprintf(stderr, "Failed to open file '%s'",
+                strcmp(fn, "-") ? fn : "stdin");
         return EXIT_FAILURE;
     }
 
-    if (fn2)
-        out = out_open(fn2, do_md5);
+    /* Make sure we don't dump to the terminal, unless forced to with -o - */
+    if(!outfile_pattern && isatty(fileno(stdout)) && !do_md5)
+    {
+        fprintf(stderr,
+                "Not dumping raw video to your terminal. Use '-o -' to "
+                "override.\n");
+        return EXIT_FAILURE;
+    }
 
     input.infile = infile;
     if(file_is_ivf(infile, &fourcc, &width, &height, &fps_den,
@@ -740,12 +835,41 @@ int main(int argc, const char **argv_)
         return EXIT_FAILURE;
     }
 
-    if (use_y4m)
+    /* If the output file is not set or doesn't have a sequence number in
+     * it, then we only open it once.
+     */
+    outfile_pattern = outfile_pattern ? outfile_pattern : "-";
+    single_file = 1;
+    {
+        const char *p = outfile_pattern;
+        do
+        {
+            p = strchr(p, '%');
+            if(p && p[1] >= '1' && p[1] <= '9')
+            {
+                // pattern contains sequence number, so it's not unique.
+                single_file = 0;
+                break;
+            }
+            if(p)
+                p++;
+        } while(p);
+    }
+
+    if(single_file && !noblit)
+    {
+        generate_filename(outfile_pattern, outfile, sizeof(outfile)-1,
+                          width, height, 0);
+        out = out_open(outfile, do_md5);
+    }
+
+    if (use_y4m && !noblit)
     {
         char buffer[128];
-        if (!fn2)
+        if (!single_file)
         {
-            fprintf(stderr, "YUV4MPEG2 output only supported with -o.\n");
+            fprintf(stderr, "YUV4MPEG2 not supported with output patterns,"
+                            " try --i420 or --yv12.\n");
             return EXIT_FAILURE;
         }
 
@@ -832,14 +956,16 @@ int main(int argc, const char **argv_)
             if (img)
             {
                 unsigned int y;
-                char out_fn[128+24];
+                char out_fn[PATH_MAX];
                 uint8_t *buf;
-                const char *sfx = flipuv ? "yv12" : "i420";
 
-                if (!fn2)
+                if (!single_file)
                 {
-                    sprintf(out_fn, "%s-%dx%d-%04d.%s",
-                            prefix, img->d_w, img->d_h, frame_in, sfx);
+                    size_t len = sizeof(out_fn)-1;
+
+                    out_fn[len] = '\0';
+                    generate_filename(outfile_pattern, out_fn, len-1,
+                                      img->d_w, img->d_h, frame_in);
                     out = out_open(out_fn, do_md5);
                 }
                 else if(use_y4m)
@@ -869,7 +995,7 @@ int main(int argc, const char **argv_)
                     buf += img->stride[VPX_PLANE_V];
                 }
 
-                if (!fn2)
+                if (!single_file)
                     out_close(out, out_fn, do_md5);
             }
         }
@@ -892,15 +1018,14 @@ fail:
         return EXIT_FAILURE;
     }
 
-    if (fn2)
-        out_close(out, fn2, do_md5);
+    if (single_file && !noblit)
+        out_close(out, outfile, do_md5);
 
     if(input.nestegg_ctx)
         nestegg_destroy(input.nestegg_ctx);
     if(input.kind != WEBM_FILE)
         free(buf);
     fclose(infile);
-    free(prefix);
     free(argv);
 
     return EXIT_SUCCESS;

From 5329189a331c6bfc8e8350f4706a5ff9add63147 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 22 Oct 2010 14:48:21 -0400
Subject: [PATCH 256/307] vpxenc: specify output file with -o

Requiring the output file to be specified with the -o option opens up
the possibility of supporting multiple input files in the future.

Change-Id: I14c9b75e9b21184b47081e1ccf30cf4c91315964
---
 vpxenc.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/vpxenc.c b/vpxenc.c
index f36af14a0..4eecdcdf9 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -740,6 +740,8 @@ write_webm_file_footer(EbmlGlobal *glob)
 
 #include "args.h"
 
+static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
+        "Output filename");
 static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
                                   "Input file is YV12 ");
 static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0,
@@ -772,7 +774,8 @@ static const arg_def_t use_ivf          = ARG_DEF(NULL, "ivf", 0,
         "Output IVF (default is WebM)");
 static const arg_def_t *main_args[] =
 {
-    &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline, &best_dl, &good_dl, &rt_dl,
+    &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline,
+    &best_dl, &good_dl, &rt_dl,
     &verbosearg, &psnrarg, &use_ivf, &framerate,
     NULL
 };
@@ -907,7 +910,8 @@ static void usage_exit()
 {
     int i;
 
-    fprintf(stderr, "Usage: %s <options> src_filename dst_filename\n", exec_name);
+    fprintf(stderr, "Usage: %s <options> -o dst_filename src_filename \n",
+            exec_name);
 
     fprintf(stderr, "\nOptions:\n");
     arg_show_usage(stdout, main_args);
@@ -1046,6 +1050,8 @@ int main(int argc, const char **argv_)
         }
         else if (arg_match(&arg, &use_ivf, argi))
             write_webm = 0;
+        else if (arg_match(&arg, &outputfile, argi))
+            out_fn = arg.val;
         else
             argj++;
     }
@@ -1210,11 +1216,13 @@ int main(int argc, const char **argv_)
 
     /* Handle non-option arguments */
     in_fn = argv[0];
-    out_fn = argv[1];
 
-    if (!in_fn || !out_fn)
+    if (!in_fn)
         usage_exit();
 
+    if(!out_fn)
+        die("Error: Output file is required (specify with -o)\n");
+
     memset(&stats, 0, sizeof(stats));
 
     for (pass = one_pass_only ? one_pass_only - 1 : 0; pass < arg_passes; pass++)

From 0a6bf29eb93d8ac6ade83071379b20700a7cd06d Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 22 Oct 2010 14:57:02 -0400
Subject: [PATCH 257/307] vpxenc: warn against webm output to pipes

The WebM writer requires a seekable stream.

Change-Id: I192e00706a0685362d41b8d2faf80add63d564b9
---
 vpxenc.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/vpxenc.c b/vpxenc.c
index 4eecdcdf9..a17878217 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1349,6 +1349,12 @@ int main(int argc, const char **argv_)
             return EXIT_FAILURE;
         }
 
+        if(write_webm && fseek(outfile, 0, SEEK_CUR))
+        {
+            fprintf(stderr, "WebM output to pipes not supported.\n");
+            return EXIT_FAILURE;
+        }
+
         if (stats_fn)
         {
             if (!stats_open_file(&stats, stats_fn, pass))

From b523dd51bd387d3b279440c5e9d5f79d20afe96c Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Mon, 25 Oct 2010 23:45:02 -0400
Subject: [PATCH 258/307] add missing GET_GOT/RESTORE_GOT pairs

These functions made global references but did not set up the GOT,
causing compilation failures in PIC mode.

Change-Id: Iac473bf46733f87eb2e001cd736af4acf73fa51d
---
 vp8/encoder/x86/fwalsh_sse2.asm   | 2 ++
 vp8/encoder/x86/subtract_sse2.asm | 6 ++++++
 2 files changed, 8 insertions(+)

diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
index ffc9b3dca..145b59d45 100644
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -17,6 +17,7 @@ sym(vp8_short_walsh4x4_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 3
+    GET_GOT     rbx
     push        rsi
     push        rdi
     ; end prolog
@@ -141,6 +142,7 @@ sym(vp8_short_walsh4x4_sse2):
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_GOT
     UNSHADOW_ARGS
     pop         rbp
     ret
diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm
index 60522bafa..27661e07f 100644
--- a/vp8/encoder/x86/subtract_sse2.asm
+++ b/vp8/encoder/x86/subtract_sse2.asm
@@ -19,6 +19,7 @@ sym(vp8_subtract_b_sse2_impl):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    GET_GOT     rbx
     push rsi
     push rdi
     ; end prolog
@@ -64,6 +65,7 @@ sym(vp8_subtract_b_sse2_impl):
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_GOT
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -75,6 +77,7 @@ sym(vp8_subtract_mby_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 4
+    GET_GOT     rbx
     push rsi
     push rdi
     ; end prolog
@@ -134,6 +137,7 @@ submby_loop:
     pop rdi
     pop rsi
     ; begin epilog
+    RESTORE_GOT
     UNSHADOW_ARGS
     pop         rbp
     ret
@@ -145,6 +149,7 @@ sym(vp8_subtract_mbuv_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 5
+    GET_GOT     rbx
     push rsi
     push rdi
     ; end prolog
@@ -338,6 +343,7 @@ sym(vp8_subtract_mbuv_sse2):
     ; begin epilog
     pop rdi
     pop rsi
+    RESTORE_GOT
     UNSHADOW_ARGS
     pop         rbp
     ret

From d330a5876b54d4e20fce1533d534affddffd71ea Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 26 Oct 2010 09:37:44 -0400
Subject: [PATCH 259/307] arm: remove duplicate functions

These functions were true duplicates of functions present in the
generic code. This fixes some of the link errors when building
with --enable-shared --enable-pic.

Change-Id: Idff26599d510d954e439207883607ad6b74df20c
---
 vp8/common/arm/reconintra4x4_arm.c |  16 +-
 vp8/encoder/arm/mcomp_arm.c        | 616 -----------------------------
 vp8/encoder/mcomp.c                |   3 +
 3 files changed, 4 insertions(+), 631 deletions(-)

diff --git a/vp8/common/arm/reconintra4x4_arm.c b/vp8/common/arm/reconintra4x4_arm.c
index 8d968d7ad..6fd9b9c2c 100644
--- a/vp8/common/arm/reconintra4x4_arm.c
+++ b/vp8/common/arm/reconintra4x4_arm.c
@@ -295,22 +295,8 @@ void vp8_predict_intra4x4(BLOCKD *x,
 
     }
 }
-// copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
-// to the right prediction have filled in pixels to use.
-void vp8_intra_prediction_down_copy(MACROBLOCKD *x)
-{
-    unsigned char *above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
-
-    unsigned int *src_ptr = (unsigned int *)above_right;
-    unsigned int *dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride);
-    unsigned int *dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride);
-    unsigned int *dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride);
-
-    *dst_ptr0 = *src_ptr;
-    *dst_ptr1 = *src_ptr;
-    *dst_ptr2 = *src_ptr;
-}
 
+extern void vp8_intra_prediction_down_copy(MACROBLOCKD *x);
 
 
 /*
diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c
index 56358328e..0b71de4ac 100644
--- a/vp8/encoder/arm/mcomp_arm.c
+++ b/vp8/encoder/arm/mcomp_arm.c
@@ -21,8 +21,6 @@ static int mv_ref_ct [31] [4] [2];
 static int mv_mode_cts [4] [2];
 #endif
 
-static int mv_bits_sadcost[256];
-
 extern unsigned int vp8_sub_pixel_variance16x16s_neon
 (
     unsigned char  *src_ptr,
@@ -58,292 +56,7 @@ extern unsigned int vp8_sub_pixel_variance16x16s_4_4_neon
     unsigned int *sse
 );
 
-void vp8cx_init_mv_bits_sadcost()
-{
-    int i;
 
-    for (i = 0; i < 256; i++)
-    {
-        mv_bits_sadcost[i] = (int)sqrt(i * 16);
-    }
-}
-
-
-int vp8_mv_bit_cost(MV *mv, MV *ref, int *mvcost[2], int Weight)
-{
-    // MV costing is based on the distribution of vectors in the previous frame and as such will tend to
-    // over state the cost of vectors. In addition coding a new vector can have a knock on effect on the
-    // cost of subsequent vectors and the quality of prediction from NEAR and NEAREST for subsequent blocks.
-    // The "Weight" parameter allows, to a limited extent, for some account to be taken of these factors.
-    return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * Weight) >> 7;
-}
-
-int vp8_mv_err_cost(MV *mv, MV *ref, int *mvcost[2], int error_per_bit)
-{
-    //int i;
-    //return ((mvcost[0][(mv->row - ref->row)>>1] + mvcost[1][(mv->col - ref->col)>>1] + 128) * error_per_bit) >> 8;
-    //return ( (vp8_mv_bit_cost(mv,  ref, mvcost, 100) + 128) * error_per_bit) >> 8;
-
-    //i = (vp8_mv_bit_cost(mv,  ref, mvcost, 100) * error_per_bit + 128) >> 8;
-    return ((mvcost[0][(mv->row - ref->row) >> 1] + mvcost[1][(mv->col - ref->col) >> 1]) * error_per_bit + 128) >> 8;
-    //return (vp8_mv_bit_cost(mv,  ref, mvcost, 128) * error_per_bit + 128) >> 8;
-}
-
-
-static int mv_bits(MV *mv, MV *ref, int *mvcost[2])
-{
-    // get the estimated number of bits for a motion vector, to be used for costing in SAD based
-    // motion estimation
-    return ((mvcost[0][(mv->row - ref->row) >> 1]  +  mvcost[1][(mv->col - ref->col)>> 1]) + 128) >> 8;
-}
-
-void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride)
-{
-    int Len;
-    int search_site_count = 0;
-
-
-    // Generate offsets for 4 search sites per step.
-    Len = MAX_FIRST_STEP;
-    x->ss[search_site_count].mv.col = 0;
-    x->ss[search_site_count].mv.row = 0;
-    x->ss[search_site_count].offset = 0;
-    search_site_count++;
-
-    while (Len > 0)
-    {
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = 0;
-        x->ss[search_site_count].mv.row = -Len;
-        x->ss[search_site_count].offset = -Len * stride;
-        search_site_count++;
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = 0;
-        x->ss[search_site_count].mv.row = Len;
-        x->ss[search_site_count].offset = Len * stride;
-        search_site_count++;
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = -Len;
-        x->ss[search_site_count].mv.row = 0;
-        x->ss[search_site_count].offset = -Len;
-        search_site_count++;
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = Len;
-        x->ss[search_site_count].mv.row = 0;
-        x->ss[search_site_count].offset = Len;
-        search_site_count++;
-
-        // Contract.
-        Len /= 2;
-    }
-
-    x->ss_count = search_site_count;
-    x->searches_per_step = 4;
-}
-
-void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
-{
-    int Len;
-    int search_site_count = 0;
-
-    // Generate offsets for 8 search sites per step.
-    Len = MAX_FIRST_STEP;
-    x->ss[search_site_count].mv.col = 0;
-    x->ss[search_site_count].mv.row = 0;
-    x->ss[search_site_count].offset = 0;
-    search_site_count++;
-
-    while (Len > 0)
-    {
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = 0;
-        x->ss[search_site_count].mv.row = -Len;
-        x->ss[search_site_count].offset = -Len * stride;
-        search_site_count++;
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = 0;
-        x->ss[search_site_count].mv.row = Len;
-        x->ss[search_site_count].offset = Len * stride;
-        search_site_count++;
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = -Len;
-        x->ss[search_site_count].mv.row = 0;
-        x->ss[search_site_count].offset = -Len;
-        search_site_count++;
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = Len;
-        x->ss[search_site_count].mv.row = 0;
-        x->ss[search_site_count].offset = Len;
-        search_site_count++;
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = -Len;
-        x->ss[search_site_count].mv.row = -Len;
-        x->ss[search_site_count].offset = -Len * stride - Len;
-        search_site_count++;
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = Len;
-        x->ss[search_site_count].mv.row = -Len;
-        x->ss[search_site_count].offset = -Len * stride + Len;
-        search_site_count++;
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = -Len;
-        x->ss[search_site_count].mv.row = Len;
-        x->ss[search_site_count].offset = Len * stride - Len;
-        search_site_count++;
-
-        // Compute offsets for search sites.
-        x->ss[search_site_count].mv.col = Len;
-        x->ss[search_site_count].mv.row = Len;
-        x->ss[search_site_count].offset = Len * stride + Len;
-        search_site_count++;
-
-
-        // Contract.
-        Len /= 2;
-    }
-
-    x->ss_count = search_site_count;
-    x->searches_per_step = 8;
-}
-
-
-#define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
-#define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector
-#define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
-#define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
-#define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
-#define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
-#define MIN(x,y) (((x)<(y))?(x):(y))
-#define MAX(x,y) (((x)>(y))?(x):(y))
-
-//#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
-
-int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
-{
-    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
-    unsigned char *z = (*(b->base_src) + b->src);
-
-    int rr = ref_mv->row >> 1, rc = ref_mv->col >> 1;
-    int br = bestmv->row << 2, bc = bestmv->col << 2;
-    int tr = br, tc = bc;
-    unsigned int besterr = INT_MAX;
-    unsigned int left, right, up, down, diag;
-    unsigned int sse;
-    unsigned int whichdir;
-    unsigned int halfiters = 4;
-    unsigned int quarteriters = 4;
-
-    int minc = MAX(x->mv_col_min << 2, (ref_mv->col >> 1) - ((1 << mvlong_width) - 1));
-    int maxc = MIN(x->mv_col_max << 2, (ref_mv->col >> 1) + ((1 << mvlong_width) - 1));
-    int minr = MAX(x->mv_row_min << 2, (ref_mv->row >> 1) - ((1 << mvlong_width) - 1));
-    int maxr = MIN(x->mv_row_max << 2, (ref_mv->row >> 1) + ((1 << mvlong_width) - 1));
-
-    // central mv
-    bestmv->row <<= 3;
-    bestmv->col <<= 3;
-
-    // calculate central point error
-    besterr = vf(y, d->pre_stride, z, b->src_stride, &sse);
-    besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
-
-    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
-    while (--halfiters)
-    {
-        // 1/2 pel
-        CHECK_BETTER(left, tr, tc - 2);
-        CHECK_BETTER(right, tr, tc + 2);
-        CHECK_BETTER(up, tr - 2, tc);
-        CHECK_BETTER(down, tr + 2, tc);
-
-        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-        switch (whichdir)
-        {
-        case 0:
-            CHECK_BETTER(diag, tr - 2, tc - 2);
-            break;
-        case 1:
-            CHECK_BETTER(diag, tr - 2, tc + 2);
-            break;
-        case 2:
-            CHECK_BETTER(diag, tr + 2, tc - 2);
-            break;
-        case 3:
-            CHECK_BETTER(diag, tr + 2, tc + 2);
-            break;
-        }
-
-        // no reason to check the same one again.
-        if (tr == br && tc == bc)
-            break;
-
-        tr = br;
-        tc = bc;
-    }
-
-    // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
-    // 1/4 pel
-    while (--quarteriters)
-    {
-        CHECK_BETTER(left, tr, tc - 1);
-        CHECK_BETTER(right, tr, tc + 1);
-        CHECK_BETTER(up, tr - 1, tc);
-        CHECK_BETTER(down, tr + 1, tc);
-
-        whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-        switch (whichdir)
-        {
-        case 0:
-            CHECK_BETTER(diag, tr - 1, tc - 1);
-            break;
-        case 1:
-            CHECK_BETTER(diag, tr - 1, tc + 1);
-            break;
-        case 2:
-            CHECK_BETTER(diag, tr + 1, tc - 1);
-            break;
-        case 3:
-            CHECK_BETTER(diag, tr + 1, tc + 1);
-            break;
-        }
-
-        // no reason to check the same one again.
-        if (tr == br && tc == bc)
-            break;
-
-        tr = br;
-        tc = bc;
-    }
-
-    bestmv->row = br << 1;
-    bestmv->col = bc << 1;
-
-    if ((abs(bestmv->col - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs(bestmv->row - ref_mv->row) > MAX_FULL_PEL_VAL))
-        return INT_MAX;
-
-    return besterr;
-}
-#undef MVC
-#undef PRE
-#undef SP
-#undef DIST
-#undef ERR
-#undef CHECK_BETTER
-#undef MIN
-#undef MAX
 int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
 {
     int bestmse = INT_MAX;
@@ -935,335 +648,6 @@ cal_neighbors:
 #undef ERR
 #undef CHECK_BETTER
 
-int vp8_diamond_search_sad
-(
-    MACROBLOCK *x,
-    BLOCK *b,
-    BLOCKD *d,
-    MV *ref_mv,
-    MV *best_mv,
-    int search_param,
-    int error_per_bit,
-    int *num00,
-    vp8_variance_fn_ptr_t *fn_ptr,
-    int *mvsadcost[2],
-    int *mvcost[2]
-)
-{
-    int i, j, step;
-
-    unsigned char *what = (*(b->base_src) + b->src);
-    int what_stride = b->src_stride;
-    unsigned char *in_what;
-    int in_what_stride = d->pre_stride;
-    unsigned char *best_address;
-
-    int tot_steps;
-    MV this_mv;
-
-    int bestsad = INT_MAX;
-    int best_site = 0;
-    int last_site = 0;
-
-    int ref_row = ref_mv->row >> 3;
-    int ref_col = ref_mv->col >> 3;
-    int this_row_offset;
-    int this_col_offset;
-    search_site *ss;
-
-    unsigned char *check_here;
-    int thissad;
-
-    // Work out the start point for the search
-    in_what = (unsigned char *)(*(d->base_pre) + d->pre + (ref_row * (d->pre_stride)) + ref_col);
-    best_address = in_what;
-
-    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
-    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
-    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
-    {
-        // Check the starting position
-        bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
-    }
-
-    // search_param determines the length of the initial step and hence the number of iterations
-    // 0 = initial step (MAX_FIRST_STEP) pel : 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc.
-    ss = &x->ss[search_param * x->searches_per_step];
-    tot_steps = (x->ss_count / x->searches_per_step) - search_param;
-
-    i = 1;
-    best_mv->row = ref_row;
-    best_mv->col = ref_col;
-
-    *num00 = 0;
-
-    for (step = 0; step < tot_steps ; step++)
-    {
-        for (j = 0 ; j < x->searches_per_step ; j++)
-        {
-            // Trap illegal vectors
-            this_row_offset = best_mv->row + ss[i].mv.row;
-            this_col_offset = best_mv->col + ss[i].mv.col;
-
-            if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) &&
-            (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max))
-
-            {
-                check_here = ss[i].offset + best_address;
-                thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-
-                if (thissad < bestsad)
-                {
-                    this_mv.row = this_row_offset << 3;
-                    this_mv.col = this_col_offset << 3;
-                    thissad += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
-                    if (thissad < bestsad)
-                    {
-                        bestsad = thissad;
-                        best_site = i;
-                    }
-                }
-            }
-
-            i++;
-        }
-
-        if (best_site != last_site)
-        {
-            best_mv->row += ss[best_site].mv.row;
-            best_mv->col += ss[best_site].mv.col;
-            best_address += ss[best_site].offset;
-            last_site = best_site;
-        }
-        else if (best_address == in_what)
-            (*num00)++;
-    }
-
-    this_mv.row = best_mv->row << 3;
-    this_mv.col = best_mv->col << 3;
-
-    if (bestsad == INT_MAX)
-        return INT_MAX;
-
-    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, (unsigned int *)(&thissad))
-    + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-}
-
-#if !(CONFIG_REALTIME_ONLY)
-int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
-{
-    unsigned char *what = (*(b->base_src) + b->src);
-    int what_stride = b->src_stride;
-    unsigned char *in_what;
-    int in_what_stride = d->pre_stride;
-    int mv_stride = d->pre_stride;
-    unsigned char *bestaddress;
-    MV *best_mv = &d->bmi.mv.as_mv;
-    MV this_mv;
-    int bestsad = INT_MAX;
-    int r, c;
-
-    unsigned char *check_here;
-    int thissad;
-
-    int ref_row = ref_mv->row >> 3;
-    int ref_col = ref_mv->col >> 3;
-
-    int row_min = ref_row - distance;
-    int row_max = ref_row + distance;
-    int col_min = ref_col - distance;
-    int col_max = ref_col + distance;
-
-    // Work out the mid point for the search
-    in_what = *(d->base_pre) + d->pre;
-    bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
-
-    best_mv->row = ref_row;
-    best_mv->col = ref_col;
-
-    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
-    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
-    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
-    {
-        // Baseline value at the centre
-
-        //bestsad = fn_ptr->sf( what,what_stride,bestaddress,in_what_stride) + (int)sqrt(vp8_mv_err_cost(ref_mv,ref_mv, mvcost,error_per_bit*14));
-        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
-    }
-
-    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
-    if (col_min < x->mv_col_min)
-        col_min = x->mv_col_min;
-
-    if (col_max > x->mv_col_max)
-        col_max = x->mv_col_max;
-
-    if (row_min < x->mv_row_min)
-        row_min = x->mv_row_min;
-
-    if (row_max > x->mv_row_max)
-        row_max = x->mv_row_max;
-
-    for (r = row_min; r < row_max ; r++)
-    {
-        this_mv.row = r << 3;
-        check_here = r * mv_stride + in_what + col_min;
-
-        for (c = col_min; c < col_max; c++)
-        {
-            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-
-            this_mv.col = c << 3;
-            //thissad += (int)sqrt(vp8_mv_err_cost(&this_mv,ref_mv, mvcost,error_per_bit*14));
-            //thissad  += error_per_bit * mv_bits_sadcost[mv_bits(&this_mv, ref_mv, mvcost)];
-            thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit); //mv_bits(error_per_bit, &this_mv, ref_mv, mvsadcost);
-
-            if (thissad < bestsad)
-            {
-                bestsad = thissad;
-                best_mv->row = r;
-                best_mv->col = c;
-                bestaddress = check_here;
-            }
-
-            check_here++;
-        }
-    }
-
-    this_mv.row = best_mv->row << 3;
-    this_mv.col = best_mv->col << 3;
-
-    if (bestsad < INT_MAX)
-        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
-        + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-    else
-        return INT_MAX;
-}
-
-int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
-{
-    unsigned char *what = (*(b->base_src) + b->src);
-    int what_stride = b->src_stride;
-    unsigned char *in_what;
-    int in_what_stride = d->pre_stride;
-    int mv_stride = d->pre_stride;
-    unsigned char *bestaddress;
-    MV *best_mv = &d->bmi.mv.as_mv;
-    MV this_mv;
-    int bestsad = INT_MAX;
-    int r, c;
-
-    unsigned char *check_here;
-    unsigned int thissad;
-
-    int ref_row = ref_mv->row >> 3;
-    int ref_col = ref_mv->col >> 3;
-
-    int row_min = ref_row - distance;
-    int row_max = ref_row + distance;
-    int col_min = ref_col - distance;
-    int col_max = ref_col + distance;
-
-    unsigned int sad_array[3];
-
-    // Work out the mid point for the search
-    in_what = *(d->base_pre) + d->pre;
-    bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
-
-    best_mv->row = ref_row;
-    best_mv->col = ref_col;
-
-    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
-    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
-    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
-    {
-        // Baseline value at the centre
-        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
-    }
-
-    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
-    if (col_min < x->mv_col_min)
-        col_min = x->mv_col_min;
-
-    if (col_max > x->mv_col_max)
-        col_max = x->mv_col_max;
-
-    if (row_min < x->mv_row_min)
-        row_min = x->mv_row_min;
-
-    if (row_max > x->mv_row_max)
-        row_max = x->mv_row_max;
-
-    for (r = row_min; r < row_max ; r++)
-    {
-        this_mv.row = r << 3;
-        check_here = r * mv_stride + in_what + col_min;
-        c = col_min;
-
-        while ((c + 3) < col_max)
-        {
-            int i;
-
-            fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
-
-            for (i = 0; i < 3; i++)
-            {
-                thissad = sad_array[i];
-
-                if (thissad < bestsad)
-                {
-                    this_mv.col = c << 3;
-                    thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
-                    if (thissad < bestsad)
-                    {
-                        bestsad = thissad;
-                        best_mv->row = r;
-                        best_mv->col = c;
-                        bestaddress = check_here;
-                    }
-                }
-
-                check_here++;
-                c++;
-            }
-        }
-
-        while (c < col_max)
-        {
-            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
-
-            if (thissad < bestsad)
-            {
-                this_mv.col = c << 3;
-                thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
-
-                if (thissad < bestsad)
-                {
-                    bestsad = thissad;
-                    best_mv->row = r;
-                    best_mv->col = c;
-                    bestaddress = check_here;
-                }
-            }
-
-            check_here ++;
-            c ++;
-        }
-
-    }
-
-    this_mv.row = best_mv->row << 3;
-    this_mv.col = best_mv->col << 3;
-
-    if (bestsad < INT_MAX)
-        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
-        + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-    else
-        return INT_MAX;
-}
-#endif
 
 #ifdef ENTROPY_STATS
 void print_mode_context(void)
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 4d60b92d6..850208eb8 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1127,6 +1127,7 @@ int vp8_diamond_search_sadx4
 }
 
 
+#if !(CONFIG_REALTIME_ONLY)
 int vp8_full_search_sad(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
 {
     unsigned char *what = (*(b->base_src) + b->src);
@@ -1339,6 +1340,8 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
     else
         return INT_MAX;
 }
+#endif
+
 
 #ifdef ENTROPY_STATS
 void print_mode_context(void)

From 19638c23098537803a8efec7ca675b905c7d2777 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 26 Oct 2010 09:51:35 -0400
Subject: [PATCH 260/307] arm: move unrolled loops back to generic code

Some of the ARM functions differed from their generic counterparts
only by unrolling their loops. Since this change may be useful
on other platforms, or might even supercede the looped version
in the generic case, move it back to the generic file.

This code is left under #if ARCH_ARM for now, but it may be worth
considering a different (possibly new) conditional for these. If
it turns out that this should be runtime selectable, these
functions will have to move to the RTCD infrastructure. Don't want
to take that step at this time without more profile data.

Change-Id: I4612fdbc606fbebba4971a690fb743ad184ff15f
---
 vp8/common/arm/recon_arm.c         |  78 ------
 vp8/common/arm/reconintra4x4_arm.c | 395 -----------------------------
 vp8/common/recon.c                 |  43 ++++
 vp8/common/reconintra4x4.c         |  69 +++++
 vp8/vp8_common.mk                  |   1 -
 5 files changed, 112 insertions(+), 474 deletions(-)
 delete mode 100644 vp8/common/arm/reconintra4x4_arm.c

diff --git a/vp8/common/arm/recon_arm.c b/vp8/common/arm/recon_arm.c
index 218898b44..a1b210130 100644
--- a/vp8/common/arm/recon_arm.c
+++ b/vp8/common/arm/recon_arm.c
@@ -15,36 +15,6 @@
 
 extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
 
-/*
-void vp8_recon16x16mby(MACROBLOCKD *x)
-{
-    int i;
-    for(i=0;i<16;i+=4)
-    {
-        //vp8_recon4b(&x->block[i]);
-        BLOCKD *b = &x->block[i];
-        vp8_recon4b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    }
-}
-*/
-void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
-    BLOCKD *b = &x->block[0];
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
-    //b = &x->block[4];
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
-    //b = &x->block[8];
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
-    //b = &x->block[12];
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-}
-
 #if HAVE_ARMV7
 void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
@@ -58,52 +28,4 @@ void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 
     vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr);
 }
-
-#else
-/*
-void vp8_recon16x16mb(MACROBLOCKD *x)
-{
-    int i;
-
-    for(i=0;i<16;i+=4)
-    {
-//      vp8_recon4b(&x->block[i]);
-        BLOCKD *b = &x->block[i];
-        vp8_recon4b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
-    }
-    for(i=16;i<24;i+=2)
-    {
-//      vp8_recon2b(&x->block[i]);
-        BLOCKD *b = &x->block[i];
-        vp8_recon2b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    }
-}
-*/
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
-    BLOCKD *b = &x->block[0];
-
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b += 4;
-
-    //b = &x->block[16];
-
-    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b++;
-    b++;
-    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b++;
-    b++;
-    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b++;
-    b++;
-    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-}
 #endif
diff --git a/vp8/common/arm/reconintra4x4_arm.c b/vp8/common/arm/reconintra4x4_arm.c
deleted file mode 100644
index 6fd9b9c2c..000000000
--- a/vp8/common/arm/reconintra4x4_arm.c
+++ /dev/null
@@ -1,395 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "recon.h"
-#include "vpx_mem/vpx_mem.h"
-#include "reconintra.h"
-
-void vp8_predict_intra4x4(BLOCKD *x,
-                          int b_mode,
-                          unsigned char *predictor)
-{
-    int i, r, c;
-
-    unsigned char *Above = *(x->base_dst) + x->dst - x->dst_stride;
-    unsigned char Left[4];
-    unsigned char top_left = Above[-1];
-
-    Left[0] = (*(x->base_dst))[x->dst - 1];
-    Left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride];
-    Left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride];
-    Left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride];
-
-    switch (b_mode)
-    {
-    case B_DC_PRED:
-    {
-        int expected_dc = 0;
-
-        for (i = 0; i < 4; i++)
-        {
-            expected_dc += Above[i];
-            expected_dc += Left[i];
-        }
-
-        expected_dc = (expected_dc + 4) >> 3;
-
-        for (r = 0; r < 4; r++)
-        {
-            for (c = 0; c < 4; c++)
-            {
-                predictor[c] = expected_dc;
-            }
-
-            predictor += 16;
-        }
-    }
-    break;
-    case B_TM_PRED:
-    {
-        // prediction similar to true_motion prediction
-        for (r = 0; r < 4; r++)
-        {
-            for (c = 0; c < 4; c++)
-            {
-                int pred = Above[c] - top_left + Left[r];
-
-                if (pred < 0)
-                    pred = 0;
-
-                if (pred > 255)
-                    pred = 255;
-
-                predictor[c] = pred;
-            }
-
-            predictor += 16;
-        }
-    }
-    break;
-
-    case B_VE_PRED:
-    {
-
-        unsigned int ap[4];
-        ap[0] = (top_left  + 2 * Above[0] + Above[1] + 2) >> 2;
-        ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2;
-        ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2;
-        ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2;
-
-        for (r = 0; r < 4; r++)
-        {
-            for (c = 0; c < 4; c++)
-            {
-
-                predictor[c] = ap[c];
-            }
-
-            predictor += 16;
-        }
-
-    }
-    break;
-
-
-    case B_HE_PRED:
-    {
-
-        unsigned int lp[4];
-        lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2;
-        lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2;
-        lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2;
-        lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2;
-
-        for (r = 0; r < 4; r++)
-        {
-            for (c = 0; c < 4; c++)
-            {
-                predictor[c] = lp[r];
-            }
-
-            predictor += 16;
-        }
-    }
-    break;
-    case B_LD_PRED:
-    {
-        unsigned char *ptr = Above;
-        predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2;
-        predictor[0 * 16 + 1] =
-            predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2;
-        predictor[0 * 16 + 2] =
-            predictor[1 * 16 + 1] =
-                predictor[2 * 16 + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2;
-        predictor[0 * 16 + 3] =
-            predictor[1 * 16 + 2] =
-                predictor[2 * 16 + 1] =
-                    predictor[3 * 16 + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2;
-        predictor[1 * 16 + 3] =
-            predictor[2 * 16 + 2] =
-                predictor[3 * 16 + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2;
-        predictor[2 * 16 + 3] =
-            predictor[3 * 16 + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2;
-        predictor[3 * 16 + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2;
-
-    }
-    break;
-    case B_RD_PRED:
-    {
-
-        unsigned char pp[9];
-
-        pp[0] = Left[3];
-        pp[1] = Left[2];
-        pp[2] = Left[1];
-        pp[3] = Left[0];
-        pp[4] = top_left;
-        pp[5] = Above[0];
-        pp[6] = Above[1];
-        pp[7] = Above[2];
-        pp[8] = Above[3];
-
-        predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
-        predictor[3 * 16 + 1] =
-            predictor[2 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
-        predictor[3 * 16 + 2] =
-            predictor[2 * 16 + 1] =
-                predictor[1 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
-        predictor[3 * 16 + 3] =
-            predictor[2 * 16 + 2] =
-                predictor[1 * 16 + 1] =
-                    predictor[0 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
-        predictor[2 * 16 + 3] =
-            predictor[1 * 16 + 2] =
-                predictor[0 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
-        predictor[1 * 16 + 3] =
-            predictor[0 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
-        predictor[0 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
-
-    }
-    break;
-    case B_VR_PRED:
-    {
-
-        unsigned char pp[9];
-
-        pp[0] = Left[3];
-        pp[1] = Left[2];
-        pp[2] = Left[1];
-        pp[3] = Left[0];
-        pp[4] = top_left;
-        pp[5] = Above[0];
-        pp[6] = Above[1];
-        pp[7] = Above[2];
-        pp[8] = Above[3];
-
-
-        predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
-        predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
-        predictor[3 * 16 + 1] =
-            predictor[1 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
-        predictor[2 * 16 + 1] =
-            predictor[0 * 16 + 0] = (pp[4] + pp[5] + 1) >> 1;
-        predictor[3 * 16 + 2] =
-            predictor[1 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
-        predictor[2 * 16 + 2] =
-            predictor[0 * 16 + 1] = (pp[5] + pp[6] + 1) >> 1;
-        predictor[3 * 16 + 3] =
-            predictor[1 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
-        predictor[2 * 16 + 3] =
-            predictor[0 * 16 + 2] = (pp[6] + pp[7] + 1) >> 1;
-        predictor[1 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
-        predictor[0 * 16 + 3] = (pp[7] + pp[8] + 1) >> 1;
-
-    }
-    break;
-    case B_VL_PRED:
-    {
-
-        unsigned char *pp = Above;
-
-        predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
-        predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
-        predictor[2 * 16 + 0] =
-            predictor[0 * 16 + 1] = (pp[1] + pp[2] + 1) >> 1;
-        predictor[1 * 16 + 1] =
-            predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
-        predictor[2 * 16 + 1] =
-            predictor[0 * 16 + 2] = (pp[2] + pp[3] + 1) >> 1;
-        predictor[3 * 16 + 1] =
-            predictor[1 * 16 + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
-        predictor[0 * 16 + 3] =
-            predictor[2 * 16 + 2] = (pp[3] + pp[4] + 1) >> 1;
-        predictor[1 * 16 + 3] =
-            predictor[3 * 16 + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
-        predictor[2 * 16 + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
-        predictor[3 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
-    }
-    break;
-
-    case B_HD_PRED:
-    {
-        unsigned char pp[9];
-        pp[0] = Left[3];
-        pp[1] = Left[2];
-        pp[2] = Left[1];
-        pp[3] = Left[0];
-        pp[4] = top_left;
-        pp[5] = Above[0];
-        pp[6] = Above[1];
-        pp[7] = Above[2];
-        pp[8] = Above[3];
-
-
-        predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
-        predictor[3 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
-        predictor[2 * 16 + 0] =
-            predictor[3 * 16 + 2] = (pp[1] + pp[2] + 1) >> 1;
-        predictor[2 * 16 + 1] =
-            predictor[3 * 16 + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
-        predictor[2 * 16 + 2] =
-            predictor[1 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
-        predictor[2 * 16 + 3] =
-            predictor[1 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
-        predictor[1 * 16 + 2] =
-            predictor[0 * 16 + 0] = (pp[3] + pp[4] + 1) >> 1;
-        predictor[1 * 16 + 3] =
-            predictor[0 * 16 + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
-        predictor[0 * 16 + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
-        predictor[0 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
-    }
-    break;
-
-
-    case B_HU_PRED:
-    {
-        unsigned char *pp = Left;
-        predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
-        predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
-        predictor[0 * 16 + 2] =
-            predictor[1 * 16 + 0] = (pp[1] + pp[2] + 1) >> 1;
-        predictor[0 * 16 + 3] =
-            predictor[1 * 16 + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
-        predictor[1 * 16 + 2] =
-            predictor[2 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
-        predictor[1 * 16 + 3] =
-            predictor[2 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2;
-        predictor[2 * 16 + 2] =
-            predictor[2 * 16 + 3] =
-                predictor[3 * 16 + 0] =
-                    predictor[3 * 16 + 1] =
-                        predictor[3 * 16 + 2] =
-                            predictor[3 * 16 + 3] = pp[3];
-    }
-    break;
-
-
-    }
-}
-
-extern void vp8_intra_prediction_down_copy(MACROBLOCKD *x);
-
-
-/*
-void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
-    int i;
-
-    vp8_intra_prediction_down_copy(x);
-
-    for(i=0;i<16;i++)
-    {
-        BLOCKD *b = &x->block[i];
-
-        vp8_predict_intra4x4(b, x->block[i].bmi.mode,x->block[i].predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    }
-
-    vp8_recon_intra_mbuv(x);
-
-}
-*/
-void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
-    int i;
-    BLOCKD *b = &x->block[0];
-
-    vp8_intra_prediction_down_copy(x);
-
-    {
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    }
-
-    vp8_recon_intra_mbuv(rtcd, x);
-
-}
diff --git a/vp8/common/recon.c b/vp8/common/recon.c
index 0b439e054..f31ffd8e4 100644
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -108,6 +108,22 @@ void vp8_recon2b_c
 
 void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
+#if ARCH_ARM
+    BLOCKD *b = &x->block[0];
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+
+    //b = &x->block[4];
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+
+    //b = &x->block[8];
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+
+    //b = &x->block[12];
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+#else
     int i;
 
     for (i = 0; i < 16; i += 4)
@@ -116,10 +132,36 @@ void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 
         RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
     }
+#endif
 }
 
 void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
+#if ARCH_ARM
+    BLOCKD *b = &x->block[0];
+
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b += 4;
+
+    //b = &x->block[16];
+
+    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b++;
+    b++;
+    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b++;
+    b++;
+    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b++;
+    b++;
+    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+#else
     int i;
 
     for (i = 0; i < 16; i += 4)
@@ -135,4 +177,5 @@ void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 
         RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
     }
+#endif
 }
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index c6e5fe7fd..315135184 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -318,6 +318,74 @@ void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 
     vp8_intra_prediction_down_copy(x);
 
+#if ARCH_ARM
+    {
+        BLOCKD *b = &x->block[0];
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+        b += 1;
+
+        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
+        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    }
+#else
     for (i = 0; i < 16; i++)
     {
         BLOCKD *b = &x->block[i];
@@ -325,6 +393,7 @@ void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
         vp8_predict_intra4x4(b, x->block[i].bmi.mode, x->block[i].predictor);
         RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
     }
+#endif
 
     vp8_recon_intra_mbuv(rtcd, x);
 
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 9ab92b334..24cbbdd0c 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -119,7 +119,6 @@ VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/bilinearfilter_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/filter_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/loopfilter_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/recon_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra4x4_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/vpx_asm_offsets.c
 

From 96cf6588dea579d151679aec7c17ea9aeb1437ac Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 26 Oct 2010 10:46:31 -0400
Subject: [PATCH 261/307] make arm hex search the generic implementation

The ARM version of vp8_hex_search() is a faster implementation
of the same algorithm. Since it doesn't use any ARM specific
code, it can be made the default implementation. This removes
a linking error.

Change-Id: I77d10f2c16b2515bff4522c350004e03b7659934
---
 vp8/encoder/arm/mcomp_arm.c | 149 ------------------------------------
 vp8/encoder/mcomp.c         |  78 +++++++++++++++----
 2 files changed, 64 insertions(+), 163 deletions(-)

diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c
index 0b71de4ac..27146e23f 100644
--- a/vp8/encoder/arm/mcomp_arm.c
+++ b/vp8/encoder/arm/mcomp_arm.c
@@ -499,155 +499,6 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
     return bestmse;
 }
 
-#define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
-#define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
-#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
-#define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
-#define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
-const MV next_chkpts[6][3] =
-{
-    {{ -2, 0}, { -1, -2}, {1, -2}},
-    {{ -1, -2}, {1, -2}, {2, 0}},
-    {{1, -2}, {2, 0}, {1, 2}},
-    {{2, 0}, {1, 2}, { -1, 2}},
-    {{1, 2}, { -1, 2}, { -2, 0}},
-    {{ -1, 2}, { -2, 0}, { -1, -2}}
-};
-int vp8_hex_search
-(
-    MACROBLOCK *x,
-    BLOCK *b,
-    BLOCKD *d,
-    MV *ref_mv,
-    MV *best_mv,
-    int search_param,
-    int error_per_bit,
-    int *num00,
-    vp8_variance_fn_t vf,
-    vp8_sad_fn_t      sf,
-    int *mvsadcost[2],
-    int *mvcost[2]
-)
-{
-    MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
-    MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
-    int i, j;
-    unsigned char *src = (*(b->base_src) + b->src);
-    int src_stride = b->src_stride;
-    int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
-    unsigned int besterr, thiserr = 0x7fffffff;
-    int k = -1, tk;
-
-    if (bc < x->mv_col_min) bc = x->mv_col_min;
-
-    if (bc > x->mv_col_max) bc = x->mv_col_max;
-
-    if (br < x->mv_row_min) br = x->mv_row_min;
-
-    if (br > x->mv_row_max) br = x->mv_row_max;
-
-    rr >>= 1;
-    rc >>= 1;
-
-    besterr = ERR(br, bc, thiserr);
-
-    // hex search
-    //j=0
-    tr = br;
-    tc = bc;
-
-    for (i = 0; i < 6; i++)
-    {
-        int nr = tr + hex[i].row, nc = tc + hex[i].col;
-
-        if (nc < x->mv_col_min) continue;
-
-        if (nc > x->mv_col_max) continue;
-
-        if (nr < x->mv_row_min) continue;
-
-        if (nr > x->mv_row_max) continue;
-
-        //CHECK_BETTER(thiserr,nr,nc);
-        if ((thiserr = ERR(nr, nc, besterr)) < besterr)
-        {
-            besterr = thiserr;
-            br = nr;
-            bc = nc;
-            k = i;
-        }
-    }
-
-    if (tr == br && tc == bc)
-        goto cal_neighbors;
-
-    for (j = 1; j < 127; j++)
-    {
-        tr = br;
-        tc = bc;
-        tk = k;
-
-        for (i = 0; i < 3; i++)
-        {
-            int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col;
-
-            if (nc < x->mv_col_min) continue;
-
-            if (nc > x->mv_col_max) continue;
-
-            if (nr < x->mv_row_min) continue;
-
-            if (nr > x->mv_row_max) continue;
-
-            //CHECK_BETTER(thiserr,nr,nc);
-            if ((thiserr = ERR(nr, nc, besterr)) < besterr)
-            {
-                besterr = thiserr;
-                br = nr;
-                bc = nc; //k=(tk+5+i)%6;}
-                k = tk + 5 + i;
-
-                if (k >= 12) k -= 12;
-                else if (k >= 6) k -= 6;
-            }
-        }
-
-        if (tr == br && tc == bc)
-            break;
-    }
-
-    // check 8 1 away neighbors
-cal_neighbors:
-    tr = br;
-    tc = bc;
-
-    for (i = 0; i < 8; i++)
-    {
-        int nr = tr + neighbors[i].row, nc = tc + neighbors[i].col;
-
-        if (nc < x->mv_col_min) continue;
-
-        if (nc > x->mv_col_max) continue;
-
-        if (nr < x->mv_row_min) continue;
-
-        if (nr > x->mv_row_max) continue;
-
-        CHECK_BETTER(thiserr, nr, nc);
-    }
-
-    best_mv->row = br;
-    best_mv->col = bc;
-
-    return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
-}
-#undef MVC
-#undef PRE
-#undef SP
-#undef DIST
-#undef ERR
-#undef CHECK_BETTER
-
 
 #ifdef ENTROPY_STATS
 void print_mode_context(void)
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 850208eb8..c5cae9cf2 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -761,7 +761,15 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
 #define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
 #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
 #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
-
+static const MV next_chkpts[6][3] =
+{
+    {{ -2, 0}, { -1, -2}, {1, -2}},
+    {{ -1, -2}, {1, -2}, {2, 0}},
+    {{1, -2}, {2, 0}, {1, 2}},
+    {{2, 0}, {1, 2}, { -1, 2}},
+    {{1, 2}, { -1, 2}, { -2, 0}},
+    {{ -1, 2}, { -2, 0}, { -1, -2}}
+};
 int vp8_hex_search
 (
     MACROBLOCK *x,
@@ -778,38 +786,67 @@ int vp8_hex_search
     int *mvcost[2]
 )
 {
-    MV hex[6] = { { -2, 0}, { -1, -2}, { -1, 2}, {2, 0}, {1, 2}, {1, -2} } ;
+    MV hex[6] = { { -1, -2}, {1, -2}, {2, 0}, {1, 2}, { -1, 2}, { -2, 0} } ;
     MV neighbors[8] = { { -1, -1}, { -1, 0}, { -1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1} } ;
     int i, j;
     unsigned char *src = (*(b->base_src) + b->src);
     int src_stride = b->src_stride;
-    int rr = ref_mv->row, rc = ref_mv->col, br = rr, bc = rc, tr, tc;
+    int rr = ref_mv->row, rc = ref_mv->col, br = rr >> 3, bc = rc >> 3, tr, tc;
     unsigned int besterr, thiserr = 0x7fffffff;
+    int k = -1, tk;
 
-    if (rc < x->mv_col_min) bc = x->mv_col_min;
+    if (bc < x->mv_col_min) bc = x->mv_col_min;
 
-    if (rc > x->mv_col_max) bc = x->mv_col_max;
+    if (bc > x->mv_col_max) bc = x->mv_col_max;
 
-    if (rr < x->mv_row_min) br = x->mv_row_min;
+    if (br < x->mv_row_min) br = x->mv_row_min;
 
-    if (rr > x->mv_row_max) br = x->mv_row_max;
+    if (br > x->mv_row_max) br = x->mv_row_max;
 
     rr >>= 1;
     rc >>= 1;
-    br >>= 3;
-    bc >>= 3;
 
     besterr = ERR(br, bc, thiserr);
 
-    // hex search  jbb changed to 127 to avoid max 256 problem steping by 2.
-    for (j = 0; j < 127; j++)
+    // hex search
+    //j=0
+    tr = br;
+    tc = bc;
+
+    for (i = 0; i < 6; i++)
+    {
+        int nr = tr + hex[i].row, nc = tc + hex[i].col;
+
+        if (nc < x->mv_col_min) continue;
+
+        if (nc > x->mv_col_max) continue;
+
+        if (nr < x->mv_row_min) continue;
+
+        if (nr > x->mv_row_max) continue;
+
+        //CHECK_BETTER(thiserr,nr,nc);
+        if ((thiserr = ERR(nr, nc, besterr)) < besterr)
+        {
+            besterr = thiserr;
+            br = nr;
+            bc = nc;
+            k = i;
+        }
+    }
+
+    if (tr == br && tc == bc)
+        goto cal_neighbors;
+
+    for (j = 1; j < 127; j++)
     {
         tr = br;
         tc = bc;
+        tk = k;
 
-        for (i = 0; i < 6; i++)
+        for (i = 0; i < 3; i++)
         {
-            int nr = tr + hex[i].row, nc = tc + hex[i].col;
+            int nr = tr + next_chkpts[tk][i].row, nc = tc + next_chkpts[tk][i].col;
 
             if (nc < x->mv_col_min) continue;
 
@@ -819,7 +856,17 @@ int vp8_hex_search
 
             if (nr > x->mv_row_max) continue;
 
-            CHECK_BETTER(thiserr, nr, nc);
+            //CHECK_BETTER(thiserr,nr,nc);
+            if ((thiserr = ERR(nr, nc, besterr)) < besterr)
+            {
+                besterr = thiserr;
+                br = nr;
+                bc = nc; //k=(tk+5+i)%6;}
+                k = tk + 5 + i;
+
+                if (k >= 12) k -= 12;
+                else if (k >= 6) k -= 6;
+            }
         }
 
         if (tr == br && tc == bc)
@@ -827,6 +874,7 @@ int vp8_hex_search
     }
 
     // check 8 1 away neighbors
+cal_neighbors:
     tr = br;
     tc = bc;
 
@@ -856,6 +904,8 @@ int vp8_hex_search
 #undef DIST
 #undef ERR
 #undef CHECK_BETTER
+
+
 int vp8_diamond_search_sad
 (
     MACROBLOCK *x,

From d6c67f02c9aae706701d3b94c20830b056c57ded Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 26 Oct 2010 11:37:23 -0400
Subject: [PATCH 262/307] make vp8_recon16x16mb{,y} RTCD functions

ARM NEON has a platform specific version of vp8_recon16x16mb, though
it's just a stub to extract the various parameters from the
MACROBLOCKD struct and pass them to vp8_recon16x16mb_neon(). Using
that function's prototype directly will be a better long term solution,
but it's quite an invasive change.

Change-Id: I04273149e2ade34749e2d09e7edb0c396e1dd620
---
 vp8/common/arm/arm_systemdependent.c          |  2 ++
 .../arm/{recon_arm.c => neon/recon_neon.c}    |  4 +--
 vp8/common/arm/recon_arm.h                    |  5 ++++
 vp8/common/generic/systemdependent.c          |  6 +++--
 vp8/common/recon.c                            |  4 +--
 vp8/common/recon.h                            | 25 ++++++++++++++++---
 vp8/encoder/encodeintra.c                     |  6 +++--
 vp8/encoder/encodemb.c                        |  6 +++--
 vp8/vp8_common.mk                             |  2 +-
 9 files changed, 44 insertions(+), 16 deletions(-)
 rename vp8/common/arm/{recon_arm.c => neon/recon_neon.c} (91%)

diff --git a/vp8/common/arm/arm_systemdependent.c b/vp8/common/arm/arm_systemdependent.c
index fe62fae13..83921f807 100644
--- a/vp8/common/arm/arm_systemdependent.c
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -105,6 +105,8 @@ void vp8_arch_arm_common_init(VP8_COMMON *ctx)
         rtcd->recon.recon       = vp8_recon_b_neon;
         rtcd->recon.recon2      = vp8_recon2b_neon;
         rtcd->recon.recon4      = vp8_recon4b_neon;
+        rtcd->recon.recon_mb    = vp8_recon_mb_neon;
+
     }
 #endif
 
diff --git a/vp8/common/arm/recon_arm.c b/vp8/common/arm/neon/recon_neon.c
similarity index 91%
rename from vp8/common/arm/recon_arm.c
rename to vp8/common/arm/neon/recon_neon.c
index a1b210130..3b2df4c9f 100644
--- a/vp8/common/arm/recon_arm.c
+++ b/vp8/common/arm/neon/recon_neon.c
@@ -15,8 +15,7 @@
 
 extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
 
-#if HAVE_ARMV7
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+void vp8_recon_mb_neon(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
     unsigned char *pred_ptr = &x->predictor[0];
     short *diff_ptr = &x->diff[0];
@@ -28,4 +27,3 @@ void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 
     vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr);
 }
-#endif
diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h
index c30f6dc2d..bf32e6205 100644
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -51,6 +51,8 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon);
 extern prototype_copy_block(vp8_copy_mem8x4_neon);
 extern prototype_copy_block(vp8_copy_mem16x16_neon);
 
+extern prototype_recon_macroblock(vp8_recon_recon_mb_neon);
+
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_recon_recon
 #define vp8_recon_recon vp8_recon_b_neon
@@ -69,6 +71,9 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);
 
 #undef  vp8_recon_copy16x16
 #define vp8_recon_copy16x16 vp8_copy_mem16x16_neon
+
+#undef  vp8_recon_recon_mb
+#define vp8_recon_recon_mb vp8_recon_mb_neon
 #endif
 #endif
 
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 0ef375e33..0c9b77e76 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -40,9 +40,11 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
     rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
     rtcd->recon.copy8x8     = vp8_copy_mem8x8_c;
     rtcd->recon.copy8x4     = vp8_copy_mem8x4_c;
-    rtcd->recon.recon      = vp8_recon_b_c;
+    rtcd->recon.recon       = vp8_recon_b_c;
     rtcd->recon.recon2      = vp8_recon2b_c;
-    rtcd->recon.recon4     = vp8_recon4b_c;
+    rtcd->recon.recon4      = vp8_recon4b_c;
+    rtcd->recon.recon_mb    = vp8_recon_mb_c;
+    rtcd->recon.recon_mby   = vp8_recon_mby_c;
 
     rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_c;
     rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_c;
diff --git a/vp8/common/recon.c b/vp8/common/recon.c
index f31ffd8e4..2cb3bc6ce 100644
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -106,7 +106,7 @@ void vp8_recon2b_c
     }
 }
 
-void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+void vp8_recon_mby_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
 #if ARCH_ARM
     BLOCKD *b = &x->block[0];
@@ -135,7 +135,7 @@ void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 #endif
 }
 
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+void vp8_recon_mb_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
 #if ARCH_ARM
     BLOCKD *b = &x->block[0];
diff --git a/vp8/common/recon.h b/vp8/common/recon.h
index e34a63c86..c432c7bd2 100644
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -12,12 +12,19 @@
 #ifndef __INC_RECON_H
 #define __INC_RECON_H
 
+#include "blockd.h"
+
 #define prototype_copy_block(sym) \
     void sym(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch)
 
 #define prototype_recon_block(sym) \
     void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch);
 
+#define prototype_recon_macroblock(sym) \
+    void sym(const struct vp8_recon_rtcd_vtable *rtcd, MACROBLOCKD *x);
+
+struct vp8_recon_rtcd_vtable;
+
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/recon_x86.h"
 #endif
@@ -56,9 +63,20 @@ extern prototype_recon_block(vp8_recon_recon2);
 #endif
 extern prototype_recon_block(vp8_recon_recon4);
 
+#ifndef vp8_recon_recon_mb
+#define vp8_recon_recon_mb vp8_recon_mb_c
+#endif
+extern prototype_recon_macroblock(vp8_recon_recon_mb);
+
+#ifndef vp8_recon_recon_mby
+#define vp8_recon_recon_mby vp8_recon_mby_c
+#endif
+extern prototype_recon_macroblock(vp8_recon_recon_mby);
+
 typedef prototype_copy_block((*vp8_copy_block_fn_t));
 typedef prototype_recon_block((*vp8_recon_fn_t));
-typedef struct
+typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t));
+typedef struct vp8_recon_rtcd_vtable
 {
     vp8_copy_block_fn_t  copy16x16;
     vp8_copy_block_fn_t  copy8x8;
@@ -66,6 +84,8 @@ typedef struct
     vp8_recon_fn_t       recon;
     vp8_recon_fn_t       recon2;
     vp8_recon_fn_t       recon4;
+    vp8_recon_mb_fn_t    recon_mb;
+    vp8_recon_mb_fn_t    recon_mby;
 } vp8_recon_rtcd_vtable_t;
 
 #if CONFIG_RUNTIME_CPU_DETECT
@@ -74,9 +94,6 @@ typedef struct
 #define RECON_INVOKE(ctx,fn) vp8_recon_##fn
 #endif
 
-#include "blockd.h"
-void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 #endif
diff --git a/vp8/encoder/encodeintra.c b/vp8/encoder/encodeintra.c
index a790456f8..1c72b90f1 100644
--- a/vp8/encoder/encodeintra.c
+++ b/vp8/encoder/encodeintra.c
@@ -113,7 +113,8 @@ void vp8_encode_intra16x16mby(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
-    vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+    RECON_INVOKE(&rtcd->common->recon, recon_mby)
+        (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
 
     // make sure block modes are set the way we want them for context updates
     for (b = 0; b < 16; b++)
@@ -157,7 +158,8 @@ void vp8_encode_intra16x16mbyrd(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
-    vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+    RECON_INVOKE(&rtcd->common->recon, recon_mby)
+        (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
 
     // make sure block modes are set the way we want them for context updates
     for (b = 0; b < 16; b++)
diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 99ee2b806..043eac219 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -639,7 +639,8 @@ void vp8_encode_inter16x16(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_inverse_transform_mb(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
-    vp8_recon16x16mb(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+    RECON_INVOKE(&rtcd->common->recon, recon_mb)
+        (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
 }
 
 
@@ -656,7 +657,8 @@ void vp8_encode_inter16x16y(const VP8_ENCODER_RTCD *rtcd, MACROBLOCK *x)
 
     vp8_inverse_transform_mby(IF_RTCD(&rtcd->common->idct), &x->e_mbd);
 
-    vp8_recon16x16mby(IF_RTCD(&rtcd->common->recon), &x->e_mbd);
+    RECON_INVOKE(&rtcd->common->recon, recon_mby)
+        (IF_RTCD(&rtcd->common->recon), &x->e_mbd);
 }
 
 
diff --git a/vp8/vp8_common.mk b/vp8/vp8_common.mk
index 24cbbdd0c..bb3f8259c 100644
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -118,7 +118,6 @@ VP8_COMMON_SRCS-$(ARCH_ARM)  += common/arm/arm_systemdependent.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/bilinearfilter_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/filter_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/loopfilter_arm.c
-VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/recon_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/reconintra_arm.c
 VP8_COMMON_SRCS-$(HAVE_ARMV6)  += common/arm/vpx_asm_offsets.c
 
@@ -162,6 +161,7 @@ VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/sixtappredict16x16_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/recon16x16mb_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/buildintrapredictorsmby_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/save_neon_reg$(ASM)
+VP8_COMMON_SRCS-$(HAVE_ARMV7)  += common/arm/neon/recon_neon.c
 
 
 #

From ad760b9660323e4febb27496f5fb8de86399853c Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 26 Oct 2010 16:10:22 -0400
Subject: [PATCH 263/307] Update AUTHORS

Change-Id: I18e0a9e00731c23a2bdd1a978c8cb38f71e9029d
---
 AUTHORS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/AUTHORS b/AUTHORS
index 6708d5aa9..110e5e143 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,6 +1,7 @@
 # This file is automatically generated from the git commit history
 # by tools/gen_authors.sh.
 
+Aaron Watry <awatry@gmail.com>
 Adrian Grange <agrange@google.com>
 Alex Converse <alex.converse@gmail.com>
 Andres Mejia <mcitadel@gmail.com>
@@ -20,6 +21,7 @@ Justin Clift <justin@salasaga.org>
 Justin Lebar <justin.lebar@gmail.com>
 Luca Barbato <lu_zero@gentoo.org>
 Makoto Kato <makoto.kt@gmail.com>
+Martin Ettl <ettl.martin78@googlemail.com>
 Michael Kohler <michaelkohler@live.com>
 Paul Wilkins <paulwilkins@google.com>
 Pavol Rusnak <stick@gk2.sk>

From 75afcee962c660b6fcd82e0eb8b1762aadee584f Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 26 Oct 2010 16:22:22 -0400
Subject: [PATCH 264/307] vpxenc: add deterministic output option

By baking the version number into the output file, a hash of the file
will vary from commit to commit, even if the output is otherwise bit
exact. Add a -D option to suppress this behavior, for use when
bisecting or other debugging.

Change-Id: I5089a8ce5719920ffaf47620fa9069b81fa15673
---
 vpxenc.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/vpxenc.c b/vpxenc.c
index a17878217..bc69f5d4b 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -430,6 +430,8 @@ struct cue_entry
 
 struct EbmlGlobal
 {
+    int debug;
+
     FILE    *stream;
     uint64_t last_pts_ms;
     vpx_rational_t  framerate;
@@ -552,8 +554,10 @@ write_webm_seek_info(EbmlGlobal *ebml)
         Ebml_SerializeUnsigned(ebml, TimecodeScale, 1000000);
         Ebml_SerializeFloat(ebml, Segment_Duration,
                             ebml->last_pts_ms + frame_time);
-        Ebml_SerializeString(ebml, 0x4D80, "vpxenc" VERSION_STRING);
-        Ebml_SerializeString(ebml, 0x5741, "vpxenc" VERSION_STRING);
+        Ebml_SerializeString(ebml, 0x4D80,
+            ebml->debug ? "vpxenc" : "vpxenc" VERSION_STRING);
+        Ebml_SerializeString(ebml, 0x5741,
+            ebml->debug ? "vpxenc" : "vpxenc" VERSION_STRING);
         Ebml_EndSubElement(ebml, &startInfo);
     }
 }
@@ -740,6 +744,8 @@ write_webm_file_footer(EbmlGlobal *glob)
 
 #include "args.h"
 
+static const arg_def_t debugmode = ARG_DEF("D", "debug", 0,
+        "Debug mode (makes output deterministic)");
 static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
         "Output filename");
 static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
@@ -774,6 +780,7 @@ static const arg_def_t use_ivf          = ARG_DEF(NULL, "ivf", 0,
         "Output IVF (default is WebM)");
 static const arg_def_t *main_args[] =
 {
+    &debugmode,
     &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &deadline,
     &best_dl, &good_dl, &rt_dl,
     &verbosearg, &psnrarg, &use_ivf, &framerate,
@@ -1052,6 +1059,8 @@ int main(int argc, const char **argv_)
             write_webm = 0;
         else if (arg_match(&arg, &outputfile, argi))
             out_fn = arg.val;
+        else if (arg_match(&arg, &debugmode, argi))
+            ebml.debug = 1;
         else
             argj++;
     }

From a0ccc97d8ac134cd787747a2b8b7ea8206b5777c Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Tue, 26 Oct 2010 12:58:51 -0700
Subject: [PATCH 265/307] postproc: Add mode and refrence frame visualizers.

Post process option to color the block for either the mode
of the macro block, or the frame that the macro block references.

Change-Id: Ie498175497f2d20e3319924d352dc4ddc16f4134
---
 vp8/common/postproc.c | 138 ++++++++++++++++++++++++++++++++++++++++--
 vp8/common/ppflags.h  |  18 +++---
 2 files changed, 143 insertions(+), 13 deletions(-)

diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index df18c7c75..9c0ab75a7 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -19,8 +19,36 @@
 #include <math.h>
 #include <stdlib.h>
 #include <stdio.h>
+
+#define RGB_TO_YUV(t)                                                                       \
+    ( (0.257*(float)(t>>16)) + (0.504*(float)(t>>8&0xff)) + (0.098*(float)(t&0xff)) + 16),  \
+    (-(0.148*(float)(t>>16)) - (0.291*(float)(t>>8&0xff)) + (0.439*(float)(t&0xff)) + 128), \
+    ( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
+
 // global constants
 
+static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
+{
+    { RGB_TO_YUV(0x98FB98) },   // PaleGreen
+    { RGB_TO_YUV(0x00FF00) },   // Green
+    { RGB_TO_YUV(0xADFF2F) },   // GreenYellow
+    { RGB_TO_YUV(0x228B22) },   // ForestGreen
+    { RGB_TO_YUV(0x006400) },   // DarkGreen
+    { RGB_TO_YUV(0x98F5FF) },   // Cadet Blue
+    { RGB_TO_YUV(0x6CA6CD) },   // Sky Blue
+    { RGB_TO_YUV(0x00008B) },   // Dark blue
+    { RGB_TO_YUV(0x551A8B) },   // Purple
+    { RGB_TO_YUV(0xFF0000) }    // Red
+};
+
+static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
+{
+    { RGB_TO_YUV(0x00ff00) },   // Blue
+    { RGB_TO_YUV(0x0000ff) },   // Green
+    { RGB_TO_YUV(0xffff00) },   // Yellow
+    { RGB_TO_YUV(0xff0000) },   // Red
+};
+
 static const short kernel5[] =
 {
     1, 1, 4, 1, 1
@@ -444,11 +472,34 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
     }
 }
 
-#if CONFIG_RUNTIME_CPU_DETECT
-#define RTCD_VTABLE(oci) (&(oci)->rtcd.postproc)
-#else
-#define RTCD_VTABLE(oci) NULL
-#endif
+void vp8_blend_block_c (unsigned char *y, unsigned char *u, unsigned char *v,
+                        int y1, int u1, int v1, int alpha, int stride)
+{
+    int i, j;
+    int y1_const = y1*((1<<16)-alpha);
+    int u1_const = u1*((1<<16)-alpha);
+    int v1_const = v1*((1<<16)-alpha);
+
+    for (i = 0; i < 16; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+            y[j] = (y[j]*alpha + y1_const)>>16;
+        }
+        y += stride;
+    }
+
+    for (i = 0; i < 8; i++)
+    {
+        for (j = 0; j < 8; j++)
+        {
+            u[j] = (u[j]*alpha + u1_const)>>16;
+            v[j] = (v[j]*alpha + v1_const)>>16;
+        }
+        u += stride/2;
+        v += stride/2;
+    }
+}
 
 static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
 {
@@ -489,6 +540,13 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
     }
 }
 
+
+#if CONFIG_RUNTIME_CPU_DETECT
+#define RTCD_VTABLE(oci) (&(oci)->rtcd.postproc)
+#else
+#define RTCD_VTABLE(oci) NULL
+#endif
+
 int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
 {
     char message[512];
@@ -693,6 +751,76 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         }
     }
 
+    // Color in block modes
+    else if (flags & VP8D_DEBUG_LEVEL6)
+    {
+        int i, j;
+        YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+        int width  = post->y_width;
+        int height = post->y_height;
+        unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
+        unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
+        unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
+        int y_stride = oci->post_proc_buffer.y_stride;
+        MODE_INFO *mi = oci->mi;
+
+        for (i = 0; i < height; i += 16)
+        {
+            for (j = 0; j < width; j += 16)
+            {
+                int Y = 0, U = 0, V = 0;
+
+                Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
+                U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
+                V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
+
+                vp8_blend_block_c (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xb000, y_stride);
+
+                mi++;
+            }
+            y_ptr += y_stride*16;
+            u_ptr += y_stride*4;
+            v_ptr += y_stride*4;
+
+            mi++;
+        }
+    }
+
+    // Color in frame reference blocks
+    else if (flags & VP8D_DEBUG_LEVEL7)
+    {
+        int i, j;
+        YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+        int width  = post->y_width;
+        int height = post->y_height;
+        unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
+        unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
+        unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
+        int y_stride = oci->post_proc_buffer.y_stride;
+        MODE_INFO *mi = oci->mi;
+
+        for (i = 0; i < height; i += 16)
+        {
+            for (j = 0; j < width; j +=16)
+            {
+                int Y = 0, U = 0, V = 0;
+
+                Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
+                U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
+                V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
+
+                vp8_blend_block_c (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xb000, y_stride);
+
+                mi++;
+            }
+            y_ptr += y_stride*16;
+            u_ptr += y_stride*4;
+            v_ptr += y_stride*4;
+
+            mi++;
+        }
+    }
+
     *dest = oci->post_proc_buffer;
 
     // handle problem with extending borders
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index a1e2330bb..651671a43 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -14,14 +14,16 @@
 enum
 {
     VP8D_NOFILTERING    = 0,
-    VP8D_DEBLOCK        = 1,
-    VP8D_DEMACROBLOCK   = 2,
-    VP8D_ADDNOISE       = 4,
-    VP8D_DEBUG_LEVEL1   = 8,
-    VP8D_DEBUG_LEVEL2   = 16,
-    VP8D_DEBUG_LEVEL3   = 32,
-    VP8D_DEBUG_LEVEL4   = 64,
-    VP8D_DEBUG_LEVEL5   = 128,
+    VP8D_DEBLOCK        = 1<<0,
+    VP8D_DEMACROBLOCK   = 1<<1,
+    VP8D_ADDNOISE       = 1<<2,
+    VP8D_DEBUG_LEVEL1   = 1<<3,
+    VP8D_DEBUG_LEVEL2   = 1<<4,
+    VP8D_DEBUG_LEVEL3   = 1<<5,
+    VP8D_DEBUG_LEVEL4   = 1<<6,
+    VP8D_DEBUG_LEVEL5   = 1<<7,
+    VP8D_DEBUG_LEVEL6   = 1<<8,
+    VP8D_DEBUG_LEVEL7   = 1<<9,
 };
 
 #endif

From 209d82ad722bd9eb0de2d2cd1e73aec281f00e00 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 26 Oct 2010 15:34:16 -0400
Subject: [PATCH 266/307] Add half-pixel variance RTCD functions

NEON has optimized 16x16 half-pixel variance functions, but they
were not part of the RTCD framework. Add these functions to RTCD,
so that other platforms can make use of this optimization in the
future and special-case ARM code can be removed.

A number of functions were taking two variance functions as
parameters. These functions were changed to take a single
parameter, a pointer to a struct containing all the variance
functions for that block size. This provides additional flexibility
for calling additional variance functions (the half-pixel special
case, for example) and by initializing the table for all block sizes,
we don't have to construct this function pointer table for each
macroblock.

Change-Id: I78289ff36b2715f9a7aa04d5f6fbe3d23acdc29c
---
 vp8/encoder/arm/mcomp_arm.c                   | 615 ------------------
 .../neon/vp8_subpixelvariance16x16s_neon.asm  |  18 +-
 vp8/encoder/arm/variance_arm.h                |  12 +
 vp8/encoder/firstpass.c                       |   5 +-
 vp8/encoder/mcomp.c                           |  95 ++-
 vp8/encoder/mcomp.h                           |   7 +-
 vp8/encoder/onyx_if.c                         |  49 +-
 vp8/encoder/onyx_int.h                        |  12 +-
 vp8/encoder/pickinter.c                       |  21 +-
 vp8/encoder/rdopt.c                           |  66 +-
 vp8/encoder/temporal_filter.c                 |  10 +-
 vp8/encoder/variance.h                        |  32 +-
 vp8/encoder/variance_c.c                      |  15 +-
 vp8/vp8cx_arm.mk                              |   1 -
 14 files changed, 197 insertions(+), 761 deletions(-)
 delete mode 100644 vp8/encoder/arm/mcomp_arm.c

diff --git a/vp8/encoder/arm/mcomp_arm.c b/vp8/encoder/arm/mcomp_arm.c
deleted file mode 100644
index 27146e23f..000000000
--- a/vp8/encoder/arm/mcomp_arm.c
+++ /dev/null
@@ -1,615 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "mcomp.h"
-#include "vpx_mem/vpx_mem.h"
-
-#include <stdio.h>
-#include <limits.h>
-#include <math.h>
-
-#ifdef ENTROPY_STATS
-static int mv_ref_ct [31] [4] [2];
-static int mv_mode_cts [4] [2];
-#endif
-
-extern unsigned int vp8_sub_pixel_variance16x16s_neon
-(
-    unsigned char  *src_ptr,
-    int  src_pixels_per_line,
-    int  xoffset,
-    int  yoffset,
-    unsigned char *dst_ptr,
-    int dst_pixels_per_line,
-    unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_4_0_neon
-(
-    unsigned char  *src_ptr,
-    int  src_pixels_per_line,
-    unsigned char *dst_ptr,
-    int dst_pixels_per_line,
-    unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_0_4_neon
-(
-    unsigned char  *src_ptr,
-    int  src_pixels_per_line,
-    unsigned char *dst_ptr,
-    int dst_pixels_per_line,
-    unsigned int *sse
-);
-extern unsigned int vp8_sub_pixel_variance16x16s_4_4_neon
-(
-    unsigned char  *src_ptr,
-    int  src_pixels_per_line,
-    unsigned char *dst_ptr,
-    int dst_pixels_per_line,
-    unsigned int *sse
-);
-
-
-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
-{
-    int bestmse = INT_MAX;
-    MV startmv;
-    //MV this_mv;
-    MV this_mv;
-    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
-    unsigned char *z = (*(b->base_src) + b->src);
-    int left, right, up, down, diag;
-    unsigned int sse;
-    int whichdir ;
-
-
-    // Trap uncodable vectors
-    if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
-    {
-        bestmv->row <<= 3;
-        bestmv->col <<= 3;
-        return INT_MAX;
-    }
-
-    // central mv
-    bestmv->row <<= 3;
-    bestmv->col <<= 3;
-    startmv = *bestmv;
-
-    // calculate central point error
-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
-    bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
-
-    // go left then right and check error
-    this_mv.row = startmv.row;
-    this_mv.col = ((startmv.col - 8) | 4);
-    left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (left < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = left;
-    }
-
-    this_mv.col += 8;
-    right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);
-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (right < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = right;
-    }
-
-    // go up then down and check error
-    this_mv.col = startmv.col;
-    this_mv.row = ((startmv.row - 8) | 4);
-    up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (up < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = up;
-    }
-
-    this_mv.row += 8;
-    down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (down < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = down;
-    }
-
-
-    // now check 1 more diagonal
-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-    //for(whichdir =0;whichdir<4;whichdir++)
-    //{
-    this_mv = startmv;
-
-    switch (whichdir)
-    {
-    case 0:
-        this_mv.col = (this_mv.col - 8) | 4;
-        this_mv.row = (this_mv.row - 8) | 4;
-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-        break;
-    case 1:
-        this_mv.col += 4;
-        this_mv.row = (this_mv.row - 8) | 4;
-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-        break;
-    case 2:
-        this_mv.col = (this_mv.col - 8) | 4;
-        this_mv.row += 4;
-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
-        break;
-    case 3:
-        this_mv.col += 4;
-        this_mv.row += 4;
-        diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
-        break;
-    }
-
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-//  }
-
-
-    // time to check quarter pels.
-    if (bestmv->row < startmv.row)
-        y -= d->pre_stride;
-
-    if (bestmv->col < startmv.col)
-        y--;
-
-    startmv = *bestmv;
-
-
-
-    // go left then right and check error
-    this_mv.row = startmv.row;
-
-    if (startmv.col & 7)
-    {
-        this_mv.col = startmv.col - 2;
-        left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-    }
-    else
-    {
-        this_mv.col = (startmv.col - 8) | 6;
-        left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
-    }
-
-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (left < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = left;
-    }
-
-    this_mv.col += 4;
-    right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (right < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = right;
-    }
-
-    // go up then down and check error
-    this_mv.col = startmv.col;
-
-    if (startmv.row & 7)
-    {
-        this_mv.row = startmv.row - 2;
-        up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-    }
-    else
-    {
-        this_mv.row = (startmv.row - 8) | 6;
-        up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
-    }
-
-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (up < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = up;
-    }
-
-    this_mv.row += 4;
-    down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (down < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = down;
-    }
-
-
-    // now check 1 more diagonal
-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-
-//  for(whichdir=0;whichdir<4;whichdir++)
-//  {
-    this_mv = startmv;
-
-    switch (whichdir)
-    {
-    case 0:
-
-        if (startmv.row & 7)
-        {
-            this_mv.row -= 2;
-
-            if (startmv.col & 7)
-            {
-                this_mv.col -= 2;
-                diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-            }
-            else
-            {
-                this_mv.col = (startmv.col - 8) | 6;
-                diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
-            }
-        }
-        else
-        {
-            this_mv.row = (startmv.row - 8) | 6;
-
-            if (startmv.col & 7)
-            {
-                this_mv.col -= 2;
-                diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
-            }
-            else
-            {
-                this_mv.col = (startmv.col - 8) | 6;
-                diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
-            }
-        }
-
-        break;
-    case 1:
-        this_mv.col += 2;
-
-        if (startmv.row & 7)
-        {
-            this_mv.row -= 2;
-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-        }
-        else
-        {
-            this_mv.row = (startmv.row - 8) | 6;
-            diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
-        }
-
-        break;
-    case 2:
-        this_mv.row += 2;
-
-        if (startmv.col & 7)
-        {
-            this_mv.col -= 2;
-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-        }
-        else
-        {
-            this_mv.col = (startmv.col - 8) | 6;
-            diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
-        }
-
-        break;
-    case 3:
-        this_mv.col += 2;
-        this_mv.row += 2;
-        diag = svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
-        break;
-    }
-
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-//  }
-
-    return bestmse;
-}
-
-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
-{
-    int bestmse = INT_MAX;
-    MV startmv;
-    //MV this_mv;
-    MV this_mv;
-    unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
-    unsigned char *z = (*(b->base_src) + b->src);
-    int left, right, up, down, diag;
-    unsigned int sse;
-
-    // Trap uncodable vectors
-    if ((abs((bestmv->col << 3) - ref_mv->col) > MAX_FULL_PEL_VAL) || (abs((bestmv->row << 3) - ref_mv->row) > MAX_FULL_PEL_VAL))
-    {
-        bestmv->row <<= 3;
-        bestmv->col <<= 3;
-        return INT_MAX;
-    }
-
-    // central mv
-    bestmv->row <<= 3;
-    bestmv->col <<= 3;
-    startmv = *bestmv;
-
-    // calculate central point error
-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
-    bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
-
-    // go left then right and check error
-    this_mv.row = startmv.row;
-    this_mv.col = ((startmv.col - 8) | 4);
-    left = vp8_sub_pixel_variance16x16s_4_0_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
-    left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (left < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = left;
-    }
-
-    this_mv.col += 8;
-    right = vp8_sub_pixel_variance16x16s_4_0_neon(y, d->pre_stride, z, b->src_stride, &sse);
-    right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (right < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = right;
-    }
-
-    // go up then down and check error
-    this_mv.col = startmv.col;
-    this_mv.row = ((startmv.row - 8) | 4);
-    up = vp8_sub_pixel_variance16x16s_0_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (up < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = up;
-    }
-
-    this_mv.row += 8;
-    down = vp8_sub_pixel_variance16x16s_0_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
-    down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (down < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = down;
-    }
-
-    // somewhat strangely not doing all the diagonals for half pel is slower than doing them.
-#if 0
-    // now check 1 more diagonal -
-    whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-    this_mv = startmv;
-
-    switch (whichdir)
-    {
-    case 0:
-        this_mv.col = (this_mv.col - 8) | 4;
-        this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
-        break;
-    case 1:
-        this_mv.col += 4;
-        this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
-        break;
-    case 2:
-        this_mv.col = (this_mv.col - 8) | 4;
-        this_mv.row += 4;
-        diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
-        break;
-    case 3:
-        this_mv.col += 4;
-        this_mv.row += 4;
-        diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
-        break;
-    }
-
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-#else
-    this_mv.col = (this_mv.col - 8) | 4;
-    this_mv.row = (this_mv.row - 8) | 4;
-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-    this_mv.col += 8;
-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-    this_mv.col = (this_mv.col - 8) | 4;
-    this_mv.row = startmv.row + 4;
-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y - 1, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-    this_mv.col += 8;
-    diag = vp8_sub_pixel_variance16x16s_4_4_neon(y, d->pre_stride, z, b->src_stride, &sse);
-    diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
-
-    if (diag < bestmse)
-    {
-        *bestmv = this_mv;
-        bestmse = diag;
-    }
-
-#endif
-    return bestmse;
-}
-
-
-#ifdef ENTROPY_STATS
-void print_mode_context(void)
-{
-    FILE *f = fopen("modecont.c", "w");
-    int i, j;
-
-    fprintf(f, "#include \"entropy.h\"\n");
-    fprintf(f, "const int vp8_mode_contexts[6][4] =\n");
-    fprintf(f, "{\n");
-
-    for (j = 0; j < 6; j++)
-    {
-        fprintf(f, "  { // %d \n", j);
-        fprintf(f, "    ");
-
-        for (i = 0; i < 4; i++)
-        {
-            int overal_prob;
-            int this_prob;
-            int count; // = mv_ref_ct[j][i][0]+mv_ref_ct[j][i][1];
-
-            // Overall probs
-            count = mv_mode_cts[i][0] + mv_mode_cts[i][1];
-
-            if (count)
-                overal_prob = 256 * mv_mode_cts[i][0] / count;
-            else
-                overal_prob = 128;
-
-            if (overal_prob == 0)
-                overal_prob = 1;
-
-            // context probs
-            count = mv_ref_ct[j][i][0] + mv_ref_ct[j][i][1];
-
-            if (count)
-                this_prob = 256 * mv_ref_ct[j][i][0] / count;
-            else
-                this_prob = 128;
-
-            if (this_prob == 0)
-                this_prob = 1;
-
-            fprintf(f, "%5d, ", this_prob);
-            //fprintf(f,"%5d, %5d, %8d,", this_prob, overal_prob, (this_prob << 10)/overal_prob);
-            //fprintf(f,"%8d, ", (this_prob << 10)/overal_prob);
-        }
-
-        fprintf(f, "  },\n");
-    }
-
-    fprintf(f, "};\n");
-    fclose(f);
-}
-
-/* MV ref count ENTROPY_STATS stats code */
-#ifdef ENTROPY_STATS
-void init_mv_ref_counts()
-{
-    vpx_memset(mv_ref_ct, 0, sizeof(mv_ref_ct));
-    vpx_memset(mv_mode_cts, 0, sizeof(mv_mode_cts));
-}
-
-void accum_mv_refs(MB_PREDICTION_MODE m, const int ct[4])
-{
-    if (m == ZEROMV)
-    {
-        ++mv_ref_ct [ct[0]] [0] [0];
-        ++mv_mode_cts[0][0];
-    }
-    else
-    {
-        ++mv_ref_ct [ct[0]] [0] [1];
-        ++mv_mode_cts[0][1];
-
-        if (m == NEARESTMV)
-        {
-            ++mv_ref_ct [ct[1]] [1] [0];
-            ++mv_mode_cts[1][0];
-        }
-        else
-        {
-            ++mv_ref_ct [ct[1]] [1] [1];
-            ++mv_mode_cts[1][1];
-
-            if (m == NEARMV)
-            {
-                ++mv_ref_ct [ct[2]] [2] [0];
-                ++mv_mode_cts[2][0];
-            }
-            else
-            {
-                ++mv_ref_ct [ct[2]] [2] [1];
-                ++mv_mode_cts[2][1];
-
-                if (m == NEWMV)
-                {
-                    ++mv_ref_ct [ct[3]] [3] [0];
-                    ++mv_mode_cts[3][0];
-                }
-                else
-                {
-                    ++mv_ref_ct [ct[3]] [3] [1];
-                    ++mv_mode_cts[3][1];
-                }
-            }
-        }
-    }
-}
-
-#endif/* END MV ref count ENTROPY_STATS stats code */
-
-#endif
diff --git a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
index 1c1441cc2..0a2b71c49 100644
--- a/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
+++ b/vp8/encoder/arm/neon/vp8_subpixelvariance16x16s_neon.asm
@@ -9,9 +9,9 @@
 ;
 
 
-    EXPORT  |vp8_sub_pixel_variance16x16s_4_0_neon|
-    EXPORT  |vp8_sub_pixel_variance16x16s_0_4_neon|
-    EXPORT  |vp8_sub_pixel_variance16x16s_4_4_neon|
+    EXPORT  |vp8_variance_halfpixvar16x16_h_neon|
+    EXPORT  |vp8_variance_halfpixvar16x16_v_neon|
+    EXPORT  |vp8_variance_halfpixvar16x16_hv_neon|
     EXPORT  |vp8_sub_pixel_variance16x16s_neon|
     ARM
     REQUIRE8
@@ -20,7 +20,7 @@
     AREA ||.text||, CODE, READONLY, ALIGN=2
 
 ;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_4_0_neon
+;unsigned int vp8_variance_halfpixvar16x16_h_neon
 ;(
 ;    unsigned char  *src_ptr, r0
 ;    int  src_pixels_per_line,  r1
@@ -29,7 +29,7 @@
 ;    unsigned int *sse
 ;);
 ;================================================
-|vp8_sub_pixel_variance16x16s_4_0_neon| PROC
+|vp8_variance_halfpixvar16x16_h_neon| PROC
     push            {lr}
 
     mov             r12, #4                  ;loop counter
@@ -120,7 +120,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon
     ENDP
 
 ;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_0_4_neon
+;unsigned int vp8_variance_halfpixvar16x16_v_neon
 ;(
 ;    unsigned char  *src_ptr, r0
 ;    int  src_pixels_per_line,  r1
@@ -129,7 +129,7 @@ vp8_filt_fpo16x16s_4_0_loop_neon
 ;    unsigned int *sse
 ;);
 ;================================================
-|vp8_sub_pixel_variance16x16s_0_4_neon| PROC
+|vp8_variance_halfpixvar16x16_v_neon| PROC
     push            {lr}
 
     mov             r12, #4                     ;loop counter
@@ -216,7 +216,7 @@ vp8_filt_spo16x16s_0_4_loop_neon
     ENDP
 
 ;================================================
-;unsigned int vp8_sub_pixel_variance16x16s_4_4_neon
+;unsigned int vp8_variance_halfpixvar16x16_hv_neon
 ;(
 ;    unsigned char  *src_ptr, r0
 ;    int  src_pixels_per_line,  r1
@@ -225,7 +225,7 @@ vp8_filt_spo16x16s_0_4_loop_neon
 ;    unsigned int *sse
 ;);
 ;================================================
-|vp8_sub_pixel_variance16x16s_4_4_neon| PROC
+|vp8_variance_halfpixvar16x16_hv_neon| PROC
     push            {lr}
 
     vld1.u8         {d0, d1, d2, d3}, [r0], r1      ;load src data
diff --git a/vp8/encoder/arm/variance_arm.h b/vp8/encoder/arm/variance_arm.h
index fb9dd5a5b..0e5f62fcf 100644
--- a/vp8/encoder/arm/variance_arm.h
+++ b/vp8/encoder/arm/variance_arm.h
@@ -30,6 +30,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_neon);
 //extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_c);
 //extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_c);
 extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_h_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_v_neon);
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_neon);
 
 //extern prototype_getmbss(vp8_get_mb_ss_c);
 extern prototype_variance(vp8_mse16x16_neon);
@@ -84,6 +87,15 @@ extern prototype_sad(vp8_get4x4sse_cs_neon);
 #undef  vp8_variance_subpixvar16x16
 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_neon
 
+#undef  vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_neon
+
+#undef  vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_neon
+
+#undef  vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_neon
+
 //#undef  vp8_variance_getmbss
 //#define vp8_variance_getmbss vp8_get_mb_ss_c
 
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 607c3d236..691aee0a6 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -462,12 +462,11 @@ void vp8_first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, MV *ref_mv, MV *
     int step_param = 3;                                       //3;          // Dont search over full range for first pass
     int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; //3;
     int n;
-    vp8_variance_fn_ptr_t v_fn_ptr;
+    vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16];
     int new_mv_mode_penalty = 256;
 
+    // override the default variance function to use MSE
     v_fn_ptr.vf    = VARIANCE_INVOKE(IF_RTCD(&cpi->rtcd.variance), mse16x16);
-    v_fn_ptr.sdf   = cpi->fn_ptr.sdf;
-    v_fn_ptr.sdx4df = cpi->fn_ptr.sdx4df;
 
     // Set up pointers for this macro block recon buffer
     xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset;
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index c5cae9cf2..8cc63f83d 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -186,7 +186,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
 #define MVC(r,c) (((mvcost[0][(r)-rr] + mvcost[1][(c) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
 #define PRE(r,c) (*(d->base_pre) + d->pre + ((r)>>2) * d->pre_stride + ((c)>>2)) // pointer to predictor base of a motionvector
 #define SP(x) (((x)&3)<<1) // convert motion vector component to offset for svf calc
-#define DIST(r,c) svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
+#define DIST(r,c) vfp->svf( PRE(r,c), d->pre_stride, SP(c),SP(r), z,b->src_stride,&sse) // returns subpixel variance error function.
 #define IFMVCV(r,c,s,e) if ( c >= minc && c <= maxc && r >= minr && r <= maxr) s else e;
 #define ERR(r,c) (MVC(r,c)+DIST(r,c)) // returns distortion + motion vector cost
 #define CHECK_BETTER(v,r,c) IFMVCV(r,c,{if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }}, v=INT_MAX;)// checks if (r,c) has better score than previous best
@@ -195,7 +195,7 @@ void vp8_init3smotion_compensation(MACROBLOCK *x, int stride)
 
 //#define CHECK_BETTER(v,r,c) if((v = ERR(r,c)) < besterr) { besterr = v; br=r; bc=c; }
 
-int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
 {
     unsigned char *y = *(d->base_pre) + d->pre + (bestmv->row) * d->pre_stride + bestmv->col;
     unsigned char *z = (*(b->base_src) + b->src);
@@ -220,7 +220,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
     bestmv->col <<= 3;
 
     // calculate central point error
-    besterr = vf(y, d->pre_stride, z, b->src_stride, &sse);
+    besterr = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
     besterr += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
 
     // TODO: Each subsequent iteration checks at least one point in common with the last iteration could be 2 ( if diag selected)
@@ -309,7 +309,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 #undef CHECK_BETTER
 #undef MIN
 #undef MAX
-int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
 {
     int bestmse = INT_MAX;
     MV startmv;
@@ -336,13 +336,13 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
     startmv = *bestmv;
 
     // calculate central point error
-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
+    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
     bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
 
     // go left then right and check error
     this_mv.row = startmv.row;
     this_mv.col = ((startmv.col - 8) | 4);
-    left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (left < bestmse)
@@ -352,7 +352,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
     }
 
     this_mv.col += 8;
-    right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (right < bestmse)
@@ -364,7 +364,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
     // go up then down and check error
     this_mv.col = startmv.col;
     this_mv.row = ((startmv.row - 8) | 4);
-    up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (up < bestmse)
@@ -374,7 +374,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
     }
 
     this_mv.row += 8;
-    down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (down < bestmse)
@@ -386,10 +386,6 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
 
     // now check 1 more diagonal
     whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2);
-    // whichdir must be 0-4. Therefore, one of the cases below
-    // must run through. However, because there is no default
-    // and diag is not set elsewhere, we get a compile warning
-    diag = 0;
     //for(whichdir =0;whichdir<4;whichdir++)
     //{
     this_mv = startmv;
@@ -399,22 +395,22 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
     case 0:
         this_mv.col = (this_mv.col - 8) | 4;
         this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
         break;
     case 1:
         this_mv.col += 4;
         this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
         break;
     case 2:
         this_mv.col = (this_mv.col - 8) | 4;
         this_mv.row += 4;
-        diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
         break;
     case 3:
         this_mv.col += 4;
         this_mv.row += 4;
-        diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
         break;
     }
 
@@ -446,12 +442,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
     if (startmv.col & 7)
     {
         this_mv.col = startmv.col - 2;
-        left = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+        left = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
     }
     else
     {
         this_mv.col = (startmv.col - 8) | 6;
-        left = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
+        left = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);
     }
 
     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@@ -463,7 +459,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
     }
 
     this_mv.col += 4;
-    right = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+    right = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (right < bestmse)
@@ -478,12 +474,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
     if (startmv.row & 7)
     {
         this_mv.row = startmv.row - 2;
-        up = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+        up = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
     }
     else
     {
         this_mv.row = (startmv.row - 8) | 6;
-        up = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+        up = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
     }
 
     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
@@ -495,7 +491,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
     }
 
     this_mv.row += 4;
-    down = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+    down = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (down < bestmse)
@@ -523,12 +519,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
             if (startmv.col & 7)
             {
                 this_mv.col -= 2;
-                diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+                diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
             }
             else
             {
                 this_mv.col = (startmv.col - 8) | 6;
-                diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
+                diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
             }
         }
         else
@@ -538,12 +534,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
             if (startmv.col & 7)
             {
                 this_mv.col -= 2;
-                diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+                diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
             }
             else
             {
                 this_mv.col = (startmv.col - 8) | 6;
-                diag = svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
+                diag = vfp->svf(y - d->pre_stride - 1, d->pre_stride, 6, 6, z, b->src_stride, &sse);
             }
         }
 
@@ -554,12 +550,12 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         if (startmv.row & 7)
         {
             this_mv.row -= 2;
-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+            diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
         }
         else
         {
             this_mv.row = (startmv.row - 8) | 6;
-            diag = svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
+            diag = vfp->svf(y - d->pre_stride, d->pre_stride, this_mv.col & 7, 6, z, b->src_stride, &sse);
         }
 
         break;
@@ -569,19 +565,19 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
         if (startmv.col & 7)
         {
             this_mv.col -= 2;
-            diag = svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+            diag = vfp->svf(y, d->pre_stride, this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
         }
         else
         {
             this_mv.col = (startmv.col - 8) | 6;
-            diag = svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
+            diag = vfp->svf(y - 1, d->pre_stride, 6, this_mv.row & 7, z, b->src_stride, &sse);;
         }
 
         break;
     case 3:
         this_mv.col += 2;
         this_mv.row += 2;
-        diag = svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
+        diag = vfp->svf(y, d->pre_stride,  this_mv.col & 7, this_mv.row & 7, z, b->src_stride, &sse);
         break;
     }
 
@@ -598,7 +594,7 @@ int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv,
     return bestmse;
 }
 
-int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
 {
     int bestmse = INT_MAX;
     MV startmv;
@@ -623,13 +619,13 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
     startmv = *bestmv;
 
     // calculate central point error
-    bestmse = vf(y, d->pre_stride, z, b->src_stride, &sse);
+    bestmse = vfp->vf(y, d->pre_stride, z, b->src_stride, &sse);
     bestmse += vp8_mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit);
 
     // go left then right and check error
     this_mv.row = startmv.row;
     this_mv.col = ((startmv.col - 8) | 4);
-    left = svf(y - 1, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+    left = vfp->svf_halfpix_h(y - 1, d->pre_stride, z, b->src_stride, &sse);
     left += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (left < bestmse)
@@ -639,7 +635,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
     }
 
     this_mv.col += 8;
-    right = svf(y, d->pre_stride, 4, 0, z, b->src_stride, &sse);
+    right = vfp->svf_halfpix_h(y, d->pre_stride, z, b->src_stride, &sse);
     right += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (right < bestmse)
@@ -651,7 +647,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
     // go up then down and check error
     this_mv.col = startmv.col;
     this_mv.row = ((startmv.row - 8) | 4);
-    up = svf(y - d->pre_stride, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+    up = vfp->svf_halfpix_v(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
     up += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (up < bestmse)
@@ -661,7 +657,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
     }
 
     this_mv.row += 8;
-    down = svf(y, d->pre_stride, 0, 4, z, b->src_stride, &sse);
+    down = vfp->svf_halfpix_v(y, d->pre_stride, z, b->src_stride, &sse);
     down += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (down < bestmse)
@@ -681,22 +677,22 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
     case 0:
         this_mv.col = (this_mv.col - 8) | 4;
         this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
         break;
     case 1:
         this_mv.col += 4;
         this_mv.row = (this_mv.row - 8) | 4;
-        diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
         break;
     case 2:
         this_mv.col = (this_mv.col - 8) | 4;
         this_mv.row += 4;
-        diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
         break;
     case 3:
         this_mv.col += 4;
         this_mv.row += 4;
-        diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+        diag = vfp->svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
         break;
     }
 
@@ -711,7 +707,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
 #else
     this_mv.col = (this_mv.col - 8) | 4;
     this_mv.row = (this_mv.row - 8) | 4;
-    diag = svf(y - 1 - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+    diag = vfp->svf_halfpix_hv(y - 1 - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (diag < bestmse)
@@ -721,7 +717,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
     }
 
     this_mv.col += 8;
-    diag = svf(y - d->pre_stride, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+    diag = vfp->svf_halfpix_hv(y - d->pre_stride, d->pre_stride, z, b->src_stride, &sse);
     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (diag < bestmse)
@@ -732,7 +728,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
 
     this_mv.col = (this_mv.col - 8) | 4;
     this_mv.row = startmv.row + 4;
-    diag = svf(y - 1, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+    diag = vfp->svf_halfpix_hv(y - 1, d->pre_stride, z, b->src_stride, &sse);
     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (diag < bestmse)
@@ -742,7 +738,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
     }
 
     this_mv.col += 8;
-    diag = svf(y, d->pre_stride, 4, 4, z, b->src_stride, &sse);
+    diag = vfp->svf_halfpix_hv(y, d->pre_stride, z, b->src_stride, &sse);
     diag += vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
 
     if (diag < bestmse)
@@ -758,7 +754,7 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestm
 
 #define MVC(r,c) (((mvsadcost[0][((r)<<2)-rr] + mvsadcost[1][((c)<<2) - rc]) * error_per_bit + 128 )>>8 ) // estimated cost of a motion vector (r,c)
 #define PRE(r,c) (*(d->base_pre) + d->pre + (r) * d->pre_stride + (c)) // pointer to predictor base of a motionvector
-#define DIST(r,c,v) sf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
+#define DIST(r,c,v) vfp->sdf( src,src_stride,PRE(r,c),d->pre_stride, v) // returns sad error score.
 #define ERR(r,c,v) (MVC(r,c)+DIST(r,c,v)) // returns distortion + motion vector cost
 #define CHECK_BETTER(v,r,c) if ((v = ERR(r,c,besterr)) < besterr) { besterr = v; br=r; bc=c; } // checks if (r,c) has better score than previous best
 static const MV next_chkpts[6][3] =
@@ -780,8 +776,7 @@ int vp8_hex_search
     int search_param,
     int error_per_bit,
     int *num00,
-    vp8_variance_fn_t vf,
-    vp8_sad_fn_t      sf,
+    const vp8_variance_fn_ptr_t *vfp,
     int *mvsadcost[2],
     int *mvcost[2]
 )
@@ -896,7 +891,7 @@ cal_neighbors:
     best_mv->row = br;
     best_mv->col = bc;
 
-    return vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
+    return vfp->vf(src, src_stride, PRE(br, bc), d->pre_stride, &thiserr) + MVC(br, bc) ;
 }
 #undef MVC
 #undef PRE
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 7cc924279..181e95822 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -42,14 +42,15 @@ extern int vp8_hex_search
     int search_param,
     int error_per_bit,
     int *num00,
-    vp8_variance_fn_t vf,
-    vp8_sad_fn_t sf,
+    const vp8_variance_fn_ptr_t *vf,
     int *mvsadcost[2],
     int *mvcost[2]
 
 );
 
-typedef int (fractional_mv_step_fp)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2]);
+typedef int (fractional_mv_step_fp)
+    (MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv,
+     int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2]);
 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively;
 extern fractional_mv_step_fp vp8_find_best_sub_pixel_step;
 extern fractional_mv_step_fp vp8_find_best_half_pixel_step;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index bd41b2cb6..7a78b2901 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2334,11 +2334,50 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
 
     vp8cx_create_encoder_threads(cpi);
 
-    cpi->fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);
-    cpi->fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);
-    cpi->fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);
-    cpi->fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
-    cpi->fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
+    cpi->fn_ptr[BLOCK_16X16].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16);
+    cpi->fn_ptr[BLOCK_16X16].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x16);
+    cpi->fn_ptr[BLOCK_16X16].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x16);
+    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_h  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_h);
+    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v);
+    cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv);
+    cpi->fn_ptr[BLOCK_16X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
+    cpi->fn_ptr[BLOCK_16X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
+
+    cpi->fn_ptr[BLOCK_16X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
+    cpi->fn_ptr[BLOCK_16X8].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8);
+    cpi->fn_ptr[BLOCK_16X8].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8);
+    cpi->fn_ptr[BLOCK_16X8].svf_halfpix_h  = NULL;
+    cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v  = NULL;
+    cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
+    cpi->fn_ptr[BLOCK_16X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
+    cpi->fn_ptr[BLOCK_16X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
+
+    cpi->fn_ptr[BLOCK_8X16].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
+    cpi->fn_ptr[BLOCK_8X16].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16);
+    cpi->fn_ptr[BLOCK_8X16].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16);
+    cpi->fn_ptr[BLOCK_8X16].svf_halfpix_h  = NULL;
+    cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v  = NULL;
+    cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
+    cpi->fn_ptr[BLOCK_8X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
+    cpi->fn_ptr[BLOCK_8X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
+
+    cpi->fn_ptr[BLOCK_8X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
+    cpi->fn_ptr[BLOCK_8X8].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8);
+    cpi->fn_ptr[BLOCK_8X8].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8);
+    cpi->fn_ptr[BLOCK_8X8].svf_halfpix_h  = NULL;
+    cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v  = NULL;
+    cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
+    cpi->fn_ptr[BLOCK_8X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
+    cpi->fn_ptr[BLOCK_8X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
+
+    cpi->fn_ptr[BLOCK_4X4].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
+    cpi->fn_ptr[BLOCK_4X4].vf             = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4);
+    cpi->fn_ptr[BLOCK_4X4].svf            = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4);
+    cpi->fn_ptr[BLOCK_4X4].svf_halfpix_h  = NULL;
+    cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v  = NULL;
+    cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
+    cpi->fn_ptr[BLOCK_4X4].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
+    cpi->fn_ptr[BLOCK_4X4].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
 
 #if !(CONFIG_REALTIME_ONLY)
     cpi->full_search_sad = SEARCH_INVOKE(&cpi->rtcd.search, full_search);
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index eb516e927..81e32f031 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -229,6 +229,16 @@ typedef struct VP8_ENCODER_RTCD
     vp8_search_rtcd_vtable_t    search;
 } VP8_ENCODER_RTCD;
 
+enum
+{
+    BLOCK_16X8,
+    BLOCK_8X16,
+    BLOCK_8X8,
+    BLOCK_4X4,
+    BLOCK_16X16,
+    BLOCK_MAX_SEGMENTS
+};
+
 typedef struct
 {
 
@@ -591,7 +601,7 @@ typedef struct
     fractional_mv_step_fp *find_fractional_mv_step;
     vp8_full_search_fn_t full_search_sad;
     vp8_diamond_search_fn_t diamond_search_sad;
-    vp8_variance_fn_ptr_t fn_ptr;
+    vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS];
     unsigned int time_receive_data;
     unsigned int time_compress_data;
     unsigned int time_pick_lpf;
diff --git a/vp8/encoder/pickinter.c b/vp8/encoder/pickinter.c
index 8821b3a0b..2f7dd9c7c 100644
--- a/vp8/encoder/pickinter.c
+++ b/vp8/encoder/pickinter.c
@@ -50,14 +50,13 @@ extern int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]);
 extern void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv);
 
 
-int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, int *mvcost[2])
+int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv, MV *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2])
 {
     (void) b;
     (void) d;
     (void) ref_mv;
     (void) error_per_bit;
-    (void) svf;
-    (void) vf;
+    (void) vfp;
     (void) mvcost;
     bestmv->row <<= 3;
     bestmv->col <<= 3;
@@ -65,7 +64,7 @@ int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, MV *bestmv,
 }
 
 
-static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, vp8_variance_fn_t vf, unsigned int *sse)
+static int get_inter_mbpred_error(MACROBLOCK *mb, const vp8_variance_fn_ptr_t *vfp, unsigned int *sse)
 {
 
     BLOCK *b = &mb->block[0];
@@ -81,11 +80,11 @@ static int get_inter_mbpred_error(MACROBLOCK *mb, vp8_subpixvariance_fn_t svf, v
 
     if (xoffset | yoffset)
     {
-        return svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse);
+        return vfp->svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse);
     }
     else
     {
-        return vf(what, what_stride, in_what, in_what_stride, sse);
+        return vfp->vf(what, what_stride, in_what, in_what_stride, sse);
     }
 
 }
@@ -719,13 +718,13 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
 
             if (cpi->sf.search_method == HEX)
             {
-                bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost);
+                bestsme = vp8_hex_search(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);
                 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                 mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
             }
             else
             {
-                bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9
+                bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
                 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                 mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
 
@@ -744,7 +743,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
                         num00--;
                     else
                     {
-                        thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9
+                        thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv1, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
 
                         if (thissme < bestsme)
                         {
@@ -765,7 +764,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
         }
 
         if (bestsme < INT_MAX)
-            cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, cpi->fn_ptr.svf, cpi->fn_ptr.vf, cpi->mb.mvcost);
+            cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost);
 
         mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
         mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
@@ -795,7 +794,7 @@ int vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int rec
             x->e_mbd.block[0].bmi.mode = this_mode;
             x->e_mbd.block[0].bmi.mv.as_int = x->e_mbd.mode_info_context->mbmi.mv.as_int;
 
-            distortion2 = get_inter_mbpred_error(x, cpi->fn_ptr.svf, cpi->fn_ptr.vf, (unsigned int *)(&sse));
+            distortion2 = get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], (unsigned int *)(&sse));
 
             this_rd = RD_ESTIMATE(x->rdmult, x->rddiv, rate2, distortion2);
 
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index dbef85b9f..8f406b9b7 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1130,6 +1130,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
     int bsd = 0;
     int bestsegmentyrate = 0;
 
+    static const int segmentation_to_sseshift[4] = {3, 3, 2, 0};
+
     // FIX TO Rd error outrange bug PGW 9 june 2004
     B_PREDICTION_MODE bmodes[16] = {ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4,
                                     ZERO4X4, ZERO4X4, ZERO4X4, ZERO4X4,
@@ -1151,10 +1153,10 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
         int rate = 0;
         int sbr = 0;
         int sbd = 0;
-        int UNINITIALIZED_IS_SAFE(sseshift);
+        int sseshift;
         int segmentyrate = 0;
 
-        vp8_variance_fn_ptr_t v_fn_ptr;
+        vp8_variance_fn_ptr_t *v_fn_ptr;
 
         ENTROPY_CONTEXT_PLANES t_above, t_left;
         ENTROPY_CONTEXT *ta;
@@ -1174,42 +1176,8 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
         br = 0;
         bd = 0;
 
-        switch (segmentation)
-        {
-        case 0:
-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var16x8);
-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar16x8);
-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
-            sseshift = 3;
-            break;
-        case 1:
-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x16);
-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x16);
-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
-            sseshift = 3;
-            break;
-        case 2:
-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var8x8);
-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar8x8);
-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
-            sseshift = 2;
-            break;
-        case 3:
-            v_fn_ptr.vf    = VARIANCE_INVOKE(&cpi->rtcd.variance, var4x4);
-            v_fn_ptr.svf   = VARIANCE_INVOKE(&cpi->rtcd.variance, subpixvar4x4);
-            v_fn_ptr.sdf   = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
-            v_fn_ptr.sdx3f = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
-            v_fn_ptr.sdx4df = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
-            sseshift = 0;
-            break;
-        }
-
+        v_fn_ptr = &cpi->fn_ptr[segmentation];
+        sseshift = segmentation_to_sseshift[segmentation];
         labels = vp8_mbsplits[segmentation];
         label_count = vp8_count_labels(labels);
 
@@ -1281,10 +1249,10 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                         int sadpb = x->sadperbit4;
 
                         if (cpi->sf.search_method == HEX)
-                            bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr.vf, v_fn_ptr.sdf, x->mvsadcost, mvcost);
+                            bestsme = vp8_hex_search(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
                         else
                         {
-                            bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost);
+                            bestsme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &mode_mv[NEW4X4], step_param, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
 
                             n = num00;
                             num00 = 0;
@@ -1297,7 +1265,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                                     num00--;
                                 else
                                 {
-                                    thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, &v_fn_ptr, x->mvsadcost, mvcost);
+                                    thissme = cpi->diamond_search_sad(x, c, e, best_ref_mv, &temp_mv, step_param + n, sadpb / 2/*x->errorperbit*/, &num00, v_fn_ptr, x->mvsadcost, mvcost);
 
                                     if (thissme < bestsme)
                                     {
@@ -1312,7 +1280,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                         // Should we do a full search (best quality only)
                         if ((compressor_speed == 0) && (bestsme >> sseshift) > 4000)
                         {
-                            thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, &v_fn_ptr, x->mvcost, x->mvsadcost);
+                            thissme = cpi->full_search_sad(x, c, e, best_ref_mv, sadpb / 4, 16, v_fn_ptr, x->mvcost, x->mvsadcost);
 
                             if (thissme < bestsme)
                             {
@@ -1330,9 +1298,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
                     if (bestsme < INT_MAX)
                     {
                         if (!fullpixel)
-                            cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr.svf, v_fn_ptr.vf, mvcost);
+                            cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit / 2, v_fn_ptr, mvcost);
                         else
-                            vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr.svf, v_fn_ptr.vf, mvcost);
+                            vp8_skip_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], best_ref_mv, x->errorperbit, v_fn_ptr, mvcost);
                     }
                 }
 
@@ -1852,13 +1820,13 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
                     if (cpi->sf.search_method == HEX)
                     {
-                        bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf, x->mvsadcost, x->mvcost);
+                        bestsme = vp8_hex_search(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost);
                         mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                         mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
                     }
                     else
                     {
-                        bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb < 9
+                        bestsme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param, sadpb / 2/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb < 9
                         mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                         mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
 
@@ -1877,7 +1845,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                                 num00--;
                             else
                             {
-                                thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr, x->mvsadcost, x->mvcost); //sadpb = 9
+                                thissme = cpi->diamond_search_sad(x, b, d, &best_ref_mv, &d->bmi.mv.as_mv, step_param + n, sadpb / 4/*x->errorperbit*/, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, x->mvcost); //sadpb = 9
 
                                 if (thissme < bestsme)
                                 {
@@ -1914,7 +1882,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
                     search_range = (search_range > cpi->sf.max_fs_radius) ? cpi->sf.max_fs_radius : search_range;
                     {
                         int sadpb = x->sadperbit16 >> 2;
-                        thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr, x->mvcost, x->mvsadcost);
+                        thissme = cpi->full_search_sad(x, b, d, &best_ref_mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, x->mvsadcost);
                     }
 
                     // Barrier threshold to initiating full search
@@ -1939,7 +1907,7 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
 
                 if (bestsme < INT_MAX)
                     // cpi->find_fractional_mv_step(x,b,d,&d->bmi.mv.as_mv,&best_ref_mv,x->errorperbit/2,cpi->fn_ptr.svf,cpi->fn_ptr.vf,x->mvcost);  // normal mvc=11
-                    cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, cpi->fn_ptr.svf, cpi->fn_ptr.vf, x->mvcost);
+                    cpi->find_fractional_mv_step(x, b, d, &d->bmi.mv.as_mv, &best_ref_mv, x->errorperbit / 4, &cpi->fn_ptr[BLOCK_16X16], x->mvcost);
 
                 mode_mv[NEWMV].row = d->bmi.mv.as_mv.row;
                 mode_mv[NEWMV].col = d->bmi.mv.as_mv.col;
diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index 630afdbf1..fd5dd7ede 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -234,7 +234,7 @@ static int find_matching_mb
             &best_ref_mv1, &d->bmi.mv.as_mv,
             step_param,
             sadpb/*x->errorperbit*/,
-            &num00, cpi->fn_ptr.vf, cpi->fn_ptr.sdf,
+            &num00, &cpi->fn_ptr[BLOCK_16X16],
             mvsadcost, mvcost);
     }
     else
@@ -245,7 +245,7 @@ static int find_matching_mb
             &best_ref_mv1, &d->bmi.mv.as_mv,
             step_param,
             sadpb / 2/*x->errorperbit*/,
-            &num00, &cpi->fn_ptr,
+            &num00, &cpi->fn_ptr[BLOCK_16X16],
             mvsadcost, mvcost); //sadpb < 9
 
         // Further step/diamond searches as necessary
@@ -267,7 +267,7 @@ static int find_matching_mb
                     &best_ref_mv1, &d->bmi.mv.as_mv,
                     step_param + n,
                     sadpb / 4/*x->errorperbit*/,
-                    &num00, &cpi->fn_ptr,
+                    &num00, &cpi->fn_ptr[BLOCK_16X16],
                     mvsadcost, mvcost); //sadpb = 9
 
                 if (thissme < bestsme)
@@ -291,8 +291,8 @@ static int find_matching_mb
     {
         bestsme = cpi->find_fractional_mv_step(x, b, d,
                     &d->bmi.mv.as_mv, &best_ref_mv1,
-                    x->errorperbit, cpi->fn_ptr.svf,
-                    cpi->fn_ptr.vf, cpi->mb.mvcost);
+                    x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
+                    cpi->mb.mvcost);
     }
 #endif
 
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index 3c9ae987c..f60038f02 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -219,6 +219,21 @@ extern prototype_subpixvariance(vp8_variance_subpixvar16x8);
 #endif
 extern prototype_subpixvariance(vp8_variance_subpixvar16x16);
 
+#ifndef vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_half_pixel_variance16x16_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar16x16_h);
+
+#ifndef vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_half_pixel_variance16x16_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar16x16_v);
+
+#ifndef vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_half_pixel_variance16x16_c
+#endif
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv);
+
 #ifndef vp8_variance_subpixmse16x16
 #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_c
 #endif
@@ -283,6 +298,9 @@ typedef struct
     vp8_subpixvariance_fn_t  subpixvar8x16;
     vp8_subpixvariance_fn_t  subpixvar16x8;
     vp8_subpixvariance_fn_t  subpixvar16x16;
+    vp8_variance_fn_t        halfpixvar16x16_h;
+    vp8_variance_fn_t        halfpixvar16x16_v;
+    vp8_variance_fn_t        halfpixvar16x16_hv;
     vp8_subpixvariance_fn_t  subpixmse16x16;
 
     vp8_getmbss_fn_t         getmbss;
@@ -309,11 +327,14 @@ typedef struct
 
 typedef struct
 {
-    vp8_sad_fn_t  sdf;
-    vp8_sad_multi_fn_t sdx3f;
-    vp8_sad_multi_d_fn_t sdx4df;
-    vp8_variance_fn_t vf;
+    vp8_sad_fn_t            sdf;
+    vp8_variance_fn_t       vf;
     vp8_subpixvariance_fn_t svf;
+    vp8_variance_fn_t       svf_halfpix_h;
+    vp8_variance_fn_t       svf_halfpix_v;
+    vp8_variance_fn_t       svf_halfpix_hv;
+    vp8_sad_multi_fn_t      sdx3f;
+    vp8_sad_multi_d_fn_t    sdx4df;
 } vp8_variance_fn_ptr_t;
 
 #if CONFIG_RUNTIME_CPU_DETECT
@@ -322,7 +343,4 @@ typedef struct
 #define VARIANCE_INVOKE(ctx,fn) vp8_variance_##fn
 #endif
 
-/* TODO: Determine if this USEBILINEAR flag is necessary. */
-#define USEBILINEAR
-
 #endif
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index 177414351..48d5bb501 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -24,7 +24,6 @@ const int vp8_six_tap[8][6] =
 };
 
 
-#ifdef USEBILINEAR
 const int VP8_FILTER_WEIGHT = 128;
 const int VP8_FILTER_SHIFT  =   7;
 const int vp8_bilinear_taps[8][2] =
@@ -461,6 +460,19 @@ unsigned int vp8_sub_pixel_variance16x16_c
     return vp8_variance16x16_c(temp2, 16, dst_ptr, dst_pixels_per_line, sse);
 }
 
+
+unsigned int vp8_half_pixel_variance16x16_c(
+    const unsigned char *src_ptr,
+    int  source_stride,
+    const unsigned char *ref_ptr,
+    int  recon_stride,
+    unsigned int *sse)
+{
+    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 4,
+                                         ref_ptr, recon_stride, sse);
+}
+
+
 unsigned int vp8_sub_pixel_mse16x16_c
 (
     const unsigned char  *src_ptr,
@@ -525,4 +537,3 @@ unsigned int vp8_sub_pixel_variance8x16_c
 
     return vp8_variance8x16_c(temp2, 8, dst_ptr, dst_pixels_per_line, sse);
 }
-#endif
diff --git a/vp8/vp8cx_arm.mk b/vp8/vp8cx_arm.mk
index d126faf32..da27e0897 100644
--- a/vp8/vp8cx_arm.mk
+++ b/vp8/vp8cx_arm.mk
@@ -19,7 +19,6 @@ VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/encodemb_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/quantize_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/picklpf_arm.c
 VP8_CX_SRCS-$(HAVE_ARMV5TE) += encoder/arm/boolhuff_arm.c
-VP8_CX_SRCS-$(HAVE_ARMV7)  += encoder/arm/mcomp_arm.c
 
 VP8_CX_SRCS_REMOVE-$(HAVE_ARMV5TE)  += encoder/boolhuff.c
 

From abcf36c7584247fe45eaf5f57bf652ea34156144 Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Wed, 27 Oct 2010 10:47:48 -0400
Subject: [PATCH 267/307] RTCD build is bringing old errors to light

needs to be _recon_ not _recon_recon_

Change-Id: I7a8b9ddcb4fb72c2b723c563932c9ea52ff15982
---
 vp8/common/arm/recon_arm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/common/arm/recon_arm.h b/vp8/common/arm/recon_arm.h
index bf32e6205..b46b7fc7d 100644
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -51,7 +51,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon);
 extern prototype_copy_block(vp8_copy_mem8x4_neon);
 extern prototype_copy_block(vp8_copy_mem16x16_neon);
 
-extern prototype_recon_macroblock(vp8_recon_recon_mb_neon);
+extern prototype_recon_macroblock(vp8_recon_mb_neon);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_recon_recon

From b90a072f10c6c40eb258a29cba083c2f01b720ac Mon Sep 17 00:00:00 2001
From: Johann <johannkoenig@google.com>
Date: Wed, 27 Oct 2010 11:21:02 -0400
Subject: [PATCH 268/307] fix implicit declarations

ARM used to explicitly remove this file from the build. With the RTCD
changes, that's no longer possible. These errors also exist for x86 w/o
RTCD, but that's not the default configuration

Change-Id: I3e10e5553ddf3278e8d3c9365ca6fb84f52f5066
---
 vp8/decoder/idct_blk.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/vp8/decoder/idct_blk.c b/vp8/decoder/idct_blk.c
index c6a42578a..c98bd5bb8 100644
--- a/vp8/decoder/idct_blk.c
+++ b/vp8/decoder/idct_blk.c
@@ -12,6 +12,14 @@
 #include "idct.h"
 #include "dequantize.h"
 
+void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
+                               unsigned char *dest, int pitch, int stride,
+                               int Dc);
+void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
+                            unsigned char *dest, int pitch, int stride);
+void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr,
+                            unsigned char *dst_ptr, int pitch, int stride);
+
 void vp8_dequant_dc_idct_add_y_block_c
             (short *q, short *dq, unsigned char *pre,
              unsigned char *dst, int stride, char *eobs, short *dc)

From cf127474d8115c15a9dc92a8a287105fbec4705e Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Tue, 26 Oct 2010 13:26:17 -0700
Subject: [PATCH 269/307] vpxdec : Change --pp-debug-info to be a bit field.

This allows multiple post processor debug levels to be overlayed.
i.e. can show colored reference blocks and visual motion vectors.

Change-Id: Ic4a1df438445b9f5780fe73adb3126e803472e53
---
 vp8/common/postproc.c | 17 +++++++++++------
 vpxdec.c              |  2 +-
 2 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 9c0ab75a7..21e08638d 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -618,7 +618,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 oci->mb_cols, oci->mb_rows);
         vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
     }
-    else if (flags & VP8D_DEBUG_LEVEL2)
+
+    if (flags & VP8D_DEBUG_LEVEL2)
     {
         int i, j;
         unsigned char *y_ptr;
@@ -649,7 +650,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
         }
     }
-    else if (flags & VP8D_DEBUG_LEVEL3)
+
+    if (flags & VP8D_DEBUG_LEVEL3)
     {
         int i, j;
         unsigned char *y_ptr;
@@ -683,7 +685,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
         }
     }
-    else if (flags & VP8D_DEBUG_LEVEL4)
+
+    if (flags & VP8D_DEBUG_LEVEL4)
     {
         sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
         vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
@@ -719,7 +722,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 #endif
 
     }
-    else if (flags & VP8D_DEBUG_LEVEL5)
+
+    // Draw motion vectors
+    if (flags & VP8D_DEBUG_LEVEL5)
     {
         YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
         int width  = post->y_width;
@@ -752,7 +757,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
     }
 
     // Color in block modes
-    else if (flags & VP8D_DEBUG_LEVEL6)
+    if (flags & VP8D_DEBUG_LEVEL6)
     {
         int i, j;
         YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
@@ -787,7 +792,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
     }
 
     // Color in frame reference blocks
-    else if (flags & VP8D_DEBUG_LEVEL7)
+    if (flags & VP8D_DEBUG_LEVEL7)
     {
         int i, j;
         YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
diff --git a/vpxdec.c b/vpxdec.c
index ee8419784..79b8c6292 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -783,7 +783,7 @@ int main(int argc, const char **argv_)
             vp8_pp_cfg.post_proc_flag &= ~0x7;
 
             if (level)
-                vp8_pp_cfg.post_proc_flag |= 8 << (level - 1);
+                vp8_pp_cfg.post_proc_flag |= level;
         }
 
 #endif

From c7c0c05608a883830e2e1a77796b2b66656d75d1 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 27 Oct 2010 10:05:55 -0400
Subject: [PATCH 270/307] vpxenc: add unique track id

MKV requires a unique(ish) TrackID element in the track info header.
Instead of the current hard-coded ID, take a hash of the video track
and use that. This value is not written in the deterministic output
mode, despite being a deterministic value itself, to give flexibility
to change the hash algorithm and not affect bisecting across the
change.

Change-Id: I807fc3ea6d1427a151c3ef703269b67e80aef860
---
 vpxenc.c | 82 +++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 79 insertions(+), 3 deletions(-)

diff --git a/vpxenc.c b/vpxenc.c
index bc69f5d4b..a8632b887 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -444,6 +444,9 @@ struct EbmlGlobal
     off_t    cue_pos;
     off_t    cluster_pos;
 
+    /* This pointer is to a specific element to be serialized */
+    off_t    track_id_pos;
+
     /* These pointers are to the size field of the element */
     EbmlLoc  startSegment;
     EbmlLoc  startCluster;
@@ -472,6 +475,18 @@ void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
 }
 
 
+/* Need a fixed size serializer for the track ID. libmkv provdes a 64 bit
+ * one, but not a 32 bit one.
+ */
+static void Ebml_SerializeUnsigned32(EbmlGlobal *glob, unsigned long class_id, uint64_t ui)
+{
+    unsigned char sizeSerialized = 4 | 0x80;
+    Ebml_WriteID(glob, class_id);
+    Ebml_Serialize(glob, &sizeSerialized, 1);
+    Ebml_Serialize(glob, &ui, 4);
+}
+
+
 static void
 Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc,
                           unsigned long class_id)
@@ -597,7 +612,8 @@ write_webm_file_header(EbmlGlobal                *glob,
                 EbmlLoc start;
                 Ebml_StartSubElement(glob, &start, TrackEntry);
                 Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
-                Ebml_SerializeUnsigned(glob, TrackUID, trackID);
+                glob->track_id_pos = ftello(glob->stream);
+                Ebml_SerializeUnsigned32(glob, TrackUID, trackID);
                 Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1
                 Ebml_SerializeString(glob, CodecID, "V_VP8");
                 {
@@ -699,7 +715,7 @@ write_webm_block(EbmlGlobal                *glob,
 
 
 static void
-write_webm_file_footer(EbmlGlobal *glob)
+write_webm_file_footer(EbmlGlobal *glob, long hash)
 {
 
     if(glob->cluster_open)
@@ -738,10 +754,63 @@ write_webm_file_footer(EbmlGlobal *glob)
 
     /* Patch up the seek info block */
     write_webm_seek_info(glob);
+
+    /* Patch up the track id */
+    fseeko(glob->stream, glob->track_id_pos, SEEK_SET);
+    Ebml_SerializeUnsigned32(glob, TrackUID, glob->debug ? 0xDEADBEEF : hash);
+
     fseeko(glob->stream, 0, SEEK_END);
 }
 
 
+/* Murmur hash derived from public domain reference implementation at
+ *   http://sites.google.com/site/murmurhash/
+ */
+static unsigned int murmur ( const void * key, int len, unsigned int seed )
+{
+    const unsigned int m = 0x5bd1e995;
+    const int r = 24;
+
+    unsigned int h = seed ^ len;
+
+    const unsigned char * data = (const unsigned char *)key;
+
+    while(len >= 4)
+    {
+        unsigned int k;
+
+        k  = data[0];
+        k |= data[1] << 8;
+        k |= data[2] << 16;
+        k |= data[3] << 24;
+
+        k *= m;
+        k ^= k >> r;
+        k *= m;
+
+        h *= m;
+        h ^= k;
+
+        data += 4;
+        len -= 4;
+    }
+
+    switch(len)
+    {
+    case 3: h ^= data[2] << 16;
+    case 2: h ^= data[1] << 8;
+    case 1: h ^= data[0];
+            h *= m;
+    };
+
+    h ^= h >> 13;
+    h *= m;
+    h ^= h >> 15;
+
+    return h;
+}
+
+
 #include "args.h"
 
 static const arg_def_t debugmode = ARG_DEF("D", "debug", 0,
@@ -979,6 +1048,7 @@ int main(int argc, const char **argv_)
     int                      arg_have_framerate = 0;
     int                      write_webm = 1;
     EbmlGlobal               ebml = {0};
+    uint32_t                 hash = 0;
 
     exec_name = argv_[0];
 
@@ -1468,8 +1538,14 @@ int main(int argc, const char **argv_)
                     frames_out++;
                     fprintf(stderr, " %6luF",
                             (unsigned long)pkt->data.frame.sz);
+
                     if(write_webm)
                     {
+                        /* Update the hash */
+                        if(!ebml.debug)
+                            hash = murmur(pkt->data.frame.buf,
+                                          pkt->data.frame.sz, hash);
+
                         write_webm_block(&ebml, &cfg, pkt);
                     }
                     else
@@ -1522,7 +1598,7 @@ int main(int argc, const char **argv_)
 
         if(write_webm)
         {
-            write_webm_file_footer(&ebml);
+            write_webm_file_footer(&ebml, hash);
         }
         else
         {

From 3427e1dd431f7450c141a8560a37b0b615e8059f Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 27 Oct 2010 10:06:45 -0400
Subject: [PATCH 271/307] makefile: remove ivf{enc,dec} on make clean

Prior clean-up removed the object files, but not the binaries
themselves.

Change-Id: Ic2332188cea88094c14457ebb8b77680a60d581b
---
 examples.mk | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples.mk b/examples.mk
index 28fc6eaa9..9340e23bb 100644
--- a/examples.mk
+++ b/examples.mk
@@ -41,8 +41,8 @@ ifeq ($(CONFIG_MSVS),yes)
 CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfenc.exe)
 CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfdec.exe)
 else
-CLEAN-OBJS += ivfenc{.c.o,.c.d,.dox,.exe}
-CLEAN-OBJS += ivfdec{.c.o,.c.d,.dox,.exe}
+CLEAN-OBJS += ivfenc{.c.o,.c.d,.dox,.exe,}
+CLEAN-OBJS += ivfdec{.c.o,.c.d,.dox,.exe,}
 endif
 
 # XMA example disabled for now, not used in VP8

From 24c86055c386a90158cc24cbaed2f72773120acb Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 27 Oct 2010 10:08:17 -0400
Subject: [PATCH 272/307] vpxdec: don't require -o with --noblit

Specifiying the output file is meaningless when we're not writing to
it.

Change-Id: I271e1d3ae1994d79f0773747477124600f98ca58
---
 vpxdec.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpxdec.c b/vpxdec.c
index 79b8c6292..48a36cd65 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -813,7 +813,7 @@ int main(int argc, const char **argv_)
     }
 
     /* Make sure we don't dump to the terminal, unless forced to with -o - */
-    if(!outfile_pattern && isatty(fileno(stdout)) && !do_md5)
+    if(!outfile_pattern && isatty(fileno(stdout)) && !do_md5 && !noblit)
     {
         fprintf(stderr,
                 "Not dumping raw video to your terminal. Use '-o -' to "

From a0ae3682aa67f882006c604196f7ee83eff88d84 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 27 Oct 2010 11:28:43 -0400
Subject: [PATCH 273/307] Fix half-pixel variance RTCD functions

This patch fixes the system dependent entries for the half-pixel
variance functions in both the RTCD and non-RTCD cases:

  - The generic C versions of these functions are now correct.
    Before all three cases called the hv code.

  - Wire up the ARM functions in RTCD mode

  - Created stubs for x86 to call the optimized subpixel functions
    with the correct parameters, rather than falling back to C
    code.

Change-Id: I1d937d074d929e0eb93aacb1232cc5e0ad1c6184
---
 vp8/encoder/arm/arm_csystemdependent.c |  3 +
 vp8/encoder/generic/csystemdependent.c |  3 +
 vp8/encoder/variance.h                 |  6 +-
 vp8/encoder/variance_c.c               | 26 ++++++++-
 vp8/encoder/x86/variance_mmx.c         | 36 ++++++++++++
 vp8/encoder/x86/variance_sse2.c        | 81 ++++++++++++++++++++++++++
 vp8/encoder/x86/variance_x86.h         | 24 ++++++++
 vp8/encoder/x86/x86_csystemdependent.c |  6 ++
 8 files changed, 181 insertions(+), 4 deletions(-)

diff --git a/vp8/encoder/arm/arm_csystemdependent.c b/vp8/encoder/arm/arm_csystemdependent.c
index 8736fcf1d..a1f110260 100644
--- a/vp8/encoder/arm/arm_csystemdependent.c
+++ b/vp8/encoder/arm/arm_csystemdependent.c
@@ -93,6 +93,9 @@ void vp8_arch_arm_encoder_init(VP8_COMP *cpi)
         /*cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
         cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;*/
         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_neon;
+        cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_neon;
+        cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_neon;
+        cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_neon;
 
         cpi->rtcd.variance.mse16x16              = vp8_mse16x16_neon;
         /*cpi->rtcd.variance.getmbss               = vp8_get_mb_ss_c;*/
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index 520b08f51..ae22b274e 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -57,6 +57,9 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
     cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_c;
     cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_c;
     cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_c;
+    cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_c;
+    cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_c;
+    cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_c;
     cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_c;
 
     cpi->rtcd.variance.mse16x16              = vp8_mse16x16_c;
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index f60038f02..5b70fe54b 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -220,17 +220,17 @@ extern prototype_subpixvariance(vp8_variance_subpixvar16x8);
 extern prototype_subpixvariance(vp8_variance_subpixvar16x16);
 
 #ifndef vp8_variance_halfpixvar16x16_h
-#define vp8_variance_halfpixvar16x16_h vp8_half_pixel_variance16x16_c
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_c
 #endif
 extern prototype_variance(vp8_variance_halfpixvar16x16_h);
 
 #ifndef vp8_variance_halfpixvar16x16_v
-#define vp8_variance_halfpixvar16x16_v vp8_half_pixel_variance16x16_c
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_c
 #endif
 extern prototype_variance(vp8_variance_halfpixvar16x16_v);
 
 #ifndef vp8_variance_halfpixvar16x16_hv
-#define vp8_variance_halfpixvar16x16_hv vp8_half_pixel_variance16x16_c
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_c
 #endif
 extern prototype_variance(vp8_variance_halfpixvar16x16_hv);
 
diff --git a/vp8/encoder/variance_c.c b/vp8/encoder/variance_c.c
index 48d5bb501..95ec96cec 100644
--- a/vp8/encoder/variance_c.c
+++ b/vp8/encoder/variance_c.c
@@ -461,7 +461,31 @@ unsigned int vp8_sub_pixel_variance16x16_c
 }
 
 
-unsigned int vp8_half_pixel_variance16x16_c(
+unsigned int vp8_variance_halfpixvar16x16_h_c(
+    const unsigned char *src_ptr,
+    int  source_stride,
+    const unsigned char *ref_ptr,
+    int  recon_stride,
+    unsigned int *sse)
+{
+    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 4, 0,
+                                         ref_ptr, recon_stride, sse);
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_v_c(
+    const unsigned char *src_ptr,
+    int  source_stride,
+    const unsigned char *ref_ptr,
+    int  recon_stride,
+    unsigned int *sse)
+{
+    return vp8_sub_pixel_variance16x16_c(src_ptr, source_stride, 0, 4,
+                                         ref_ptr, recon_stride, sse);
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_hv_c(
     const unsigned char *src_ptr,
     int  source_stride,
     const unsigned char *ref_ptr,
diff --git a/vp8/encoder/x86/variance_mmx.c b/vp8/encoder/x86/variance_mmx.c
index a5a89d6de..2df73a635 100644
--- a/vp8/encoder/x86/variance_mmx.c
+++ b/vp8/encoder/x86/variance_mmx.c
@@ -595,3 +595,39 @@ unsigned int vp8_i_sub_pixel_variance8x16_mmx
     *sse = xxsum0;
     return (xxsum0 - ((xsum0 * xsum0) >> 7));
 }
+
+
+unsigned int vp8_variance_halfpixvar16x16_h_mmx(
+    const unsigned char *src_ptr,
+    int  source_stride,
+    const unsigned char *ref_ptr,
+    int  recon_stride,
+    unsigned int *sse)
+{
+    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 0,
+                                           ref_ptr, recon_stride, sse);
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_v_mmx(
+    const unsigned char *src_ptr,
+    int  source_stride,
+    const unsigned char *ref_ptr,
+    int  recon_stride,
+    unsigned int *sse)
+{
+    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 0, 4,
+                                           ref_ptr, recon_stride, sse);
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_hv_mmx(
+    const unsigned char *src_ptr,
+    int  source_stride,
+    const unsigned char *ref_ptr,
+    int  recon_stride,
+    unsigned int *sse)
+{
+    return vp8_sub_pixel_variance16x16_mmx(src_ptr, source_stride, 4, 4,
+                                           ref_ptr, recon_stride, sse);
+}
diff --git a/vp8/encoder/x86/variance_sse2.c b/vp8/encoder/x86/variance_sse2.c
index fb0bac1cb..006e0a24a 100644
--- a/vp8/encoder/x86/variance_sse2.c
+++ b/vp8/encoder/x86/variance_sse2.c
@@ -513,3 +513,84 @@ unsigned int vp8_i_sub_pixel_variance8x16_wmt
 
     return vp8_sub_pixel_variance8x16_wmt(src_ptr, (src_pixels_per_line >> 1), xoffset, yoffset, dst_ptr, (dst_pixels_per_line >> 1), sse);
 }
+
+
+unsigned int vp8_variance_halfpixvar16x16_h_wmt(
+    const unsigned char *src_ptr,
+    int  src_pixels_per_line,
+    const unsigned char *dst_ptr,
+    int  dst_pixels_per_line,
+    unsigned int *sse)
+{
+    int xsum0, xsum1;
+    unsigned int xxsum0, xxsum1;
+
+    vp8_half_horiz_variance16x_h_sse2(
+        src_ptr, src_pixels_per_line,
+        dst_ptr, dst_pixels_per_line, 16,
+        &xsum0, &xxsum0);
+
+    vp8_half_horiz_variance16x_h_sse2(
+        src_ptr + 8, src_pixels_per_line,
+        dst_ptr + 8, dst_pixels_per_line, 16,
+        &xsum1, &xxsum1);
+
+    xsum0 += xsum1;
+    xxsum0 += xxsum1;
+    *sse = xxsum0;
+    return (xxsum0 - ((xsum0 * xsum0) >> 8));
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_v_wmt(
+    const unsigned char *src_ptr,
+    int  src_pixels_per_line,
+    const unsigned char *dst_ptr,
+    int  dst_pixels_per_line,
+    unsigned int *sse)
+{
+    int xsum0, xsum1;
+    unsigned int xxsum0, xxsum1;
+
+    vp8_half_vert_variance16x_h_sse2(
+        src_ptr, src_pixels_per_line,
+        dst_ptr, dst_pixels_per_line, 16,
+        &xsum0, &xxsum0);
+
+    vp8_half_vert_variance16x_h_sse2(
+        src_ptr + 8, src_pixels_per_line,
+        dst_ptr + 8, dst_pixels_per_line, 16,
+        &xsum1, &xxsum1);
+
+    xsum0 += xsum1;
+    xxsum0 += xxsum1;
+    *sse = xxsum0;
+    return (xxsum0 - ((xsum0 * xsum0) >> 8));
+}
+
+
+unsigned int vp8_variance_halfpixvar16x16_hv_wmt(
+    const unsigned char *src_ptr,
+    int  src_pixels_per_line,
+    const unsigned char *dst_ptr,
+    int  dst_pixels_per_line,
+    unsigned int *sse)
+{
+    int xsum0, xsum1;
+    unsigned int xxsum0, xxsum1;
+
+    vp8_half_horiz_vert_variance16x_h_sse2(
+        src_ptr, src_pixels_per_line,
+        dst_ptr, dst_pixels_per_line, 16,
+        &xsum0, &xxsum0);
+
+    vp8_half_horiz_vert_variance16x_h_sse2(
+        src_ptr + 8, src_pixels_per_line,
+        dst_ptr + 8, dst_pixels_per_line, 16,
+        &xsum1, &xxsum1);
+
+    xsum0 += xsum1;
+    xxsum0 += xxsum1;
+    *sse = xxsum0;
+    return (xxsum0 - ((xsum0 * xsum0) >> 8));
+}
diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
index 3c9f9c790..2b62b5f7e 100644
--- a/vp8/encoder/x86/variance_x86.h
+++ b/vp8/encoder/x86/variance_x86.h
@@ -35,6 +35,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_mmx);
 extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_mmx);
 extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_mmx);
 extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_mmx);
+extern prototype_variance(vp8_variance_halfpixvar16x16_h_mmx);
+extern prototype_variance(vp8_variance_halfpixvar16x16_v_mmx);
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_mmx);
 extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_mmx);
 extern prototype_getmbss(vp8_get_mb_ss_mmx);
 extern prototype_variance(vp8_mse16x16_mmx);
@@ -89,6 +92,15 @@ extern prototype_sad(vp8_get4x4sse_cs_mmx);
 #undef  vp8_variance_subpixvar16x16
 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_mmx
 
+#undef  vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_mmx
+
+#undef  vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_mmx
+
+#undef  vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_mmx
+
 #undef  vp8_variance_subpixmse16x16
 #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_mmx
 
@@ -130,6 +142,9 @@ extern prototype_subpixvariance(vp8_sub_pixel_variance8x8_wmt);
 extern prototype_subpixvariance(vp8_sub_pixel_variance8x16_wmt);
 extern prototype_subpixvariance(vp8_sub_pixel_variance16x8_wmt);
 extern prototype_subpixvariance(vp8_sub_pixel_variance16x16_wmt);
+extern prototype_variance(vp8_variance_halfpixvar16x16_h_wmt);
+extern prototype_variance(vp8_variance_halfpixvar16x16_v_wmt);
+extern prototype_variance(vp8_variance_halfpixvar16x16_hv_wmt);
 extern prototype_subpixvariance(vp8_sub_pixel_mse16x16_wmt);
 extern prototype_getmbss(vp8_get_mb_ss_sse2);
 extern prototype_variance(vp8_mse16x16_wmt);
@@ -183,6 +198,15 @@ extern prototype_variance2(vp8_get16x16var_sse2);
 #undef  vp8_variance_subpixvar16x16
 #define vp8_variance_subpixvar16x16 vp8_sub_pixel_variance16x16_wmt
 
+#undef  vp8_variance_halfpixvar16x16_h
+#define vp8_variance_halfpixvar16x16_h vp8_variance_halfpixvar16x16_h_wmt
+
+#undef  vp8_variance_halfpixvar16x16_v
+#define vp8_variance_halfpixvar16x16_v vp8_variance_halfpixvar16x16_v_wmt
+
+#undef  vp8_variance_halfpixvar16x16_hv
+#define vp8_variance_halfpixvar16x16_hv vp8_variance_halfpixvar16x16_hv_wmt
+
 #undef  vp8_variance_subpixmse16x16
 #define vp8_variance_subpixmse16x16 vp8_sub_pixel_mse16x16_wmt
 
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 3e5a8abf9..2581c337f 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -218,6 +218,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_mmx;
         cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_mmx;
         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_mmx;
+        cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_mmx;
+        cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_mmx;
+        cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_mmx;
         cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_mmx;
 
         cpi->rtcd.variance.mse16x16              = vp8_mse16x16_mmx;
@@ -274,6 +277,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.variance.subpixvar8x16         = vp8_sub_pixel_variance8x16_wmt;
         cpi->rtcd.variance.subpixvar16x8         = vp8_sub_pixel_variance16x8_wmt;
         cpi->rtcd.variance.subpixvar16x16        = vp8_sub_pixel_variance16x16_wmt;
+        cpi->rtcd.variance.halfpixvar16x16_h     = vp8_variance_halfpixvar16x16_h_wmt;
+        cpi->rtcd.variance.halfpixvar16x16_v     = vp8_variance_halfpixvar16x16_v_wmt;
+        cpi->rtcd.variance.halfpixvar16x16_hv    = vp8_variance_halfpixvar16x16_hv_wmt;
         cpi->rtcd.variance.subpixmse16x16        = vp8_sub_pixel_mse16x16_wmt;
 
         cpi->rtcd.variance.mse16x16              = vp8_mse16x16_wmt;

From 71ecb5d7d905d1f1771b6c5e130e873dcf458b73 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Wed, 27 Oct 2010 08:45:24 -0400
Subject: [PATCH 274/307] Full search SAD function optimization in SSE4.1

Use mpsadbw, and calculate 8 sad at once. Function list:
vp8_sad16x16x8_sse4
vp8_sad16x8x8_sse4
vp8_sad8x16x8_sse4
vp8_sad8x8x8_sse4
vp8_sad4x4x8_sse4

(test clip: tulip)
For best quality mode, this gave encoder a 5% performance boost.
For good quality mode with speed=1, this gave encoder a 3%
performance boost.

Change-Id: I083b5a39d39144f88dcbccbef95da6498e490134
---
 build/make/configure.sh                |   1 +
 configure                              |   1 +
 vp8/encoder/generic/csystemdependent.c |   6 +
 vp8/encoder/mcomp.c                    | 154 ++++++++++-
 vp8/encoder/mcomp.h                    |   1 +
 vp8/encoder/onyx_if.c                  |   5 +
 vp8/encoder/sad_c.c                    |  90 +++++++
 vp8/encoder/variance.h                 |  43 +++
 vp8/encoder/x86/mcomp_x86.h            |   9 +
 vp8/encoder/x86/sad_sse4.asm           | 353 +++++++++++++++++++++++++
 vp8/encoder/x86/variance_x86.h         |  27 ++
 vp8/encoder/x86/x86_csystemdependent.c |  25 +-
 vp8/vp8cx.mk                           |   1 +
 vpx_ports/x86.h                        |   3 +
 14 files changed, 710 insertions(+), 9 deletions(-)
 create mode 100644 vp8/encoder/x86/sad_sse4.asm

diff --git a/build/make/configure.sh b/build/make/configure.sh
index cdd55ebd8..d25d6400e 100755
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -824,6 +824,7 @@ process_common_toolchain() {
         soft_enable sse2
         soft_enable sse3
         soft_enable ssse3
+        soft_enable sse4_1
 
         case  ${tgt_os} in
             win*)
diff --git a/configure b/configure
index 39ef83ffa..11e086e9c 100755
--- a/configure
+++ b/configure
@@ -199,6 +199,7 @@ ARCH_EXT_LIST="
     sse2
     sse3
     ssse3
+    sse4_1
 
     altivec
 "
diff --git a/vp8/encoder/generic/csystemdependent.c b/vp8/encoder/generic/csystemdependent.c
index ae22b274e..824af5e46 100644
--- a/vp8/encoder/generic/csystemdependent.c
+++ b/vp8/encoder/generic/csystemdependent.c
@@ -40,6 +40,12 @@ void vp8_cmachine_specific_config(VP8_COMP *cpi)
     cpi->rtcd.variance.sad8x8x3              = vp8_sad8x8x3_c;
     cpi->rtcd.variance.sad4x4x3              = vp8_sad4x4x3_c;
 
+    cpi->rtcd.variance.sad16x16x8            = vp8_sad16x16x8_c;
+    cpi->rtcd.variance.sad16x8x8             = vp8_sad16x8x8_c;
+    cpi->rtcd.variance.sad8x16x8             = vp8_sad8x16x8_c;
+    cpi->rtcd.variance.sad8x8x8              = vp8_sad8x8x8_c;
+    cpi->rtcd.variance.sad4x4x8              = vp8_sad4x4x8_c;
+
     cpi->rtcd.variance.sad16x16x4d           = vp8_sad16x16x4d_c;
     cpi->rtcd.variance.sad16x8x4d            = vp8_sad16x8x4d_c;
     cpi->rtcd.variance.sad8x16x4d            = vp8_sad8x16x4d_c;
diff --git a/vp8/encoder/mcomp.c b/vp8/encoder/mcomp.c
index 8cc63f83d..bb85afa6f 100644
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -1323,7 +1323,7 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
         check_here = r * mv_stride + in_what + col_min;
         c = col_min;
 
-        while ((c + 3) < col_max)
+        while ((c + 2) < col_max)
         {
             int i;
 
@@ -1388,6 +1388,158 @@ int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int er
 #endif
 
 
+int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, MV *ref_mv, int error_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int *mvsadcost[2])
+{
+    unsigned char *what = (*(b->base_src) + b->src);
+    int what_stride = b->src_stride;
+    unsigned char *in_what;
+    int in_what_stride = d->pre_stride;
+    int mv_stride = d->pre_stride;
+    unsigned char *bestaddress;
+    MV *best_mv = &d->bmi.mv.as_mv;
+    MV this_mv;
+    int bestsad = INT_MAX;
+    int r, c;
+
+    unsigned char *check_here;
+    unsigned int thissad;
+
+    int ref_row = ref_mv->row >> 3;
+    int ref_col = ref_mv->col >> 3;
+
+    int row_min = ref_row - distance;
+    int row_max = ref_row + distance;
+    int col_min = ref_col - distance;
+    int col_max = ref_col + distance;
+
+    unsigned short sad_array8[8];
+    unsigned int sad_array[3];
+
+    // Work out the mid point for the search
+    in_what = *(d->base_pre) + d->pre;
+    bestaddress = in_what + (ref_row * d->pre_stride) + ref_col;
+
+    best_mv->row = ref_row;
+    best_mv->col = ref_col;
+
+    // We need to check that the starting point for the search (as indicated by ref_mv) is within the buffer limits
+    if ((ref_col > x->mv_col_min) && (ref_col < x->mv_col_max) &&
+    (ref_row > x->mv_row_min) && (ref_row < x->mv_row_max))
+    {
+        // Baseline value at the centre
+        bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride, 0x7fffffff) + vp8_mv_err_cost(ref_mv, ref_mv, mvsadcost, error_per_bit);
+    }
+
+    // Apply further limits to prevent us looking using vectors that stretch beyiond the UMV border
+    if (col_min < x->mv_col_min)
+        col_min = x->mv_col_min;
+
+    if (col_max > x->mv_col_max)
+        col_max = x->mv_col_max;
+
+    if (row_min < x->mv_row_min)
+        row_min = x->mv_row_min;
+
+    if (row_max > x->mv_row_max)
+        row_max = x->mv_row_max;
+
+    for (r = row_min; r < row_max ; r++)
+    {
+        this_mv.row = r << 3;
+        check_here = r * mv_stride + in_what + col_min;
+        c = col_min;
+
+        while ((c + 7) < col_max)
+        {
+            int i;
+
+            fn_ptr->sdx8f(what, what_stride, check_here , in_what_stride, sad_array8);
+
+            for (i = 0; i < 8; i++)
+            {
+                thissad = (unsigned int)sad_array8[i];
+
+                if (thissad < bestsad)
+                {
+                    this_mv.col = c << 3;
+                    thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+
+                    if (thissad < bestsad)
+                    {
+                        bestsad = thissad;
+                        best_mv->row = r;
+                        best_mv->col = c;
+                        bestaddress = check_here;
+                    }
+                }
+
+                check_here++;
+                c++;
+            }
+        }
+
+        while ((c + 2) < col_max)
+        {
+            int i;
+
+            fn_ptr->sdx3f(what, what_stride, check_here , in_what_stride, sad_array);
+
+            for (i = 0; i < 3; i++)
+            {
+                thissad = sad_array[i];
+
+                if (thissad < bestsad)
+                {
+                    this_mv.col = c << 3;
+                    thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+
+                    if (thissad < bestsad)
+                    {
+                        bestsad = thissad;
+                        best_mv->row = r;
+                        best_mv->col = c;
+                        bestaddress = check_here;
+                    }
+                }
+
+                check_here++;
+                c++;
+            }
+        }
+
+        while (c < col_max)
+        {
+            thissad = fn_ptr->sdf(what, what_stride, check_here , in_what_stride, bestsad);
+
+            if (thissad < bestsad)
+            {
+                this_mv.col = c << 3;
+                thissad  += vp8_mv_err_cost(&this_mv, ref_mv, mvsadcost, error_per_bit);
+
+                if (thissad < bestsad)
+                {
+                    bestsad = thissad;
+                    best_mv->row = r;
+                    best_mv->col = c;
+                    bestaddress = check_here;
+                }
+            }
+
+            check_here ++;
+            c ++;
+        }
+    }
+
+    this_mv.row = best_mv->row << 3;
+    this_mv.col = best_mv->col << 3;
+
+    if (bestsad < INT_MAX)
+        return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, (unsigned int *)(&thissad))
+        + vp8_mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit);
+    else
+        return INT_MAX;
+}
+
 #ifdef ENTROPY_STATS
 void print_mode_context(void)
 {
diff --git a/vp8/encoder/mcomp.h b/vp8/encoder/mcomp.h
index 181e95822..7d6036248 100644
--- a/vp8/encoder/mcomp.h
+++ b/vp8/encoder/mcomp.h
@@ -93,6 +93,7 @@ extern fractional_mv_step_fp vp8_skip_fractional_mv_step;
 typedef prototype_full_search_sad(*vp8_full_search_fn_t);
 extern prototype_full_search_sad(vp8_full_search_sad);
 extern prototype_full_search_sad(vp8_full_search_sadx3);
+extern prototype_full_search_sad(vp8_full_search_sadx8);
 
 typedef prototype_diamond_search_sad(*vp8_diamond_search_fn_t);
 extern prototype_diamond_search_sad(vp8_diamond_search_sad);
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 7a78b2901..5f02a5a02 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -2341,6 +2341,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_16X16].svf_halfpix_v  = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_v);
     cpi->fn_ptr[BLOCK_16X16].svf_halfpix_hv = VARIANCE_INVOKE(&cpi->rtcd.variance, halfpixvar16x16_hv);
     cpi->fn_ptr[BLOCK_16X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x3);
+    cpi->fn_ptr[BLOCK_16X16].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x8);
     cpi->fn_ptr[BLOCK_16X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x16x4d);
 
     cpi->fn_ptr[BLOCK_16X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8);
@@ -2350,6 +2351,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_16X8].svf_halfpix_v  = NULL;
     cpi->fn_ptr[BLOCK_16X8].svf_halfpix_hv = NULL;
     cpi->fn_ptr[BLOCK_16X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x3);
+    cpi->fn_ptr[BLOCK_16X8].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x8);
     cpi->fn_ptr[BLOCK_16X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad16x8x4d);
 
     cpi->fn_ptr[BLOCK_8X16].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16);
@@ -2359,6 +2361,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_8X16].svf_halfpix_v  = NULL;
     cpi->fn_ptr[BLOCK_8X16].svf_halfpix_hv = NULL;
     cpi->fn_ptr[BLOCK_8X16].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x3);
+    cpi->fn_ptr[BLOCK_8X16].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x8);
     cpi->fn_ptr[BLOCK_8X16].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x16x4d);
 
     cpi->fn_ptr[BLOCK_8X8].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8);
@@ -2368,6 +2371,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_8X8].svf_halfpix_v  = NULL;
     cpi->fn_ptr[BLOCK_8X8].svf_halfpix_hv = NULL;
     cpi->fn_ptr[BLOCK_8X8].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x3);
+    cpi->fn_ptr[BLOCK_8X8].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x8);
     cpi->fn_ptr[BLOCK_8X8].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad8x8x4d);
 
     cpi->fn_ptr[BLOCK_4X4].sdf            = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4);
@@ -2377,6 +2381,7 @@ VP8_PTR vp8_create_compressor(VP8_CONFIG *oxcf)
     cpi->fn_ptr[BLOCK_4X4].svf_halfpix_v  = NULL;
     cpi->fn_ptr[BLOCK_4X4].svf_halfpix_hv = NULL;
     cpi->fn_ptr[BLOCK_4X4].sdx3f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x3);
+    cpi->fn_ptr[BLOCK_4X4].sdx8f          = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x8);
     cpi->fn_ptr[BLOCK_4X4].sdx4df         = VARIANCE_INVOKE(&cpi->rtcd.variance, sad4x4x4d);
 
 #if !(CONFIG_REALTIME_ONLY)
diff --git a/vp8/encoder/sad_c.c b/vp8/encoder/sad_c.c
index 2ff122f68..5eaca5935 100644
--- a/vp8/encoder/sad_c.c
+++ b/vp8/encoder/sad_c.c
@@ -126,6 +126,24 @@ void vp8_sad16x16x3_c(
     sad_array[2] = vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
 }
 
+void vp8_sad16x16x8_c(
+    const unsigned char *src_ptr,
+    int  src_stride,
+    const unsigned char *ref_ptr,
+    int  ref_stride,
+    unsigned short *sad_array
+)
+{
+    sad_array[0] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
+    sad_array[1] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+    sad_array[2] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+    sad_array[3] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
+    sad_array[4] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
+    sad_array[5] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
+    sad_array[6] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
+    sad_array[7] = (unsigned short)vp8_sad16x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+}
+
 void vp8_sad16x8x3_c(
     const unsigned char *src_ptr,
     int  src_stride,
@@ -139,6 +157,24 @@ void vp8_sad16x8x3_c(
     sad_array[2] = vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
 }
 
+void vp8_sad16x8x8_c(
+    const unsigned char *src_ptr,
+    int  src_stride,
+    const unsigned char *ref_ptr,
+    int  ref_stride,
+    unsigned short *sad_array
+)
+{
+    sad_array[0] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
+    sad_array[1] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+    sad_array[2] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+    sad_array[3] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
+    sad_array[4] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
+    sad_array[5] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
+    sad_array[6] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
+    sad_array[7] = (unsigned short)vp8_sad16x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+}
+
 void vp8_sad8x8x3_c(
     const unsigned char *src_ptr,
     int  src_stride,
@@ -152,6 +188,24 @@ void vp8_sad8x8x3_c(
     sad_array[2] = vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
 }
 
+void vp8_sad8x8x8_c(
+    const unsigned char *src_ptr,
+    int  src_stride,
+    const unsigned char *ref_ptr,
+    int  ref_stride,
+    unsigned short *sad_array
+)
+{
+    sad_array[0] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
+    sad_array[1] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+    sad_array[2] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+    sad_array[3] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
+    sad_array[4] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
+    sad_array[5] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
+    sad_array[6] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
+    sad_array[7] = (unsigned short)vp8_sad8x8_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+}
+
 void vp8_sad8x16x3_c(
     const unsigned char *src_ptr,
     int  src_stride,
@@ -165,6 +219,24 @@ void vp8_sad8x16x3_c(
     sad_array[2] = vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
 }
 
+void vp8_sad8x16x8_c(
+    const unsigned char *src_ptr,
+    int  src_stride,
+    const unsigned char *ref_ptr,
+    int  ref_stride,
+    unsigned short *sad_array
+)
+{
+    sad_array[0] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
+    sad_array[1] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+    sad_array[2] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+    sad_array[3] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
+    sad_array[4] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
+    sad_array[5] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
+    sad_array[6] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
+    sad_array[7] = (unsigned short)vp8_sad8x16_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+}
+
 void vp8_sad4x4x3_c(
     const unsigned char *src_ptr,
     int  src_stride,
@@ -178,6 +250,24 @@ void vp8_sad4x4x3_c(
     sad_array[2] = vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
 }
 
+void vp8_sad4x4x8_c(
+    const unsigned char *src_ptr,
+    int  src_stride,
+    const unsigned char *ref_ptr,
+    int  ref_stride,
+    unsigned short *sad_array
+)
+{
+    sad_array[0] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr  , ref_stride, 0x7fffffff);
+    sad_array[1] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 1, ref_stride, 0x7fffffff);
+    sad_array[2] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 2, ref_stride, 0x7fffffff);
+    sad_array[3] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 3 , ref_stride, 0x7fffffff);
+    sad_array[4] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 4, ref_stride, 0x7fffffff);
+    sad_array[5] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 5, ref_stride, 0x7fffffff);
+    sad_array[6] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 6 , ref_stride, 0x7fffffff);
+    sad_array[7] = (unsigned short)vp8_sad4x4_c(src_ptr, src_stride, ref_ptr + 7, ref_stride, 0x7fffffff);
+}
+
 void vp8_sad16x16x4d_c(
     const unsigned char *src_ptr,
     int  src_stride,
diff --git a/vp8/encoder/variance.h b/vp8/encoder/variance.h
index 5b70fe54b..5befd3b86 100644
--- a/vp8/encoder/variance.h
+++ b/vp8/encoder/variance.h
@@ -32,6 +32,16 @@
      unsigned int *sad_array\
     )
 
+#define prototype_sad_multi_same_address_1(sym)\
+    void (sym)\
+    (\
+     const unsigned char *src_ptr, \
+     int source_stride, \
+     const unsigned char *ref_ptr, \
+     int  ref_stride, \
+     unsigned short *sad_array\
+    )
+
 #define prototype_sad_multi_dif_address(sym)\
     void (sym)\
     (\
@@ -138,6 +148,31 @@ extern prototype_sad_multi_same_address(vp8_variance_sad8x16x3);
 #endif
 extern prototype_sad_multi_same_address(vp8_variance_sad4x4x3);
 
+#ifndef vp8_variance_sad16x16x8
+#define vp8_variance_sad16x16x8 vp8_sad16x16x8_c
+#endif
+extern prototype_sad_multi_same_address_1(vp8_variance_sad16x16x8);
+
+#ifndef vp8_variance_sad16x8x8
+#define vp8_variance_sad16x8x8 vp8_sad16x8x8_c
+#endif
+extern prototype_sad_multi_same_address_1(vp8_variance_sad16x8x8);
+
+#ifndef vp8_variance_sad8x8x8
+#define vp8_variance_sad8x8x8 vp8_sad8x8x8_c
+#endif
+extern prototype_sad_multi_same_address_1(vp8_variance_sad8x8x8);
+
+#ifndef vp8_variance_sad8x16x8
+#define vp8_variance_sad8x16x8 vp8_sad8x16x8_c
+#endif
+extern prototype_sad_multi_same_address_1(vp8_variance_sad8x16x8);
+
+#ifndef vp8_variance_sad4x4x8
+#define vp8_variance_sad4x4x8 vp8_sad4x4x8_c
+#endif
+extern prototype_sad_multi_same_address_1(vp8_variance_sad4x4x8);
+
 //-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-
 
 #ifndef vp8_variance_sad16x16x4d
@@ -274,6 +309,7 @@ extern prototype_sad(vp8_variance_get4x4sse_cs);
 
 typedef prototype_sad(*vp8_sad_fn_t);
 typedef prototype_sad_multi_same_address(*vp8_sad_multi_fn_t);
+typedef prototype_sad_multi_same_address_1(*vp8_sad_multi1_fn_t);
 typedef prototype_sad_multi_dif_address(*vp8_sad_multi_d_fn_t);
 typedef prototype_variance(*vp8_variance_fn_t);
 typedef prototype_variance2(*vp8_variance2_fn_t);
@@ -317,6 +353,12 @@ typedef struct
     vp8_sad_multi_fn_t       sad8x8x3;
     vp8_sad_multi_fn_t       sad4x4x3;
 
+    vp8_sad_multi1_fn_t      sad16x16x8;
+    vp8_sad_multi1_fn_t      sad16x8x8;
+    vp8_sad_multi1_fn_t      sad8x16x8;
+    vp8_sad_multi1_fn_t      sad8x8x8;
+    vp8_sad_multi1_fn_t      sad4x4x8;
+
     vp8_sad_multi_d_fn_t     sad16x16x4d;
     vp8_sad_multi_d_fn_t     sad16x8x4d;
     vp8_sad_multi_d_fn_t     sad8x16x4d;
@@ -334,6 +376,7 @@ typedef struct
     vp8_variance_fn_t       svf_halfpix_v;
     vp8_variance_fn_t       svf_halfpix_hv;
     vp8_sad_multi_fn_t      sdx3f;
+    vp8_sad_multi1_fn_t     sdx8f;
     vp8_sad_multi_d_fn_t    sdx4df;
 } vp8_variance_fn_ptr_t;
 
diff --git a/vp8/encoder/x86/mcomp_x86.h b/vp8/encoder/x86/mcomp_x86.h
index e8d658b39..3b7b29c21 100644
--- a/vp8/encoder/x86/mcomp_x86.h
+++ b/vp8/encoder/x86/mcomp_x86.h
@@ -24,5 +24,14 @@
 #endif
 #endif
 
+#if HAVE_SSE4_1
+#if !CONFIG_RUNTIME_CPU_DETECT
+
+#undef  vp8_search_full_search
+#define vp8_search_full_search vp8_full_search_sadx8
+
+#endif
+#endif
+
 #endif
 
diff --git a/vp8/encoder/x86/sad_sse4.asm b/vp8/encoder/x86/sad_sse4.asm
new file mode 100644
index 000000000..03ecec4b3
--- /dev/null
+++ b/vp8/encoder/x86/sad_sse4.asm
@@ -0,0 +1,353 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+%macro PROCESS_16X2X8 1
+%if %1
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        movq            xmm1,       MMWORD PTR [rdi]
+        movq            xmm3,       MMWORD PTR [rdi+8]
+        movq            xmm2,       MMWORD PTR [rdi+16]
+        punpcklqdq      xmm1,       xmm3
+        punpcklqdq      xmm3,       xmm2
+
+        movdqa          xmm2,       xmm1
+        mpsadbw         xmm1,       xmm0,  0x0
+        mpsadbw         xmm2,       xmm0,  0x5
+
+        psrldq          xmm0,       8
+
+        movdqa          xmm4,       xmm3
+        mpsadbw         xmm3,       xmm0,  0x0
+        mpsadbw         xmm4,       xmm0,  0x5
+
+        paddw           xmm1,       xmm2
+        paddw           xmm1,       xmm3
+        paddw           xmm1,       xmm4
+%else
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        movq            xmm5,       MMWORD PTR [rdi]
+        movq            xmm3,       MMWORD PTR [rdi+8]
+        movq            xmm2,       MMWORD PTR [rdi+16]
+        punpcklqdq      xmm5,       xmm3
+        punpcklqdq      xmm3,       xmm2
+
+        movdqa          xmm2,       xmm5
+        mpsadbw         xmm5,       xmm0,  0x0
+        mpsadbw         xmm2,       xmm0,  0x5
+
+        psrldq          xmm0,       8
+
+        movdqa          xmm4,       xmm3
+        mpsadbw         xmm3,       xmm0,  0x0
+        mpsadbw         xmm4,       xmm0,  0x5
+
+        paddw           xmm5,       xmm2
+        paddw           xmm5,       xmm3
+        paddw           xmm5,       xmm4
+
+        paddw           xmm1,       xmm5
+%endif
+        movdqa          xmm0,       XMMWORD PTR [rsi + rax]
+        movq            xmm5,       MMWORD PTR [rdi+ rdx]
+        movq            xmm3,       MMWORD PTR [rdi+ rdx+8]
+        movq            xmm2,       MMWORD PTR [rdi+ rdx+16]
+        punpcklqdq      xmm5,       xmm3
+        punpcklqdq      xmm3,       xmm2
+
+        lea             rsi,        [rsi+rax*2]
+        lea             rdi,        [rdi+rdx*2]
+
+        movdqa          xmm2,       xmm5
+        mpsadbw         xmm5,       xmm0,  0x0
+        mpsadbw         xmm2,       xmm0,  0x5
+
+        psrldq          xmm0,       8
+        movdqa          xmm4,       xmm3
+        mpsadbw         xmm3,       xmm0,  0x0
+        mpsadbw         xmm4,       xmm0,  0x5
+
+        paddw           xmm5,       xmm2
+        paddw           xmm5,       xmm3
+        paddw           xmm5,       xmm4
+
+        paddw           xmm1,       xmm5
+%endmacro
+
+%macro PROCESS_8X2X8 1
+%if %1
+        movq            xmm0,       MMWORD PTR [rsi]
+        movq            xmm1,       MMWORD PTR [rdi]
+        movq            xmm3,       MMWORD PTR [rdi+8]
+        punpcklqdq      xmm1,       xmm3
+
+        movdqa          xmm2,       xmm1
+        mpsadbw         xmm1,       xmm0,  0x0
+        mpsadbw         xmm2,       xmm0,  0x5
+        paddw           xmm1,       xmm2
+%else
+        movq            xmm0,       MMWORD PTR [rsi]
+        movq            xmm5,       MMWORD PTR [rdi]
+        movq            xmm3,       MMWORD PTR [rdi+8]
+        punpcklqdq      xmm5,       xmm3
+
+        movdqa          xmm2,       xmm5
+        mpsadbw         xmm5,       xmm0,  0x0
+        mpsadbw         xmm2,       xmm0,  0x5
+        paddw           xmm5,       xmm2
+
+        paddw           xmm1,       xmm5
+%endif
+        movq            xmm0,       MMWORD PTR [rsi + rax]
+        movq            xmm5,       MMWORD PTR [rdi+ rdx]
+        movq            xmm3,       MMWORD PTR [rdi+ rdx+8]
+        punpcklqdq      xmm5,       xmm3
+
+        lea             rsi,        [rsi+rax*2]
+        lea             rdi,        [rdi+rdx*2]
+
+        movdqa          xmm2,       xmm5
+        mpsadbw         xmm5,       xmm0,  0x0
+        mpsadbw         xmm2,       xmm0,  0x5
+        paddw           xmm5,       xmm2
+
+        paddw           xmm1,       xmm5
+%endmacro
+
+%macro PROCESS_4X2X8 1
+%if %1
+        movd            xmm0,       [rsi]
+        movq            xmm1,       MMWORD PTR [rdi]
+        movq            xmm3,       MMWORD PTR [rdi+8]
+        punpcklqdq      xmm1,       xmm3
+
+        mpsadbw         xmm1,       xmm0,  0x0
+%else
+        movd            xmm0,       [rsi]
+        movq            xmm5,       MMWORD PTR [rdi]
+        movq            xmm3,       MMWORD PTR [rdi+8]
+        punpcklqdq      xmm5,       xmm3
+
+        mpsadbw         xmm5,       xmm0,  0x0
+
+        paddw           xmm1,       xmm5
+%endif
+        movd            xmm0,       [rsi + rax]
+        movq            xmm5,       MMWORD PTR [rdi+ rdx]
+        movq            xmm3,       MMWORD PTR [rdi+ rdx+8]
+        punpcklqdq      xmm5,       xmm3
+
+        lea             rsi,        [rsi+rax*2]
+        lea             rdi,        [rdi+rdx*2]
+
+        mpsadbw         xmm5,       xmm0,  0x0
+
+        paddw           xmm1,       xmm5
+%endmacro
+
+
+;void vp8_sad16x16x8_sse4(
+;    const unsigned char *src_ptr,
+;    int  src_stride,
+;    const unsigned char *ref_ptr,
+;    int  ref_stride,
+;    unsigned short *sad_array);
+global sym(vp8_sad16x16x8_sse4)
+sym(vp8_sad16x16x8_sse4):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov             rsi,        arg(0)           ;src_ptr
+        mov             rdi,        arg(2)           ;ref_ptr
+
+        movsxd          rax,        dword ptr arg(1) ;src_stride
+        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+
+        PROCESS_16X2X8 1
+        PROCESS_16X2X8 0
+        PROCESS_16X2X8 0
+        PROCESS_16X2X8 0
+        PROCESS_16X2X8 0
+        PROCESS_16X2X8 0
+        PROCESS_16X2X8 0
+        PROCESS_16X2X8 0
+
+        mov             rdi,        arg(4)           ;Results
+        movdqa          XMMWORD PTR [rdi],    xmm1
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_sad16x8x8_sse4(
+;    const unsigned char *src_ptr,
+;    int  src_stride,
+;    const unsigned char *ref_ptr,
+;    int  ref_stride,
+;    unsigned short *sad_array
+;);
+global sym(vp8_sad16x8x8_sse4)
+sym(vp8_sad16x8x8_sse4):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov             rsi,        arg(0)           ;src_ptr
+        mov             rdi,        arg(2)           ;ref_ptr
+
+        movsxd          rax,        dword ptr arg(1) ;src_stride
+        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+
+        PROCESS_16X2X8 1
+        PROCESS_16X2X8 0
+        PROCESS_16X2X8 0
+        PROCESS_16X2X8 0
+
+        mov             rdi,        arg(4)           ;Results
+        movdqa          XMMWORD PTR [rdi],    xmm1
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_sad8x8x8_sse4(
+;    const unsigned char *src_ptr,
+;    int  src_stride,
+;    const unsigned char *ref_ptr,
+;    int  ref_stride,
+;    unsigned short *sad_array
+;);
+global sym(vp8_sad8x8x8_sse4)
+sym(vp8_sad8x8x8_sse4):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov             rsi,        arg(0)           ;src_ptr
+        mov             rdi,        arg(2)           ;ref_ptr
+
+        movsxd          rax,        dword ptr arg(1) ;src_stride
+        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+
+        PROCESS_8X2X8 1
+        PROCESS_8X2X8 0
+        PROCESS_8X2X8 0
+        PROCESS_8X2X8 0
+
+        mov             rdi,        arg(4)           ;Results
+        movdqa          XMMWORD PTR [rdi],    xmm1
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_sad8x16x8_sse4(
+;    const unsigned char *src_ptr,
+;    int  src_stride,
+;    const unsigned char *ref_ptr,
+;    int  ref_stride,
+;    unsigned short *sad_array
+;);
+global sym(vp8_sad8x16x8_sse4)
+sym(vp8_sad8x16x8_sse4):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov             rsi,        arg(0)           ;src_ptr
+        mov             rdi,        arg(2)           ;ref_ptr
+
+        movsxd          rax,        dword ptr arg(1) ;src_stride
+        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+
+        PROCESS_8X2X8 1
+        PROCESS_8X2X8 0
+        PROCESS_8X2X8 0
+        PROCESS_8X2X8 0
+        PROCESS_8X2X8 0
+        PROCESS_8X2X8 0
+        PROCESS_8X2X8 0
+        PROCESS_8X2X8 0
+        mov             rdi,        arg(4)           ;Results
+        movdqa          XMMWORD PTR [rdi],    xmm1
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+;void vp8_sad4x4x8_c(
+;    const unsigned char *src_ptr,
+;    int  src_stride,
+;    const unsigned char *ref_ptr,
+;    int  ref_stride,
+;    unsigned short *sad_array
+;);
+global sym(vp8_sad4x4x8_sse4)
+sym(vp8_sad4x4x8_sse4):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
+
+        mov             rsi,        arg(0)           ;src_ptr
+        mov             rdi,        arg(2)           ;ref_ptr
+
+        movsxd          rax,        dword ptr arg(1) ;src_stride
+        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+
+        PROCESS_4X2X8 1
+        PROCESS_4X2X8 0
+
+        mov             rdi,        arg(4)           ;Results
+        movdqa          XMMWORD PTR [rdi],    xmm1
+
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+
+
+
diff --git a/vp8/encoder/x86/variance_x86.h b/vp8/encoder/x86/variance_x86.h
index 2b62b5f7e..6bea15ebc 100644
--- a/vp8/encoder/x86/variance_x86.h
+++ b/vp8/encoder/x86/variance_x86.h
@@ -297,4 +297,31 @@ extern prototype_sad_multi_same_address(vp8_sad16x8x3_ssse3);
 #endif
 #endif
 
+
+#if HAVE_SSE4_1
+extern prototype_sad_multi_same_address_1(vp8_sad16x16x8_sse4);
+extern prototype_sad_multi_same_address_1(vp8_sad16x8x8_sse4);
+extern prototype_sad_multi_same_address_1(vp8_sad8x16x8_sse4);
+extern prototype_sad_multi_same_address_1(vp8_sad8x8x8_sse4);
+extern prototype_sad_multi_same_address_1(vp8_sad4x4x8_sse4);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
+#undef  vp8_variance_sad16x16x8
+#define vp8_variance_sad16x16x8 vp8_sad16x16x8_sse4
+
+#undef  vp8_variance_sad16x8x8
+#define vp8_variance_sad16x8x8 vp8_sad16x8x8_sse4
+
+#undef  vp8_variance_sad8x16x8
+#define vp8_variance_sad8x16x8 vp8_sad8x16x8_sse4
+
+#undef  vp8_variance_sad8x8x8
+#define vp8_variance_sad8x8x8 vp8_sad8x8x8_sse4
+
+#undef  vp8_variance_sad4x4x8
+#define vp8_variance_sad4x4x8 vp8_sad4x4x8_sse4
+
+#endif
+#endif
+
 #endif
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index 2581c337f..ed0e71ed0 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -188,6 +188,7 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
     int wmt_enabled = flags & HAS_SSE2;
     int SSE3Enabled = flags & HAS_SSE3;
     int SSSE3Enabled = flags & HAS_SSSE3;
+    int SSE4_1Enabled = flags & HAS_SSE4_1;
 
     /* Note:
      *
@@ -198,7 +199,6 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
 
     /* Override default functions with fastest ones for this CPU. */
 #if HAVE_MMX
-
     if (mmx_enabled)
     {
         cpi->rtcd.variance.sad16x16              = vp8_sad16x16_mmx;
@@ -254,10 +254,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
 
         /*cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_mmx;*/
     }
-
 #endif
-#if HAVE_SSE2
 
+#if HAVE_SSE2
     if (wmt_enabled)
     {
         cpi->rtcd.variance.sad16x16              = vp8_sad16x16_wmt;
@@ -307,10 +306,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         /*cpi->rtcd.quantize.quantb            = vp8_regular_quantize_b_sse2;*/
         cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_sse2;
     }
-
 #endif
-#if HAVE_SSE3
 
+#if HAVE_SSE3
     if (SSE3Enabled)
     {
         cpi->rtcd.variance.sad16x16              = vp8_sad16x16_sse3;
@@ -328,16 +326,27 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.variance.sad4x4x4d             = vp8_sad4x4x4d_sse3;
         cpi->rtcd.search.diamond_search          = vp8_diamond_search_sadx4;
     }
-
 #endif
-#if HAVE_SSSE3
 
+#if HAVE_SSSE3
     if (SSSE3Enabled)
     {
         cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_ssse3;
         cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_ssse3;
     }
+#endif
+
+#if HAVE_SSE4_1
+    if (SSE4_1Enabled)
+    {
+        cpi->rtcd.variance.sad16x16x8            = vp8_sad16x16x8_sse4;
+        cpi->rtcd.variance.sad16x8x8             = vp8_sad16x8x8_sse4;
+        cpi->rtcd.variance.sad8x16x8             = vp8_sad8x16x8_sse4;
+        cpi->rtcd.variance.sad8x8x8              = vp8_sad8x8x8_sse4;
+        cpi->rtcd.variance.sad4x4x8              = vp8_sad4x4x8_sse4;
+        cpi->rtcd.search.full_search             = vp8_full_search_sadx8;
+    }
+#endif
 
-#endif
 #endif
 }
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index 98288f2b6..f693f865b 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -109,6 +109,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
 VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
+VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm
 
diff --git a/vpx_ports/x86.h b/vpx_ports/x86.h
index feffc3e69..190c8643a 100644
--- a/vpx_ports/x86.h
+++ b/vpx_ports/x86.h
@@ -74,6 +74,7 @@ void __cpuid(int CPUInfo[4], int info_type);
 #define HAS_SSE2  0x04
 #define HAS_SSE3  0x08
 #define HAS_SSSE3 0x10
+#define HAS_SSE4_1 0x20
 #ifndef BIT
 #define BIT(n) (1<<n)
 #endif
@@ -117,6 +118,8 @@ x86_simd_caps(void)
 
     if (reg_ecx & BIT(9))  flags |= HAS_SSSE3;
 
+    if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1;
+
     return flags & mask;
 }
 

From 3d84da6b8d9e80d477b79726f27efe1b98a35302 Mon Sep 17 00:00:00 2001
From: Frank Galligan <fgalligan@google.com>
Date: Wed, 27 Oct 2010 11:28:56 -0400
Subject: [PATCH 275/307] Output the PSNR for the entire file.

If --psnr option is enabled vpxenc will output PSNR values for the
entire file. Added a \n before final output to make sure the output
is on its own line. Overall and Avg psnr matches the values written
to opsnr.stt file.

Change-Id: I869268b704fe8b0c8389d318cceb6072fea102f8
---
 vpxenc.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/vpxenc.c b/vpxenc.c
index a8632b887..fc0b779ad 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -810,6 +810,23 @@ static unsigned int murmur ( const void * key, int len, unsigned int seed )
     return h;
 }
 
+#include "math.h"
+
+static double vp8_mse2psnr(double Samples, double Peak, double Mse)
+{
+    double psnr;
+
+    if ((double)Mse > 0.0)
+        psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
+    else
+        psnr = 60;      // Limit to prevent / 0
+
+    if (psnr > 60)
+        psnr = 60;
+
+    return psnr;
+}
+
 
 #include "args.h"
 
@@ -1049,6 +1066,10 @@ int main(int argc, const char **argv_)
     int                      write_webm = 1;
     EbmlGlobal               ebml = {0};
     uint32_t                 hash = 0;
+    uint64_t                 psnr_sse_total = 0;
+    uint64_t                 psnr_samples_total = 0;
+    double                   psnr_totals[4] = {0, 0, 0, 0};
+    int                      psnr_count = 0;
 
     exec_name = argv_[0];
 
@@ -1570,8 +1591,14 @@ int main(int argc, const char **argv_)
                     {
                         int i;
 
+                        psnr_sse_total += pkt->data.psnr.sse[0];
+                        psnr_samples_total += pkt->data.psnr.samples[0];
                         for (i = 0; i < 4; i++)
+                        {
                             fprintf(stderr, "%.3lf ", pkt->data.psnr.psnr[i]);
+                            psnr_totals[i] += pkt->data.psnr.psnr[i];
+                        }
+                        psnr_count++;
                     }
 
                     break;
@@ -1592,6 +1619,21 @@ int main(int argc, const char **argv_)
                cx_time > 9999999 ? "ms" : "us",
                (float)frames_in * 1000000.0 / (float)cx_time);
 
+        if ( (show_psnr) && (psnr_count>0) )
+        {
+            int i;
+            double ovpsnr = vp8_mse2psnr(psnr_samples_total, 255.0,
+                                         psnr_sse_total);
+
+            fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
+
+            fprintf(stderr, " %.3lf", ovpsnr);
+            for (i = 0; i < 4; i++)
+            {
+                fprintf(stderr, " %.3lf", psnr_totals[i]/psnr_count);
+            }
+        }
+
         vpx_codec_destroy(&encoder);
 
         fclose(infile);

From a097e189641fcbbecc92661e2c87019845dc7b00 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Wed, 27 Oct 2010 12:50:16 -0700
Subject: [PATCH 276/307] postproc: Tweaks to line drawing and blending.

Turned down the blending level to make colored blocks obscure
the video less.
Not blending the entire block to give distinction to macro
block edges.
Added configuration so that macro block blending function can
be optimized.
Change to constrain line as to when dx and dy are computed.
Now draw two lines to form an arrow.

Change-Id: Id3ef0fdeeab2949a6664b2c63e2a3e1a89503f6c
---
 vp8/common/generic/systemdependent.c |  1 +
 vp8/common/postproc.c                | 65 +++++++++++++++++++---------
 vp8/common/postproc.h                | 10 +++++
 3 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 0c9b77e76..c624242a5 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -69,6 +69,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
     rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
     rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
     rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
+    rtcd->postproc.blend_mb    = vp8_blend_mb_c;
 #endif
 
 #endif
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 21e08638d..42e37da30 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -472,7 +472,10 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
     }
 }
 
-void vp8_blend_block_c (unsigned char *y, unsigned char *u, unsigned char *v,
+// Blend the macro block with a solid colored square.  Leave the
+// edges unblended to give distinction to macro blocks in areas
+// filled with the same color block.
+void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
                         int y1, int u1, int v1, int alpha, int stride)
 {
     int i, j;
@@ -480,63 +483,73 @@ void vp8_blend_block_c (unsigned char *y, unsigned char *u, unsigned char *v,
     int u1_const = u1*((1<<16)-alpha);
     int v1_const = v1*((1<<16)-alpha);
 
-    for (i = 0; i < 16; i++)
+    y += stride + 2;
+    for (i = 0; i < 14; i++)
     {
-        for (j = 0; j < 16; j++)
+        for (j = 0; j < 14; j++)
         {
             y[j] = (y[j]*alpha + y1_const)>>16;
         }
         y += stride;
     }
 
-    for (i = 0; i < 8; i++)
+    stride >>= 1;
+
+    u += stride + 1;
+    v += stride + 1;
+
+    for (i = 0; i < 6; i++)
     {
-        for (j = 0; j < 8; j++)
+        for (j = 0; j < 6; j++)
         {
             u[j] = (u[j]*alpha + u1_const)>>16;
             v[j] = (v[j]*alpha + v1_const)>>16;
         }
-        u += stride/2;
-        v += stride/2;
+        u += stride;
+        v += stride;
     }
 }
 
 static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
 {
-    int dx = *x1 - x0;
-    int dy = *y1 - y0;
+    int dx;
+    int dy;
 
     if (*x1 > width)
     {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
         *x1 = width;
         if (dy)
             *y1 = ((width-x0)*dy)/dx + y0;
-        dx = *x1 - x0;
-        dy = *y1 - y0;
     }
     if (*x1 < 0)
     {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
         *x1 = 0;
         if (dy)
             *y1 = ((0-x0)*dy)/dx + y0;
-        dx = *x1 - x0;
-        dy = *y1 - y0;
     }
     if (*y1 > height)
     {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
         *y1 = height;
         if (dx)
             *x1 = ((height-y0)*dx)/dy + x0;
-        dx = *x1 - x0;
-        dy = *y1 - y0;
     }
     if (*y1 < 0)
     {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
         *y1 = 0;
         if (dx)
             *x1 = ((0-y0)*dx)/dy + x0;
-        dx = *x1 - x0;
-        dy = *y1 - y0;
     }
 }
 
@@ -747,8 +760,16 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                     x1 = x0 + (mv->col >> 3);
                     y1 = y0 + (mv->row >> 3);
 
-                    constrain_line (x0, &x1, y0, &y1, width, height);
-                    vp8_blit_line (x0, x1, y0, y1, y_buffer, y_stride);
+                    if (x1 != x0 && y1 != y0)
+                    {
+                        constrain_line (x0, &x1, y0-1, &y1, width, height);
+                        vp8_blit_line  (x0,  x1, y0-1,  y1, y_buffer, y_stride);
+
+                        constrain_line (x0, &x1, y0+1, &y1, width, height);
+                        vp8_blit_line  (x0,  x1, y0+1,  y1, y_buffer, y_stride);
+                    }
+                    else
+                        vp8_blit_line  (x0,  x1, y0,  y1, y_buffer, y_stride);
                 }
                 mi++;
             }
@@ -779,7 +800,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
                 V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
 
-                vp8_blend_block_c (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xb000, y_stride);
+                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
+                    (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
 
                 mi++;
             }
@@ -814,7 +836,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
                 V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
 
-                vp8_blend_block_c (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xb000, y_stride);
+                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
+                    (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
 
                 mi++;
             }
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index 80337fc68..4a4493802 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -24,6 +24,10 @@
               char whiteclamp[16], char bothclamp[16],\
               unsigned int w, unsigned int h, int pitch)
 
+#define prototype_postproc_blend_mb(sym)\
+    void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
+              int y1, int u1, int v1, int alpha, int stride)
+
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/postproc_x86.h"
 #endif
@@ -48,16 +52,22 @@ extern prototype_postproc(vp8_postproc_downacross);
 #endif
 extern prototype_postproc_addnoise(vp8_postproc_addnoise);
 
+#ifndef vp8_postproc_blend_mb
+#define vp8_postproc_blend_mb vp8_blend_mb_c
+#endif
+extern prototype_postproc_blend_mb(vp8_postproc_blend_mb);
 
 typedef prototype_postproc((*vp8_postproc_fn_t));
 typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
 typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
+typedef prototype_postproc_blend_mb((*vp8_postproc_blend_mb_fn_t));
 typedef struct
 {
     vp8_postproc_inplace_fn_t   down;
     vp8_postproc_inplace_fn_t   across;
     vp8_postproc_fn_t           downacross;
     vp8_postproc_addnoise_fn_t  addnoise;
+    vp8_postproc_blend_mb_fn_t  blend_mb;
 } vp8_postproc_rtcd_vtable_t;
 
 #if CONFIG_RUNTIME_CPU_DETECT

From c4d7e5e67ecfcd9730bd512483390369fc9982db Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Wed, 27 Oct 2010 16:04:02 -0700
Subject: [PATCH 277/307] Eliminate more warnings.

This eliminates a large set of warnings exposed by the Mozilla build
 system (Use of C++ comments in ISO C90 source, commas at the end of
 enum lists, a couple incomplete initializers, and signed/unsigned
 comparisons).
It also eliminates many (but not all) of the warnings expose by newer
 GCC versions and _FORTIFY_SOURCE (e.g., calling fread and fwrite
 without checking the return values).
There are a few spurious warnings left on my system:

../vp8/encoder/encodemb.c:274:9: warning: 'sz' may be used
 uninitialized in this function
gcc seems to be unable to figure out that the value shortcut doesn't
 change between the two if blocks that test it here.

../vp8/encoder/onyx_if.c:5314:5: warning: comparison of unsigned
 expression >= 0 is always true
../vp8/encoder/onyx_if.c:5319:5: warning: comparison of unsigned
 expression >= 0 is always true
This is true, so far as it goes, but it's comparing against an enum, and the C
 standard does not mandate that enums be unsigned, so the checks can't be
 removed.

Change-Id: Iaf689ae3e3d0ddc5ade00faa474debe73b8d3395
---
 examples/decoder_tmpl.c                |   4 +-
 examples/decoder_tmpl.txt              |   2 +-
 examples/encoder_tmpl.c                |   4 +-
 examples/encoder_tmpl.txt              |   4 +-
 libmkv/EbmlIDs.h                       |   2 +-
 vp8/common/alloccommon.c               |   8 +-
 vp8/common/arm/bilinearfilter_arm.c    |  22 +--
 vp8/common/arm/filter_arm.c            |  82 ++++-----
 vp8/common/arm/loopfilter_arm.c        |  20 +--
 vp8/common/arm/neon/recon_neon.c       |   2 +-
 vp8/common/blockd.h                    |  78 ++++-----
 vp8/common/debugmodes.c                |  10 +-
 vp8/common/defaultcoefcounts.h         |  72 ++++----
 vp8/common/entropy.h                   |  26 +--
 vp8/common/entropymv.c                 |  16 +-
 vp8/common/extend.c                    |  31 ++--
 vp8/common/filter_c.c                  |  76 ++++-----
 vp8/common/findnearmv.c                |   4 +-
 vp8/common/generic/systemdependent.c   |   2 +-
 vp8/common/invtrans.c                  |   4 +-
 vp8/common/loopfilter.c                | 132 +++++++--------
 vp8/common/loopfilter.h                |  12 +-
 vp8/common/loopfilter_filters.c        |  69 ++++----
 vp8/common/mbpitch.c                   |   8 +-
 vp8/common/modecont.c                  |  12 +-
 vp8/common/modecontext.c               | 220 ++++++++++++-------------
 vp8/common/onyxc_int.h                 |  38 ++---
 vp8/common/postproc.c                  |  73 ++++----
 vp8/common/ppflags.h                   |   2 +-
 vp8/common/recon.c                     |   8 +-
 vp8/common/recon.h                     |   4 +-
 vp8/common/reconinter.c                |  49 +++---
 vp8/common/reconintra.c                |  46 +++---
 vp8/common/reconintra4x4.c             |   7 +-
 vp8/common/setupintrarecon.c           |   2 +-
 vp8/common/textblit.c                  |   2 +-
 vp8/common/threading.h                 |   6 +-
 vp8/common/treecoder.h                 |   2 +-
 vp8/common/type_aliases.h              |  14 +-
 vp8/common/x86/loopfilter_x86.c        |  16 +-
 vp8/common/x86/vp8_asm_stubs.c         |  36 ++--
 vp8/common/x86/x86_systemdependent.c   |   2 +-
 vp8/decoder/arm/arm_dsystemdependent.c |   4 +-
 vp8/decoder/arm/dboolhuff_arm.h        |  10 +-
 vp8/decoder/dboolhuff.h                |  19 ++-
 vp8/decoder/decodemv.c                 |  44 ++---
 vp8/decoder/decodframe.c               | 122 +++++++-------
 vp8/decoder/dequantize.c               |   4 +-
 vp8/decoder/detokenize.c               |  39 ++---
 vp8/decoder/generic/dsystemdependent.c |   4 +-
 vp8/decoder/onyxd_if.c                 |  38 +++--
 vp8/decoder/onyxd_int.h                |  14 +-
 vp8/decoder/reconintra_mt.c            |  98 +++++------
 vp8/decoder/reconintra_mt.h            |   2 +-
 vp8/decoder/threading.c                | 132 ++++++++-------
 vp8/encoder/firstpass.c                |   8 +-
 vp8/vp8_dx_iface.c                     |  26 ++-
 vpx/src/vpx_encoder.c                  |   2 +-
 vpx/vp8.h                              |   2 +-
 vpx/vpx_codec.h                        |   4 +-
 vpx/vpx_decoder_compat.h               |   2 +-
 vpx/vpx_encoder.h                      |   2 +-
 vpx/vpx_image.h                        |   2 +-
 vpx_mem/include/vpx_mem_intrnl.h       |  46 +++---
 vpx_mem/vpx_mem.c                      |  32 ++--
 vpx_mem/vpx_mem.h                      |  12 +-
 vpx_scale/generic/gen_scalers.c        |  12 +-
 vpx_scale/generic/vpxscale.c           | 148 ++++++++---------
 vpx_scale/generic/yv12config.c         |   9 +-
 vpx_scale/generic/yv12extend.c         |  16 +-
 vpx_scale/yv12config.h                 |  10 +-
 vpxdec.c                               |  14 +-
 vpxenc.c                               |  35 ++--
 y4minput.c                             |   2 +-
 74 files changed, 1100 insertions(+), 1043 deletions(-)

diff --git a/examples/decoder_tmpl.c b/examples/decoder_tmpl.c
index ba3ac987f..26b745d34 100644
--- a/examples/decoder_tmpl.c
+++ b/examples/decoder_tmpl.c
@@ -61,8 +61,8 @@ int main(int argc, char **argv) {
         die("Failed to open %s for writing", argv[2]);
 
     /* Read file header */
-    fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile);
-    if(!(file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
+    if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ
+         && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
          && file_hdr[3]=='F'))
         die("%s is not an IVF file.", argv[1]);
 
diff --git a/examples/decoder_tmpl.txt b/examples/decoder_tmpl.txt
index 6da38c2a8..310c66d54 100644
--- a/examples/decoder_tmpl.txt
+++ b/examples/decoder_tmpl.txt
@@ -48,7 +48,7 @@ for(plane=0; plane < 3; plane++) {
     unsigned char *buf =img->planes[plane];
 
     for(y=0; y<img->d_h >> (plane?1:0); y++) {
-        fwrite(buf, 1, img->d_w >> (plane?1:0), outfile);
+        if(fwrite(buf, 1, img->d_w >> (plane?1:0), outfile));
         buf += img->stride[plane];
     }
 }
diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c
index fdfc3af8f..d9e4d0317 100644
--- a/examples/encoder_tmpl.c
+++ b/examples/encoder_tmpl.c
@@ -85,7 +85,7 @@ static void write_ivf_file_header(FILE *outfile,
     mem_put_le32(header+24, frame_cnt);           /* length */
     mem_put_le32(header+28, 0);                   /* unused */
 
-    fwrite(header, 1, 32, outfile);
+    if(fwrite(header, 1, 32, outfile));
 }
 
 
@@ -103,7 +103,7 @@ static void write_ivf_frame_header(FILE *outfile,
     mem_put_le32(header+4, pts&0xFFFFFFFF);
     mem_put_le32(header+8, pts >> 32);
 
-    fwrite(header, 1, 12, outfile);
+    if(fwrite(header, 1, 12, outfile));
 }
 
 int main(int argc, char **argv) {
diff --git a/examples/encoder_tmpl.txt b/examples/encoder_tmpl.txt
index 87055ca13..3273164da 100644
--- a/examples/encoder_tmpl.txt
+++ b/examples/encoder_tmpl.txt
@@ -61,8 +61,8 @@ if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME
 case VPX_CODEC_CX_FRAME_PKT:
     write_ivf_frame_header(outfile, pkt);
-    fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-           outfile);
+    if(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
+              outfile));
     break;
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME
 
diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h
index 271990827..429747063 100644
--- a/libmkv/EbmlIDs.h
+++ b/libmkv/EbmlIDs.h
@@ -228,4 +228,4 @@ enum mkv
 //  TagString = 0x4487,
 //  TagBinary = 0x4485,
 };
-#endif
\ No newline at end of file
+#endif
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 408c25306..9dce8c8f6 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -56,7 +56,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
 
     vp8_de_alloc_frame_buffers(oci);
 
-    // our internal buffers are always multiples of 16
+    /* our internal buffers are always multiples of 16 */
     if ((width & 0xf) != 0)
         width += 16 - (width & 0xf);
 
@@ -153,7 +153,7 @@ void vp8_setup_version(VP8_COMMON *cm)
         cm->full_pixel = 1;
         break;
     default:
-        //4,5,6,7 are reserved for future use
+        /*4,5,6,7 are reserved for future use*/
         cm->no_lpf = 0;
         cm->simpler_lpf = 0;
         cm->use_bilinear_mc_filter = 0;
@@ -177,10 +177,10 @@ void vp8_create_common(VP8_COMMON *oci)
     oci->clr_type = REG_YUV;
     oci->clamp_type = RECON_CLAMP_REQUIRED;
 
-    // Initialise reference frame sign bias structure to defaults
+    /* Initialise reference frame sign bias structure to defaults */
     vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
 
-    // Default disable buffer to buffer copying
+    /* Default disable buffer to buffer copying */
     oci->copy_buffer_to_gf = 0;
     oci->copy_buffer_to_arf = 0;
 }
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c
index 247f95b6c..65afb41a1 100644
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -49,7 +49,7 @@ extern void vp8_filter_block2d_bil_second_pass_armv6
     const short *vp8_filter
 );
 
-/*
+#if 0
 void vp8_filter_block2d_bil_first_pass_6
 (
     unsigned char *src_ptr,
@@ -66,14 +66,14 @@ void vp8_filter_block2d_bil_first_pass_6
     {
         for ( j=0; j<output_width; j++ )
         {
-            // Apply bilinear filter
+            /* Apply bilinear filter */
             output_ptr[j] = ( ( (int)src_ptr[0]          * vp8_filter[0]) +
                                ((int)src_ptr[1] * vp8_filter[1]) +
                                 (VP8_FILTER_WEIGHT/2) ) >> VP8_FILTER_SHIFT;
             src_ptr++;
         }
 
-        // Next row...
+        /* Next row... */
         src_ptr    += src_pixels_per_line - output_width;
         output_ptr += output_width;
     }
@@ -96,7 +96,7 @@ void vp8_filter_block2d_bil_second_pass_6
     {
         for ( j=0; j<output_width; j++ )
         {
-            // Apply filter
+            /* Apply filter */
             Temp =  ((int)src_ptr[0]         * vp8_filter[0]) +
                     ((int)src_ptr[output_width] * vp8_filter[1]) +
                     (VP8_FILTER_WEIGHT/2);
@@ -104,12 +104,12 @@ void vp8_filter_block2d_bil_second_pass_6
             src_ptr++;
         }
 
-        // Next row...
-        //src_ptr    += src_pixels_per_line - output_width;
+        /* Next row... */
+        /*src_ptr    += src_pixels_per_line - output_width;*/
         output_ptr += output_pitch;
     }
 }
-*/
+#endif
 
 void vp8_filter_block2d_bil_armv6
 (
@@ -124,13 +124,13 @@ void vp8_filter_block2d_bil_armv6
 )
 {
 
-    unsigned short FData[36*16]; // Temp data bufffer used in filtering
+    unsigned short FData[36*16]; /* Temp data bufffer used in filtering */
 
-    // First filter 1-D horizontally...
-    // pixel_step = 1;
+    /* First filter 1-D horizontally... */
+    /* pixel_step = 1; */
     vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter);
 
-    // then 1-D vertically...
+    /* then 1-D vertically... */
     vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter);
 }
 
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index 4bfa3ab34..b4f2fe6ca 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -20,13 +20,13 @@
 
 DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) =
 {
-    { 0,  0,  128,    0,   0,  0 },         // note that 1/8 pel positions are just as per alpha -0.5 bicubic
+    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
     { 0, -6,  123,   12,  -1,  0 },
-    { 2, -11, 108,   36,  -8,  1 },         // New 1/4 pel 6 tap filter
+    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
     { 0, -9,   93,   50,  -6,  0 },
-    { 3, -16,  77,   77, -16,  3 },         // New 1/2 pel 6 tap filter
+    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
     { 0, -6,   50,   93,  -9,  0 },
-    { 1, -8,   36,  108, -11,  2 },         // New 1/4 pel 6 tap filter
+    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
     { 0, -1,   12,  123,  -6,  0 },
 };
 
@@ -93,34 +93,34 @@ void vp8_sixtap_predict_armv6
 {
     const short  *HFilter;
     const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */
 
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
 
-    // Vfilter is null. First pass only
+    /* Vfilter is null. First pass only */
     if (xoffset && !yoffset)
     {
-        //vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
-        //vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );
+        /*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
+        vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/
 
         vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
     }
-    // Hfilter is null. Second pass only
+    /* Hfilter is null. Second pass only */
     else if (!xoffset && yoffset)
     {
         vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
     }
     else
     {
-        // Vfilter is a 4 tap filter
+        /* Vfilter is a 4 tap filter */
         if (yoffset & 0x1)
         {
             vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
             vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
         }
-        // Vfilter is 6 tap filter
+        /* Vfilter is 6 tap filter */
         else
         {
             vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
@@ -129,7 +129,7 @@ void vp8_sixtap_predict_armv6
     }
 }
 
-/*
+#if 0
 void vp8_sixtap_predict8x4_armv6
 (
     unsigned char  *src_ptr,
@@ -142,33 +142,33 @@ void vp8_sixtap_predict8x4_armv6
 {
     const short  *HFilter;
     const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
 
 
-//  if (xoffset && !yoffset)
-//  {
-//      vp8_filter_block2d_first_pass_only_armv6 (  src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
-//  }
-    // Hfilter is null. Second pass only
-//  else if (!xoffset && yoffset)
-//  {
-//      vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
-//  }
-//  else
-//  {
-//      if (yoffset & 0x1)
-    //      vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
-    //  else
+    /*if (xoffset && !yoffset)
+    {
+        vp8_filter_block2d_first_pass_only_armv6 (  src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
+    }*/
+    /* Hfilter is null. Second pass only */
+    /*else if (!xoffset && yoffset)
+    {
+        vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
+    }
+    else
+    {
+        if (yoffset & 0x1)
+            vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
+        else*/
 
         vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter );
 
         vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter );
-//  }
+    /*}*/
 }
-*/
+#endif
 
 void vp8_sixtap_predict8x8_armv6
 (
@@ -182,16 +182,16 @@ void vp8_sixtap_predict8x8_armv6
 {
     const short  *HFilter;
     const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
 
     if (xoffset && !yoffset)
     {
         vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
     }
-    // Hfilter is null. Second pass only
+    /* Hfilter is null. Second pass only */
     else if (!xoffset && yoffset)
     {
         vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
@@ -224,16 +224,16 @@ void vp8_sixtap_predict16x16_armv6
 {
     const short  *HFilter;
     const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16);    // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16);    /* Temp data bufffer used in filtering */
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
 
     if (xoffset && !yoffset)
     {
         vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
     }
-    // Hfilter is null. Second pass only
+    /* Hfilter is null. Second pass only */
     else if (!xoffset && yoffset)
     {
         vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index f86bca1ea..a81c50588 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -35,8 +35,8 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
 
 
 #if HAVE_ARMV6
-//ARMV6 loopfilter functions
-// Horizontal MB filtering
+/*ARMV6 loopfilter functions*/
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                                int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -60,7 +60,7 @@ void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
     vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                                int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -84,7 +84,7 @@ void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
     vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -112,7 +112,7 @@ void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
     vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 }
 
-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -142,8 +142,8 @@ void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
 #endif
 
 #if HAVE_ARMV7
-// NEON loopfilter functions
-// Horizontal MB filtering
+/* NEON loopfilter functions */
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -164,7 +164,7 @@ void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
     vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -185,7 +185,7 @@ void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
     vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -210,7 +210,7 @@ void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign
     vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 }
 
-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
diff --git a/vp8/common/arm/neon/recon_neon.c b/vp8/common/arm/neon/recon_neon.c
index 3b2df4c9f..f7930ee5f 100644
--- a/vp8/common/arm/neon/recon_neon.c
+++ b/vp8/common/arm/neon/recon_neon.c
@@ -23,7 +23,7 @@ void vp8_recon_mb_neon(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
     unsigned char *udst_ptr = x->dst.u_buffer;
     unsigned char *vdst_ptr = x->dst.v_buffer;
     int ystride = x->dst.y_stride;
-    //int uv_stride = x->dst.uv_stride;
+    /*int uv_stride = x->dst.uv_stride;*/
 
     vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr);
 }
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index a81bc8b95..a38f0b72b 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -24,7 +24,7 @@ void vpx_log(const char *format, ...);
 #define TRUE    1
 #define FALSE   0
 
-//#define DCPRED 1
+/*#define DCPRED 1*/
 #define DCPREDSIMTHRESH 0
 #define DCPREDCNTTHRESH 3
 
@@ -39,7 +39,7 @@ void vpx_log(const char *format, ...);
 #define MAX_REF_LF_DELTAS       4
 #define MAX_MODE_LF_DELTAS      4
 
-// Segment Feature Masks
+/* Segment Feature Masks */
 #define SEGMENT_DELTADATA   0
 #define SEGMENT_ABSDATA     1
 
@@ -75,11 +75,11 @@ typedef enum
 
 typedef enum
 {
-    DC_PRED,            // average of above and left pixels
-    V_PRED,             // vertical prediction
-    H_PRED,             // horizontal prediction
-    TM_PRED,            // Truemotion prediction
-    B_PRED,             // block based prediction, each block has its own prediction mode
+    DC_PRED,            /* average of above and left pixels */
+    V_PRED,             /* vertical prediction */
+    H_PRED,             /* horizontal prediction */
+    TM_PRED,            /* Truemotion prediction */
+    B_PRED,             /* block based prediction, each block has its own prediction mode */
 
     NEARESTMV,
     NEARMV,
@@ -90,16 +90,16 @@ typedef enum
     MB_MODE_COUNT
 } MB_PREDICTION_MODE;
 
-// Macroblock level features
+/* Macroblock level features */
 typedef enum
 {
-    MB_LVL_ALT_Q = 0,               // Use alternate Quantizer ....
-    MB_LVL_ALT_LF = 1,              // Use alternate loop filter value...
-    MB_LVL_MAX = 2,                 // Number of MB level features supported
+    MB_LVL_ALT_Q = 0,               /* Use alternate Quantizer .... */
+    MB_LVL_ALT_LF = 1,              /* Use alternate loop filter value... */
+    MB_LVL_MAX = 2                  /* Number of MB level features supported */
 
 } MB_LVL_FEATURES;
 
-// Segment Feature Masks
+/* Segment Feature Masks */
 #define SEGMENT_ALTQ    0x01
 #define SEGMENT_ALT_LF  0x02
 
@@ -110,11 +110,11 @@ typedef enum
 
 typedef enum
 {
-    B_DC_PRED,          // average of above and left pixels
+    B_DC_PRED,          /* average of above and left pixels */
     B_TM_PRED,
 
-    B_VE_PRED,           // vertical prediction
-    B_HE_PRED,           // horizontal prediction
+    B_VE_PRED,           /* vertical prediction */
+    B_HE_PRED,           /* horizontal prediction */
 
     B_LD_PRED,
     B_RD_PRED,
@@ -170,13 +170,13 @@ typedef struct
     } mv;
 
     unsigned char partitioning;
-    unsigned char mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
+    unsigned char mb_skip_coeff;                                /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
     unsigned char dc_diff;
     unsigned char need_to_clamp_mvs;
 
-    unsigned char segment_id;                  // Which set of segmentation parameters should be used for this MB
+    unsigned char segment_id;                  /* Which set of segmentation parameters should be used for this MB */
 
-    unsigned char force_no_skip; //encoder only
+    unsigned char force_no_skip; /* encoder only */
 } MB_MODE_INFO;
 
 
@@ -197,7 +197,7 @@ typedef struct
 
     short *dequant;
 
-    // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
+    /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
     unsigned char **base_pre;
     int pre;
     int pre_stride;
@@ -214,17 +214,17 @@ typedef struct
 
 typedef struct
 {
-    DECLARE_ALIGNED(16, short, diff[400]);      // from idct diff
+    DECLARE_ALIGNED(16, short, diff[400]);      /* from idct diff */
     DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
-//not used    DECLARE_ALIGNED(16, short, reference[384]);
+/* not used    DECLARE_ALIGNED(16, short, reference[384]); */
     DECLARE_ALIGNED(16, short, qcoeff[400]);
     DECLARE_ALIGNED(16, short, dqcoeff[400]);
     DECLARE_ALIGNED(16, char,  eobs[25]);
 
-    // 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries.
+    /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
     BLOCKD block[25];
 
-    YV12_BUFFER_CONFIG pre; // Filtered copy of previous frame reconstruction
+    YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
     YV12_BUFFER_CONFIG dst;
 
     MODE_INFO *mode_info_context;
@@ -235,39 +235,39 @@ typedef struct
     int up_available;
     int left_available;
 
-    // Y,U,V,Y2
+    /* Y,U,V,Y2 */
     ENTROPY_CONTEXT_PLANES *above_context;
     ENTROPY_CONTEXT_PLANES *left_context;
 
-    // 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
+    /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
     unsigned char segmentation_enabled;
 
-    // 0 (do not update) 1 (update) the macroblock segmentation map.
+    /* 0 (do not update) 1 (update) the macroblock segmentation map. */
     unsigned char update_mb_segmentation_map;
 
-    // 0 (do not update) 1 (update) the macroblock segmentation feature data.
+    /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
     unsigned char update_mb_segmentation_data;
 
-    // 0 (do not update) 1 (update) the macroblock segmentation feature data.
+    /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
     unsigned char mb_segement_abs_delta;
 
-    // Per frame flags that define which MB level features (such as quantizer or loop filter level)
-    // are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO
-    vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];         // Probability Tree used to code Segment number
+    /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
+    /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
+    vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];         /* Probability Tree used to code Segment number */
 
-    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];            // Segment parameters
+    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];            /* Segment parameters */
 
-    // mode_based Loop filter adjustment
+    /* mode_based Loop filter adjustment */
     unsigned char mode_ref_lf_delta_enabled;
     unsigned char mode_ref_lf_delta_update;
 
-    // Delta values have the range +/- MAX_LOOP_FILTER
-    signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];                // 0 = Intra, Last, GF, ARF
-    signed char ref_lf_deltas[MAX_REF_LF_DELTAS];                     // 0 = Intra, Last, GF, ARF
-    signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];                      // 0 = BPRED, ZERO_MV, MV, SPLIT
-    signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                           // 0 = BPRED, ZERO_MV, MV, SPLIT
+    /* Delta values have the range +/- MAX_LOOP_FILTER */
+    signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];                /* 0 = Intra, Last, GF, ARF */
+    signed char ref_lf_deltas[MAX_REF_LF_DELTAS];                     /* 0 = Intra, Last, GF, ARF */
+    signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];                      /* 0 = BPRED, ZERO_MV, MV, SPLIT */
+    signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                           /* 0 = BPRED, ZERO_MV, MV, SPLIT */
 
-    // Distance of MB away from frame edges
+    /* Distance of MB away from frame edges */
     int mb_to_left_edge;
     int mb_to_right_edge;
     int mb_to_top_edge;
diff --git a/vp8/common/debugmodes.c b/vp8/common/debugmodes.c
index c3ac88fc8..8c03480fa 100644
--- a/vp8/common/debugmodes.c
+++ b/vp8/common/debugmodes.c
@@ -21,7 +21,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
     int mb_index = 0;
     FILE *mvs = fopen("mvs.stt", "a");
 
-    // print out the macroblock Y modes
+    /* print out the macroblock Y modes */
     mb_index = 0;
     fprintf(mvs, "Mb Modes for Frame %d\n", frame);
 
@@ -60,7 +60,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
 
     fprintf(mvs, "\n");
 
-    // print out the macroblock UV modes
+    /* print out the macroblock UV modes */
     mb_index = 0;
     fprintf(mvs, "UV Modes for Frame %d\n", frame);
 
@@ -80,7 +80,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
 
     fprintf(mvs, "\n");
 
-    // print out the block modes
+    /* print out the block modes */
     mb_index = 0;
     fprintf(mvs, "Mbs for Frame %d\n", frame);
     {
@@ -108,7 +108,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
     }
     fprintf(mvs, "\n");
 
-    // print out the macroblock mvs
+    /* print out the macroblock mvs */
     mb_index = 0;
     fprintf(mvs, "MVs for Frame %d\n", frame);
 
@@ -128,7 +128,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
     fprintf(mvs, "\n");
 
 
-    // print out the block modes
+    /* print out the block modes */
     mb_index = 0;
     fprintf(mvs, "MVs for Frame %d\n", frame);
     {
diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h
index b85f59b9f..ca58d565a 100644
--- a/vp8/common/defaultcoefcounts.h
+++ b/vp8/common/defaultcoefcounts.h
@@ -15,204 +15,204 @@ static const unsigned int default_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_C
 {
 
     {
-        // Block Type ( 0 )
+        /* Block Type ( 0 ) */
         {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
         },
         {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
             {30190, 26544, 225,  24,   4,   0,   0,   0,   0,   0,   0, 4171593,},
             {26846, 25157, 1241, 130,  26,   6,   1,   0,   0,   0,   0, 149987,},
             {10484, 9538, 1006, 160,  36,  18,   0,   0,   0,   0,   0, 15104,},
         },
         {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
             {25842, 40456, 1126,  83,  11,   2,   0,   0,   0,   0,   0,   0,},
             {9338, 8010, 512,  73,   7,   3,   2,   0,   0,   0,   0, 43294,},
             {1047, 751, 149,  31,  13,   6,   1,   0,   0,   0,   0, 879,},
         },
         {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
             {26136, 9826, 252,  13,   0,   0,   0,   0,   0,   0,   0,   0,},
             {8134, 5574, 191,  14,   2,   0,   0,   0,   0,   0,   0, 35302,},
             { 605, 677, 116,   9,   1,   0,   0,   0,   0,   0,   0, 611,},
         },
         {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
             {10263, 15463, 283,  17,   0,   0,   0,   0,   0,   0,   0,   0,},
             {2773, 2191, 128,   9,   2,   2,   0,   0,   0,   0,   0, 10073,},
             { 134, 125,  32,   4,   0,   2,   0,   0,   0,   0,   0,  50,},
         },
         {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
             {10483, 2663,  23,   1,   0,   0,   0,   0,   0,   0,   0,   0,},
             {2137, 1251,  27,   1,   1,   0,   0,   0,   0,   0,   0, 14362,},
             { 116, 156,  14,   2,   1,   0,   0,   0,   0,   0,   0, 190,},
         },
         {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
             {40977, 27614, 412,  28,   0,   0,   0,   0,   0,   0,   0,   0,},
             {6113, 5213, 261,  22,   3,   0,   0,   0,   0,   0,   0, 26164,},
             { 382, 312,  50,  14,   2,   0,   0,   0,   0,   0,   0, 345,},
         },
         {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
             {   0,  26,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,  13,   0,   0,   0,   0,   0,   0,   0,   0,   0, 319,},
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   8,},
         },
     },
     {
-        // Block Type ( 1 )
+        /* Block Type ( 1 ) */
         {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
             {3268, 19382, 1043, 250,  93,  82,  49,  26,  17,   8,  25, 82289,},
             {8758, 32110, 5436, 1832, 827, 668, 420, 153,  24,   0,   3, 52914,},
             {9337, 23725, 8487, 3954, 2107, 1836, 1069, 399,  59,   0,   0, 18620,},
         },
         {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
             {12419, 8420, 452,  62,   9,   1,   0,   0,   0,   0,   0,   0,},
             {11715, 8705, 693,  92,  15,   7,   2,   0,   0,   0,   0, 53988,},
             {7603, 8585, 2306, 778, 270, 145,  39,   5,   0,   0,   0, 9136,},
         },
         {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
             {15938, 14335, 1207, 184,  55,  13,   4,   1,   0,   0,   0,   0,},
             {7415, 6829, 1138, 244,  71,  26,   7,   0,   0,   0,   0, 9980,},
             {1580, 1824, 655, 241,  89,  46,  10,   2,   0,   0,   0, 429,},
         },
         {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
             {19453, 5260, 201,  19,   0,   0,   0,   0,   0,   0,   0,   0,},
             {9173, 3758, 213,  22,   1,   1,   0,   0,   0,   0,   0, 9820,},
             {1689, 1277, 276,  51,  17,   4,   0,   0,   0,   0,   0, 679,},
         },
         {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
             {12076, 10667, 620,  85,  19,   9,   5,   0,   0,   0,   0,   0,},
             {4665, 3625, 423,  55,  19,   9,   0,   0,   0,   0,   0, 5127,},
             { 415, 440, 143,  34,  20,   7,   2,   0,   0,   0,   0, 101,},
         },
         {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
             {12183, 4846, 115,  11,   1,   0,   0,   0,   0,   0,   0,   0,},
             {4226, 3149, 177,  21,   2,   0,   0,   0,   0,   0,   0, 7157,},
             { 375, 621, 189,  51,  11,   4,   1,   0,   0,   0,   0, 198,},
         },
         {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
             {61658, 37743, 1203,  94,  10,   3,   0,   0,   0,   0,   0,   0,},
             {15514, 11563, 903, 111,  14,   5,   0,   0,   0,   0,   0, 25195,},
             { 929, 1077, 291,  78,  14,   7,   1,   0,   0,   0,   0, 507,},
         },
         {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
             {   0, 990,  15,   3,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0, 412,  13,   0,   0,   0,   0,   0,   0,   0,   0, 1641,},
             {   0,  18,   7,   1,   0,   0,   0,   0,   0,   0,   0,  30,},
         },
     },
     {
-        // Block Type ( 2 )
+        /* Block Type ( 2 ) */
         {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
             { 953, 24519, 628, 120,  28,  12,   4,   0,   0,   0,   0, 2248798,},
             {1525, 25654, 2647, 617, 239, 143,  42,   5,   0,   0,   0, 66837,},
             {1180, 11011, 3001, 1237, 532, 448, 239,  54,   5,   0,   0, 7122,},
         },
         {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
             {1356, 2220,  67,  10,   4,   1,   0,   0,   0,   0,   0,   0,},
             {1450, 2544, 102,  18,   4,   3,   0,   0,   0,   0,   0, 57063,},
             {1182, 2110, 470, 130,  41,  21,   0,   0,   0,   0,   0, 6047,},
         },
         {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
             { 370, 3378, 200,  30,   5,   4,   1,   0,   0,   0,   0,   0,},
             { 293, 1006, 131,  29,  11,   0,   0,   0,   0,   0,   0, 5404,},
             { 114, 387,  98,  23,   4,   8,   1,   0,   0,   0,   0, 236,},
         },
         {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
             { 579, 194,   4,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             { 395, 213,   5,   1,   0,   0,   0,   0,   0,   0,   0, 4157,},
             { 119, 122,   4,   0,   0,   0,   0,   0,   0,   0,   0, 300,},
         },
         {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
             {  38, 557,  19,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {  21, 114,  12,   1,   0,   0,   0,   0,   0,   0,   0, 427,},
             {   0,   5,   0,   0,   0,   0,   0,   0,   0,   0,   0,   7,},
         },
         {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
             {  52,   7,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {  18,   6,   0,   0,   0,   0,   0,   0,   0,   0,   0, 652,},
             {   1,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,  30,},
         },
         {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
             { 640, 569,  10,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {  25,  77,   2,   0,   0,   0,   0,   0,   0,   0,   0, 517,},
             {   4,   7,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,},
         },
         {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
         },
     },
     {
-        // Block Type ( 3 )
+        /* Block Type ( 3 ) */
         {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
             {2506, 20161, 2707, 767, 261, 178, 107,  30,  14,   3,   0, 100694,},
             {8806, 36478, 8817, 3268, 1280, 850, 401, 114,  42,   0,   0, 58572,},
             {11003, 27214, 11798, 5716, 2482, 2072, 1048, 175,  32,   0,   0, 19284,},
         },
         {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
             {9738, 11313, 959, 205,  70,  18,  11,   1,   0,   0,   0,   0,},
             {12628, 15085, 1507, 273,  52,  19,   9,   0,   0,   0,   0, 54280,},
             {10701, 15846, 5561, 1926, 813, 570, 249,  36,   0,   0,   0, 6460,},
         },
         {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
             {6781, 22539, 2784, 634, 182, 123,  20,   4,   0,   0,   0,   0,},
             {6263, 11544, 2649, 790, 259, 168,  27,   5,   0,   0,   0, 20539,},
             {3109, 4075, 2031, 896, 457, 386, 158,  29,   0,   0,   0, 1138,},
         },
         {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
             {11515, 4079, 465,  73,   5,  14,   2,   0,   0,   0,   0,   0,},
             {9361, 5834, 650,  96,  24,   8,   4,   0,   0,   0,   0, 22181,},
             {4343, 3974, 1360, 415, 132,  96,  14,   1,   0,   0,   0, 1267,},
         },
         {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
             {4787, 9297, 823, 168,  44,  12,   4,   0,   0,   0,   0,   0,},
             {3619, 4472, 719, 198,  60,  31,   3,   0,   0,   0,   0, 8401,},
             {1157, 1175, 483, 182,  88,  31,   8,   0,   0,   0,   0, 268,},
         },
         {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
             {8299, 1226,  32,   5,   1,   0,   0,   0,   0,   0,   0,   0,},
             {3502, 1568,  57,   4,   1,   1,   0,   0,   0,   0,   0, 9811,},
             {1055, 1070, 166,  29,   6,   1,   0,   0,   0,   0,   0, 527,},
         },
         {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
             {27414, 27927, 1989, 347,  69,  26,   0,   0,   0,   0,   0,   0,},
             {5876, 10074, 1574, 341,  91,  24,   4,   0,   0,   0,   0, 21954,},
             {1571, 2171, 778, 324, 124,  65,  16,   0,   0,   0,   0, 979,},
         },
         {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
             {   0,  29,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,  23,   0,   0,   0,   0,   0,   0,   0,   0,   0, 459,},
             {   0,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,  13,},
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index 80b481700..0685cd0ae 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -17,18 +17,18 @@
 
 /* Coefficient token alphabet */
 
-#define ZERO_TOKEN              0       //0         Extra Bits 0+0
-#define ONE_TOKEN               1       //1         Extra Bits 0+1
-#define TWO_TOKEN               2       //2         Extra Bits 0+1
-#define THREE_TOKEN             3       //3         Extra Bits 0+1
-#define FOUR_TOKEN              4       //4         Extra Bits 0+1
-#define DCT_VAL_CATEGORY1       5       //5-6       Extra Bits 1+1
-#define DCT_VAL_CATEGORY2       6       //7-10      Extra Bits 2+1
-#define DCT_VAL_CATEGORY3       7       //11-26     Extra Bits 4+1
-#define DCT_VAL_CATEGORY4       8       //11-26     Extra Bits 5+1
-#define DCT_VAL_CATEGORY5       9       //27-58     Extra Bits 5+1
-#define DCT_VAL_CATEGORY6       10      //59+       Extra Bits 11+1
-#define DCT_EOB_TOKEN           11      //EOB       Extra Bits 0+0
+#define ZERO_TOKEN              0       /* 0         Extra Bits 0+0 */
+#define ONE_TOKEN               1       /* 1         Extra Bits 0+1 */
+#define TWO_TOKEN               2       /* 2         Extra Bits 0+1 */
+#define THREE_TOKEN             3       /* 3         Extra Bits 0+1 */
+#define FOUR_TOKEN              4       /* 4         Extra Bits 0+1 */
+#define DCT_VAL_CATEGORY1       5       /* 5-6       Extra Bits 1+1 */
+#define DCT_VAL_CATEGORY2       6       /* 7-10      Extra Bits 2+1 */
+#define DCT_VAL_CATEGORY3       7       /* 11-26     Extra Bits 4+1 */
+#define DCT_VAL_CATEGORY4       8       /* 11-26     Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY5       9       /* 27-58     Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY6       10      /* 59+       Extra Bits 11+1 */
+#define DCT_EOB_TOKEN           11      /* EOB       Extra Bits 0+0 */
 
 #define vp8_coef_tokens 12
 #define MAX_ENTROPY_TOKENS vp8_coef_tokens
@@ -83,7 +83,7 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
    coefficient band (and since zigzag positions 0, 1, and 2 are in
    distinct bands). */
 
-/*# define DC_TOKEN_CONTEXTS        3 // 00, 0!0, !0!0 */
+/*# define DC_TOKEN_CONTEXTS        3*/ /* 00, 0!0, !0!0 */
 #   define PREV_COEF_CONTEXTS       3
 
 extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[vp8_coef_tokens]);
diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c
index 8e72881e9..e5df1f095 100644
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@@ -29,21 +29,21 @@ const MV_CONTEXT vp8_mv_update_probs[2] =
 const MV_CONTEXT vp8_default_mv_context[2] =
 {
     {{
-        // row
-        162,                                        // is short
-        128,                                        // sign
-        225, 146, 172, 147, 214,  39, 156,          // short tree
-        128, 129, 132,  75, 145, 178, 206, 239, 254, 254 // long bits
+        /* row */
+        162,                                        /* is short */
+        128,                                        /* sign */
+        225, 146, 172, 147, 214,  39, 156,          /* short tree */
+        128, 129, 132,  75, 145, 178, 206, 239, 254, 254 /* long bits */
     }},
 
 
 
     {{
-        // same for column
-        164,                                        // is short
+        /* same for column */
+        164,                                        /* is short */
         128,
         204, 170, 119, 235, 140, 230, 228,
-        128, 130, 130,  74, 148, 180, 203, 236, 254, 254 // long bits
+        128, 130, 130,  74, 148, 180, 203, 236, 254, 254 /* long bits */
 
     }}
 };
diff --git a/vp8/common/extend.c b/vp8/common/extend.c
index 7e06ac30c..47207fa79 100644
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -15,14 +15,14 @@
 
 static void extend_plane_borders
 (
-    unsigned char *s, // source
-    int sp,           // pitch
-    int h,            // height
-    int w,            // width
-    int et,           // extend top border
-    int el,           // extend left border
-    int eb,           // extend bottom border
-    int er            // extend right border
+    unsigned char *s, /* source */
+    int sp,           /* pitch */
+    int h,            /* height */
+    int w,            /* width */
+    int et,           /* extend top border */
+    int el,           /* extend left border */
+    int eb,           /* extend bottom border */
+    int er            /* extend right border */
 )
 {
 
@@ -31,7 +31,7 @@ static void extend_plane_borders
     unsigned char *dest_ptr1, *dest_ptr2;
     int linesize;
 
-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
     src_ptr1 = s;
     src_ptr2 = s + w - 1;
     dest_ptr1 = s - el;
@@ -39,8 +39,9 @@ static void extend_plane_borders
 
     for (i = 0; i < h - 0 + 1; i++)
     {
-        // Some linkers will complain if we call vpx_memset with el set to a
-        // constant 0.
+        /* Some linkers will complain if we call vpx_memset with el set to a
+         * constant 0.
+         */
         if (el)
             vpx_memset(dest_ptr1, src_ptr1[0], el);
         vpx_memset(dest_ptr2, src_ptr2[0], er);
@@ -50,7 +51,7 @@ static void extend_plane_borders
         dest_ptr2 += sp;
     }
 
-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
     src_ptr1 = s - el;
     src_ptr2 = s + sp * (h - 1) - el;
     dest_ptr1 = s + sp * (-et) - el;
@@ -76,12 +77,12 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
     int er = 0xf & (16 - (width & 0xf));
     int eb = 0xf & (16 - (height & 0xf));
 
-    // check for non multiples of 16
+    /* check for non multiples of 16 */
     if (er != 0 || eb != 0)
     {
         extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er);
 
-        //adjust for uv
+        /* adjust for uv */
         height = (height + 1) >> 1;
         width  = (width  + 1) >> 1;
         er = 0x7 & (8 - (width  & 0x7));
@@ -95,7 +96,7 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
     }
 }
 
-// note the extension is only for the last row, for intra prediction purpose
+/* note the extension is only for the last row, for intra prediction purpose */
 void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
 {
     int i;
diff --git a/vp8/common/filter_c.c b/vp8/common/filter_c.c
index 3d18d8191..399a847d5 100644
--- a/vp8/common/filter_c.c
+++ b/vp8/common/filter_c.c
@@ -32,13 +32,13 @@ static const int bilinear_filters[8][2] =
 static const short sub_pel_filters[8][6] =
 {
 
-    { 0,  0,  128,    0,   0,  0 },         // note that 1/8 pel positions are just as per alpha -0.5 bicubic
+    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
     { 0, -6,  123,   12,  -1,  0 },
-    { 2, -11, 108,   36,  -8,  1 },         // New 1/4 pel 6 tap filter
+    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
     { 0, -9,   93,   50,  -6,  0 },
-    { 3, -16,  77,   77, -16,  3 },         // New 1/2 pel 6 tap filter
+    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
     { 0, -6,   50,   93,  -9,  0 },
-    { 1, -8,   36,  108, -11,  2 },         // New 1/4 pel 6 tap filter
+    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
     { 0, -1,   12,  123,  -6,  0 },
 
 
@@ -69,9 +69,9 @@ void vp8_filter_block2d_first_pass
                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
-                   (VP8_FILTER_WEIGHT >> 1);      // Rounding
+                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
 
-            // Normalize back to 0-255
+            /* Normalize back to 0-255 */
             Temp = Temp >> VP8_FILTER_SHIFT;
 
             if (Temp < 0)
@@ -83,7 +83,7 @@ void vp8_filter_block2d_first_pass
             src_ptr++;
         }
 
-        // Next row...
+        /* Next row... */
         src_ptr    += src_pixels_per_line - output_width;
         output_ptr += output_width;
     }
@@ -108,16 +108,16 @@ void vp8_filter_block2d_second_pass
     {
         for (j = 0; j < output_width; j++)
         {
-            // Apply filter
+            /* Apply filter */
             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
                    ((int)src_ptr[0]                 * vp8_filter[2]) +
                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
-                   (VP8_FILTER_WEIGHT >> 1);   // Rounding
+                   (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
 
-            // Normalize back to 0-255
+            /* Normalize back to 0-255 */
             Temp = Temp >> VP8_FILTER_SHIFT;
 
             if (Temp < 0)
@@ -129,7 +129,7 @@ void vp8_filter_block2d_second_pass
             src_ptr++;
         }
 
-        // Start next row
+        /* Start next row */
         src_ptr    += src_pixels_per_line - output_width;
         output_ptr += output_pitch;
     }
@@ -146,12 +146,12 @@ void vp8_filter_block2d
     const short  *VFilter
 )
 {
-    int FData[9*4]; // Temp data bufffer used in filtering
+    int FData[9*4]; /* Temp data bufffer used in filtering */
 
-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
     vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
 
-    // then filter verticaly...
+    /* then filter verticaly... */
     vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
 }
 
@@ -195,8 +195,8 @@ void vp8_sixtap_predict_c
     const short  *HFilter;
     const short  *VFilter;
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
 
     vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
 }
@@ -212,16 +212,16 @@ void vp8_sixtap_predict8x8_c
 {
     const short  *HFilter;
     const short  *VFilter;
-    int FData[13*16];   // Temp data bufffer used in filtering
+    int FData[13*16];   /* Temp data bufffer used in filtering */
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
 
-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
     vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
 
 
-    // then filter verticaly...
+    /* then filter verticaly... */
     vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
 
 }
@@ -238,16 +238,16 @@ void vp8_sixtap_predict8x4_c
 {
     const short  *HFilter;
     const short  *VFilter;
-    int FData[13*16];   // Temp data bufffer used in filtering
+    int FData[13*16];   /* Temp data bufffer used in filtering */
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
 
-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
     vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
 
 
-    // then filter verticaly...
+    /* then filter verticaly... */
     vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
 
 }
@@ -264,16 +264,16 @@ void vp8_sixtap_predict16x16_c
 {
     const short  *HFilter;
     const short  *VFilter;
-    int FData[21*24];   // Temp data bufffer used in filtering
+    int FData[21*24];   /* Temp data bufffer used in filtering */
 
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
 
-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
     vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
 
-    // then filter verticaly...
+    /* then filter verticaly... */
     vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
 
 }
@@ -324,14 +324,14 @@ void vp8_filter_block2d_bil_first_pass
     {
         for (j = 0; j < output_width; j++)
         {
-            // Apply bilinear filter
+            /* Apply bilinear filter */
             output_ptr[j] = (((int)src_ptr[0]          * vp8_filter[0]) +
                              ((int)src_ptr[pixel_step] * vp8_filter[1]) +
                              (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
             src_ptr++;
         }
 
-        // Next row...
+        /* Next row... */
         src_ptr    += src_pixels_per_line - output_width;
         output_ptr += output_width;
     }
@@ -384,7 +384,7 @@ void vp8_filter_block2d_bil_second_pass
     {
         for (j = 0; j < output_width; j++)
         {
-            // Apply filter
+            /* Apply filter */
             Temp = ((int)src_ptr[0]         * vp8_filter[0]) +
                    ((int)src_ptr[pixel_step] * vp8_filter[1]) +
                    (VP8_FILTER_WEIGHT / 2);
@@ -392,7 +392,7 @@ void vp8_filter_block2d_bil_second_pass
             src_ptr++;
         }
 
-        // Next row...
+        /* Next row... */
         src_ptr    += src_pixels_per_line - output_width;
         output_ptr += output_pitch;
     }
@@ -432,12 +432,12 @@ void vp8_filter_block2d_bil
 )
 {
 
-    unsigned short FData[17*16];    // Temp data bufffer used in filtering
+    unsigned short FData[17*16];    /* Temp data bufffer used in filtering */
 
-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
     vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);
 
-    // then 1-D vertically...
+    /* then 1-D vertically... */
     vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
 }
 
diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c
index 41037f707..e63d4ef8d 100644
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -168,7 +168,7 @@ void vp8_find_near_mvs
 
     vp8_clamp_mv(nearest, xd);
     vp8_clamp_mv(nearby, xd);
-    vp8_clamp_mv(best_mv, xd); //TODO: move this up before the copy
+    vp8_clamp_mv(best_mv, xd); /*TODO: move this up before the copy*/
 }
 
 vp8_prob *vp8_mv_ref_probs(
@@ -179,7 +179,7 @@ vp8_prob *vp8_mv_ref_probs(
     p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
     p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
     p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
-    //p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];
+    /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
     return p;
 }
 
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index c624242a5..7b1a84b70 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -73,7 +73,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
 #endif
 
 #endif
-    // Pure C:
+    /* Pure C: */
     vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
     vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
 
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index 4cb433a80..81a3f2d89 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -38,7 +38,7 @@ void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *
 {
     int i;
 
-    // do 2nd order transform on the dc block
+    /* do 2nd order transform on the dc block */
     IDCT_INVOKE(rtcd, iwalsh16)(x->block[24].dqcoeff, x->block[24].diff);
 
     recon_dcblock(x);
@@ -68,7 +68,7 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
     if (x->mode_info_context->mbmi.mode != B_PRED &&
         x->mode_info_context->mbmi.mode != SPLITMV)
     {
-        // do 2nd order transform on the dc block
+        /* do 2nd order transform on the dc block */
 
         IDCT_INVOKE(rtcd, iwalsh16)(&x->block[24].dqcoeff[0], x->block[24].diff);
         recon_dcblock(x);
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index da9ca2871..f9d082304 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -23,7 +23,7 @@ prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
 prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
 prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
 
-// Horizontal MB filtering
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                            int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -47,7 +47,7 @@ void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
     vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                            int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -71,7 +71,7 @@ void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
     vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                           int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -99,7 +99,7 @@ void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
     vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 }
 
-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                           int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -140,7 +140,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
     const int yhedge_boost  = 2;
     const int uvhedge_boost = 2;
 
-    // For each possible value for the loop filter fill out a "loop_filter_info" entry.
+    /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
     for (i = 0; i <= MAX_LOOP_FILTER; i++)
     {
         int filt_lvl = i;
@@ -166,7 +166,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
                 HEVThresh = 0;
         }
 
-        // Set loop filter paramaeters that control sharpness.
+        /* Set loop filter paramaeters that control sharpness. */
         block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
         block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);
 
@@ -195,7 +195,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
 
     }
 
-    // Set up the function pointers depending on the type of loop filtering selected
+    /* Set up the function pointers depending on the type of loop filtering selected */
     if (lft == NORMAL_LOOPFILTER)
     {
         cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v);
@@ -212,14 +212,15 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
     }
 }
 
-// Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
-// each frame. Check last_frame_type to skip the function most of times.
+/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
+ * each frame. Check last_frame_type to skip the function most of times.
+ */
 void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
 {
     int HEVThresh;
     int i, j;
 
-    // For each possible value for the loop filter fill out a "loop_filter_info" entry.
+    /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
     for (i = 0; i <= MAX_LOOP_FILTER; i++)
     {
         int filt_lvl = i;
@@ -247,15 +248,15 @@ void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
 
         for (j = 0; j < 16; j++)
         {
-            //lfi[i].lim[j] = block_inside_limit;
-            //lfi[i].mbflim[j] = filt_lvl+yhedge_boost;
+            /*lfi[i].lim[j] = block_inside_limit;
+            lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/
             lfi[i].mbthr[j] = HEVThresh;
-            //lfi[i].flim[j] = filt_lvl;
+            /*lfi[i].flim[j] = filt_lvl;*/
             lfi[i].thr[j] = HEVThresh;
-            //lfi[i].uvlim[j] = block_inside_limit;
-            //lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;
+            /*lfi[i].uvlim[j] = block_inside_limit;
+            lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/
             lfi[i].uvmbthr[j] = HEVThresh;
-            //lfi[i].uvflim[j] = filt_lvl;
+            /*lfi[i].uvflim[j] = filt_lvl;*/
             lfi[i].uvthr[j] = HEVThresh;
         }
     }
@@ -268,32 +269,32 @@ void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level)
 
     if (mbd->mode_ref_lf_delta_enabled)
     {
-        // Aplly delta for reference frame
+        /* Apply delta for reference frame */
         *filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
 
-        // Apply delta for mode
+        /* Apply delta for mode */
         if (mbmi->ref_frame == INTRA_FRAME)
         {
-            // Only the split mode BPRED has a further special case
+            /* Only the split mode BPRED has a further special case */
             if (mbmi->mode == B_PRED)
                 *filter_level +=  mbd->mode_lf_deltas[0];
         }
         else
         {
-            // Zero motion mode
+            /* Zero motion mode */
             if (mbmi->mode == ZEROMV)
                 *filter_level +=  mbd->mode_lf_deltas[1];
 
-            // Split MB motion mode
+            /* Split MB motion mode */
             else if (mbmi->mode == SPLITMV)
                 *filter_level +=  mbd->mode_lf_deltas[3];
 
-            // All other inter motion modes (Nearest, Near, New)
+            /* All other inter motion modes (Nearest, Near, New) */
             else
                 *filter_level +=  mbd->mode_lf_deltas[2];
         }
 
-        // Range check
+        /* Range check */
         if (*filter_level > MAX_LOOP_FILTER)
             *filter_level = MAX_LOOP_FILTER;
         else if (*filter_level < 0)
@@ -311,7 +312,7 @@ void vp8_loop_filter_frame
 {
     YV12_BUFFER_CONFIG *post = cm->frame_to_show;
     loop_filter_info *lfi = cm->lf_info;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;
 
     int mb_row;
     int mb_col;
@@ -324,21 +325,21 @@ void vp8_loop_filter_frame
     int i;
     unsigned char *y_ptr, *u_ptr, *v_ptr;
 
-    mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
+    mbd->mode_info_context = cm->mi;          /* Point at base of Mb MODE_INFO list */
 
-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
     if (alt_flt_enabled)
     {
         for (i = 0; i < MAX_MB_SEGMENTS; i++)
         {
-            // Abs value
+            /* Abs value */
             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                 baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
             else
             {
                 baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
             }
         }
     }
@@ -348,18 +349,18 @@ void vp8_loop_filter_frame
             baseline_filter_level[i] = default_filt_lvl;
     }
 
-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
         vp8_init_loop_filter(cm);
     else if (frame_type != cm->last_frame_type)
         vp8_frame_init_loop_filter(lfi, frame_type);
 
-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
     y_ptr = post->y_buffer;
     u_ptr = post->u_buffer;
     v_ptr = post->v_buffer;
 
-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
     for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
     {
         for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@@ -368,9 +369,10 @@ void vp8_loop_filter_frame
 
             filter_level = baseline_filter_level[Segment];
 
-            // Distance of Mb to the various image edges.
-            // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-            // Apply any context driven MB level adjustment
+            /* Distance of Mb to the various image edges.
+             * These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+             * Apply any context driven MB level adjustment
+             */
             vp8_adjust_mb_lf_value(mbd, &filter_level);
 
             if (filter_level)
@@ -381,7 +383,7 @@ void vp8_loop_filter_frame
                 if (mbd->mode_info_context->mbmi.dc_diff > 0)
                     cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
 
-                // don't apply across umv border
+                /* don't apply across umv border */
                 if (mb_row > 0)
                     cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
 
@@ -393,14 +395,14 @@ void vp8_loop_filter_frame
             u_ptr += 8;
             v_ptr += 8;
 
-            mbd->mode_info_context++;     // step to next MB
+            mbd->mode_info_context++;     /* step to next MB */
         }
 
         y_ptr += post->y_stride  * 16 - post->y_width;
         u_ptr += post->uv_stride *  8 - post->uv_width;
         v_ptr += post->uv_stride *  8 - post->uv_width;
 
-        mbd->mode_info_context++;         // Skip border mb
+        mbd->mode_info_context++;         /* Skip border mb */
     }
 }
 
@@ -424,26 +426,26 @@ void vp8_loop_filter_frame_yonly
     int baseline_filter_level[MAX_MB_SEGMENTS];
     int filter_level;
     int alt_flt_enabled = mbd->segmentation_enabled;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;
 
     (void) sharpness_lvl;
 
-    //MODE_INFO * this_mb_mode_info = cm->mi;  // Point at base of Mb MODE_INFO list
-    mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
+    /*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */
+    mbd->mode_info_context = cm->mi;          /* Point at base of Mb MODE_INFO list */
 
-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
     if (alt_flt_enabled)
     {
         for (i = 0; i < MAX_MB_SEGMENTS; i++)
         {
-            // Abs value
+            /* Abs value */
             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                 baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
             else
             {
                 baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
             }
         }
     }
@@ -453,16 +455,16 @@ void vp8_loop_filter_frame_yonly
             baseline_filter_level[i] = default_filt_lvl;
     }
 
-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
         vp8_init_loop_filter(cm);
     else if (frame_type != cm->last_frame_type)
         vp8_frame_init_loop_filter(lfi, frame_type);
 
-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
     y_ptr = post->y_buffer;
 
-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
     for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
     {
         for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@@ -470,7 +472,7 @@ void vp8_loop_filter_frame_yonly
             int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
             filter_level = baseline_filter_level[Segment];
 
-            // Apply any context driven MB level adjustment
+            /* Apply any context driven MB level adjustment */
             vp8_adjust_mb_lf_value(mbd, &filter_level);
 
             if (filter_level)
@@ -481,7 +483,7 @@ void vp8_loop_filter_frame_yonly
                 if (mbd->mode_info_context->mbmi.dc_diff > 0)
                     cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
 
-                // don't apply across umv border
+                /* don't apply across umv border */
                 if (mb_row > 0)
                     cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
 
@@ -490,12 +492,12 @@ void vp8_loop_filter_frame_yonly
             }
 
             y_ptr += 16;
-            mbd->mode_info_context ++;        // step to next MB
+            mbd->mode_info_context ++;        /* step to next MB */
 
         }
 
         y_ptr += post->y_stride  * 16 - post->y_width;
-        mbd->mode_info_context ++;            // Skip border mb
+        mbd->mode_info_context ++;            /* Skip border mb */
     }
 
 }
@@ -516,7 +518,7 @@ void vp8_loop_filter_partial_frame
     unsigned char *y_ptr;
     int mb_row;
     int mb_col;
-    //int mb_rows = post->y_height >> 4;
+    /*int mb_rows = post->y_height >> 4;*/
     int mb_cols = post->y_width  >> 4;
 
     int linestocopy;
@@ -525,12 +527,12 @@ void vp8_loop_filter_partial_frame
     int baseline_filter_level[MAX_MB_SEGMENTS];
     int filter_level;
     int alt_flt_enabled = mbd->segmentation_enabled;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;
 
     (void) sharpness_lvl;
 
-    //MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);  // Point at base of Mb MODE_INFO list
-    mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);        // Point at base of Mb MODE_INFO list
+    /*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */
+    mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);        /* Point at base of Mb MODE_INFO list */
 
     linestocopy = (post->y_height >> (4 + Fraction));
 
@@ -539,19 +541,19 @@ void vp8_loop_filter_partial_frame
 
     linestocopy <<= 4;
 
-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
     if (alt_flt_enabled)
     {
         for (i = 0; i < MAX_MB_SEGMENTS; i++)
         {
-            // Abs value
+            /* Abs value */
             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                 baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
             else
             {
                 baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
             }
         }
     }
@@ -561,16 +563,16 @@ void vp8_loop_filter_partial_frame
             baseline_filter_level[i] = default_filt_lvl;
     }
 
-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
         vp8_init_loop_filter(cm);
     else if (frame_type != cm->last_frame_type)
         vp8_frame_init_loop_filter(lfi, frame_type);
 
-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
     y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
 
-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
     for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
     {
         for (mb_col = 0; mb_col < mb_cols; mb_col++)
@@ -593,10 +595,10 @@ void vp8_loop_filter_partial_frame
             }
 
             y_ptr += 16;
-            mbd->mode_info_context += 1;      // step to next MB
+            mbd->mode_info_context += 1;      /* step to next MB */
         }
 
         y_ptr += post->y_stride  * 16 - post->y_width;
-        mbd->mode_info_context += 1;          // Skip border mb
+        mbd->mode_info_context += 1;          /* Skip border mb */
     }
 }
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index a2049bf61..e45683460 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -22,10 +22,10 @@ typedef enum
     SIMPLE_LOOPFILTER = 1
 } LOOPFILTERTYPE;
 
-// FRK
-// Need to align this structure so when it is declared and
-// passed it can be loaded into vector registers.
-// FRK
+/* FRK
+ * Need to align this structure so when it is declared and
+ * passed it can be loaded into vector registers.
+ */
 typedef struct
 {
     DECLARE_ALIGNED(16, signed char, lim[16]);
@@ -119,8 +119,8 @@ typedef struct
 
 typedef void loop_filter_uvfunction
 (
-    unsigned char *u,   // source pointer
-    int p,              // pitch
+    unsigned char *u,   /* source pointer */
+    int p,              /* pitch */
     const signed char *flimit,
     const signed char *limit,
     const signed char *thresh,
diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index 1b92e00aa..694052924 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -23,7 +23,7 @@ static __inline signed char vp8_signed_char_clamp(int t)
 }
 
 
-// should we apply any filter at all ( 11111111 yes, 00000000 no)
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
                                      uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
 {
@@ -39,7 +39,7 @@ static __inline signed char vp8_filter_mask(signed char limit, signed char flimi
     return mask;
 }
 
-// is there high variance internal edge ( 11111111 yes, 00000000 no)
+/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
 {
     signed char hev = 0;
@@ -61,17 +61,18 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
     qs0 = (signed char) * oq0 ^ 0x80;
     qs1 = (signed char) * oq1 ^ 0x80;
 
-    // add outer taps if we have high edge variance
+    /* add outer taps if we have high edge variance */
     vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
     vp8_filter &= hev;
 
-    // inner taps
+    /* inner taps */
     vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
     vp8_filter &= mask;
 
-    // save bottom 3 bits so that we round one side +4 and the other +3
-    // if it equals 4 we'll set to adjust by -1 to account for the fact
-    // we'd round 3 the other way
+    /* save bottom 3 bits so that we round one side +4 and the other +3
+     * if it equals 4 we'll set to adjust by -1 to account for the fact
+     * we'd round 3 the other way
+     */
     Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
     Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
     Filter1 >>= 3;
@@ -82,7 +83,7 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
     *op0 = u ^ 0x80;
     vp8_filter = Filter1;
 
-    // outer tap adjustments
+    /* outer tap adjustments */
     vp8_filter += 1;
     vp8_filter >>= 1;
     vp8_filter &= ~hev;
@@ -96,19 +97,20 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
 void vp8_loop_filter_horizontal_edge_c
 (
     unsigned char *s,
-    int p, //pitch
+    int p, /* pitch */
     const signed char *flimit,
     const signed char *limit,
     const signed char *thresh,
     int count
 )
 {
-    int  hev = 0; // high edge variance
+    int  hev = 0; /* high edge variance */
     signed char mask = 0;
     int i = 0;
 
-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
     do
     {
         mask = vp8_filter_mask(limit[i], flimit[i],
@@ -134,12 +136,13 @@ void vp8_loop_filter_vertical_edge_c
     int count
 )
 {
-    int  hev = 0; // high edge variance
+    int  hev = 0; /* high edge variance */
     signed char mask = 0;
     int i = 0;
 
-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
     do
     {
         mask = vp8_filter_mask(limit[i], flimit[i],
@@ -166,7 +169,7 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
     signed char qs1 = (signed char) * oq1 ^ 0x80;
     signed char qs2 = (signed char) * oq2 ^ 0x80;
 
-    // add outer taps if we have high edge variance
+    /* add outer taps if we have high edge variance */
     vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
     vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
     vp8_filter &= mask;
@@ -174,7 +177,7 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
     Filter2 = vp8_filter;
     Filter2 &= hev;
 
-    // save bottom 3 bits so that we round one side +4 and the other +3
+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
     Filter1 = vp8_signed_char_clamp(Filter2 + 4);
     Filter2 = vp8_signed_char_clamp(Filter2 + 3);
     Filter1 >>= 3;
@@ -183,25 +186,25 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
     ps0 = vp8_signed_char_clamp(ps0 + Filter2);
 
 
-    // only apply wider filter if not high edge variance
+    /* only apply wider filter if not high edge variance */
     vp8_filter &= ~hev;
     Filter2 = vp8_filter;
 
-    // roughly 3/7th difference across boundary
+    /* roughly 3/7th difference across boundary */
     u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
     s = vp8_signed_char_clamp(qs0 - u);
     *oq0 = s ^ 0x80;
     s = vp8_signed_char_clamp(ps0 + u);
     *op0 = s ^ 0x80;
 
-    // roughly 2/7th difference across boundary
+    /* roughly 2/7th difference across boundary */
     u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
     s = vp8_signed_char_clamp(qs1 - u);
     *oq1 = s ^ 0x80;
     s = vp8_signed_char_clamp(ps1 + u);
     *op1 = s ^ 0x80;
 
-    // roughly 1/7th difference across boundary
+    /* roughly 1/7th difference across boundary */
     u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
     s = vp8_signed_char_clamp(qs2 - u);
     *oq2 = s ^ 0x80;
@@ -219,12 +222,13 @@ void vp8_mbloop_filter_horizontal_edge_c
     int count
 )
 {
-    signed char hev = 0; // high edge variance
+    signed char hev = 0; /* high edge variance */
     signed char mask = 0;
     int i = 0;
 
-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
     do
     {
 
@@ -253,7 +257,7 @@ void vp8_mbloop_filter_vertical_edge_c
     int count
 )
 {
-    signed char hev = 0; // high edge variance
+    signed char hev = 0; /* high edge variance */
     signed char mask = 0;
     int i = 0;
 
@@ -273,12 +277,13 @@ void vp8_mbloop_filter_vertical_edge_c
 
 }
 
-// should we apply any filter at all ( 11111111 yes, 00000000 no)
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
 {
-// Why does this cause problems for win32?
-// error C2143: syntax error : missing ';' before 'type'
-//  (void) limit;
+/* Why does this cause problems for win32?
+ * error C2143: syntax error : missing ';' before 'type'
+ *  (void) limit;
+ */
     signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  <= flimit * 2 + limit) * -1;
     return mask;
 }
@@ -296,7 +301,7 @@ static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *o
     vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (q0 - p0));
     vp8_filter &= mask;
 
-    // save bottom 3 bits so that we round one side +4 and the other +3
+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
     Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
     Filter1 >>= 3;
     u = vp8_signed_char_clamp(q0 - Filter1);
@@ -324,7 +329,7 @@ void vp8_loop_filter_simple_horizontal_edge_c
 
     do
     {
-        //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);
+        /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
         mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
         vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
         ++s;
@@ -348,7 +353,7 @@ void vp8_loop_filter_simple_vertical_edge_c
 
     do
     {
-        //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);
+        /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
         mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
         vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
         s += p;
diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c
index ce40d16ca..af55e2fe0 100644
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -14,7 +14,7 @@
 typedef enum
 {
     PRED = 0,
-    DEST = 1,
+    DEST = 1
 } BLOCKSET;
 
 void vp8_setup_block
@@ -62,13 +62,13 @@ void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
         v = &x->pre.v_buffer;
     }
 
-    for (block = 0; block < 16; block++) // y blocks
+    for (block = 0; block < 16; block++) /* y blocks */
     {
         vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
                         (block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs);
     }
 
-    for (block = 16; block < 20; block++) // U and V blocks
+    for (block = 16; block < 20; block++) /* U and V blocks */
     {
         vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
                         ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
@@ -123,7 +123,7 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
 void vp8_build_block_doffsets(MACROBLOCKD *x)
 {
 
-    // handle the destination pitch features
+    /* handle the destination pitch features */
     vp8_setup_macroblock(x, DEST);
     vp8_setup_macroblock(x, PRED);
 }
diff --git a/vp8/common/modecont.c b/vp8/common/modecont.c
index 0fa299577..86a74bc0f 100644
--- a/vp8/common/modecont.c
+++ b/vp8/common/modecont.c
@@ -14,27 +14,27 @@
 const int vp8_mode_contexts[6][4] =
 {
     {
-        // 0
+        /* 0 */
         7,     1,     1,   143,
     },
     {
-        // 1
+        /* 1 */
         14,    18,    14,   107,
     },
     {
-        // 2
+        /* 2 */
         135,    64,    57,    68,
     },
     {
-        // 3
+        /* 3 */
         60,    56,   128,    65,
     },
     {
-        // 4
+        /* 4 */
         159,   134,   128,    34,
     },
     {
-        // 5
+        /* 5 */
         234,   188,   128,    28,
     },
 };
diff --git a/vp8/common/modecontext.c b/vp8/common/modecontext.c
index 8e483b800..a31a561c8 100644
--- a/vp8/common/modecontext.c
+++ b/vp8/common/modecontext.c
@@ -14,133 +14,133 @@
 const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES] =
 {
     {
-        //Above Mode :  0
-        { 43438,   2195,    470,    316,    615,    171,    217,    412,    124,    160, }, // left_mode 0
-        {  5722,   2751,    296,    291,     81,     68,     80,    101,    100,    170, }, // left_mode 1
-        {  1629,    201,    307,     25,     47,     16,     34,     72,     19,     28, }, // left_mode 2
-        {   332,    266,     36,    500,     20,     65,     23,     14,    154,    106, }, // left_mode 3
-        {   450,     97,     10,     24,    117,     10,      2,     12,      8,     71, }, // left_mode 4
-        {   384,     49,     29,     44,     12,    162,     51,      5,     87,     42, }, // left_mode 5
-        {   495,     53,    157,     27,     14,     57,    180,     17,     17,     34, }, // left_mode 6
-        {   695,     64,     62,      9,     27,      5,      3,    147,     10,     26, }, // left_mode 7
-        {   230,     54,     20,    124,     16,    125,     29,     12,    283,     37, }, // left_mode 8
-        {   260,     87,     21,    120,     32,     16,     33,     16,     33,    203, }, // left_mode 9
+        /*Above Mode :  0*/
+        { 43438,   2195,    470,    316,    615,    171,    217,    412,    124,    160, }, /* left_mode 0 */
+        {  5722,   2751,    296,    291,     81,     68,     80,    101,    100,    170, }, /* left_mode 1 */
+        {  1629,    201,    307,     25,     47,     16,     34,     72,     19,     28, }, /* left_mode 2 */
+        {   332,    266,     36,    500,     20,     65,     23,     14,    154,    106, }, /* left_mode 3 */
+        {   450,     97,     10,     24,    117,     10,      2,     12,      8,     71, }, /* left_mode 4 */
+        {   384,     49,     29,     44,     12,    162,     51,      5,     87,     42, }, /* left_mode 5 */
+        {   495,     53,    157,     27,     14,     57,    180,     17,     17,     34, }, /* left_mode 6 */
+        {   695,     64,     62,      9,     27,      5,      3,    147,     10,     26, }, /* left_mode 7 */
+        {   230,     54,     20,    124,     16,    125,     29,     12,    283,     37, }, /* left_mode 8 */
+        {   260,     87,     21,    120,     32,     16,     33,     16,     33,    203, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  1
-        {  3934,   2573,    355,    137,    128,     87,    133,    117,     37,     27, }, // left_mode 0
-        {  1036,   1929,    278,    135,     27,     37,     48,     55,     41,     91, }, // left_mode 1
-        {   223,    256,    253,     15,     13,      9,     28,     64,      3,      3, }, // left_mode 2
-        {   120,    129,     17,    316,     15,     11,      9,      4,     53,     74, }, // left_mode 3
-        {   129,     58,      6,     11,     38,      2,      0,      5,      2,     67, }, // left_mode 4
-        {    53,     22,     11,     16,      8,     26,     14,      3,     19,     12, }, // left_mode 5
-        {    59,     26,     61,     11,      4,      9,     35,     13,      8,      8, }, // left_mode 6
-        {   101,     52,     40,      8,      5,      2,      8,     59,      2,     20, }, // left_mode 7
-        {    48,     34,     10,     52,      8,     15,      6,      6,     63,     20, }, // left_mode 8
-        {    96,     48,     22,     63,     11,     14,      5,      8,      9,     96, }, // left_mode 9
+        /*Above Mode :  1*/
+        {  3934,   2573,    355,    137,    128,     87,    133,    117,     37,     27, }, /* left_mode 0 */
+        {  1036,   1929,    278,    135,     27,     37,     48,     55,     41,     91, }, /* left_mode 1 */
+        {   223,    256,    253,     15,     13,      9,     28,     64,      3,      3, }, /* left_mode 2 */
+        {   120,    129,     17,    316,     15,     11,      9,      4,     53,     74, }, /* left_mode 3 */
+        {   129,     58,      6,     11,     38,      2,      0,      5,      2,     67, }, /* left_mode 4 */
+        {    53,     22,     11,     16,      8,     26,     14,      3,     19,     12, }, /* left_mode 5 */
+        {    59,     26,     61,     11,      4,      9,     35,     13,      8,      8, }, /* left_mode 6 */
+        {   101,     52,     40,      8,      5,      2,      8,     59,      2,     20, }, /* left_mode 7 */
+        {    48,     34,     10,     52,      8,     15,      6,      6,     63,     20, }, /* left_mode 8 */
+        {    96,     48,     22,     63,     11,     14,      5,      8,      9,     96, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  2
-        {   709,    461,    506,     36,     27,     33,    151,     98,     24,      6, }, // left_mode 0
-        {   201,    375,    442,     27,     13,      8,     46,     58,      6,     19, }, // left_mode 1
-        {   122,    140,    417,      4,     13,      3,     33,     59,      4,      2, }, // left_mode 2
-        {    36,     17,     22,     16,      6,      8,     12,     17,      9,     21, }, // left_mode 3
-        {    51,     15,      7,      1,     14,      0,      4,      5,      3,     22, }, // left_mode 4
-        {    18,     11,     30,      9,      7,     20,     11,      5,      2,      6, }, // left_mode 5
-        {    38,     21,    103,      9,      4,     12,     79,     13,      2,      5, }, // left_mode 6
-        {    64,     17,     66,      2,     12,      4,      2,     65,      4,      5, }, // left_mode 7
-        {    14,      7,      7,     16,      3,     11,      4,     13,     15,     16, }, // left_mode 8
-        {    36,      8,     32,      9,      9,      4,     14,      7,      6,     24, }, // left_mode 9
+        /*Above Mode :  2*/
+        {   709,    461,    506,     36,     27,     33,    151,     98,     24,      6, }, /* left_mode 0 */
+        {   201,    375,    442,     27,     13,      8,     46,     58,      6,     19, }, /* left_mode 1 */
+        {   122,    140,    417,      4,     13,      3,     33,     59,      4,      2, }, /* left_mode 2 */
+        {    36,     17,     22,     16,      6,      8,     12,     17,      9,     21, }, /* left_mode 3 */
+        {    51,     15,      7,      1,     14,      0,      4,      5,      3,     22, }, /* left_mode 4 */
+        {    18,     11,     30,      9,      7,     20,     11,      5,      2,      6, }, /* left_mode 5 */
+        {    38,     21,    103,      9,      4,     12,     79,     13,      2,      5, }, /* left_mode 6 */
+        {    64,     17,     66,      2,     12,      4,      2,     65,      4,      5, }, /* left_mode 7 */
+        {    14,      7,      7,     16,      3,     11,      4,     13,     15,     16, }, /* left_mode 8 */
+        {    36,      8,     32,      9,      9,      4,     14,      7,      6,     24, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  3
-        {  1340,    173,     36,    119,     30,     10,     13,     10,     20,     26, }, // left_mode 0
-        {   156,    293,     26,    108,      5,     16,      2,      4,     23,     30, }, // left_mode 1
-        {    60,     34,     13,      7,      3,      3,      0,      8,      4,      5, }, // left_mode 2
-        {    72,     64,      1,    235,      3,      9,      2,      7,     28,     38, }, // left_mode 3
-        {    29,     14,      1,      3,      5,      0,      2,      2,      5,     13, }, // left_mode 4
-        {    22,      7,      4,     11,      2,      5,      1,      2,      6,      4, }, // left_mode 5
-        {    18,     14,      5,      6,      4,      3,     14,      0,      9,      2, }, // left_mode 6
-        {    41,     10,      7,      1,      2,      0,      0,     10,      2,      1, }, // left_mode 7
-        {    23,     19,      2,     33,      1,      5,      2,      0,     51,      8, }, // left_mode 8
-        {    33,     26,      7,     53,      3,      9,      3,      3,      9,     19, }, // left_mode 9
+        /*Above Mode :  3*/
+        {  1340,    173,     36,    119,     30,     10,     13,     10,     20,     26, }, /* left_mode 0 */
+        {   156,    293,     26,    108,      5,     16,      2,      4,     23,     30, }, /* left_mode 1 */
+        {    60,     34,     13,      7,      3,      3,      0,      8,      4,      5, }, /* left_mode 2 */
+        {    72,     64,      1,    235,      3,      9,      2,      7,     28,     38, }, /* left_mode 3 */
+        {    29,     14,      1,      3,      5,      0,      2,      2,      5,     13, }, /* left_mode 4 */
+        {    22,      7,      4,     11,      2,      5,      1,      2,      6,      4, }, /* left_mode 5 */
+        {    18,     14,      5,      6,      4,      3,     14,      0,      9,      2, }, /* left_mode 6 */
+        {    41,     10,      7,      1,      2,      0,      0,     10,      2,      1, }, /* left_mode 7 */
+        {    23,     19,      2,     33,      1,      5,      2,      0,     51,      8, }, /* left_mode 8 */
+        {    33,     26,      7,     53,      3,      9,      3,      3,      9,     19, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  4
-        {   410,    165,     43,     31,     66,     15,     30,     54,      8,     17, }, // left_mode 0
-        {   115,     64,     27,     18,     30,      7,     11,     15,      4,     19, }, // left_mode 1
-        {    31,     23,     25,      1,      7,      2,      2,     10,      0,      5, }, // left_mode 2
-        {    17,      4,      1,      6,      8,      2,      7,      5,      5,     21, }, // left_mode 3
-        {   120,     12,      1,      2,     83,      3,      0,      4,      1,     40, }, // left_mode 4
-        {     4,      3,      1,      2,      1,      2,      5,      0,      3,      6, }, // left_mode 5
-        {    10,      2,     13,      6,      6,      6,      8,      2,      4,      5, }, // left_mode 6
-        {    58,     10,      5,      1,     28,      1,      1,     33,      1,      9, }, // left_mode 7
-        {     8,      2,      1,      4,      2,      5,      1,      1,      2,     10, }, // left_mode 8
-        {    76,      7,      5,      7,     18,      2,      2,      0,      5,     45, }, // left_mode 9
+        /*Above Mode :  4*/
+        {   410,    165,     43,     31,     66,     15,     30,     54,      8,     17, }, /* left_mode 0 */
+        {   115,     64,     27,     18,     30,      7,     11,     15,      4,     19, }, /* left_mode 1 */
+        {    31,     23,     25,      1,      7,      2,      2,     10,      0,      5, }, /* left_mode 2 */
+        {    17,      4,      1,      6,      8,      2,      7,      5,      5,     21, }, /* left_mode 3 */
+        {   120,     12,      1,      2,     83,      3,      0,      4,      1,     40, }, /* left_mode 4 */
+        {     4,      3,      1,      2,      1,      2,      5,      0,      3,      6, }, /* left_mode 5 */
+        {    10,      2,     13,      6,      6,      6,      8,      2,      4,      5, }, /* left_mode 6 */
+        {    58,     10,      5,      1,     28,      1,      1,     33,      1,      9, }, /* left_mode 7 */
+        {     8,      2,      1,      4,      2,      5,      1,      1,      2,     10, }, /* left_mode 8 */
+        {    76,      7,      5,      7,     18,      2,      2,      0,      5,     45, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  5
-        {   444,     46,     47,     20,     14,    110,     60,     14,     60,      7, }, // left_mode 0
-        {    59,     57,     25,     18,      3,     17,     21,      6,     14,      6, }, // left_mode 1
-        {    24,     17,     20,      6,      4,     13,      7,      2,      3,      2, }, // left_mode 2
-        {    13,     11,      5,     14,      4,      9,      2,      4,     15,      7, }, // left_mode 3
-        {     8,      5,      2,      1,      4,      0,      1,      1,      2,     12, }, // left_mode 4
-        {    19,      5,      5,      7,      4,     40,      6,      3,     10,      4, }, // left_mode 5
-        {    16,      5,      9,      1,      1,     16,     26,      2,     10,      4, }, // left_mode 6
-        {    11,      4,      8,      1,      1,      4,      4,      5,      4,      1, }, // left_mode 7
-        {    15,      1,      3,      7,      3,     21,      7,      1,     34,      5, }, // left_mode 8
-        {    18,      5,      1,      3,      4,      3,      7,      1,      2,      9, }, // left_mode 9
+        /*Above Mode :  5*/
+        {   444,     46,     47,     20,     14,    110,     60,     14,     60,      7, }, /* left_mode 0 */
+        {    59,     57,     25,     18,      3,     17,     21,      6,     14,      6, }, /* left_mode 1 */
+        {    24,     17,     20,      6,      4,     13,      7,      2,      3,      2, }, /* left_mode 2 */
+        {    13,     11,      5,     14,      4,      9,      2,      4,     15,      7, }, /* left_mode 3 */
+        {     8,      5,      2,      1,      4,      0,      1,      1,      2,     12, }, /* left_mode 4 */
+        {    19,      5,      5,      7,      4,     40,      6,      3,     10,      4, }, /* left_mode 5 */
+        {    16,      5,      9,      1,      1,     16,     26,      2,     10,      4, }, /* left_mode 6 */
+        {    11,      4,      8,      1,      1,      4,      4,      5,      4,      1, }, /* left_mode 7 */
+        {    15,      1,      3,      7,      3,     21,      7,      1,     34,      5, }, /* left_mode 8 */
+        {    18,      5,      1,      3,      4,      3,      7,      1,      2,      9, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  6
-        {   476,    149,     94,     13,     14,     77,    291,     27,     23,      3, }, // left_mode 0
-        {    79,     83,     42,     14,      2,     12,     63,      2,      4,     14, }, // left_mode 1
-        {    43,     36,     55,      1,      3,      8,     42,     11,      5,      1, }, // left_mode 2
-        {     9,      9,      6,     16,      1,      5,      6,      3,     11,     10, }, // left_mode 3
-        {    10,      3,      1,      3,     10,      1,      0,      1,      1,      4, }, // left_mode 4
-        {    14,      6,     15,      5,      1,     20,     25,      2,      5,      0, }, // left_mode 5
-        {    28,      7,     51,      1,      0,      8,    127,      6,      2,      5, }, // left_mode 6
-        {    13,      3,      3,      2,      3,      1,      2,      8,      1,      2, }, // left_mode 7
-        {    10,      3,      3,      3,      3,      8,      2,      2,      9,      3, }, // left_mode 8
-        {    13,      7,     11,      4,      0,      4,      6,      2,      5,      8, }, // left_mode 9
+        /*Above Mode :  6*/
+        {   476,    149,     94,     13,     14,     77,    291,     27,     23,      3, }, /* left_mode 0 */
+        {    79,     83,     42,     14,      2,     12,     63,      2,      4,     14, }, /* left_mode 1 */
+        {    43,     36,     55,      1,      3,      8,     42,     11,      5,      1, }, /* left_mode 2 */
+        {     9,      9,      6,     16,      1,      5,      6,      3,     11,     10, }, /* left_mode 3 */
+        {    10,      3,      1,      3,     10,      1,      0,      1,      1,      4, }, /* left_mode 4 */
+        {    14,      6,     15,      5,      1,     20,     25,      2,      5,      0, }, /* left_mode 5 */
+        {    28,      7,     51,      1,      0,      8,    127,      6,      2,      5, }, /* left_mode 6 */
+        {    13,      3,      3,      2,      3,      1,      2,      8,      1,      2, }, /* left_mode 7 */
+        {    10,      3,      3,      3,      3,      8,      2,      2,      9,      3, }, /* left_mode 8 */
+        {    13,      7,     11,      4,      0,      4,      6,      2,      5,      8, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  7
-        {   376,    135,    119,      6,     32,      8,     31,    224,      9,      3, }, // left_mode 0
-        {    93,     60,     54,      6,     13,      7,      8,     92,      2,     12, }, // left_mode 1
-        {    74,     36,     84,      0,      3,      2,      9,     67,      2,      1, }, // left_mode 2
-        {    19,      4,      4,      8,      8,      2,      4,      7,      6,     16, }, // left_mode 3
-        {    51,      7,      4,      1,     77,      3,      0,     14,      1,     15, }, // left_mode 4
-        {     7,      7,      5,      7,      4,      7,      4,      5,      0,      3, }, // left_mode 5
-        {    18,      2,     19,      2,      2,      4,     12,     11,      1,      2, }, // left_mode 6
-        {   129,      6,     27,      1,     21,      3,      0,    189,      0,      6, }, // left_mode 7
-        {     9,      1,      2,      8,      3,      7,      0,      5,      3,      3, }, // left_mode 8
-        {    20,      4,      5,     10,      4,      2,      7,     17,      3,     16, }, // left_mode 9
+        /*Above Mode :  7*/
+        {   376,    135,    119,      6,     32,      8,     31,    224,      9,      3, }, /* left_mode 0 */
+        {    93,     60,     54,      6,     13,      7,      8,     92,      2,     12, }, /* left_mode 1 */
+        {    74,     36,     84,      0,      3,      2,      9,     67,      2,      1, }, /* left_mode 2 */
+        {    19,      4,      4,      8,      8,      2,      4,      7,      6,     16, }, /* left_mode 3 */
+        {    51,      7,      4,      1,     77,      3,      0,     14,      1,     15, }, /* left_mode 4 */
+        {     7,      7,      5,      7,      4,      7,      4,      5,      0,      3, }, /* left_mode 5 */
+        {    18,      2,     19,      2,      2,      4,     12,     11,      1,      2, }, /* left_mode 6 */
+        {   129,      6,     27,      1,     21,      3,      0,    189,      0,      6, }, /* left_mode 7 */
+        {     9,      1,      2,      8,      3,      7,      0,      5,      3,      3, }, /* left_mode 8 */
+        {    20,      4,      5,     10,      4,      2,      7,     17,      3,     16, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  8
-        {   617,     68,     34,     79,     11,     27,     25,     14,     75,     13, }, // left_mode 0
-        {    51,     82,     21,     26,      6,     12,     13,      1,     26,     16, }, // left_mode 1
-        {    29,      9,     12,     11,      3,      7,      1,     10,      2,      2, }, // left_mode 2
-        {    17,     19,     11,     74,      4,      3,      2,      0,     58,     13, }, // left_mode 3
-        {    10,      1,      1,      3,      4,      1,      0,      2,      1,      8, }, // left_mode 4
-        {    14,      4,      5,      5,      1,     13,      2,      0,     27,      8, }, // left_mode 5
-        {    10,      3,      5,      4,      1,      7,      6,      4,      5,      1, }, // left_mode 6
-        {    10,      2,      6,      2,      1,      1,      1,      4,      2,      1, }, // left_mode 7
-        {    14,      8,      5,     23,      2,     12,      6,      2,    117,      5, }, // left_mode 8
-        {     9,      6,      2,     19,      1,      6,      3,      2,      9,      9, }, // left_mode 9
+        /*Above Mode :  8*/
+        {   617,     68,     34,     79,     11,     27,     25,     14,     75,     13, }, /* left_mode 0 */
+        {    51,     82,     21,     26,      6,     12,     13,      1,     26,     16, }, /* left_mode 1 */
+        {    29,      9,     12,     11,      3,      7,      1,     10,      2,      2, }, /* left_mode 2 */
+        {    17,     19,     11,     74,      4,      3,      2,      0,     58,     13, }, /* left_mode 3 */
+        {    10,      1,      1,      3,      4,      1,      0,      2,      1,      8, }, /* left_mode 4 */
+        {    14,      4,      5,      5,      1,     13,      2,      0,     27,      8, }, /* left_mode 5 */
+        {    10,      3,      5,      4,      1,      7,      6,      4,      5,      1, }, /* left_mode 6 */
+        {    10,      2,      6,      2,      1,      1,      1,      4,      2,      1, }, /* left_mode 7 */
+        {    14,      8,      5,     23,      2,     12,      6,      2,    117,      5, }, /* left_mode 8 */
+        {     9,      6,      2,     19,      1,      6,      3,      2,      9,      9, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  9
-        {   680,     73,     22,     38,     42,      5,     11,      9,      6,     28, }, // left_mode 0
-        {   113,    112,     21,     22,     10,      2,      8,      4,      6,     42, }, // left_mode 1
-        {    44,     20,     24,      6,      5,      4,      3,      3,      1,      2, }, // left_mode 2
-        {    40,     23,      7,     71,      5,      2,      4,      1,      7,     22, }, // left_mode 3
-        {    85,      9,      4,      4,     17,      2,      0,      3,      2,     23, }, // left_mode 4
-        {    13,      4,      2,      6,      1,      7,      0,      1,      7,      6, }, // left_mode 5
-        {    26,      6,      8,      3,      2,      3,      8,      1,      5,      4, }, // left_mode 6
-        {    54,      8,      9,      6,      7,      0,      1,     11,      1,      3, }, // left_mode 7
-        {     9,     10,      4,     13,      2,      5,      4,      2,     14,      8, }, // left_mode 8
-        {    92,      9,      5,     19,     15,      3,      3,      1,      6,     58, }, // left_mode 9
+        /*Above Mode :  9*/
+        {   680,     73,     22,     38,     42,      5,     11,      9,      6,     28, }, /* left_mode 0 */
+        {   113,    112,     21,     22,     10,      2,      8,      4,      6,     42, }, /* left_mode 1 */
+        {    44,     20,     24,      6,      5,      4,      3,      3,      1,      2, }, /* left_mode 2 */
+        {    40,     23,      7,     71,      5,      2,      4,      1,      7,     22, }, /* left_mode 3 */
+        {    85,      9,      4,      4,     17,      2,      0,      3,      2,     23, }, /* left_mode 4 */
+        {    13,      4,      2,      6,      1,      7,      0,      1,      7,      6, }, /* left_mode 5 */
+        {    26,      6,      8,      3,      2,      3,      8,      1,      5,      4, }, /* left_mode 6 */
+        {    54,      8,      9,      6,      7,      0,      1,     11,      1,      3, }, /* left_mode 7 */
+        {     9,     10,      4,     13,      2,      5,      4,      2,     14,      8, }, /* left_mode 8 */
+        {    92,      9,      5,     19,     15,      3,      3,      1,      6,     58, }, /* left_mode 9 */
     },
 };
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index d12143d4d..7e44c1f0c 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -21,9 +21,9 @@
 #include "recon.h"
 #include "postproc.h"
 
-//#ifdef PACKET_TESTING
+/*#ifdef PACKET_TESTING*/
 #include "header.h"
-//#endif
+/*#endif*/
 
 /* Create/destroy static data structures. */
 
@@ -43,7 +43,7 @@ typedef struct frame_contexts
     vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
     vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1];
     MV_CONTEXT mvc[2];
-    MV_CONTEXT pre_mvc[2];  //not to caculate the mvcost for the frame if mvc doesn't change.
+    MV_CONTEXT pre_mvc[2];  /* not to caculate the mvcost for the frame if mvc doesn't change. */
 } FRAME_CONTEXT;
 
 typedef enum
@@ -105,7 +105,7 @@ typedef struct VP8Common
     YV12_BUFFER_CONFIG post_proc_buffer;
     YV12_BUFFER_CONFIG temp_scale_frame;
 
-    FRAME_TYPE last_frame_type;  //Add to check if vp8_frame_init_loop_filter() can be skipped.
+    FRAME_TYPE last_frame_type;  /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
     FRAME_TYPE frame_type;
 
     int show_frame;
@@ -116,7 +116,7 @@ typedef struct VP8Common
     int mb_cols;
     int mode_info_stride;
 
-    // prfile settings
+    /* profile settings */
     int mb_no_coeff_skip;
     int no_lpf;
     int simpler_lpf;
@@ -124,7 +124,7 @@ typedef struct VP8Common
     int full_pixel;
 
     int base_qindex;
-    int last_kf_gf_q;  // Q used on the last GF or KF
+    int last_kf_gf_q;  /* Q used on the last GF or KF */
 
     int y1dc_delta_q;
     int y2dc_delta_q;
@@ -154,31 +154,31 @@ typedef struct VP8Common
     int last_sharpness_level;
     int sharpness_level;
 
-    int refresh_last_frame;       // Two state 0 = NO, 1 = YES
-    int refresh_golden_frame;     // Two state 0 = NO, 1 = YES
-    int refresh_alt_ref_frame;     // Two state 0 = NO, 1 = YES
+    int refresh_last_frame;       /* Two state 0 = NO, 1 = YES */
+    int refresh_golden_frame;     /* Two state 0 = NO, 1 = YES */
+    int refresh_alt_ref_frame;     /* Two state 0 = NO, 1 = YES */
 
-    int copy_buffer_to_gf;         // 0 none, 1 Last to GF, 2 ARF to GF
-    int copy_buffer_to_arf;        // 0 none, 1 Last to ARF, 2 GF to ARF
+    int copy_buffer_to_gf;         /* 0 none, 1 Last to GF, 2 ARF to GF */
+    int copy_buffer_to_arf;        /* 0 none, 1 Last to ARF, 2 GF to ARF */
 
-    int refresh_entropy_probs;    // Two state 0 = NO, 1 = YES
+    int refresh_entropy_probs;    /* Two state 0 = NO, 1 = YES */
 
-    int ref_frame_sign_bias[MAX_REF_FRAMES];    // Two state 0, 1
+    int ref_frame_sign_bias[MAX_REF_FRAMES];    /* Two state 0, 1 */
 
-    // Y,U,V,Y2
-    ENTROPY_CONTEXT_PLANES *above_context;   // row of context for each plane
-    ENTROPY_CONTEXT_PLANES left_context;  // (up to) 4 contexts ""
+    /* Y,U,V,Y2 */
+    ENTROPY_CONTEXT_PLANES *above_context;   /* row of context for each plane */
+    ENTROPY_CONTEXT_PLANES left_context;  /* (up to) 4 contexts "" */
 
 
-    // keyframe block modes are predicted by their above, left neighbors
+    /* keyframe block modes are predicted by their above, left neighbors */
 
     vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1];
     vp8_prob kf_ymode_prob [VP8_YMODES-1];  /* keyframe "" */
     vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1];
 
 
-    FRAME_CONTEXT lfc; // last frame entropy
-    FRAME_CONTEXT fc;  // this frame entropy
+    FRAME_CONTEXT lfc; /* last frame entropy */
+    FRAME_CONTEXT fc;  /* this frame entropy */
 
     unsigned int current_video_frame;
 
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 42e37da30..6f419435a 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -25,28 +25,28 @@
     (-(0.148*(float)(t>>16)) - (0.291*(float)(t>>8&0xff)) + (0.439*(float)(t&0xff)) + 128), \
     ( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
 
-// global constants
+/* global constants */
 
 static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
 {
-    { RGB_TO_YUV(0x98FB98) },   // PaleGreen
-    { RGB_TO_YUV(0x00FF00) },   // Green
-    { RGB_TO_YUV(0xADFF2F) },   // GreenYellow
-    { RGB_TO_YUV(0x228B22) },   // ForestGreen
-    { RGB_TO_YUV(0x006400) },   // DarkGreen
-    { RGB_TO_YUV(0x98F5FF) },   // Cadet Blue
-    { RGB_TO_YUV(0x6CA6CD) },   // Sky Blue
-    { RGB_TO_YUV(0x00008B) },   // Dark blue
-    { RGB_TO_YUV(0x551A8B) },   // Purple
-    { RGB_TO_YUV(0xFF0000) }    // Red
+    { RGB_TO_YUV(0x98FB98) },   /* PaleGreen */
+    { RGB_TO_YUV(0x00FF00) },   /* Green */
+    { RGB_TO_YUV(0xADFF2F) },   /* GreenYellow */
+    { RGB_TO_YUV(0x228B22) },   /* ForestGreen */
+    { RGB_TO_YUV(0x006400) },   /* DarkGreen */
+    { RGB_TO_YUV(0x98F5FF) },   /* Cadet Blue */
+    { RGB_TO_YUV(0x6CA6CD) },   /* Sky Blue */
+    { RGB_TO_YUV(0x00008B) },   /* Dark blue */
+    { RGB_TO_YUV(0x551A8B) },   /* Purple */
+    { RGB_TO_YUV(0xFF0000) }    /* Red */
 };
 
 static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
 {
-    { RGB_TO_YUV(0x00ff00) },   // Blue
-    { RGB_TO_YUV(0x0000ff) },   // Green
-    { RGB_TO_YUV(0xffff00) },   // Yellow
-    { RGB_TO_YUV(0xff0000) },   // Red
+    { RGB_TO_YUV(0x00ff00) },   /* Blue */
+    { RGB_TO_YUV(0x0000ff) },   /* Green */
+    { RGB_TO_YUV(0xffff00) },   /* Yellow */
+    { RGB_TO_YUV(0xff0000) },   /* Red */
 };
 
 static const short kernel5[] =
@@ -129,7 +129,7 @@ void vp8_post_proc_down_and_across_c
 
     for (row = 0; row < rows; row++)
     {
-        // post_proc_down for one row
+        /* post_proc_down for one row */
         p_src = src_ptr;
         p_dst = dst_ptr;
 
@@ -152,7 +152,7 @@ void vp8_post_proc_down_and_across_c
             p_dst[col] = v;
         }
 
-        // now post_proc_across
+        /* now post_proc_across */
         p_src = dst_ptr;
         p_dst = dst_ptr;
 
@@ -181,12 +181,12 @@ void vp8_post_proc_down_and_across_c
                 p_dst[col-2] = d[(col-2)&7];
         }
 
-        //handle the last two pixels
+        /* handle the last two pixels */
         p_dst[col-2] = d[(col-2)&7];
         p_dst[col-1] = d[(col-1)&7];
 
 
-        //next row
+        /* next row */
         src_ptr += pitch;
         dst_ptr += pitch;
     }
@@ -379,9 +379,9 @@ static void fillrd(struct postproc_state *state, int q, int a)
 
     sigma = ai + .5 + .6 * (63 - qi) / 63.0;
 
-    // set up a lookup table of 256 entries that matches
-    // a gaussian distribution with sigma determined by q.
-    //
+    /* set up a lookup table of 256 entries that matches
+     * a gaussian distribution with sigma determined by q.
+     */
     {
         double i;
         int next, j;
@@ -472,9 +472,10 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
     }
 }
 
-// Blend the macro block with a solid colored square.  Leave the
-// edges unblended to give distinction to macro blocks in areas
-// filled with the same color block.
+/* Blend the macro block with a solid colored square.  Leave the
+ * edges unblended to give distinction to macro blocks in areas
+ * filled with the same color block.
+ */
 void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
                         int y1, int u1, int v1, int alpha, int stride)
 {
@@ -575,7 +576,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
     {
         *dest = *oci->frame_to_show;
 
-        // handle problem with extending borders
+        /* handle problem with extending borders */
         dest->y_width = oci->Width;
         dest->y_height = oci->Height;
         dest->uv_height = dest->y_height / 2;
@@ -644,7 +645,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
         y_ptr = post->y_buffer + 4 * post->y_stride + 4;
 
-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
         for (i = 0; i < mb_rows; i++)
         {
             for (j = 0; j < mb_cols; j++)
@@ -658,7 +659,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 y_ptr += 16;
             }
 
-            mb_index ++; //border
+            mb_index ++; /* border */
             y_ptr += post->y_stride  * 16 - post->y_width;
 
         }
@@ -676,7 +677,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
         y_ptr = post->y_buffer + 4 * post->y_stride + 4;
 
-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
         for (i = 0; i < mb_rows; i++)
         {
             for (j = 0; j < mb_cols; j++)
@@ -693,7 +694,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 y_ptr += 16;
             }
 
-            mb_index ++; //border
+            mb_index ++; /* border */
             y_ptr += post->y_stride  * 16 - post->y_width;
 
         }
@@ -714,7 +715,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
         y_ptr = post->y_buffer + 4 * post->y_stride + 4;
 
-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
         for (i = 0; i < mb_rows; i++)
         {
             for (j = 0; j < mb_cols; j++)
@@ -727,7 +728,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 y_ptr += 16;
             }
 
-            mb_index ++; //border
+            mb_index ++; /* border */
             y_ptr += post->y_stride  * 16 - post->y_width;
 
         }
@@ -736,7 +737,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
     }
 
-    // Draw motion vectors
+    /* Draw motion vectors */
     if (flags & VP8D_DEBUG_LEVEL5)
     {
         YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
@@ -777,7 +778,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         }
     }
 
-    // Color in block modes
+    /* Color in block modes */
     if (flags & VP8D_DEBUG_LEVEL6)
     {
         int i, j;
@@ -813,7 +814,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         }
     }
 
-    // Color in frame reference blocks
+    /* Color in frame reference blocks */
     if (flags & VP8D_DEBUG_LEVEL7)
     {
         int i, j;
@@ -851,7 +852,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
     *dest = oci->post_proc_buffer;
 
-    // handle problem with extending borders
+    /* handle problem with extending borders */
     dest->y_width = oci->Width;
     dest->y_height = oci->Height;
     dest->uv_height = dest->y_height / 2;
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index 651671a43..6a51d2cd6 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -23,7 +23,7 @@ enum
     VP8D_DEBUG_LEVEL4   = 1<<6,
     VP8D_DEBUG_LEVEL5   = 1<<7,
     VP8D_DEBUG_LEVEL6   = 1<<8,
-    VP8D_DEBUG_LEVEL7   = 1<<9,
+    VP8D_DEBUG_LEVEL7   = 1<<9
 };
 
 #endif
diff --git a/vp8/common/recon.c b/vp8/common/recon.c
index 2cb3bc6ce..d72d6e410 100644
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -112,15 +112,15 @@ void vp8_recon_mby_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
     BLOCKD *b = &x->block[0];
     RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
-    //b = &x->block[4];
+    /*b = &x->block[4];*/
     b += 4;
     RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
-    //b = &x->block[8];
+    /*b = &x->block[8];*/
     b += 4;
     RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
-    //b = &x->block[12];
+    /*b = &x->block[12];*/
     b += 4;
     RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 #else
@@ -149,7 +149,7 @@ void vp8_recon_mb_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
     RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
     b += 4;
 
-    //b = &x->block[16];
+    /*b = &x->block[16];*/
 
     RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
     b++;
diff --git a/vp8/common/recon.h b/vp8/common/recon.h
index c432c7bd2..1e6e343fc 100644
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -18,10 +18,10 @@
     void sym(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch)
 
 #define prototype_recon_block(sym) \
-    void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch);
+    void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch)
 
 #define prototype_recon_macroblock(sym) \
-    void sym(const struct vp8_recon_rtcd_vtable *rtcd, MACROBLOCKD *x);
+    void sym(const struct vp8_recon_rtcd_vtable *rtcd, MACROBLOCKD *x)
 
 struct vp8_recon_rtcd_vtable;
 
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 2a7f12908..74871c0e8 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -18,9 +18,10 @@
 #include "onyxc_int.h"
 #endif
 
-// use this define on systems where unaligned int reads and writes are
-// not allowed, i.e. ARM architectures
-//#define MUST_BE_ALIGNED
+/* use this define on systems where unaligned int reads and writes are
+ * not allowed, i.e. ARM architectures
+ */
+/*#define MUST_BE_ALIGNED*/
 
 
 static const int bbb[4] = {0, 2, 8, 10};
@@ -255,7 +256,7 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
     }
 }
 
-//encoder only
+/*encoder only*/
 void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
 {
 
@@ -491,15 +492,16 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
 }
 
 
-// The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this
-// situation, we can write the result directly to dst buffer instead of writing it to predictor
-// buffer and then copying it to dst buffer.
+/* The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this
+ * situation, we can write the result directly to dst buffer instead of writing it to predictor
+ * buffer and then copying it to dst buffer.
+ */
 static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp8_subpix_fn_t sppf)
 {
     int r;
     unsigned char *ptr_base;
     unsigned char *ptr;
-    //unsigned char *pred_ptr = d->predictor;
+    /*unsigned char *pred_ptr = d->predictor;*/
     int dst_stride = d->dst_stride;
     int pre_stride = d->pre_stride;
 
@@ -535,8 +537,8 @@ static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp
 
 void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
 {
-    //unsigned char *pred_ptr = x->block[0].predictor;
-    //unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;
+    /*unsigned char *pred_ptr = x->block[0].predictor;
+    unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;*/
     unsigned char *pred_ptr = x->predictor;
     unsigned char *dst_ptr = x->dst.y_buffer;
 
@@ -546,26 +548,26 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
         unsigned char *ptr_base;
         unsigned char *ptr;
         unsigned char *uptr, *vptr;
-        //unsigned char *pred_ptr = x->predictor;
-        //unsigned char *upred_ptr = &x->predictor[256];
-        //unsigned char *vpred_ptr = &x->predictor[320];
+        /*unsigned char *pred_ptr = x->predictor;
+        unsigned char *upred_ptr = &x->predictor[256];
+        unsigned char *vpred_ptr = &x->predictor[320];*/
         unsigned char *udst_ptr = x->dst.u_buffer;
         unsigned char *vdst_ptr = x->dst.v_buffer;
 
         int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
         int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
-        int pre_stride = x->dst.y_stride; //x->block[0].pre_stride;
+        int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/
 
         ptr_base = x->pre.y_buffer;
         ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
 
         if ((mv_row | mv_col) & 7)
         {
-            x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+            x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
         }
         else
         {
-            RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+            RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
         }
 
         mv_row = x->block[16].bmi.mv.as_mv.row;
@@ -588,8 +590,9 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
     }
     else
     {
-        //note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
-        //if sth is wrong, go back to what it is in build_inter_predictors_mb.
+        /* note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
+         * if sth is wrong, go back to what it is in build_inter_predictors_mb.
+         */
         int i;
 
         if (x->mode_info_context->mbmi.partitioning < 3)
@@ -597,7 +600,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
             for (i = 0; i < 4; i++)
             {
                 BLOCKD *d = &x->block[bbb[i]];
-                //vp8_build_inter_predictors4b(x, d, 16);
+                /*vp8_build_inter_predictors4b(x, d, 16);*/
 
                 {
                     unsigned char *ptr_base;
@@ -609,11 +612,11 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
 
                     if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
                     {
-                        x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+                        x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
                     }
                     else
                     {
-                        RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+                        RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
                     }
                 }
             }
@@ -627,7 +630,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
 
                 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
                 {
-                    //vp8_build_inter_predictors2b(x, d0, 16);
+                    /*vp8_build_inter_predictors2b(x, d0, 16);*/
                     unsigned char *ptr_base;
                     unsigned char *ptr;
                     unsigned char *pred_ptr = d0->predictor;
@@ -659,7 +662,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
 
             if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
             {
-                //vp8_build_inter_predictors2b(x, d0, 8);
+                /*vp8_build_inter_predictors2b(x, d0, 8);*/
                 unsigned char *ptr_base;
                 unsigned char *ptr;
                 unsigned char *pred_ptr = d0->predictor;
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index ce0b1b8ec..9cf5f6a88 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -14,9 +14,9 @@
 #include "reconintra.h"
 #include "vpx_mem/vpx_mem.h"
 
-// For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
-// vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
-
+/* For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
+ * vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
+ */
 void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
     int i;
@@ -42,7 +42,7 @@ void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
         yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
     }
 
-    // for Y
+    /* for Y */
     switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
@@ -156,14 +156,14 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
     int r, c, i;
 
     int y_stride = x->dst.y_stride;
-    ypred_ptr = x->dst.y_buffer; //x->predictor;
+    ypred_ptr = x->dst.y_buffer; /*x->predictor;*/
 
     for (i = 0; i < 16; i++)
     {
         yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
     }
 
-    // for Y
+    /* for Y */
     switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
@@ -204,11 +204,11 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
             expected_dc = 128;
         }
 
-        //vpx_memset(ypred_ptr, expected_dc, 256);
+        /*vpx_memset(ypred_ptr, expected_dc, 256);*/
         for (r = 0; r < 16; r++)
         {
             vpx_memset(ypred_ptr, expected_dc, 16);
-            ypred_ptr += y_stride; //16;
+            ypred_ptr += y_stride; /*16;*/
         }
     }
     break;
@@ -222,7 +222,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
             ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
             ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
             ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
-            ypred_ptr += y_stride; //16;
+            ypred_ptr += y_stride; /*16;*/
         }
     }
     break;
@@ -233,7 +233,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
         {
 
             vpx_memset(ypred_ptr, yleft_col[r], 16);
-            ypred_ptr += y_stride;  //16;
+            ypred_ptr += y_stride;  /*16;*/
         }
 
     }
@@ -256,7 +256,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
                 ypred_ptr[c] = pred;
             }
 
-            ypred_ptr += y_stride;  //16;
+            ypred_ptr += y_stride;  /*16;*/
         }
 
     }
@@ -418,8 +418,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
     unsigned char *vabove_row = x->dst.v_buffer - x->dst.uv_stride;
     unsigned char vleft_col[20];
     unsigned char vtop_left = vabove_row[-1];
-    unsigned char *upred_ptr = x->dst.u_buffer; //&x->predictor[256];
-    unsigned char *vpred_ptr = x->dst.v_buffer; //&x->predictor[320];
+    unsigned char *upred_ptr = x->dst.u_buffer; /*&x->predictor[256];*/
+    unsigned char *vpred_ptr = x->dst.v_buffer; /*&x->predictor[320];*/
     int uv_stride = x->dst.uv_stride;
 
     int i, j;
@@ -472,14 +472,14 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
         }
 
 
-        //vpx_memset(upred_ptr,expected_udc,64);
-        //vpx_memset(vpred_ptr,expected_vdc,64);
+        /*vpx_memset(upred_ptr,expected_udc,64);*/
+        /*vpx_memset(vpred_ptr,expected_vdc,64);*/
         for (i = 0; i < 8; i++)
         {
             vpx_memset(upred_ptr, expected_udc, 8);
             vpx_memset(vpred_ptr, expected_vdc, 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
     }
     break;
@@ -491,8 +491,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
         {
             vpx_memcpy(upred_ptr, uabove_row, 8);
             vpx_memcpy(vpred_ptr, vabove_row, 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
 
     }
@@ -505,8 +505,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
         {
             vpx_memset(upred_ptr, uleft_col[i], 8);
             vpx_memset(vpred_ptr, vleft_col[i], 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
     }
 
@@ -538,8 +538,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
                 vpred_ptr[j] = predv;
             }
 
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
 
     }
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index 315135184..db44fa190 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -56,7 +56,7 @@ void vp8_predict_intra4x4(BLOCKD *x,
     break;
     case B_TM_PRED:
     {
-        // prediction similar to true_motion prediction
+        /* prediction similar to true_motion prediction */
         for (r = 0; r < 4; r++)
         {
             for (c = 0; c < 4; c++)
@@ -295,8 +295,9 @@ void vp8_predict_intra4x4(BLOCKD *x,
 
     }
 }
-// copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
-// to the right prediction have filled in pixels to use.
+/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
+ * to the right prediction have filled in pixels to use.
+ */
 void vp8_intra_prediction_down_copy(MACROBLOCKD *x)
 {
     unsigned char *above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c
index 8647ae2aa..7976e252b 100644
--- a/vp8/common/setupintrarecon.c
+++ b/vp8/common/setupintrarecon.c
@@ -16,7 +16,7 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
 {
     int i;
 
-    // set up frame new frame for intra coded blocks
+    /* set up frame new frame for intra coded blocks */
     vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
     for (i = 0; i < ybf->y_height; i++)
         ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129;
diff --git a/vp8/common/textblit.c b/vp8/common/textblit.c
index b7922d385..1756100a7 100644
--- a/vp8/common/textblit.c
+++ b/vp8/common/textblit.c
@@ -57,7 +57,7 @@ static void plot (const int x, const int y, unsigned char *image, const int pitc
     image [x+y*pitch] ^= 255;
 }
 
-// Bresenham line algorithm
+/* Bresenham line algorithm */
 void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch)
 {
     int steep = abs(y1 - y0) > abs(x1 - x0);
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index f9a257460..1929f7c4f 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -12,7 +12,7 @@
 #ifndef _PTHREAD_EMULATION
 #define _PTHREAD_EMULATION
 
-#define VPXINFINITE 10000       //10second.
+#define VPXINFINITE 10000       /* 10second. */
 
 /* Thread management macros */
 #ifdef _WIN32
@@ -72,11 +72,11 @@
 #define sem_wait(sem) (semaphore_wait(*sem) )
 #define sem_post(sem) semaphore_signal(*sem)
 #define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem)
-#define thread_sleep(nms) // { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
+#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
 #else
 #include <unistd.h>
 #include <sched.h>
-#define thread_sleep(nms) sched_yield();// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
+#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
 #endif
 /* Not Windows. Assume pthreads */
 
diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index 35e5be10c..ebf51c5ed 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h
@@ -12,7 +12,7 @@
 #ifndef __INC_TREECODER_H
 #define __INC_TREECODER_H
 
-typedef unsigned char vp8bc_index_t; // probability index
+typedef unsigned char vp8bc_index_t; /* probability index */
 
 
 typedef unsigned char vp8_prob;
diff --git a/vp8/common/type_aliases.h b/vp8/common/type_aliases.h
index f2a370298..22b531a76 100644
--- a/vp8/common/type_aliases.h
+++ b/vp8/common/type_aliases.h
@@ -64,32 +64,32 @@ typedef signed char     INT8;
 #endif
 
 #ifndef TYPE_INT16
-//#define TYPE_INT16
+/*#define TYPE_INT16*/
 typedef signed short    INT16;
 #endif
 
 #ifndef TYPE_INT32
-//#define TYPE_INT32
+/*#define TYPE_INT32*/
 typedef signed int      INT32;
 #endif
 
 #ifndef TYPE_UINT8
-//#define TYPE_UINT8
+/*#define TYPE_UINT8*/
 typedef unsigned char   UINT8;
 #endif
 
 #ifndef TYPE_UINT32
-//#define TYPE_UINT32
+/*#define TYPE_UINT32*/
 typedef unsigned int    UINT32;
 #endif
 
 #ifndef TYPE_UINT16
-//#define TYPE_UINT16
+/*#define TYPE_UINT16*/
 typedef unsigned short  UINT16;
 #endif
 
 #ifndef TYPE_BOOL
-//#define TYPE_BOOL
+/*#define TYPE_BOOL*/
 typedef int             BOOL;
 #endif
 
@@ -101,7 +101,7 @@ typedef __int64 INT64;
 
 #ifndef TYPE_INT64
 #ifdef _TMS320C6X
-//for now we only have 40bits
+/* for now we only have 40bits */
 typedef long INT64;
 #else
 typedef long long INT64;
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index 3ff8c4e12..93107e179 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -40,7 +40,7 @@ extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2;
 extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
 
 #if HAVE_MMX
-// Horizontal MB filtering
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -66,7 +66,7 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
 }
 
 
-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -92,7 +92,7 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
 }
 
 
-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                             int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -122,7 +122,7 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
 }
 
 
-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                             int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -153,7 +153,7 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
 #endif
 
 
-// Horizontal MB filtering
+/* Horizontal MB filtering */
 #if HAVE_SSE2
 void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
@@ -177,7 +177,7 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
 }
 
 
-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -200,7 +200,7 @@ void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
 }
 
 
-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -227,7 +227,7 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
 }
 
 
-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 950d96262..8dd07c90d 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -128,7 +128,7 @@ void vp8_sixtap_predict4x4_mmx
     int dst_pitch
 )
 {
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16);  // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16);  /* Temp data bufffer used in filtering */
     const short *HFilter, *VFilter;
     HFilter = vp8_six_tap_mmx[xoffset];
     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
@@ -149,7 +149,7 @@ void vp8_sixtap_predict16x16_mmx
 )
 {
 
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);  // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);  /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
 
@@ -181,7 +181,7 @@ void vp8_sixtap_predict8x8_mmx
 )
 {
 
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
 
@@ -207,7 +207,7 @@ void vp8_sixtap_predict8x4_mmx
 )
 {
 
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
 
@@ -253,7 +253,7 @@ void vp8_sixtap_predict16x16_sse2
 
 )
 {
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);    // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);    /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
 
@@ -268,14 +268,14 @@ void vp8_sixtap_predict16x16_sse2
         }
         else
         {
-            // First-pass only
+            /* First-pass only */
             HFilter = vp8_six_tap_mmx[xoffset];
             vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
         }
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         VFilter = vp8_six_tap_mmx[yoffset];
         vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 21, 32);
         vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
@@ -293,7 +293,7 @@ void vp8_sixtap_predict8x8_sse2
     int dst_pitch
 )
 {
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  /* Temp data bufffer used in filtering */
     const short *HFilter, *VFilter;
 
     if (xoffset)
@@ -307,14 +307,14 @@ void vp8_sixtap_predict8x8_sse2
         }
         else
         {
-            // First-pass only
+            /* First-pass only */
             HFilter = vp8_six_tap_mmx[xoffset];
             vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
         }
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         VFilter = vp8_six_tap_mmx[yoffset];
         vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
     }
@@ -331,7 +331,7 @@ void vp8_sixtap_predict8x4_sse2
     int dst_pitch
 )
 {
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  /* Temp data bufffer used in filtering */
     const short *HFilter, *VFilter;
 
     if (xoffset)
@@ -345,14 +345,14 @@ void vp8_sixtap_predict8x4_sse2
         }
         else
         {
-            // First-pass only
+            /* First-pass only */
             HFilter = vp8_six_tap_mmx[xoffset];
             vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
         }
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         VFilter = vp8_six_tap_mmx[yoffset];
         vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
     }
@@ -444,13 +444,13 @@ void vp8_sixtap_predict16x16_ssse3
         }
         else
         {
-            // First-pass only
+            /* First-pass only */
             vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
         }
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
     }
 }
@@ -481,7 +481,7 @@ void vp8_sixtap_predict8x8_ssse3
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
     }
 }
@@ -508,13 +508,13 @@ void vp8_sixtap_predict8x4_ssse3
         }
         else
         {
-            // First-pass only
+            /* First-pass only */
             vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
         }
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
     }
 }
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index b983bc27e..38500fd01 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -74,7 +74,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
 
 #if CONFIG_POSTPROC
         rtcd->postproc.down        = vp8_mbpost_proc_down_mmx;
-        //rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
+        /*rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;*/
         rtcd->postproc.downacross  = vp8_post_proc_down_and_across_mmx;
         rtcd->postproc.addnoise    = vp8_plane_add_noise_mmx;
 #endif
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
index 77cff47db..e9741e286 100644
--- a/vp8/decoder/arm/arm_dsystemdependent.c
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -35,7 +35,7 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
         pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
         pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_v6;
         pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_v6;
-#if 0 //For use with RTCD, when implemented
+#if 0 /*For use with RTCD, when implemented*/
         pbi->dboolhuff.start             = vp8dx_start_decode_c;
         pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
         pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
@@ -54,7 +54,7 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
         pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
         pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_neon;
         pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_neon;
-#if 0 //For use with RTCD, when implemented
+#if 0 /*For use with RTCD, when implemented*/
         pbi->dboolhuff.start             = vp8dx_start_decode_c;
         pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
         pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
diff --git a/vp8/decoder/arm/dboolhuff_arm.h b/vp8/decoder/arm/dboolhuff_arm.h
index d2ebc71e8..985951c7c 100644
--- a/vp8/decoder/arm/dboolhuff_arm.h
+++ b/vp8/decoder/arm/dboolhuff_arm.h
@@ -11,7 +11,7 @@
  * to be useless. However, its been left (for now)
  * for reference.
  */
-/*
+#if 0
 #if HAVE_ARMV6
 #undef vp8_dbool_start
 #define vp8_dbool_start vp8dx_start_decode_v6
@@ -24,7 +24,7 @@
 
 #undef vp8_dbool_devalue
 #define vp8_dbool_devalue vp8_decode_value_v6
-#endif // HAVE_ARMV6
+#endif /* HAVE_ARMV6 */
 
 #if HAVE_ARMV7
 #undef vp8_dbool_start
@@ -38,6 +38,6 @@
 
 #undef vp8_dbool_devalue
 #define vp8_dbool_devalue vp8_decode_value_neon
-#endif // HAVE_ARMV7
-*/
-#endif // DBOOLHUFF_ARM_H
+#endif /* HAVE_ARMV7 */
+#endif
+#endif /* DBOOLHUFF_ARM_H */
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index c72bc0330..c851aa7e5 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -45,7 +45,7 @@ typedef struct
     const unsigned char *source, unsigned int source_sz)
 #define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
 #define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
-#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits);
+#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits)
 
 #if ARCH_ARM
 #include "arm/dboolhuff_arm.h"
@@ -84,16 +84,17 @@ typedef struct vp8_dboolhuff_rtcd_vtable {
     vp8_dbool_devalue_fn_t devalue;
 } vp8_dboolhuff_rtcd_vtable_t;
 
-// There are no processor-specific versions of these
-// functions right now. Disable RTCD to avoid using
-// function pointers which gives a speed boost
-//#ifdef ENABLE_RUNTIME_CPU_DETECT
-//#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn
-//#define IF_RTCD(x) (x)
-//#else
+/* There are no processor-specific versions of these
+ * functions right now. Disable RTCD to avoid using
+ * function pointers which gives a speed boost
+ */
+/*#ifdef ENABLE_RUNTIME_CPU_DETECT
+#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn
+#define IF_RTCD(x) (x)
+#else*/
 #define DBOOLHUFF_INVOKE(ctx,fn) vp8_dbool_##fn
 #define IF_RTCD(x) NULL
-//#endif
+/*#endif*/
 
 DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
 
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 106dbde06..203d72dd2 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -51,10 +51,10 @@ static int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
 
 static void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
 {
-    // Is segmentation enabled
+    /* Is segmentation enabled */
     if (x->segmentation_enabled && x->update_mb_segmentation_map)
     {
-        // If so then read the segment id.
+        /* If so then read the segment id. */
         if (vp8_read(r, x->mb_segment_tree_probs[0]))
             mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
         else
@@ -70,14 +70,15 @@ static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_co
         {
             MB_PREDICTION_MODE y_mode;
 
-            // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
-            // By default on a key frame reset all MBs to segment 0
+            /* Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
+             * By default on a key frame reset all MBs to segment 0
+             */
             m->mbmi.segment_id = 0;
 
             if (pbi->mb.update_mb_segmentation_map)
                 vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
 
-            // Read the macroblock coeff skip flag if this feature is in use, else default to 0
+            /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */
             if (pbi->common.mb_no_coeff_skip)
                 m->mbmi.mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
             else
@@ -306,8 +307,9 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
     mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
 
     mbmi->need_to_clamp_mvs = 0;
-    // Distance of Mb to the various image edges.
-    // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
+    /* Distance of Mb to the various image edges.
+     * These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
+     */
     pbi->mb.mb_to_left_edge =
     mb_to_left_edge = -((mb_col * 16) << 3);
     mb_to_left_edge -= LEFT_TOP_MARGIN;
@@ -316,11 +318,11 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
     mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3;
     mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
 
-    // If required read in new segmentation data for this MB
+    /* If required read in new segmentation data for this MB */
     if (pbi->mb.update_mb_segmentation_map)
         vp8_read_mb_features(bc, mbmi, &pbi->mb);
 
-    // Read the macroblock coeff skip flag if this feature is in use, else default to 0
+    /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */
     if (pbi->common.mb_no_coeff_skip)
         mbmi->mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
     else
@@ -362,7 +364,7 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
 
                 mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
 
-                switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
+                switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/
                 {
                 case NEW4X4:
                     read_mv(bc, mv, (const MV_CONTEXT *) mvc);
@@ -425,7 +427,7 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
 
         case NEARMV:
             *mv = nearby;
-            // Clip "next_nearest" so that it does not extend to far out of image
+            /* Clip "next_nearest" so that it does not extend to far out of image */
             mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
             mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
             mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
@@ -434,7 +436,7 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
 
         case NEARESTMV:
             *mv = nearest;
-            // Clip "next_nearest" so that it does not extend to far out of image
+            /* Clip "next_nearest" so that it does not extend to far out of image */
             mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
             mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
             mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
@@ -462,12 +464,12 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
 
         propagate_mv:  /* same MV throughout */
             {
-                //int i=0;
-                //do
-                //{
-                //  mi->bmi[i].mv.as_mv = *mv;
-                //}
-                //while( ++i < 16);
+                /*int i=0;
+                do
+                {
+                  mi->bmi[i].mv.as_mv = *mv;
+                }
+                while( ++i < 16);*/
 
                 mi->bmi[0].mv.as_mv = *mv;
                 mi->bmi[1].mv.as_mv = *mv;
@@ -541,16 +543,16 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
 
         while (++mb_col < pbi->common.mb_cols)
         {
-//          vp8_read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);
+            /*vp8_read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/
             if(pbi->common.frame_type == KEY_FRAME)
                 vp8_kfread_modes(pbi, mi, mb_row, mb_col);
             else
                 vp8_read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
 
-            mi++;       // next macroblock
+            mi++;       /* next macroblock */
         }
 
-        mi++;           // skip left predictor each row
+        mi++;           /* skip left predictor each row */
     }
 }
 
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 273bdb694..1bdc3d946 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -50,7 +50,7 @@ void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
         pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
         pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
 
-        // all the ac values = ;
+        /* all the ac values = ; */
         for (i = 1; i < 16; i++)
         {
             int rc = vp8_default_zig_zag1d[i];
@@ -69,24 +69,24 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
     MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
     VP8_COMMON *const pc = & pbi->common;
 
-    // Decide whether to use the default or alternate baseline Q value.
+    /* Decide whether to use the default or alternate baseline Q value. */
     if (xd->segmentation_enabled)
     {
-        // Abs Value
+        /* Abs Value */
         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
 
-        // Delta Value
+        /* Delta Value */
         else
         {
             QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
-            QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
+            QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    /* Clamp to valid range */
         }
     }
     else
         QIndex = pc->base_qindex;
 
-    // Set up the block level dequant pointers
+    /* Set up the block level dequant pointers */
     for (i = 0; i < 16; i++)
     {
         xd->block[i].dequant = pc->Y1dequant[QIndex];
@@ -107,8 +107,9 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
 #define RTCD_VTABLE(x) NULL
 #endif
 
-//skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
-// to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
+/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
+ *  to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
+ */
 static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
 {
     if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
@@ -207,7 +208,7 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
     if (xd->segmentation_enabled)
         mb_init_dequantizer(pbi, xd);
 
-    // do prediction
+    /* do prediction */
     if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
         vp8_build_intra_predictors_mbuv(xd);
@@ -224,13 +225,13 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
         vp8_build_inter_predictors_mb(xd);
     }
 
-    // dequantization and idct
+    /* dequantization and idct */
     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
     {
         BLOCKD *b = &xd->block[24];
         DEQUANT_INVOKE(&pbi->dequant, block)(b);
 
-        // do 2nd order transform on the dc block
+        /* do 2nd order transform on the dc block */
         if (xd->eobs[24] > 1)
         {
             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
@@ -336,7 +337,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
     vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
     recon_yoffset = mb_row * recon_y_stride * 16;
     recon_uvoffset = mb_row * recon_uv_stride * 8;
-    // reset above block coeffs
+    /* reset above block coeffs */
 
     xd->above_context = pc->above_context;
     xd->up_available = (mb_row != 0);
@@ -356,8 +357,9 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
             }
         }
 
-        // Distance of Mb to the various image edges.
-        // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+        /* Distance of Mb to the various image edges.
+         * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+         */
         xd->mb_to_left_edge = -((mb_col * 16) << 3);
         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
 
@@ -367,7 +369,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
 
         xd->left_available = (mb_col != 0);
 
-        // Select the appropriate reference frame for this MB
+        /* Select the appropriate reference frame for this MB */
         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
             ref_fb_idx = pc->lst_fb_idx;
         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
@@ -399,7 +401,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
 
     }
 
-    // adjust to the next row of mbs
+    /* adjust to the next row of mbs */
     vp8_extend_mb_row(
         &pc->yv12_fb[dst_fb_idx],
         xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
@@ -445,7 +447,7 @@ static void setup_token_decoder(VP8D_COMP *pbi,
     for (i = 0; i < num_part; i++)
     {
         const unsigned char *partition_size_ptr = cx_data + i * 3;
-        unsigned int         partition_size;
+        ptrdiff_t            partition_size;
 
         /* Calculate the length of this partition. The last partition
          * size is implicit.
@@ -496,7 +498,7 @@ static void init_frame(VP8D_COMP *pbi)
 
     if (pc->frame_type == KEY_FRAME)
     {
-        // Various keyframe initializations
+        /* Various keyframe initializations */
         vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
 
         vp8_init_mbmode_probs(pc);
@@ -504,22 +506,23 @@ static void init_frame(VP8D_COMP *pbi)
         vp8_default_coef_probs(pc);
         vp8_kf_default_bmode_probs(pc->kf_bmode_prob);
 
-        // reset the segment feature data to 0 with delta coding (Default state).
+        /* reset the segment feature data to 0 with delta coding (Default state). */
         vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
         xd->mb_segement_abs_delta = SEGMENT_DELTADATA;
 
-       // reset the mode ref deltasa for loop filter
+        /* reset the mode ref deltasa for loop filter */
         vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
         vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
 
-        // All buffers are implicitly updated on key frames.
+        /* All buffers are implicitly updated on key frames. */
         pc->refresh_golden_frame = 1;
         pc->refresh_alt_ref_frame = 1;
         pc->copy_buffer_to_gf = 0;
         pc->copy_buffer_to_arf = 0;
 
-        // Note that Golden and Altref modes cannot be used on a key frame so
-        // ref_frame_sign_bias[] is undefined and meaningless
+        /* Note that Golden and Altref modes cannot be used on a key frame so
+         * ref_frame_sign_bias[] is undefined and meaningless
+         */
         pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0;
         pc->ref_frame_sign_bias[ALTREF_FRAME] = 0;
     }
@@ -530,7 +533,7 @@ static void init_frame(VP8D_COMP *pbi)
         else
             pc->mcomp_filter_type = BILINEAR;
 
-        // To enable choice of different interploation filters
+        /* To enable choice of different interploation filters */
         if (pc->mcomp_filter_type == SIXTAP)
         {
             xd->subpixel_predict      = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap4x4);
@@ -561,7 +564,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     MACROBLOCKD *const xd  = & pbi->mb;
     const unsigned char *data = (const unsigned char *)pbi->Source;
     const unsigned char *const data_end = data + pbi->source_sz;
-    unsigned int first_partition_length_in_bytes;
+    ptrdiff_t first_partition_length_in_bytes;
 
     int mb_row;
     int i, j, k, l;
@@ -587,7 +590,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
         const int Width = pc->Width;
         const int Height = pc->Height;
 
-        // vet via sync code
+        /* vet via sync code */
         if (data[0] != 0x9d || data[1] != 0x01 || data[2] != 0x2a)
             vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM,
                                "Invalid frame sync code");
@@ -643,12 +646,12 @@ int vp8_decode_frame(VP8D_COMP *pbi)
         pc->clamp_type  = (CLAMP_TYPE)vp8_read_bit(bc);
     }
 
-    // Is segmentation enabled
+    /* Is segmentation enabled */
     xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc);
 
     if (xd->segmentation_enabled)
     {
-        // Signal whether or not the segmentation map is being explicitly updated this frame.
+        /* Signal whether or not the segmentation map is being explicitly updated this frame. */
         xd->update_mb_segmentation_map = (unsigned char)vp8_read_bit(bc);
         xd->update_mb_segmentation_data = (unsigned char)vp8_read_bit(bc);
 
@@ -658,12 +661,12 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
             vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
 
-            // For each segmentation feature (Quant and loop filter level)
+            /* For each segmentation feature (Quant and loop filter level) */
             for (i = 0; i < MB_LVL_MAX; i++)
             {
                 for (j = 0; j < MAX_MB_SEGMENTS; j++)
                 {
-                    // Frame level data
+                    /* Frame level data */
                     if (vp8_read_bit(bc))
                     {
                         xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]);
@@ -679,57 +682,57 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
         if (xd->update_mb_segmentation_map)
         {
-            // Which macro block level features are enabled
+            /* Which macro block level features are enabled */
             vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
 
-            // Read the probs used to decode the segment id for each macro block.
+            /* Read the probs used to decode the segment id for each macro block. */
             for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
             {
-                // If not explicitly set value is defaulted to 255 by memset above
+                /* If not explicitly set value is defaulted to 255 by memset above */
                 if (vp8_read_bit(bc))
                     xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8);
             }
         }
     }
 
-    // Read the loop filter level and type
+    /* Read the loop filter level and type */
     pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(bc);
     pc->filter_level = vp8_read_literal(bc, 6);
     pc->sharpness_level = vp8_read_literal(bc, 3);
 
-    // Read in loop filter deltas applied at the MB level based on mode or ref frame.
+    /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */
     xd->mode_ref_lf_delta_update = 0;
     xd->mode_ref_lf_delta_enabled = (unsigned char)vp8_read_bit(bc);
 
     if (xd->mode_ref_lf_delta_enabled)
     {
-        // Do the deltas need to be updated
+        /* Do the deltas need to be updated */
         xd->mode_ref_lf_delta_update = (unsigned char)vp8_read_bit(bc);
 
         if (xd->mode_ref_lf_delta_update)
         {
-            // Send update
+            /* Send update */
             for (i = 0; i < MAX_REF_LF_DELTAS; i++)
             {
                 if (vp8_read_bit(bc))
                 {
-                    //sign = vp8_read_bit( bc );
+                    /*sign = vp8_read_bit( bc );*/
                     xd->ref_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6);
 
-                    if (vp8_read_bit(bc))        // Apply sign
+                    if (vp8_read_bit(bc))        /* Apply sign */
                         xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1;
                 }
             }
 
-            // Send update
+            /* Send update */
             for (i = 0; i < MAX_MODE_LF_DELTAS; i++)
             {
                 if (vp8_read_bit(bc))
                 {
-                    //sign = vp8_read_bit( bc );
+                    /*sign = vp8_read_bit( bc );*/
                     xd->mode_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6);
 
-                    if (vp8_read_bit(bc))        // Apply sign
+                    if (vp8_read_bit(bc))        /* Apply sign */
                         xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1;
                 }
             }
@@ -739,11 +742,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     setup_token_decoder(pbi, data + first_partition_length_in_bytes);
     xd->current_bc = &pbi->bc2;
 
-    // Read the default quantizers.
+    /* Read the default quantizers. */
     {
         int Q, q_update;
 
-        Q = vp8_read_literal(bc, 7);  // AC 1st order Q = default
+        Q = vp8_read_literal(bc, 7);  /* AC 1st order Q = default */
         pc->base_qindex = Q;
         q_update = 0;
         pc->y1dc_delta_q = get_delta_q(bc, pc->y1dc_delta_q, &q_update);
@@ -755,20 +758,21 @@ int vp8_decode_frame(VP8D_COMP *pbi)
         if (q_update)
             vp8cx_init_de_quantizer(pbi);
 
-        // MB level dequantizer setup
+        /* MB level dequantizer setup */
         mb_init_dequantizer(pbi, &pbi->mb);
     }
 
-    // Determine if the golden frame or ARF buffer should be updated and how.
-    // For all non key frames the GF and ARF refresh flags and sign bias
-    // flags must be set explicitly.
+    /* Determine if the golden frame or ARF buffer should be updated and how.
+     * For all non key frames the GF and ARF refresh flags and sign bias
+     * flags must be set explicitly.
+     */
     if (pc->frame_type != KEY_FRAME)
     {
-        // Should the GF or ARF be updated from the current frame
+        /* Should the GF or ARF be updated from the current frame */
         pc->refresh_golden_frame = vp8_read_bit(bc);
         pc->refresh_alt_ref_frame = vp8_read_bit(bc);
 
-        // Buffer to buffer copy flags.
+        /* Buffer to buffer copy flags. */
         pc->copy_buffer_to_gf = 0;
 
         if (!pc->refresh_golden_frame)
@@ -806,7 +810,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
 
     {
-        // read coef probability tree
+        /* read coef probability tree */
 
         for (i = 0; i < BLOCK_TYPES; i++)
             for (j = 0; j < COEF_BANDS; j++)
@@ -827,7 +831,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
     vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
 
-    // set up frame new frame for intra coded blocks
+    /* set up frame new frame for intra coded blocks */
     if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level))
         vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
 
@@ -835,10 +839,10 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
     vp8_build_block_doffsets(xd);
 
-    // clear out the coeff buffer
+    /* clear out the coeff buffer */
     vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
 
-    // Read the mb_no_coeff_skip flag
+    /* Read the mb_no_coeff_skip flag */
     pc->mb_no_coeff_skip = (int)vp8_read_bit(bc);
 
 
@@ -853,20 +857,20 @@ int vp8_decode_frame(VP8D_COMP *pbi)
         vp8mt_decode_mb_rows(pbi, xd);
         if(pbi->common.filter_level)
         {
-            //vp8_mt_loop_filter_frame(pbi);   //cm, &pbi->mb, cm->filter_level);
+            /*vp8_mt_loop_filter_frame(pbi);*/ /*cm, &pbi->mb, cm->filter_level);*/
 
             pc->last_frame_type = pc->frame_type;
             pc->last_filter_type = pc->filter_type;
             pc->last_sharpness_level = pc->sharpness_level;
         }
-        vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]);    //cm->frame_to_show);
+        vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]);    /*cm->frame_to_show);*/
     }
     else
     {
         int ibc = 0;
         int num_part = 1 << pc->multi_token_partition;
 
-        // Decode the individual macro block
+        /* Decode the individual macro block */
         for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
         {
 
@@ -886,9 +890,9 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
     stop_token_decoder(pbi);
 
-    // vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes  \n",bc->pos+pbi->bc2.pos);
+    /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes  \n",bc->pos+pbi->bc2.pos); */
 
-    // If this was a kf or Gf note the Q used
+    /* If this was a kf or Gf note the Q used */
     if ((pc->frame_type == KEY_FRAME) ||
          pc->refresh_golden_frame || pc->refresh_alt_ref_frame)
     {
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index f5d576ac7..84a9fd943 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -45,7 +45,7 @@ void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
         input[i] = dq[i] * input[i];
     }
 
-    // the idct halves ( >> 1) the pitch
+    /* the idct halves ( >> 1) the pitch */
     vp8_short_idct4x4llm_c(input, output, 4 << 1);
 
     vpx_memset(input, 0, 32);
@@ -87,7 +87,7 @@ void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
         input[i] = dq[i] * input[i];
     }
 
-    // the idct halves ( >> 1) the pitch
+    /* the idct halves ( >> 1) the pitch */
     vp8_short_idct4x4llm_c(input, output, 4 << 1);
 
     vpx_memset(input, 0, 32);
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 65c7d5370..7d013d240 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -44,18 +44,18 @@ typedef struct
 
 DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
 {
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  //ZERO_TOKEN
-    {  1, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //ONE_TOKEN
-    {  2, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //TWO_TOKEN
-    {  3, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //THREE_TOKEN
-    {  4, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //FOUR_TOKEN
-    {  5, 0, { 159, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  //DCT_VAL_CATEGORY1
-    {  7, 1, { 145, 165, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY2
-    { 11, 2, { 140, 148, 173, 0,  0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY3
-    { 19, 3, { 135, 140, 155, 176, 0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY4
-    { 35, 4, { 130, 134, 141, 157, 180, 0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY5
-    { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0   } }, //DCT_VAL_CATEGORY6
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  // EOB TOKEN
+    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /* ZERO_TOKEN */
+    {  1, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* ONE_TOKEN */
+    {  2, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* TWO_TOKEN */
+    {  3, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* THREE_TOKEN */
+    {  4, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* FOUR_TOKEN */
+    {  5, 0, { 159, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /* DCT_VAL_CATEGORY1 */
+    {  7, 1, { 145, 165, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY2 */
+    { 11, 2, { 140, 148, 173, 0,  0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY3 */
+    { 19, 3, { 135, 140, 155, 176, 0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY4 */
+    { 35, 4, { 130, 134, 141, 157, 180, 0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY5 */
+    { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0   } }, /* DCT_VAL_CATEGORY6 */
+    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /*  EOB TOKEN */
 };
 
 
@@ -75,13 +75,14 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
 }
 
 #if CONFIG_ARM_ASM_DETOK
-// mashup of vp8_block2left and vp8_block2above so we only need one pointer
-// for the assembly version.
+/* mashup of vp8_block2left and vp8_block2above so we only need one pointer
+ * for the assembly version.
+ */
 DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) =
 {
-    //vp8_block2left
+    /* vp8_block2left */
     0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
-    //vp8_block2above
+    /* vp8_block2above */
     0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
 };
 
@@ -297,7 +298,7 @@ BLOCK_LOOP:
 
     c = (INT16)(!type);
 
-//    Dest = ((A)!=0) + ((B)!=0);
+    /*Dest = ((A)!=0) + ((B)!=0);*/
     VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
     Prob = coef_probs;
     Prob += v * ENTROPY_NODES;
@@ -387,7 +388,7 @@ ONE_CONTEXT_NODE_0_:
 
     qcoeff_ptr [ scan[15] ] = (INT16) v;
 BLOCK_FINISHED:
-    *a = *l = ((eobs[i] = c) != !type);   // any nonzero data?
+    *a = *l = ((eobs[i] = c) != !type);   /* any nonzero data? */
     eobtotal += c;
     qcoeff_ptr += 16;
 
@@ -422,4 +423,4 @@ BLOCK_FINISHED:
     return eobtotal;
 
 }
-#endif //!CONFIG_ASM_DETOK
+#endif /*!CONFIG_ASM_DETOK*/
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 84de7af43..2e284729b 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -18,7 +18,7 @@ extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi);
 
 void vp8_dmachine_specific_config(VP8D_COMP *pbi)
 {
-    // Pure C:
+    /* Pure C: */
 #if CONFIG_RUNTIME_CPU_DETECT
     pbi->mb.rtcd                     = &pbi->common.rtcd;
     pbi->dequant.block               = vp8_dequantize_b_c;
@@ -29,7 +29,7 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
     pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_c;
     pbi->dboolhuff.start             = vp8dx_start_decode_c;
     pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
-#if 0 //For use with RTCD, when implemented
+#if 0 /*For use with RTCD, when implemented*/
     pbi->dboolhuff.debool = vp8dx_decode_bool_c;
     pbi->dboolhuff.devalue = vp8dx_decode_value_c;
 #endif
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index b5a6e3e85..6eda45e4a 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -113,12 +113,13 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
     pbi->common.current_video_frame = 0;
     pbi->ready_for_new_data = 1;
 
-    pbi->CPUFreq = 0; //vp8_get_processor_freq();
+    pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/
     pbi->max_threads = oxcf->max_threads;
     vp8_decoder_create_threads(pbi);
 
-    //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
-    // unnecessary calling of vp8cx_init_de_quantizer() for every frame.
+    /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
+     *  unnecessary calling of vp8cx_init_de_quantizer() for every frame.
+     */
     vp8cx_init_de_quantizer(pbi);
 
     {
@@ -223,7 +224,7 @@ int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C
     return 0;
 }
 
-//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.
+/*For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.*/
 #if HAVE_ARMV7
 extern void vp8_push_neon(INT64 *store);
 extern void vp8_pop_neon(INT64 *store);
@@ -250,7 +251,7 @@ static void ref_cnt_fb (int *buf, int *idx, int new_idx)
     buf[new_idx]++;
 }
 
-// If any buffer copy / swapping is signalled it should be done here.
+/* If any buffer copy / swapping is signalled it should be done here. */
 static int swap_frame_buffers (VP8_COMMON *cm)
 {
     int fb_to_update_with, err = 0;
@@ -260,10 +261,11 @@ static int swap_frame_buffers (VP8_COMMON *cm)
     else
         fb_to_update_with = cm->new_fb_idx;
 
-    // The alternate reference frame or golden frame can be updated
-    //  using the new, last, or golden/alt ref frame.  If it
-    //  is updated using the newly decoded frame it is a refresh.
-    //  An update using the last or golden/alt ref frame is a copy.
+    /* The alternate reference frame or golden frame can be updated
+     *  using the new, last, or golden/alt ref frame.  If it
+     *  is updated using the newly decoded frame it is a refresh.
+     *  An update using the last or golden/alt ref frame is a copy.
+     */
     if (cm->copy_buffer_to_arf)
     {
         int new_fb = 0;
@@ -322,8 +324,8 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     int retcode = 0;
     struct vpx_usec_timer timer;
 
-//  if(pbi->ready_for_new_data == 0)
-//      return -1;
+    /*if(pbi->ready_for_new_data == 0)
+        return -1;*/
 
     if (ptr == 0)
     {
@@ -363,7 +365,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 
     vpx_usec_timer_start(&timer);
 
-    //cm->current_video_frame++;
+    /*cm->current_video_frame++;*/
     pbi->Source = source;
     pbi->source_sz = size;
 
@@ -423,7 +425,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
         {
             struct vpx_usec_timer lpftimer;
             vpx_usec_timer_start(&lpftimer);
-            // Apply the loop filter if appropriate.
+            /* Apply the loop filter if appropriate. */
 
             vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
 
@@ -438,8 +440,8 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     }
 
 #if 0
-    // DEBUG code
-    //vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
+    /* DEBUG code */
+    /*vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);*/
     if (cm->current_video_frame <= 5)
         write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
 #endif
@@ -451,7 +453,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 
     pbi->time_decoding += pbi->decode_microseconds;
 
-//  vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);
+    /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/
 
     if (cm->show_frame)
         cm->current_video_frame++;
@@ -512,7 +514,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
     if (pbi->ready_for_new_data == 1)
         return ret;
 
-    // ie no raw frame to show!!!
+    /* ie no raw frame to show!!! */
     if (pbi->common.show_frame == 0)
         return ret;
 
@@ -538,7 +540,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
         ret = -1;
     }
 
-#endif //!CONFIG_POSTPROC
+#endif /*!CONFIG_POSTPROC*/
     vp8_clear_system_state();
     return ret;
 }
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 522ac22d2..7593edf27 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -93,18 +93,18 @@ typedef struct VP8Decompressor
     int decoding_thread_count;
     int allocated_decoding_thread_count;
 
-    // variable for threading
+    /* variable for threading */
 #if CONFIG_MULTITHREAD
     int mt_baseline_filter_level[MAX_MB_SEGMENTS];
     int sync_range;
-    int *mt_current_mb_col;                  // Each row remembers its already decoded column.
+    int *mt_current_mb_col;                  /* Each row remembers its already decoded column. */
 
-    unsigned char **mt_yabove_row;           // mb_rows x width
+    unsigned char **mt_yabove_row;           /* mb_rows x width */
     unsigned char **mt_uabove_row;
     unsigned char **mt_vabove_row;
-    unsigned char **mt_yleft_col;            // mb_rows x 16
-    unsigned char **mt_uleft_col;            // mb_rows x 8
-    unsigned char **mt_vleft_col;            // mb_rows x 8
+    unsigned char **mt_yleft_col;            /* mb_rows x 16 */
+    unsigned char **mt_uleft_col;            /* mb_rows x 8 */
+    unsigned char **mt_vleft_col;            /* mb_rows x 8 */
 
     MB_ROW_DEC           *mb_row_di;
     DECODETHREAD_DATA    *de_thread_data;
@@ -112,7 +112,7 @@ typedef struct VP8Decompressor
     pthread_t           *h_decoding_thread;
     sem_t               *h_event_start_decoding;
     sem_t                h_event_end_decoding;
-    // end of threading data
+    /* end of threading data */
 #endif
 
     vp8_reader *mbc;
diff --git a/vp8/decoder/reconintra_mt.c b/vp8/decoder/reconintra_mt.c
index 4d395629d..ad4324b27 100644
--- a/vp8/decoder/reconintra_mt.c
+++ b/vp8/decoder/reconintra_mt.c
@@ -15,16 +15,17 @@
 #include "vpx_mem/vpx_mem.h"
 #include "onyxd_int.h"
 
-// For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
-// vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
+/* For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
+ * vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
+ */
 
 void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
 {
 #if CONFIG_MULTITHREAD
-    unsigned char *yabove_row;    // = x->dst.y_buffer - x->dst.y_stride;
+    unsigned char *yabove_row;    /* = x->dst.y_buffer - x->dst.y_stride; */
     unsigned char *yleft_col;
     unsigned char yleft_buf[16];
-    unsigned char ytop_left;      // = yabove_row[-1];
+    unsigned char ytop_left;      /* = yabove_row[-1]; */
     unsigned char *ypred_ptr = x->predictor;
     int r, c, i;
 
@@ -43,7 +44,7 @@ void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row
 
     ytop_left = yabove_row[-1];
 
-    // for Y
+    /* for Y */
     switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
@@ -156,15 +157,15 @@ void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row
 void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
 {
 #if CONFIG_MULTITHREAD
-    unsigned char *yabove_row;    // = x->dst.y_buffer - x->dst.y_stride;
+    unsigned char *yabove_row;    /* = x->dst.y_buffer - x->dst.y_stride; */
     unsigned char *yleft_col;
     unsigned char yleft_buf[16];
-    unsigned char ytop_left;      // = yabove_row[-1];
+    unsigned char ytop_left;      /* = yabove_row[-1]; */
     unsigned char *ypred_ptr = x->predictor;
     int r, c, i;
 
     int y_stride = x->dst.y_stride;
-    ypred_ptr = x->dst.y_buffer; //x->predictor;
+    ypred_ptr = x->dst.y_buffer; /*x->predictor;*/
 
     if (pbi->common.filter_level)
     {
@@ -181,7 +182,7 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
 
     ytop_left = yabove_row[-1];
 
-    // for Y
+    /* for Y */
     switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
@@ -222,11 +223,11 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
             expected_dc = 128;
         }
 
-        //vpx_memset(ypred_ptr, expected_dc, 256);
+        /*vpx_memset(ypred_ptr, expected_dc, 256);*/
         for (r = 0; r < 16; r++)
         {
             vpx_memset(ypred_ptr, expected_dc, 16);
-            ypred_ptr += y_stride; //16;
+            ypred_ptr += y_stride; /*16;*/
         }
     }
     break;
@@ -240,7 +241,7 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
             ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
             ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
             ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
-            ypred_ptr += y_stride; //16;
+            ypred_ptr += y_stride; /*16;*/
         }
     }
     break;
@@ -251,7 +252,7 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
         {
 
             vpx_memset(ypred_ptr, yleft_col[r], 16);
-            ypred_ptr += y_stride;  //16;
+            ypred_ptr += y_stride;  /*16;*/
         }
 
     }
@@ -274,7 +275,7 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
                 ypred_ptr[c] = pred;
             }
 
-            ypred_ptr += y_stride;  //16;
+            ypred_ptr += y_stride;  /*16;*/
         }
 
     }
@@ -299,14 +300,14 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
 void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
 {
 #if CONFIG_MULTITHREAD
-    unsigned char *uabove_row;   // = x->dst.u_buffer - x->dst.uv_stride;
-    unsigned char *uleft_col;    //[16];
+    unsigned char *uabove_row;   /* = x->dst.u_buffer - x->dst.uv_stride; */
+    unsigned char *uleft_col;    /*[16];*/
     unsigned char uleft_buf[8];
-    unsigned char utop_left;     // = uabove_row[-1];
-    unsigned char *vabove_row;   // = x->dst.v_buffer - x->dst.uv_stride;
-    unsigned char *vleft_col;    //[20];
+    unsigned char utop_left;     /* = uabove_row[-1]; */
+    unsigned char *vabove_row;   /* = x->dst.v_buffer - x->dst.uv_stride; */
+    unsigned char *vleft_col;    /*[20];*/
     unsigned char vleft_buf[8];
-    unsigned char vtop_left;     // = vabove_row[-1];
+    unsigned char vtop_left;     /* = vabove_row[-1]; */
     unsigned char *upred_ptr = &x->predictor[256];
     unsigned char *vpred_ptr = &x->predictor[320];
     int i, j;
@@ -462,16 +463,16 @@ void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_ro
 void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
 {
 #if CONFIG_MULTITHREAD
-    unsigned char *uabove_row;  // = x->dst.u_buffer - x->dst.uv_stride;
-    unsigned char *uleft_col;   //[16];
+    unsigned char *uabove_row;  /* = x->dst.u_buffer - x->dst.uv_stride; */
+    unsigned char *uleft_col;   /*[16];*/
     unsigned char uleft_buf[8];
-    unsigned char utop_left;    // = uabove_row[-1];
-    unsigned char *vabove_row;  // = x->dst.v_buffer - x->dst.uv_stride;
-    unsigned char *vleft_col;   //[20];
+    unsigned char utop_left;    /* = uabove_row[-1]; */
+    unsigned char *vabove_row;  /* = x->dst.v_buffer - x->dst.uv_stride; */
+    unsigned char *vleft_col;   /*[20];*/
     unsigned char vleft_buf[8];
-    unsigned char vtop_left;    // = vabove_row[-1];
-    unsigned char *upred_ptr = x->dst.u_buffer; //&x->predictor[256];
-    unsigned char *vpred_ptr = x->dst.v_buffer; //&x->predictor[320];
+    unsigned char vtop_left;    /* = vabove_row[-1]; */
+    unsigned char *upred_ptr = x->dst.u_buffer; /*&x->predictor[256];*/
+    unsigned char *vpred_ptr = x->dst.v_buffer; /*&x->predictor[320];*/
     int uv_stride = x->dst.uv_stride;
     int i, j;
 
@@ -539,14 +540,14 @@ void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_
         }
 
 
-        //vpx_memset(upred_ptr,expected_udc,64);
-        //vpx_memset(vpred_ptr,expected_vdc,64);
+        /*vpx_memset(upred_ptr,expected_udc,64);
+        vpx_memset(vpred_ptr,expected_vdc,64);*/
         for (i = 0; i < 8; i++)
         {
             vpx_memset(upred_ptr, expected_udc, 8);
             vpx_memset(vpred_ptr, expected_vdc, 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
     }
     break;
@@ -558,8 +559,8 @@ void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_
         {
             vpx_memcpy(upred_ptr, uabove_row, 8);
             vpx_memcpy(vpred_ptr, vabove_row, 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
 
     }
@@ -572,8 +573,8 @@ void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_
         {
             vpx_memset(upred_ptr, uleft_col[i], 8);
             vpx_memset(vpred_ptr, vleft_col[i], 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
     }
 
@@ -605,8 +606,8 @@ void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_
                 vpred_ptr[j] = predv;
             }
 
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
 
     }
@@ -640,13 +641,13 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
 #if CONFIG_MULTITHREAD
     int i, r, c;
 
-    unsigned char *Above;   // = *(x->base_dst) + x->dst - x->dst_stride;
+    unsigned char *Above;   /* = *(x->base_dst) + x->dst - x->dst_stride; */
     unsigned char Left[4];
-    unsigned char top_left; // = Above[-1];
+    unsigned char top_left; /* = Above[-1]; */
 
     BLOCKD *x = &xd->block[num];
 
-    //Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).
+    /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/
     if (num < 4 && pbi->common.filter_level)
         Above = pbi->mt_yabove_row[mb_row] + mb_col*16 + num*4 + 32;
     else
@@ -696,7 +697,7 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
     break;
     case B_TM_PRED:
     {
-        // prediction similar to true_motion prediction
+        /* prediction similar to true_motion prediction */
         for (r = 0; r < 4; r++)
         {
             for (c = 0; c < 4; c++)
@@ -945,12 +946,13 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
 #endif
 }
 
-// copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
-// to the right prediction have filled in pixels to use.
+/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
+ * to the right prediction have filled in pixels to use.
+ */
 void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
 {
 #if CONFIG_MULTITHREAD
-    unsigned char *above_right;   // = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
+    unsigned char *above_right;   /* = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16; */
     unsigned int *src_ptr;
     unsigned int *dst_ptr0;
     unsigned int *dst_ptr1;
@@ -962,9 +964,9 @@ void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row
         above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
 
     src_ptr = (unsigned int *)above_right;
-    //dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride);
-    //dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride);
-    //dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride);
+    /*dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride);
+    dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride);
+    dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride);*/
     dst_ptr0 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 3 * x->block[0].dst_stride);
     dst_ptr1 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 7 * x->block[0].dst_stride);
     dst_ptr2 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 11 * x->block[0].dst_stride);
diff --git a/vp8/decoder/reconintra_mt.h b/vp8/decoder/reconintra_mt.h
index 5c42f819f..d401295b2 100644
--- a/vp8/decoder/reconintra_mt.h
+++ b/vp8/decoder/reconintra_mt.h
@@ -12,7 +12,7 @@
 #ifndef __INC_RECONINTRA_MT_H
 #define __INC_RECONINTRA_MT_H
 
-// reconintra functions used in multi-threaded decoder
+/* reconintra functions used in multi-threaded decoder */
 #if CONFIG_MULTITHREAD
 extern void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
 extern void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 2d7f7b9b8..fc2fad516 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -69,12 +69,12 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
 
-        //signed char ref_lf_deltas[MAX_REF_LF_DELTAS];
+        /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
         vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
-        //signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];
+        /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
         vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
-        //unsigned char mode_ref_lf_delta_enabled;
-        //unsigned char mode_ref_lf_delta_update;
+        /*unsigned char mode_ref_lf_delta_enabled;
+        unsigned char mode_ref_lf_delta_update;*/
         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
 
@@ -113,7 +113,7 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
         eobtotal = vp8_decode_mb_tokens(pbi, xd);
     }
 
-    // Perform temporary clamping of the MV to be used for prediction
+    /* Perform temporary clamping of the MV to be used for prediction */
     if (do_clamp)
     {
         clamp_mvs(xd);
@@ -125,7 +125,7 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
     {
         xd->mode_info_context->mbmi.dc_diff = 0;
 
-        //mt_skip_recon_mb(pbi, xd, mb_row, mb_col);
+        /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
         if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
         {
             vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
@@ -141,7 +141,7 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
     if (xd->segmentation_enabled)
         mb_init_dequantizer(pbi, xd);
 
-    // do prediction
+    /* do prediction */
     if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
         vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
@@ -158,13 +158,13 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
         vp8_build_inter_predictors_mb(xd);
     }
 
-    // dequantization and idct
+    /* dequantization and idct */
     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
     {
         BLOCKD *b = &xd->block[24];
         DEQUANT_INVOKE(&pbi->dequant, block)(b);
 
-        // do 2nd order transform on the dc block
+        /* do 2nd order transform on the dc block */
         if (xd->eobs[24] > 1)
         {
             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
@@ -244,7 +244,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
         if (pbi->b_multithreaded_rd == 0)
             break;
 
-        //if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)
+        /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
         {
             if (pbi->b_multithreaded_rd == 0)
@@ -281,7 +281,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                     recon_yoffset = mb_row * recon_y_stride * 16;
                     recon_uvoffset = mb_row * recon_uv_stride * 8;
-                    // reset above block coeffs
+                    /* reset above block coeffs */
 
                     xd->above_context = pc->above_context;
                     xd->left_context = &mb_row_left_context;
@@ -313,17 +313,19 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                         if(pbi->common.filter_level)
                         {
-                            //update loopfilter info
+                            /*update loopfilter info*/
                             Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
                             filter_level = pbi->mt_baseline_filter_level[Segment];
-                            // Distance of Mb to the various image edges.
-                            // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-                            // Apply any context driven MB level adjustment
+                            /* Distance of Mb to the various image edges.
+                             * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                             * Apply any context driven MB level adjustment
+                             */
                             vp8_adjust_mb_lf_value(xd, &filter_level);
                         }
 
-                        // Distance of Mb to the various image edges.
-                        // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                        /* Distance of Mb to the various image edges.
+                         * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                         */
                         xd->mb_to_left_edge = -((mb_col * 16) << 3);
                         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
 
@@ -333,7 +335,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                         xd->left_available = (mb_col != 0);
 
-                        // Select the appropriate reference frame for this MB
+                        /* Select the appropriate reference frame for this MB */
                         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
                             ref_fb_idx = pc->lst_fb_idx;
                         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
@@ -352,13 +354,13 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                         {
                             if( mb_row != pc->mb_rows-1 )
                             {
-                                //Save decoded MB last row data for next-row decoding
+                                /* Save decoded MB last row data for next-row decoding */
                                 vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
                                 vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
                                 vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
                             }
 
-                            //save left_col for next MB decoding
+                            /* save left_col for next MB decoding */
                             if(mb_col != pc->mb_cols-1)
                             {
                                 MODE_INFO *next = xd->mode_info_context +1;
@@ -375,7 +377,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                                 }
                             }
 
-                          // loopfilter on this macroblock.
+                          /* loopfilter on this macroblock. */
                             if (filter_level)
                             {
                                 if (mb_col > 0)
@@ -384,7 +386,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                                 if (xd->mode_info_context->mbmi.dc_diff > 0)
                                     pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
 
-                                // don't apply across umv border
+                                /* don't apply across umv border */
                                 if (mb_row > 0)
                                     pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
 
@@ -400,11 +402,11 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                         xd->above_context++;
 
-                        //pbi->mb_row_di[ithread].current_mb_col = mb_col;
+                        /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
                         pbi->mt_current_mb_col[mb_row] = mb_col;
                     }
 
-                    // adjust to the next row of mbs
+                    /* adjust to the next row of mbs */
                     if (pbi->common.filter_level)
                     {
                         if(mb_row != pc->mb_rows-1)
@@ -424,15 +426,15 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                     ++xd->mode_info_context;      /* skip prediction column */
 
-                    // since we have multithread
+                    /* since we have multithread */
                     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
                 }
             }
         }
-        //  add this to each frame
+        /*  add this to each frame */
         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
         {
-            //SetEvent(pbi->h_event_end_decoding);
+            /*SetEvent(pbi->h_event_end_decoding);*/
             sem_post(&pbi->h_event_end_decoding);
         }
     }
@@ -502,7 +504,7 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
             pbi->mt_current_mb_col = NULL ;
         }
 
-        // Free above_row buffers.
+        /* Free above_row buffers. */
         if (pbi->mt_yabove_row)
         {
             for (i=0; i< mb_rows; i++)
@@ -545,7 +547,7 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
             pbi->mt_vabove_row = NULL ;
         }
 
-        // Free left_col buffers.
+        /* Free left_col buffers. */
         if (pbi->mt_yleft_col)
         {
             for (i=0; i< mb_rows; i++)
@@ -605,7 +607,7 @@ int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
     {
         vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
 
-        // our internal buffers are always multiples of 16
+        /* our internal buffers are always multiples of 16 */
         if ((width & 0xf) != 0)
             width += 16 - (width & 0xf);
 
@@ -616,10 +618,10 @@ int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
 
         uv_width = width >>1;
 
-        // Allocate an int for each mb row.
+        /* Allocate an int for each mb row. */
         CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
 
-        // Allocate memory for above_row buffers.
+        /* Allocate memory for above_row buffers. */
         CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
         for (i=0; i< pc->mb_rows; i++)
             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
@@ -632,7 +634,7 @@ int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
         for (i=0; i< pc->mb_rows; i++)
             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
 
-        // Allocate memory for left_col buffers.
+        /* Allocate memory for left_col buffers. */
         CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
         for (i=0; i< pc->mb_rows; i++)
             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
@@ -657,14 +659,14 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
 {
 #if CONFIG_MULTITHREAD
 
-    //shutdown MB Decoding thread;
+    /* shutdown MB Decoding thread; */
     if (pbi->b_multithreaded_rd)
     {
         int i;
 
         pbi->b_multithreaded_rd = 0;
 
-        // allow all threads to exit
+        /* allow all threads to exit */
         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
         {
             sem_post(&pbi->h_event_start_decoding[i]);
@@ -713,32 +715,32 @@ void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
 #if CONFIG_MULTITHREAD
     VP8_COMMON *cm  = &pbi->common;
     MACROBLOCKD *mbd = &pbi->mb;
-    //YV12_BUFFER_CONFIG *post = &cm->new_frame;  //frame_to_show;
+    /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/  /*frame_to_show;*/
     loop_filter_info *lfi = cm->lf_info;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;
 
-    //int mb_row;
-    //int mb_col;
-    //int baseline_filter_level[MAX_MB_SEGMENTS];
+    /*int mb_row;
+    int mb_col;
+    int baseline_filter_level[MAX_MB_SEGMENTS];*/
     int filter_level;
     int alt_flt_enabled = mbd->segmentation_enabled;
 
     int i;
-    //unsigned char *y_ptr, *u_ptr, *v_ptr;
+    /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
 
-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
     if (alt_flt_enabled)
     {
         for (i = 0; i < MAX_MB_SEGMENTS; i++)
         {
-            // Abs value
+            /* Abs value */
             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                 pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
             else
             {
                 pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
             }
         }
     }
@@ -748,7 +750,7 @@ void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
             pbi->mt_baseline_filter_level[i] = default_filt_lvl;
     }
 
-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
         vp8_init_loop_filter(cm);
     else if (frame_type != cm->last_frame_type)
@@ -779,7 +781,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 
     if(pbi->common.filter_level)
     {
-        //Set above_row buffer to 127 for decoding first MB row
+        /* Set above_row buffer to 127 for decoding first MB row */
         vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
         vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
         vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
@@ -791,7 +793,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
             vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
         }
 
-        //Set left_col to 129 initially
+        /* Set left_col to 129 initially */
         for (i=0; i<pc->mb_rows; i++)
         {
             vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
@@ -812,7 +814,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 
         xd->current_bc = &pbi->mbc[mb_row%num_part];
 
-        //vp8_decode_mb_row(pbi, pc, mb_row, xd);
+        /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
         {
             int i;
             int recon_yoffset, recon_uvoffset;
@@ -822,14 +824,14 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
             int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
             int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
 
-           // volatile int *last_row_current_mb_col = NULL;
+           /* volatile int *last_row_current_mb_col = NULL; */
             if (mb_row > 0)
                 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
 
             vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
             recon_yoffset = mb_row * recon_y_stride * 16;
             recon_uvoffset = mb_row * recon_uv_stride * 8;
-            // reset above block coeffs
+            /* reset above block coeffs */
 
             xd->above_context = pc->above_context;
             xd->up_available = (mb_row != 0);
@@ -858,17 +860,19 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 
                 if(pbi->common.filter_level)
                 {
-                    //update loopfilter info
+                    /* update loopfilter info */
                     Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
                     filter_level = pbi->mt_baseline_filter_level[Segment];
-                    // Distance of Mb to the various image edges.
-                    // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-                    // Apply any context driven MB level adjustment
+                    /* Distance of Mb to the various image edges.
+                     * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                     * Apply any context driven MB level adjustment
+                     */
                     vp8_adjust_mb_lf_value(xd, &filter_level);
                 }
 
-                // Distance of Mb to the various image edges.
-                // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                /* Distance of Mb to the various image edges.
+                 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                 */
                 xd->mb_to_left_edge = -((mb_col * 16) << 3);
                 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
 
@@ -878,7 +882,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 
                 xd->left_available = (mb_col != 0);
 
-                // Select the appropriate reference frame for this MB
+                /* Select the appropriate reference frame for this MB */
                 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
                     ref_fb_idx = pc->lst_fb_idx;
                 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
@@ -895,7 +899,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 
                 if (pbi->common.filter_level)
                 {
-                    //Save decoded MB last row data for next-row decoding
+                    /* Save decoded MB last row data for next-row decoding */
                     if(mb_row != pc->mb_rows-1)
                     {
                         vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
@@ -903,7 +907,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                         vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
                     }
 
-                    //save left_col for next MB decoding
+                    /* save left_col for next MB decoding */
                     if(mb_col != pc->mb_cols-1)
                     {
                         MODE_INFO *next = xd->mode_info_context +1;
@@ -920,7 +924,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                         }
                     }
 
-                    // loopfilter on this macroblock.
+                    /* loopfilter on this macroblock. */
                     if (filter_level)
                     {
                         if (mb_col > 0)
@@ -929,7 +933,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                         if (xd->mode_info_context->mbmi.dc_diff > 0)
                             pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
 
-                        // don't apply across umv border
+                        /* don't apply across umv border */
                         if (mb_row > 0)
                             pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
 
@@ -948,7 +952,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                 pbi->mt_current_mb_col[mb_row] = mb_col;
             }
 
-            // adjust to the next row of mbs
+            /* adjust to the next row of mbs */
             if (pbi->common.filter_level)
             {
                 if(mb_row != pc->mb_rows-1)
@@ -971,7 +975,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
     }
 
-    sem_wait(&pbi->h_event_end_decoding);   // add back for each frame
+    sem_wait(&pbi->h_event_end_decoding);   /* add back for each frame */
 #else
     (void) pbi;
     (void) xd;
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 691aee0a6..8a94fa369 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -291,7 +291,7 @@ void vp8_output_stats(const VP8_COMP            *cpi,
 
 
         fpfile = fopen("fpmotionmap.stt", "a");
-        fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, fpfile);
+        if(fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, fpfile));
         fclose(fpfile);
     }
 #endif
@@ -879,7 +879,7 @@ void vp8_first_pass(VP8_COMP *cpi)
         else
             recon_file = fopen(filename, "ab");
 
-        fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file);
+        if(fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file));
         fclose(recon_file);
     }
 
@@ -2586,7 +2586,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         if (0)
         {
             FILE *f = fopen("Subsamle.stt", "a");
-            fprintf(f, " %8d %8d %8d %8d %12.0f %8d %8d %8d\n",  cpi->common.current_video_frame, kf_q, cpi->common.horiz_scale, cpi->common.vert_scale,  kf_group_err / cpi->frames_to_key, cpi->kf_group_bits / cpi->frames_to_key, new_height, new_width);
+            fprintf(f, " %8d %8d %8d %8d %12.0f %8d %8d %8d\n",  cpi->common.current_video_frame, kf_q, cpi->common.horiz_scale, cpi->common.vert_scale,  kf_group_err / cpi->frames_to_key, (int)(cpi->kf_group_bits / cpi->frames_to_key), new_height, new_width);
             fclose(f);
         }
 
@@ -2644,7 +2644,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
                 if (0)
                 {
                     FILE *f = fopen("Subsamle.stt", "a");
-                    fprintf(f, "******** %8d %8d %8d %12.0f %8d %8d %8d\n",  kf_q, cpi->common.horiz_scale, cpi->common.vert_scale,  kf_group_err / cpi->frames_to_key, cpi->kf_group_bits / cpi->frames_to_key, new_height, new_width);
+                    fprintf(f, "******** %8d %8d %8d %12.0f %8d %8d %8d\n",  kf_q, cpi->common.horiz_scale, cpi->common.vert_scale,  kf_group_err / cpi->frames_to_key, (int)(cpi->kf_group_bits / cpi->frames_to_key), new_height, new_width);
                     fclose(f);
                 }
             }
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index f19cb9a30..a85cad1b4 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -41,7 +41,7 @@ typedef enum
     VP8_SEG_ALG_PRIV     = 256,
     VP8_SEG_MAX
 } mem_seg_id_t;
-#define NELEMENTS(x) (sizeof(x)/sizeof(x[0]))
+#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0])))
 
 static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t);
 
@@ -170,7 +170,7 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
     }
 }
 
-static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, int id)
+static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, unsigned int id)
 {
     int i;
 
@@ -269,14 +269,14 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t         *data,
             const uint8_t *c = data + 3;
             si->is_kf = 1;
 
-            // vet via sync code
+            /* vet via sync code */
             if (c[0] != 0x9d || c[1] != 0x01 || c[2] != 0x2a)
                 res = VPX_CODEC_UNSUP_BITSTREAM;
 
             si->w = swap2(*(const unsigned short *)(c + 3)) & 0x3fff;
             si->h = swap2(*(const unsigned short *)(c + 5)) & 0x3fff;
 
-            //printf("w=%d, h=%d\n", si->w, si->h);
+            /*printf("w=%d, h=%d\n", si->w, si->h);*/
             if (!(si->h | si->w))
                 res = VPX_CODEC_UNSUP_BITSTREAM;
         }
@@ -670,7 +670,14 @@ CODEC_INTERFACE(vpx_codec_vp8_dx) =
         vp8_decode,       /* vpx_codec_decode_fn_t     decode; */
         vp8_get_frame,    /* vpx_codec_frame_get_fn_t  frame_get; */
     },
-    {NOT_IMPLEMENTED} /* encoder functions */
+    { /* encoder functions */
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED
+    }
 };
 
 /*
@@ -693,5 +700,12 @@ vpx_codec_iface_t vpx_codec_vp8_algo =
         vp8_decode,       /* vpx_codec_decode_fn_t     decode; */
         vp8_get_frame,    /* vpx_codec_frame_get_fn_t  frame_get; */
     },
-    {NOT_IMPLEMENTED} /* encoder functions */
+    { /* encoder functions */
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED
+    }
 };
diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index ddbd65484..10929590b 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -303,7 +303,7 @@ const vpx_codec_cx_pkt_t *vpx_codec_pkt_list_get(struct vpx_codec_pkt_list *list
 
     pkt = (const void *) * iter;
 
-    if (pkt - list->pkts < list->cnt)
+    if ((size_t)(pkt - list->pkts) < list->cnt)
         *iter = pkt + 1;
     else
         pkt = NULL;
diff --git a/vpx/vp8.h b/vpx/vp8.h
index c7553ec22..d7ed8d8c1 100644
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -53,7 +53,7 @@ enum vp8_postproc_level
     VP8_NOFILTERING    = 0,
     VP8_DEBLOCK        = 1,
     VP8_DEMACROBLOCK   = 2,
-    VP8_ADDNOISE       = 4,
+    VP8_ADDNOISE       = 4
 };
 
 /*!\brief post process flags
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index 371df00c3..899b27cca 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -62,7 +62,7 @@ extern "C" {
     /*!\brief Decorator indicating a function is potentially unused */
 #ifdef UNUSED
 #elif __GNUC__
-#define UNUSED __attribute__ ((unused));
+#define UNUSED __attribute__ ((unused))
 #else
 #define UNUSED
 #endif
@@ -128,7 +128,7 @@ extern "C" {
         /*!\brief An iterator reached the end of list.
          *
          */
-        VPX_CODEC_LIST_END,
+        VPX_CODEC_LIST_END
 
     }
     vpx_codec_err_t;
diff --git a/vpx/vpx_decoder_compat.h b/vpx/vpx_decoder_compat.h
index 9e1e49222..ca6f61849 100644
--- a/vpx/vpx_decoder_compat.h
+++ b/vpx/vpx_decoder_compat.h
@@ -78,7 +78,7 @@ extern "C" {
         /*!\brief An iterator reached the end of list.
          *
          */
-        VPX_DEC_LIST_END = VPX_CODEC_LIST_END,
+        VPX_DEC_LIST_END = VPX_CODEC_LIST_END
 
     }
     vpx_dec_err_t;
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index f894cd871..3acb19945 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -171,7 +171,7 @@ extern "C" {
     {
         VPX_RC_ONE_PASS,   /**< Single pass mode */
         VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */
-        VPX_RC_LAST_PASS,  /**< Final pass of multi-pass mode */
+        VPX_RC_LAST_PASS   /**< Final pass of multi-pass mode */
     };
 
 
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index 4506dd3b2..dcb8f31bc 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -55,7 +55,7 @@ extern "C" {
         VPX_IMG_FMT_YV12    = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
         VPX_IMG_FMT_I420    = VPX_IMG_FMT_PLANAR | 2,
         VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */
-        VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4,  /** < planar 4:2:0 format with vpx color space */
+        VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4   /** < planar 4:2:0 format with vpx color space */
     }
     vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */
 
diff --git a/vpx_mem/include/vpx_mem_intrnl.h b/vpx_mem/include/vpx_mem_intrnl.h
index 00f9c90e1..6e261ba7f 100644
--- a/vpx_mem/include/vpx_mem_intrnl.h
+++ b/vpx_mem/include/vpx_mem_intrnl.h
@@ -15,24 +15,24 @@
 
 #ifndef CONFIG_MEM_MANAGER
 # if defined(VXWORKS)
-#  define CONFIG_MEM_MANAGER  1 //include heap manager functionality,
-//default: enabled on vxworks
+#  define CONFIG_MEM_MANAGER  1 /*include heap manager functionality,*/
+/*default: enabled on vxworks*/
 # else
-#  define CONFIG_MEM_MANAGER  0 //include heap manager functionality
+#  define CONFIG_MEM_MANAGER  0 /*include heap manager functionality*/
 # endif
 #endif /*CONFIG_MEM_MANAGER*/
 
 #ifndef CONFIG_MEM_TRACKER
-# define CONFIG_MEM_TRACKER     1 //include xvpx_* calls in the lib
+# define CONFIG_MEM_TRACKER     1 /*include xvpx_* calls in the lib*/
 #endif
 
 #ifndef CONFIG_MEM_CHECKS
-# define CONFIG_MEM_CHECKS      0 //include some basic safety checks in
-//vpx_memcpy, _memset, and _memmove
+# define CONFIG_MEM_CHECKS      0 /*include some basic safety checks in
+vpx_memcpy, _memset, and _memmove*/
 #endif
 
 #ifndef USE_GLOBAL_FUNCTION_POINTERS
-# define USE_GLOBAL_FUNCTION_POINTERS   0  //use function pointers instead of compiled functions.
+# define USE_GLOBAL_FUNCTION_POINTERS   0  /*use function pointers instead of compiled functions.*/
 #endif
 
 #if CONFIG_MEM_TRACKER
@@ -46,9 +46,9 @@
 
 #ifndef DEFAULT_ALIGNMENT
 # if defined(VXWORKS)
-#  define DEFAULT_ALIGNMENT        32        //default addr alignment to use in
-//calls to vpx_* functions other
-//than vpx_memalign
+#  define DEFAULT_ALIGNMENT        32        /*default addr alignment to use in
+                                               calls to vpx_* functions other
+                                               than vpx_memalign*/
 # else
 #  define DEFAULT_ALIGNMENT        1
 # endif
@@ -59,24 +59,24 @@
 #endif
 
 #if CONFIG_MEM_TRACKER
-# define TRY_BOUNDS_CHECK         1         //when set to 1 pads each allocation,
-//integrity can be checked using
-//vpx_memory_tracker_check_integrity
-//or on free by defining
-//TRY_BOUNDS_CHECK_ON_FREE
+# define TRY_BOUNDS_CHECK         1        /*when set to 1 pads each allocation,
+                                             integrity can be checked using
+                                             vpx_memory_tracker_check_integrity
+                                             or on free by defining*/
+/*TRY_BOUNDS_CHECK_ON_FREE*/
 #else
 # define TRY_BOUNDS_CHECK         0
 #endif /*CONFIG_MEM_TRACKER*/
 
 #if TRY_BOUNDS_CHECK
-# define TRY_BOUNDS_CHECK_ON_FREE 0          //checks mem integrity on every
-//free, very expensive
-# define BOUNDS_CHECK_VALUE       0xdeadbeef //value stored before/after ea.
-//mem addr for bounds checking
-# define BOUNDS_CHECK_PAD_SIZE    32         //size of the padding before and
-//after ea allocation to be filled
-//with BOUNDS_CHECK_VALUE.
-//this should be a multiple of 4
+# define TRY_BOUNDS_CHECK_ON_FREE 0          /*checks mem integrity on every
+                                               free, very expensive*/
+# define BOUNDS_CHECK_VALUE       0xdeadbeef /*value stored before/after ea.
+                                               mem addr for bounds checking*/
+# define BOUNDS_CHECK_PAD_SIZE    32         /*size of the padding before and
+                                               after ea allocation to be filled
+                                               with BOUNDS_CHECK_VALUE.
+                                               this should be a multiple of 4*/
 #else
 # define BOUNDS_CHECK_VALUE       0
 # define BOUNDS_CHECK_PAD_SIZE    0
diff --git a/vpx_mem/vpx_mem.c b/vpx_mem/vpx_mem.c
index 85b05ab9f..eade43222 100644
--- a/vpx_mem/vpx_mem.c
+++ b/vpx_mem/vpx_mem.c
@@ -31,7 +31,7 @@ static unsigned long g_alloc_count = 0;
 # include "hmm_intrnl.h"
 
 # define SHIFT_HMM_ADDR_ALIGN_UNIT 5
-# define TOTAL_MEMORY_TO_ALLOCATE  20971520 // 20 * 1024 * 1024
+# define TOTAL_MEMORY_TO_ALLOCATE  20971520 /* 20 * 1024 * 1024 */
 
 # define MM_DYNAMIC_MEMORY 1
 # if MM_DYNAMIC_MEMORY
@@ -48,7 +48,7 @@ static int g_mng_memory_allocated = 0;
 
 static int vpx_mm_create_heap_memory();
 static void *vpx_mm_realloc(void *memblk, size_t size);
-#endif //CONFIG_MEM_MANAGER
+#endif /*CONFIG_MEM_MANAGER*/
 
 #if USE_GLOBAL_FUNCTION_POINTERS
 struct GLOBAL_FUNC_POINTERS
@@ -75,7 +75,7 @@ struct GLOBAL_FUNC_POINTERS
 # define VPX_MEMCPY_L  memcpy
 # define VPX_MEMSET_L  memset
 # define VPX_MEMMOVE_L memmove
-#endif // USE_GLOBAL_FUNCTION_POINTERS
+#endif /* USE_GLOBAL_FUNCTION_POINTERS */
 
 unsigned int vpx_mem_get_version()
 {
@@ -130,7 +130,7 @@ void *vpx_memalign(size_t align, size_t size)
     addr = hmm_alloc(&hmm_d, number_aau);
 #else
     addr = VPX_MALLOC_L(size + align - 1 + ADDRESS_STORAGE_SIZE);
-#endif //CONFIG_MEM_MANAGER
+#endif /*CONFIG_MEM_MANAGER*/
 
     if (addr)
     {
@@ -269,7 +269,7 @@ void *xvpx_memalign(size_t align, size_t size, char *file, int line)
     }
 #else
     x = vpx_memalign(align, size);
-#endif //TRY_BOUNDS_CHECK
+#endif /*TRY_BOUNDS_CHECK*/
 
     g_alloc_count++;
 
@@ -332,9 +332,10 @@ void *xvpx_realloc(void *memblk, size_t size, char *file, int line)
     vpx_memory_tracker_check_integrity(file, line);
 #endif
 
-    //have to do this regardless of success, because
-    //the memory that does get realloc'd may change
-    //the bounds values of this block
+    /* have to do this regardless of success, because
+     * the memory that does get realloc'd may change
+     * the bounds values of this block
+     */
     vpx_memory_tracker_remove((size_t)memblk);
 
 #if TRY_BOUNDS_CHECK
@@ -364,7 +365,7 @@ void *xvpx_realloc(void *memblk, size_t size, char *file, int line)
     }
 #else
     x = vpx_realloc(memblk, size);
-#endif //TRY_BOUNDS_CHECK
+#endif /*TRY_BOUNDS_CHECK*/
 
     if (!memblk) ++g_alloc_count;
 
@@ -380,7 +381,7 @@ void xvpx_free(void *p_address, char *file, int line)
 {
 #if TRY_BOUNDS_CHECK
     unsigned char *p_bounds_address = (unsigned char *)p_address;
-    //p_bounds_address -= BOUNDS_CHECK_PAD_SIZE;
+    /*p_bounds_address -= BOUNDS_CHECK_PAD_SIZE;*/
 #endif
 
 #if !TRY_BOUNDS_CHECK_ON_FREE
@@ -394,8 +395,9 @@ void xvpx_free(void *p_address, char *file, int line)
         vpx_memory_tracker_check_integrity(file, line);
 #endif
 
-        //if the addr isn't found in the list, assume it was allocated via
-        //vpx_ calls not xvpx_, therefore it does not contain any padding
+        /* if the addr isn't found in the list, assume it was allocated via
+         * vpx_ calls not xvpx_, therefore it does not contain any padding
+         */
         if (vpx_memory_tracker_remove((size_t)p_address) == -2)
         {
             p_bounds_address = p_address;
@@ -421,7 +423,7 @@ void xvpx_free(void *p_address, char *file, int line)
 
 #if CONFIG_MEM_CHECKS
 #if defined(VXWORKS)
-#include <task_lib.h> //for task_delay()
+#include <task_lib.h> /*for task_delay()*/
 /* This function is only used to get a stack trace of the player
 object so we can se where we are having a problem. */
 static int get_my_tt(int task)
@@ -627,7 +629,7 @@ static void *vpx_mm_realloc(void *memblk, size_t size)
 
     return p_ret;
 }
-#endif //CONFIG_MEM_MANAGER
+#endif /*CONFIG_MEM_MANAGER*/
 
 #if USE_GLOBAL_FUNCTION_POINTERS
 # if CONFIG_MEM_TRACKER
@@ -639,7 +641,7 @@ extern int vpx_memory_tracker_set_functions(g_malloc_func g_malloc_l
         , g_memset_func g_memset_l
         , g_memmove_func g_memmove_l);
 # endif
-#endif //USE_GLOBAL_FUNCTION_POINTERS
+#endif /*USE_GLOBAL_FUNCTION_POINTERS*/
 int vpx_mem_set_functions(g_malloc_func g_malloc_l
                           , g_calloc_func g_calloc_l
                           , g_realloc_func g_realloc_l
diff --git a/vpx_mem/vpx_mem.h b/vpx_mem/vpx_mem.h
index 31f8f9c60..749eaa42e 100644
--- a/vpx_mem/vpx_mem.h
+++ b/vpx_mem/vpx_mem.h
@@ -26,15 +26,15 @@
 /* end - vpx_mem version info */
 
 #ifndef VPX_TRACK_MEM_USAGE
-# define VPX_TRACK_MEM_USAGE       0  //enable memory tracking/integrity checks
+# define VPX_TRACK_MEM_USAGE       0  /* enable memory tracking/integrity checks */
 #endif
 #ifndef VPX_CHECK_MEM_FUNCTIONS
-# define VPX_CHECK_MEM_FUNCTIONS   0  //enable basic safety checks in _memcpy,
-//_memset, and _memmove
+# define VPX_CHECK_MEM_FUNCTIONS   0  /* enable basic safety checks in _memcpy,
+                                         _memset, and _memmove */
 #endif
 #ifndef REPLACE_BUILTIN_FUNCTIONS
-# define REPLACE_BUILTIN_FUNCTIONS 0  //replace builtin functions with their
-//vpx_ equivalents
+# define REPLACE_BUILTIN_FUNCTIONS 0  /* replace builtin functions with their
+                                         vpx_ equivalents */
 #endif
 
 #include <stdlib.h>
@@ -74,7 +74,7 @@ extern "C" {
     void *vpx_memset(void *dest, int val, size_t length);
     void *vpx_memmove(void *dest, const void *src, size_t count);
 
-// special memory functions
+    /* special memory functions */
     void *vpx_mem_alloc(int id, size_t size, size_t align);
     void vpx_mem_free(int id, void *mem, size_t size);
 
diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c
index b084e817b..b54e334cb 100644
--- a/vpx_scale/generic/gen_scalers.c
+++ b/vpx_scale/generic/gen_scalers.c
@@ -116,7 +116,7 @@ void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitc
         des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8);
         des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8);
 
-        // First line in next band
+        /* First line in next band */
         a = des [dest_pitch * 5];
         des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8);
 
@@ -163,7 +163,7 @@ void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest
         des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8);
         des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8);
 
-        // No other line for interplation of this line, so ..
+        /* No other line for interplation of this line, so .. */
         des[dest_pitch*4] = (unsigned char) d;
 
         des++;
@@ -401,7 +401,7 @@ void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitc
         des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
         des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
 
-        // First line in next band...
+        /* First line in next band... */
         a = des [dest_pitch * 5];
         des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
 
@@ -446,7 +446,7 @@ void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest
         des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
         des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
 
-        // No other line for interplation of this line, so ..
+        /* No other line for interplation of this line, so .. */
         des [ dest_pitch * 4 ] = (unsigned char)(c) ;
 
         des++;
@@ -549,7 +549,7 @@ void vp8cx_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitc
         c = des[dest_pitch*2];
         des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1);
 
-        // First line in next band...
+        /* First line in next band... */
         a = des [dest_pitch*4];
         des [dest_pitch*3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
 
@@ -593,7 +593,7 @@ void vp8cx_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest
         c = des[dest_pitch*2];
         des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1);
 
-        // No other line for interplation of this line, so ..
+        /* No other line for interplation of this line, so .. */
         des [dest_pitch*3] = (unsigned char)(c);
 
         des++;
diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c
index e1c80281f..13c9122f0 100644
--- a/vpx_scale/generic/vpxscale.c
+++ b/vpx_scale/generic/vpxscale.c
@@ -279,9 +279,9 @@ void scale1d_c
 
     (void) source_length;
 
-    // These asserts are needed if there are boundary issues...
-    //assert ( dest_scale > source_scale );
-    //assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );
+    /* These asserts are needed if there are boundary issues... */
+    /*assert ( dest_scale > source_scale );*/
+    /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/
 
     for (i = 0; i < dest_length * dest_step; i += dest_step)
     {
@@ -334,7 +334,7 @@ void scale1d_c
 static
 void Scale2D
 (
-    //const
+    /*const*/
     unsigned char *source,
     int source_pitch,
     unsigned int source_width,
@@ -352,7 +352,7 @@ void Scale2D
     unsigned int interlaced
 )
 {
-    //unsigned
+    /*unsigned*/
     int i, j, k;
     int bands;
     int dest_band_height;
@@ -370,7 +370,7 @@ void Scale2D
     int ratio_scalable = 1;
     int interpolation = 0;
 
-    unsigned char *source_base; // = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch)));
+    unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */
     unsigned char *line_src;
 
 
@@ -386,24 +386,24 @@ void Scale2D
         source_base += offset;
     }
 
-    // find out the ratio for each direction
+    /* find out the ratio for each direction */
     switch (hratio * 10 / hscale)
     {
     case 8:
-        // 4-5 Scale in Width direction
+        /* 4-5 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_5_4_scale;
         break;
     case 6:
-        // 3-5 Scale in Width direction
+        /* 3-5 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_5_3_scale;
         break;
     case 5:
-        // 1-2 Scale in Width direction
+        /* 1-2 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_2_1_scale;
         break;
     default:
-        // The ratio is not acceptable now
-        // throw("The ratio is not acceptable for now!");
+        /* The ratio is not acceptable now */
+        /* throw("The ratio is not acceptable for now!"); */
         ratio_scalable = 0;
         break;
     }
@@ -411,30 +411,30 @@ void Scale2D
     switch (vratio * 10 / vscale)
     {
     case 8:
-        // 4-5 Scale in vertical direction
+        /* 4-5 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_5_4_scale;
         source_band_height  = 5;
         dest_band_height    = 4;
         break;
     case 6:
-        // 3-5 Scale in vertical direction
+        /* 3-5 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_5_3_scale;
         source_band_height  = 5;
         dest_band_height    = 3;
         break;
     case 5:
-        // 1-2 Scale in vertical direction
+        /* 1-2 Scale in vertical direction */
 
         if (interlaced)
         {
-            //if the content is interlaced, point sampling is used
+            /* if the content is interlaced, point sampling is used */
             vert_band_scale     = vp8_vertical_band_2_1_scale;
         }
         else
         {
 
             interpolation = 1;
-            //if the content is progressive, interplo
+            /* if the content is progressive, interplo */
             vert_band_scale     = vp8_vertical_band_2_1_scale_i;
 
         }
@@ -443,8 +443,8 @@ void Scale2D
         dest_band_height    = 1;
         break;
     default:
-        // The ratio is not acceptable now
-        // throw("The ratio is not acceptable for now!");
+        /* The ratio is not acceptable now */
+        /* throw("The ratio is not acceptable for now!"); */
         ratio_scalable = 0;
         break;
     }
@@ -453,7 +453,7 @@ void Scale2D
     {
         if (source_height == dest_height)
         {
-            // for each band of the image
+            /* for each band of the image */
             for (k = 0; k < (int)dest_height; k++)
             {
                 horiz_line_scale(source, source_width, dest, dest_width);
@@ -474,10 +474,10 @@ void Scale2D
 
         for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++)
         {
-            // scale one band horizontally
+            /* scale one band horizontally */
             for (i = 0; i < source_band_height; i++)
             {
-                // Trap case where we could read off the base of the source buffer
+                /* Trap case where we could read off the base of the source buffer */
 
                 line_src = (unsigned char *)source + i * source_pitch;
 
@@ -488,13 +488,13 @@ void Scale2D
                                  temp_area + (i + 1)*dest_pitch, dest_width);
             }
 
-            // Vertical scaling is in place
+            /* Vertical scaling is in place */
             vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width);
 
             if (interpolation)
                 vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width);
 
-            // Next band...
+            /* Next band... */
             source += (unsigned long) source_band_height  * source_pitch;
             dest   += (unsigned long) dest_band_height * dest_pitch;
         }
@@ -515,7 +515,7 @@ void Scale2D
 
     if (source_height == dest_height)
     {
-        // for each band of the image
+        /* for each band of the image */
         for (k = 0; k < (int)dest_height; k++)
         {
             Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width);
@@ -537,15 +537,15 @@ void Scale2D
         dest_band_height   = source_band_height * vratio / vscale;
     }
 
-    // first row needs to be done so that we can stay one row ahead for vertical zoom
+    /* first row needs to be done so that we can stay one row ahead for vertical zoom */
     Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width);
 
-    // for each band of the image
+    /* for each band of the image */
     bands = (dest_height + dest_band_height - 1) / dest_band_height;
 
     for (k = 0; k < bands; k++)
     {
-        // scale one band horizontally
+        /* scale one band horizontally */
         for (i = 1; i < source_band_height + 1; i++)
         {
             if (k * source_band_height + i < (int) source_height)
@@ -553,24 +553,24 @@ void Scale2D
                 Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1,
                          temp_area + i * dest_pitch, 1, hratio, dest_width);
             }
-            else  //  Duplicate the last row
+            else  /*  Duplicate the last row */
             {
-                // copy temp_area row 0 over from last row in the past
+                /* copy temp_area row 0 over from last row in the past */
                 duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch);
             }
         }
 
-        // scale one band vertically
+        /* scale one band vertically */
         for (j = 0; j < (int)dest_width; j++)
         {
             Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1,
                      &dest[j], dest_pitch, vratio, dest_band_height);
         }
 
-        // copy temp_area row 0 over from last row in the past
+        /* copy temp_area row 0 over from last row in the past */
         duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch);
 
-        // move to the next band
+        /* move to the next band */
         source += source_band_height * source_pitch;
         dest   += dest_band_height * dest_pitch;
     }
@@ -617,7 +617,7 @@ void vp8_scale_frame
     int dw = (hscale - 1 + src->y_width * hratio) / hscale;
     int dh = (vscale - 1 + src->y_height * vratio) / vscale;
 
-    // call our internal scaling routines!!
+    /* call our internal scaling routines!! */
     Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height,
             (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh,
             temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
@@ -696,13 +696,13 @@ int any_ratio_2d_scale
     unsigned int src_band_height  = 0;
     unsigned int dest_band_height = 0;
 
-    // suggested scale factors
+    /* suggested scale factors */
     int hs = si->HScale;
     int hr = si->HRatio;
     int vs = si->VScale;
     int vr = si->VRatio;
 
-    // assume the ratios are scalable instead of should be centered
+    /* assume the ratios are scalable instead of should be centered */
     int ratio_scalable = 1;
 
     const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch)));
@@ -714,37 +714,37 @@ int any_ratio_2d_scale
 
     (void) si;
 
-    // find out the ratio for each direction
+    /* find out the ratio for each direction */
     switch (hr * 30 / hs)
     {
     case 24:
-        // 4-5 Scale in Width direction
+        /* 4-5 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_4_5_scale;
         break;
     case 22:
-        // 3-4 Scale in Width direction
+        /* 3-4 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_3_4_scale;
         break;
 
     case 20:
-        // 4-5 Scale in Width direction
+        /* 4-5 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_2_3_scale;
         break;
     case 18:
-        // 3-5 Scale in Width direction
+        /* 3-5 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_3_5_scale;
         break;
     case 15:
-        // 1-2 Scale in Width direction
+        /* 1-2 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_1_2_scale;
         break;
     case 30:
-        // no scale in Width direction
+        /* no scale in Width direction */
         horiz_line_scale = horizontal_line_copy;
         break;
     default:
-        // The ratio is not acceptable now
-        // throw("The ratio is not acceptable for now!");
+        /* The ratio is not acceptable now */
+        /* throw("The ratio is not acceptable for now!"); */
         ratio_scalable = 0;
         break;
     }
@@ -752,50 +752,50 @@ int any_ratio_2d_scale
     switch (vr * 30 / vs)
     {
     case 24:
-        // 4-5 Scale in vertical direction
+        /* 4-5 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_4_5_scale;
         last_vert_band_scale = vp8_last_vertical_band_4_5_scale;
         src_band_height     = 4;
         dest_band_height    = 5;
         break;
     case 22:
-        // 3-4 Scale in vertical direction
+        /* 3-4 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_3_4_scale;
         last_vert_band_scale = vp8_last_vertical_band_3_4_scale;
         src_band_height     = 3;
         dest_band_height    = 4;
         break;
     case 20:
-        // 2-3 Scale in vertical direction
+        /* 2-3 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_2_3_scale;
         last_vert_band_scale = vp8_last_vertical_band_2_3_scale;
         src_band_height     = 2;
         dest_band_height    = 3;
         break;
     case 18:
-        // 3-5 Scale in vertical direction
+        /* 3-5 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_3_5_scale;
         last_vert_band_scale = vp8_last_vertical_band_3_5_scale;
         src_band_height     = 3;
         dest_band_height    = 5;
         break;
     case 15:
-        // 1-2 Scale in vertical direction
+        /* 1-2 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_1_2_scale;
         last_vert_band_scale = vp8_last_vertical_band_1_2_scale;
         src_band_height     = 1;
         dest_band_height    = 2;
         break;
     case 30:
-        // no scale in Width direction
+        /* no scale in Width direction */
         vert_band_scale     = null_scale;
         last_vert_band_scale = null_scale;
         src_band_height     = 4;
         dest_band_height    = 4;
         break;
     default:
-        // The ratio is not acceptable now
-        // throw("The ratio is not acceptable for now!");
+        /* The ratio is not acceptable now */
+        /* throw("The ratio is not acceptable for now!"); */
         ratio_scalable = 0;
         break;
     }
@@ -805,13 +805,13 @@ int any_ratio_2d_scale
 
     horiz_line_scale(source, source_width, dest, dest_width);
 
-    // except last band
+    /* except last band */
     for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++)
     {
-        // scale one band horizontally
+        /* scale one band horizontally */
         for (i = 1; i < src_band_height; i++)
         {
-            // Trap case where we could read off the base of the source buffer
+            /* Trap case where we could read off the base of the source buffer */
             line_src = source + i * source_pitch;
 
             if (line_src < source_base)
@@ -821,8 +821,8 @@ int any_ratio_2d_scale
                              dest + i * dest_pitch, dest_width);
         }
 
-        // first line of next band
-        // Trap case where we could read off the base of the source buffer
+        /* first line of next band */
+        /* Trap case where we could read off the base of the source buffer */
         line_src = source + src_band_height * source_pitch;
 
         if (line_src < source_base)
@@ -832,18 +832,18 @@ int any_ratio_2d_scale
                          dest + dest_band_height * dest_pitch,
                          dest_width);
 
-        // Vertical scaling is in place
+        /* Vertical scaling is in place */
         vert_band_scale(dest, dest_pitch, dest_width);
 
-        // Next band...
+        /* Next band... */
         source += src_band_height  * source_pitch;
         dest   += dest_band_height * dest_pitch;
     }
 
-    // scale one band horizontally
+    /* scale one band horizontally */
     for (i = 1; i < src_band_height; i++)
     {
-        // Trap case where we could read off the base of the source buffer
+        /* Trap case where we could read off the base of the source buffer */
         line_src = source + i * source_pitch;
 
         if (line_src < source_base)
@@ -854,7 +854,7 @@ int any_ratio_2d_scale
                          dest_width);
     }
 
-    // Vertical scaling is in place
+    /* Vertical scaling is in place */
     last_vert_band_scale(dest, dest_pitch, dest_width);
 
     return ratio_scalable;
@@ -885,7 +885,7 @@ int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset)
     int ew;
     int eh;
 
-    // suggested scale factors
+    /* suggested scale factors */
     int hs = scale_vars->HScale;
     int hr = scale_vars->HRatio;
     int vs = scale_vars->VScale;
@@ -968,11 +968,11 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con
     unsigned char *src_data_pointer;
     unsigned char *dst_data_pointer;
 
-    // center values
+    /* center values */
     row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2;
     col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2;
 
-    // Y's
+    /* Y's */
     src_data_pointer = src_yuv_config->y_buffer;
     dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset;
 
@@ -986,7 +986,7 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con
     row_offset /= 2;
     col_offset /= 2;
 
-    // U's
+    /* U's */
     src_data_pointer = src_yuv_config->u_buffer;
     dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
 
@@ -997,7 +997,7 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con
         src_data_pointer += src_yuv_config->uv_stride;
     }
 
-    // V's
+    /* V's */
     src_data_pointer = src_yuv_config->v_buffer;
     dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
 
@@ -1040,8 +1040,8 @@ vp8_yv12_scale_or_center
     int VRatio
 )
 {
-//    if ( ppi->post_processing_level )
-    //      update_umvborder ( ppi, frame_buffer );
+    /*if ( ppi->post_processing_level )
+          update_umvborder ( ppi, frame_buffer );*/
 
 
     switch (scaling_mode)
@@ -1050,12 +1050,12 @@ vp8_yv12_scale_or_center
     case MAINTAIN_ASPECT_RATIO:
     {
         SCALE_VARS scale_vars;
-        // center values
+        /* center values */
 #if 1
         int row = (dst_yuv_config->y_height - expanded_frame_height) / 2;
         int col = (dst_yuv_config->y_width  - expanded_frame_width) / 2;
-//        int YOffset  = row * dst_yuv_config->y_width + col;
-//        int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);
+        /*int YOffset  = row * dst_yuv_config->y_width + col;
+        int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/
         int YOffset  = row * dst_yuv_config->y_stride + col;
         int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1);
 #else
@@ -1074,7 +1074,7 @@ vp8_yv12_scale_or_center
         scale_vars.expanded_frame_width = expanded_frame_width;
         scale_vars.expanded_frame_height = expanded_frame_height;
 
-        // perform center and scale
+        /* perform center and scale */
         any_ratio_frame_scale(&scale_vars, YOffset, UVOffset);
 
         break;
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index 3034cc3a7..d9d228551 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -45,7 +45,7 @@ vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf)
 int
 vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border)
 {
-//NOTE:
+/*NOTE:*/
 
     int yplane_size = (height + 2 * border) * (width + 2 * border);
     int uvplane_size = ((1 + height) / 2 + border) * ((1 + width) / 2 + border);
@@ -65,9 +65,10 @@ vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int
         ybf->border = border;
         ybf->frame_size = yplane_size + 2 * uvplane_size;
 
-        // Added 2 extra lines to framebuffer so that copy12x12 doesn't fail
-        // when we have a large motion vector in V on the last v block.
-        // Note : We never use these pixels anyway so this doesn't hurt.
+        /* Added 2 extra lines to framebuffer so that copy12x12 doesn't fail
+         * when we have a large motion vector in V on the last v block.
+         * Note : We never use these pixels anyway so this doesn't hurt.
+         */
         ybf->buffer_alloc = (unsigned char *) duck_memalign(32,  ybf->frame_size + (ybf->y_stride * 2) + 32, 0);
 
         if (ybf->buffer_alloc == NULL)
diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
index 39d2fa854..e58aa1fb2 100644
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@@ -40,7 +40,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
     plane_height = ybf->y_height;
     plane_width = ybf->y_width;
 
-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
     src_ptr1 = ybf->y_buffer;
     src_ptr2 = src_ptr1 + plane_width - 1;
     dest_ptr1 = src_ptr1 - Border;
@@ -56,7 +56,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
         dest_ptr2 += plane_stride;
     }
 
-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
     src_ptr1 = ybf->y_buffer - Border;
     src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
     dest_ptr1 = src_ptr1 - (Border * plane_stride);
@@ -79,7 +79,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
     plane_width = ybf->uv_width;
     Border /= 2;
 
-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
     src_ptr1 = ybf->u_buffer;
     src_ptr2 = src_ptr1 + plane_width - 1;
     dest_ptr1 = src_ptr1 - Border;
@@ -95,7 +95,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
         dest_ptr2 += plane_stride;
     }
 
-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
     src_ptr1 = ybf->u_buffer - Border;
     src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
     dest_ptr1 = src_ptr1 - (Border * plane_stride);
@@ -113,7 +113,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
     /* V Plane */
     /***********/
 
-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
     src_ptr1 = ybf->v_buffer;
     src_ptr2 = src_ptr1 + plane_width - 1;
     dest_ptr1 = src_ptr1 - Border;
@@ -129,7 +129,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
         dest_ptr2 += plane_stride;
     }
 
-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
     src_ptr1 = ybf->v_buffer - Border;
     src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
     dest_ptr1 = src_ptr1 - (Border * plane_stride);
@@ -165,7 +165,7 @@ vp8_yv12_extend_frame_borders_yonly(YV12_BUFFER_CONFIG *ybf)
     plane_height = ybf->y_height;
     plane_width = ybf->y_width;
 
-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
     src_ptr1 = ybf->y_buffer;
     src_ptr2 = src_ptr1 + plane_width - 1;
     dest_ptr1 = src_ptr1 - Border;
@@ -181,7 +181,7 @@ vp8_yv12_extend_frame_borders_yonly(YV12_BUFFER_CONFIG *ybf)
         dest_ptr2 += plane_stride;
     }
 
-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
     src_ptr1 = ybf->y_buffer - Border;
     src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
     dest_ptr1 = src_ptr1 - (Border * plane_stride);
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index 50d6e3b3a..5dcee818a 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -32,8 +32,8 @@ extern "C"
     ************************************/
     typedef enum
     {
-        REG_YUV = 0,    // Regular yuv
-        INT_YUV = 1     // The type of yuv that can be tranfer to and from RGB through integer transform
+        REG_YUV = 0,    /* Regular yuv */
+        INT_YUV = 1     /* The type of yuv that can be tranfer to and from RGB through integer transform */
               }
               YUV_TYPE;
 
@@ -42,12 +42,12 @@ extern "C"
         int   y_width;
         int   y_height;
         int   y_stride;
-//    int   yinternal_width;
+/*    int   yinternal_width; */
 
         int   uv_width;
         int   uv_height;
         int   uv_stride;
-//    int   uvinternal_width;
+/*    int   uvinternal_width; */
 
         unsigned char *y_buffer;
         unsigned char *u_buffer;
@@ -68,4 +68,4 @@ extern "C"
 #endif
 
 
-#endif //YV12_CONFIG_H
+#endif /*YV12_CONFIG_H*/
diff --git a/vpxdec.c b/vpxdec.c
index 48a36cd65..e8e4d5f12 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -336,7 +336,7 @@ void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5)
     }
     else
     {
-        fwrite(buf, 1, len, out);
+        if(fwrite(buf, 1, len, out));
     }
 }
 
@@ -461,11 +461,13 @@ nestegg_read_cb(void *buffer, size_t length, void *userdata)
 {
     FILE *f = userdata;
 
-    fread(buffer, 1, length, f);
-    if (ferror(f))
-        return -1;
-    if (feof(f))
-        return 0;
+    if(fread(buffer, 1, length, f) < length)
+    {
+        if (ferror(f))
+            return -1;
+        if (feof(f))
+            return 0;
+    }
     return 1;
 }
 
diff --git a/vpxenc.c b/vpxenc.c
index fc0b779ad..f95657d37 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -213,7 +213,7 @@ void stats_write(stats_io_t *stats, const void *pkt, size_t len)
 {
     if (stats->file)
     {
-        fwrite(pkt, 1, len, stats->file);
+        if(fwrite(pkt, 1, len, stats->file));
     }
     else
     {
@@ -259,10 +259,11 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
                       y4m_input *y4m, struct detect_buffer *detect)
 {
     int plane = 0;
+    int shortread = 0;
 
     if (file_type == FILE_TYPE_Y4M)
     {
-        if (y4m_input_fetch_frame(y4m, f, img) < 0)
+        if (y4m_input_fetch_frame(y4m, f, img) < 1)
            return 0;
     }
     else
@@ -275,7 +276,7 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
              * write_ivf_frame_header() for documentation on the frame header
              * layout.
              */
-            fread(junk, 1, IVF_FRAME_HDR_SZ, f);
+            if(fread(junk, 1, IVF_FRAME_HDR_SZ, f));
         }
 
         for (plane = 0; plane < 3; plane++)
@@ -306,18 +307,18 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
                 if (detect->valid)
                 {
                     memcpy(ptr, detect->buf, 4);
-                    fread(ptr+4, 1, w-4, f);
+                    shortread |= fread(ptr+4, 1, w-4, f) < w-4;
                     detect->valid = 0;
                 }
                 else
-                    fread(ptr, 1, w, f);
+                    shortread |= fread(ptr, 1, w, f) < w;
 
                 ptr += img->stride[plane];
             }
         }
     }
 
-    return !feof(f);
+    return !shortread;
 }
 
 
@@ -396,7 +397,7 @@ static void write_ivf_file_header(FILE *outfile,
     mem_put_le32(header + 24, frame_cnt);         /* length */
     mem_put_le32(header + 28, 0);                 /* unused */
 
-    fwrite(header, 1, 32, outfile);
+    if(fwrite(header, 1, 32, outfile));
 }
 
 
@@ -414,7 +415,7 @@ static void write_ivf_frame_header(FILE *outfile,
     mem_put_le32(header + 4, pts & 0xFFFFFFFF);
     mem_put_le32(header + 8, pts >> 32);
 
-    fwrite(header, 1, 12, outfile);
+    if(fwrite(header, 1, 12, outfile));
 }
 
 
@@ -462,7 +463,7 @@ struct EbmlGlobal
 
 void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
 {
-    fwrite(buffer_in, 1, len, glob->stream);
+    if(fwrite(buffer_in, 1, len, glob->stream));
 }
 
 
@@ -1329,6 +1330,7 @@ int main(int argc, const char **argv_)
     {
         int frames_in = 0, frames_out = 0;
         unsigned long nbytes = 0;
+        size_t detect_bytes;
         struct detect_buffer detect;
 
         /* Parse certain options from the input file, if possible */
@@ -1340,10 +1342,15 @@ int main(int argc, const char **argv_)
             return EXIT_FAILURE;
         }
 
-        fread(detect.buf, 1, 4, infile);
+        /* For RAW input sources, these bytes will applied on the first frame
+         *  in read_frame().
+         * We can always read 4 bytes because the minimum supported frame size
+         *  is 2x2.
+         */
+        detect_bytes = fread(detect.buf, 1, 4, infile);
         detect.valid = 0;
 
-        if (file_is_y4m(infile, &y4m, detect.buf))
+        if (detect_bytes == 4 && file_is_y4m(infile, &y4m, detect.buf))
         {
             if (y4m_input_open(&y4m, infile, detect.buf, 4) >= 0)
             {
@@ -1368,7 +1375,8 @@ int main(int argc, const char **argv_)
                 return EXIT_FAILURE;
             }
         }
-        else if (file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, detect.buf))
+        else if (detect_bytes == 4 &&
+                 file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, detect.buf))
         {
             file_type = FILE_TYPE_IVF;
             switch (fourcc)
@@ -1572,7 +1580,8 @@ int main(int argc, const char **argv_)
                     else
                     {
                         write_ivf_frame_header(outfile, pkt);
-                        fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile);
+                        if(fwrite(pkt->data.frame.buf, 1,
+                                  pkt->data.frame.sz, outfile));
                     }
                     nbytes += pkt->data.raw.sz;
                     break;
diff --git a/y4minput.c b/y4minput.c
index 3eaec4ed3..449afe858 100644
--- a/y4minput.c
+++ b/y4minput.c
@@ -877,5 +877,5 @@ int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){
   _img->planes[PLANE_Y]=_y4m->dst_buf;
   _img->planes[PLANE_U]=_y4m->dst_buf+pic_sz;
   _img->planes[PLANE_V]=_y4m->dst_buf+pic_sz+c_sz;
-  return 0;
+  return 1;
 }

From 483ce40346fbf8faee8ed48881f8ba65038b532e Mon Sep 17 00:00:00 2001
From: Frank Galligan <fgalligan@google.com>
Date: Wed, 27 Oct 2010 11:28:56 -0400
Subject: [PATCH 278/307] Output the PSNR for the entire file.

If --psnr option is enabled vpxenc will output PSNR values for the
entire file. Added a \n before final output to make sure the output
is on its own line. Overall and Avg psnr matches the values written
to opsnr.stt file.

Change-Id: Ibac5fa9baf8d5a626ea0d6ba161b484e6e8427ee
---
 vpxenc.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/vpxenc.c b/vpxenc.c
index a8632b887..fc0b779ad 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -810,6 +810,23 @@ static unsigned int murmur ( const void * key, int len, unsigned int seed )
     return h;
 }
 
+#include "math.h"
+
+static double vp8_mse2psnr(double Samples, double Peak, double Mse)
+{
+    double psnr;
+
+    if ((double)Mse > 0.0)
+        psnr = 10.0 * log10(Peak * Peak * Samples / Mse);
+    else
+        psnr = 60;      // Limit to prevent / 0
+
+    if (psnr > 60)
+        psnr = 60;
+
+    return psnr;
+}
+
 
 #include "args.h"
 
@@ -1049,6 +1066,10 @@ int main(int argc, const char **argv_)
     int                      write_webm = 1;
     EbmlGlobal               ebml = {0};
     uint32_t                 hash = 0;
+    uint64_t                 psnr_sse_total = 0;
+    uint64_t                 psnr_samples_total = 0;
+    double                   psnr_totals[4] = {0, 0, 0, 0};
+    int                      psnr_count = 0;
 
     exec_name = argv_[0];
 
@@ -1570,8 +1591,14 @@ int main(int argc, const char **argv_)
                     {
                         int i;
 
+                        psnr_sse_total += pkt->data.psnr.sse[0];
+                        psnr_samples_total += pkt->data.psnr.samples[0];
                         for (i = 0; i < 4; i++)
+                        {
                             fprintf(stderr, "%.3lf ", pkt->data.psnr.psnr[i]);
+                            psnr_totals[i] += pkt->data.psnr.psnr[i];
+                        }
+                        psnr_count++;
                     }
 
                     break;
@@ -1592,6 +1619,21 @@ int main(int argc, const char **argv_)
                cx_time > 9999999 ? "ms" : "us",
                (float)frames_in * 1000000.0 / (float)cx_time);
 
+        if ( (show_psnr) && (psnr_count>0) )
+        {
+            int i;
+            double ovpsnr = vp8_mse2psnr(psnr_samples_total, 255.0,
+                                         psnr_sse_total);
+
+            fprintf(stderr, "\nPSNR (Overall/Avg/Y/U/V)");
+
+            fprintf(stderr, " %.3lf", ovpsnr);
+            for (i = 0; i < 4; i++)
+            {
+                fprintf(stderr, " %.3lf", psnr_totals[i]/psnr_count);
+            }
+        }
+
         vpx_codec_destroy(&encoder);
 
         fclose(infile);

From 6fda7668e84808230d69a9d1f7580bd4b14ff977 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Wed, 27 Oct 2010 12:50:16 -0700
Subject: [PATCH 279/307] postproc: Tweaks to line drawing and blending.

Turned down the blending level to make colored blocks obscure
the video less.
Not blending the entire block to give distinction to macro
block edges.
Added configuration so that macro block blending function can
be optimized.
Change to constrain line as to when dx and dy are computed.
Now draw two lines to form an arrow.

Change-Id: I986784e6abff65ea3e0d1437dfca7d06d44ede71
---
 vp8/common/generic/systemdependent.c |  1 +
 vp8/common/postproc.c                | 65 +++++++++++++++++++---------
 vp8/common/postproc.h                | 10 +++++
 3 files changed, 55 insertions(+), 21 deletions(-)

diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 0c9b77e76..c624242a5 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -69,6 +69,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
     rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
     rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
     rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
+    rtcd->postproc.blend_mb    = vp8_blend_mb_c;
 #endif
 
 #endif
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 21e08638d..42e37da30 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -472,7 +472,10 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
     }
 }
 
-void vp8_blend_block_c (unsigned char *y, unsigned char *u, unsigned char *v,
+// Blend the macro block with a solid colored square.  Leave the
+// edges unblended to give distinction to macro blocks in areas
+// filled with the same color block.
+void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
                         int y1, int u1, int v1, int alpha, int stride)
 {
     int i, j;
@@ -480,63 +483,73 @@ void vp8_blend_block_c (unsigned char *y, unsigned char *u, unsigned char *v,
     int u1_const = u1*((1<<16)-alpha);
     int v1_const = v1*((1<<16)-alpha);
 
-    for (i = 0; i < 16; i++)
+    y += stride + 2;
+    for (i = 0; i < 14; i++)
     {
-        for (j = 0; j < 16; j++)
+        for (j = 0; j < 14; j++)
         {
             y[j] = (y[j]*alpha + y1_const)>>16;
         }
         y += stride;
     }
 
-    for (i = 0; i < 8; i++)
+    stride >>= 1;
+
+    u += stride + 1;
+    v += stride + 1;
+
+    for (i = 0; i < 6; i++)
     {
-        for (j = 0; j < 8; j++)
+        for (j = 0; j < 6; j++)
         {
             u[j] = (u[j]*alpha + u1_const)>>16;
             v[j] = (v[j]*alpha + v1_const)>>16;
         }
-        u += stride/2;
-        v += stride/2;
+        u += stride;
+        v += stride;
     }
 }
 
 static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
 {
-    int dx = *x1 - x0;
-    int dy = *y1 - y0;
+    int dx;
+    int dy;
 
     if (*x1 > width)
     {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
         *x1 = width;
         if (dy)
             *y1 = ((width-x0)*dy)/dx + y0;
-        dx = *x1 - x0;
-        dy = *y1 - y0;
     }
     if (*x1 < 0)
     {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
         *x1 = 0;
         if (dy)
             *y1 = ((0-x0)*dy)/dx + y0;
-        dx = *x1 - x0;
-        dy = *y1 - y0;
     }
     if (*y1 > height)
     {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
         *y1 = height;
         if (dx)
             *x1 = ((height-y0)*dx)/dy + x0;
-        dx = *x1 - x0;
-        dy = *y1 - y0;
     }
     if (*y1 < 0)
     {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
         *y1 = 0;
         if (dx)
             *x1 = ((0-y0)*dx)/dy + x0;
-        dx = *x1 - x0;
-        dy = *y1 - y0;
     }
 }
 
@@ -747,8 +760,16 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                     x1 = x0 + (mv->col >> 3);
                     y1 = y0 + (mv->row >> 3);
 
-                    constrain_line (x0, &x1, y0, &y1, width, height);
-                    vp8_blit_line (x0, x1, y0, y1, y_buffer, y_stride);
+                    if (x1 != x0 && y1 != y0)
+                    {
+                        constrain_line (x0, &x1, y0-1, &y1, width, height);
+                        vp8_blit_line  (x0,  x1, y0-1,  y1, y_buffer, y_stride);
+
+                        constrain_line (x0, &x1, y0+1, &y1, width, height);
+                        vp8_blit_line  (x0,  x1, y0+1,  y1, y_buffer, y_stride);
+                    }
+                    else
+                        vp8_blit_line  (x0,  x1, y0,  y1, y_buffer, y_stride);
                 }
                 mi++;
             }
@@ -779,7 +800,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
                 V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
 
-                vp8_blend_block_c (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xb000, y_stride);
+                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
+                    (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
 
                 mi++;
             }
@@ -814,7 +836,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
                 V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
 
-                vp8_blend_block_c (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xb000, y_stride);
+                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
+                    (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
 
                 mi++;
             }
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index 80337fc68..4a4493802 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -24,6 +24,10 @@
               char whiteclamp[16], char bothclamp[16],\
               unsigned int w, unsigned int h, int pitch)
 
+#define prototype_postproc_blend_mb(sym)\
+    void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
+              int y1, int u1, int v1, int alpha, int stride)
+
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/postproc_x86.h"
 #endif
@@ -48,16 +52,22 @@ extern prototype_postproc(vp8_postproc_downacross);
 #endif
 extern prototype_postproc_addnoise(vp8_postproc_addnoise);
 
+#ifndef vp8_postproc_blend_mb
+#define vp8_postproc_blend_mb vp8_blend_mb_c
+#endif
+extern prototype_postproc_blend_mb(vp8_postproc_blend_mb);
 
 typedef prototype_postproc((*vp8_postproc_fn_t));
 typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
 typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
+typedef prototype_postproc_blend_mb((*vp8_postproc_blend_mb_fn_t));
 typedef struct
 {
     vp8_postproc_inplace_fn_t   down;
     vp8_postproc_inplace_fn_t   across;
     vp8_postproc_fn_t           downacross;
     vp8_postproc_addnoise_fn_t  addnoise;
+    vp8_postproc_blend_mb_fn_t  blend_mb;
 } vp8_postproc_rtcd_vtable_t;
 
 #if CONFIG_RUNTIME_CPU_DETECT

From 97b766a46cb7a9a26da618726600e619023c0745 Mon Sep 17 00:00:00 2001
From: "Timothy B. Terriberry" <tterribe@xiph.org>
Date: Wed, 27 Oct 2010 16:04:02 -0700
Subject: [PATCH 280/307] Eliminate more warnings.

This eliminates a large set of warnings exposed by the Mozilla build
 system (Use of C++ comments in ISO C90 source, commas at the end of
 enum lists, a couple incomplete initializers, and signed/unsigned
 comparisons).
It also eliminates many (but not all) of the warnings expose by newer
 GCC versions and _FORTIFY_SOURCE (e.g., calling fread and fwrite
 without checking the return values).
There are a few spurious warnings left on my system:

../vp8/encoder/encodemb.c:274:9: warning: 'sz' may be used
 uninitialized in this function
gcc seems to be unable to figure out that the value shortcut doesn't
 change between the two if blocks that test it here.

../vp8/encoder/onyx_if.c:5314:5: warning: comparison of unsigned
 expression >= 0 is always true
../vp8/encoder/onyx_if.c:5319:5: warning: comparison of unsigned
 expression >= 0 is always true
This is true, so far as it goes, but it's comparing against an enum,
 and the C standard does not mandate that enums be unsigned, so the
 checks can't be removed.

Change-Id: Iead6cd561a2afaa3d801fd63f1d8d58953da7426
---
 examples/decoder_tmpl.c                |   4 +-
 examples/decoder_tmpl.txt              |   2 +-
 examples/encoder_tmpl.c                |   4 +-
 examples/encoder_tmpl.txt              |   4 +-
 libmkv/EbmlIDs.h                       |   2 +-
 vp8/common/alloccommon.c               |   8 +-
 vp8/common/arm/bilinearfilter_arm.c    |  22 +--
 vp8/common/arm/filter_arm.c            |  82 ++++-----
 vp8/common/arm/loopfilter_arm.c        |  20 +--
 vp8/common/arm/neon/recon_neon.c       |   2 +-
 vp8/common/blockd.h                    |  78 ++++-----
 vp8/common/debugmodes.c                |  10 +-
 vp8/common/defaultcoefcounts.h         |  72 ++++----
 vp8/common/entropy.h                   |  26 +--
 vp8/common/entropymv.c                 |  16 +-
 vp8/common/extend.c                    |  31 ++--
 vp8/common/filter_c.c                  |  76 ++++-----
 vp8/common/findnearmv.c                |   4 +-
 vp8/common/generic/systemdependent.c   |   2 +-
 vp8/common/invtrans.c                  |   4 +-
 vp8/common/loopfilter.c                | 132 +++++++--------
 vp8/common/loopfilter.h                |  12 +-
 vp8/common/loopfilter_filters.c        |  69 ++++----
 vp8/common/mbpitch.c                   |   8 +-
 vp8/common/modecont.c                  |  12 +-
 vp8/common/modecontext.c               | 220 ++++++++++++-------------
 vp8/common/onyxc_int.h                 |  38 ++---
 vp8/common/postproc.c                  |  73 ++++----
 vp8/common/ppflags.h                   |   2 +-
 vp8/common/recon.c                     |   8 +-
 vp8/common/recon.h                     |   4 +-
 vp8/common/reconinter.c                |  49 +++---
 vp8/common/reconintra.c                |  46 +++---
 vp8/common/reconintra4x4.c             |   7 +-
 vp8/common/setupintrarecon.c           |   2 +-
 vp8/common/textblit.c                  |   2 +-
 vp8/common/threading.h                 |   6 +-
 vp8/common/treecoder.h                 |   2 +-
 vp8/common/type_aliases.h              |  14 +-
 vp8/common/x86/loopfilter_x86.c        |  16 +-
 vp8/common/x86/vp8_asm_stubs.c         |  36 ++--
 vp8/common/x86/x86_systemdependent.c   |   2 +-
 vp8/decoder/arm/arm_dsystemdependent.c |   4 +-
 vp8/decoder/arm/dboolhuff_arm.h        |  10 +-
 vp8/decoder/dboolhuff.h                |  19 ++-
 vp8/decoder/decodemv.c                 |  44 ++---
 vp8/decoder/decodframe.c               | 122 +++++++-------
 vp8/decoder/dequantize.c               |   4 +-
 vp8/decoder/detokenize.c               |  39 ++---
 vp8/decoder/generic/dsystemdependent.c |   4 +-
 vp8/decoder/onyxd_if.c                 |  38 +++--
 vp8/decoder/onyxd_int.h                |  14 +-
 vp8/decoder/reconintra_mt.c            |  98 +++++------
 vp8/decoder/reconintra_mt.h            |   2 +-
 vp8/decoder/threading.c                | 132 ++++++++-------
 vp8/encoder/firstpass.c                |   8 +-
 vp8/vp8_dx_iface.c                     |  26 ++-
 vpx/src/vpx_encoder.c                  |   2 +-
 vpx/vp8.h                              |   2 +-
 vpx/vpx_codec.h                        |   4 +-
 vpx/vpx_decoder_compat.h               |   2 +-
 vpx/vpx_encoder.h                      |   2 +-
 vpx/vpx_image.h                        |   2 +-
 vpx_mem/include/vpx_mem_intrnl.h       |  46 +++---
 vpx_mem/vpx_mem.c                      |  32 ++--
 vpx_mem/vpx_mem.h                      |  12 +-
 vpx_scale/generic/gen_scalers.c        |  12 +-
 vpx_scale/generic/vpxscale.c           | 148 ++++++++---------
 vpx_scale/generic/yv12config.c         |   9 +-
 vpx_scale/generic/yv12extend.c         |  16 +-
 vpx_scale/yv12config.h                 |  10 +-
 vpxdec.c                               |  14 +-
 vpxenc.c                               |  35 ++--
 y4minput.c                             |   2 +-
 74 files changed, 1100 insertions(+), 1043 deletions(-)

diff --git a/examples/decoder_tmpl.c b/examples/decoder_tmpl.c
index ba3ac987f..26b745d34 100644
--- a/examples/decoder_tmpl.c
+++ b/examples/decoder_tmpl.c
@@ -61,8 +61,8 @@ int main(int argc, char **argv) {
         die("Failed to open %s for writing", argv[2]);
 
     /* Read file header */
-    fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile);
-    if(!(file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
+    if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ
+         && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
          && file_hdr[3]=='F'))
         die("%s is not an IVF file.", argv[1]);
 
diff --git a/examples/decoder_tmpl.txt b/examples/decoder_tmpl.txt
index 6da38c2a8..310c66d54 100644
--- a/examples/decoder_tmpl.txt
+++ b/examples/decoder_tmpl.txt
@@ -48,7 +48,7 @@ for(plane=0; plane < 3; plane++) {
     unsigned char *buf =img->planes[plane];
 
     for(y=0; y<img->d_h >> (plane?1:0); y++) {
-        fwrite(buf, 1, img->d_w >> (plane?1:0), outfile);
+        if(fwrite(buf, 1, img->d_w >> (plane?1:0), outfile));
         buf += img->stride[plane];
     }
 }
diff --git a/examples/encoder_tmpl.c b/examples/encoder_tmpl.c
index fdfc3af8f..d9e4d0317 100644
--- a/examples/encoder_tmpl.c
+++ b/examples/encoder_tmpl.c
@@ -85,7 +85,7 @@ static void write_ivf_file_header(FILE *outfile,
     mem_put_le32(header+24, frame_cnt);           /* length */
     mem_put_le32(header+28, 0);                   /* unused */
 
-    fwrite(header, 1, 32, outfile);
+    if(fwrite(header, 1, 32, outfile));
 }
 
 
@@ -103,7 +103,7 @@ static void write_ivf_frame_header(FILE *outfile,
     mem_put_le32(header+4, pts&0xFFFFFFFF);
     mem_put_le32(header+8, pts >> 32);
 
-    fwrite(header, 1, 12, outfile);
+    if(fwrite(header, 1, 12, outfile));
 }
 
 int main(int argc, char **argv) {
diff --git a/examples/encoder_tmpl.txt b/examples/encoder_tmpl.txt
index 87055ca13..3273164da 100644
--- a/examples/encoder_tmpl.txt
+++ b/examples/encoder_tmpl.txt
@@ -61,8 +61,8 @@ if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME
 case VPX_CODEC_CX_FRAME_PKT:
     write_ivf_frame_header(outfile, pkt);
-    fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-           outfile);
+    if(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
+              outfile));
     break;
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME
 
diff --git a/libmkv/EbmlIDs.h b/libmkv/EbmlIDs.h
index 271990827..429747063 100644
--- a/libmkv/EbmlIDs.h
+++ b/libmkv/EbmlIDs.h
@@ -228,4 +228,4 @@ enum mkv
 //  TagString = 0x4487,
 //  TagBinary = 0x4485,
 };
-#endif
\ No newline at end of file
+#endif
diff --git a/vp8/common/alloccommon.c b/vp8/common/alloccommon.c
index 408c25306..9dce8c8f6 100644
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -56,7 +56,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)
 
     vp8_de_alloc_frame_buffers(oci);
 
-    // our internal buffers are always multiples of 16
+    /* our internal buffers are always multiples of 16 */
     if ((width & 0xf) != 0)
         width += 16 - (width & 0xf);
 
@@ -153,7 +153,7 @@ void vp8_setup_version(VP8_COMMON *cm)
         cm->full_pixel = 1;
         break;
     default:
-        //4,5,6,7 are reserved for future use
+        /*4,5,6,7 are reserved for future use*/
         cm->no_lpf = 0;
         cm->simpler_lpf = 0;
         cm->use_bilinear_mc_filter = 0;
@@ -177,10 +177,10 @@ void vp8_create_common(VP8_COMMON *oci)
     oci->clr_type = REG_YUV;
     oci->clamp_type = RECON_CLAMP_REQUIRED;
 
-    // Initialise reference frame sign bias structure to defaults
+    /* Initialise reference frame sign bias structure to defaults */
     vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));
 
-    // Default disable buffer to buffer copying
+    /* Default disable buffer to buffer copying */
     oci->copy_buffer_to_gf = 0;
     oci->copy_buffer_to_arf = 0;
 }
diff --git a/vp8/common/arm/bilinearfilter_arm.c b/vp8/common/arm/bilinearfilter_arm.c
index 247f95b6c..65afb41a1 100644
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -49,7 +49,7 @@ extern void vp8_filter_block2d_bil_second_pass_armv6
     const short *vp8_filter
 );
 
-/*
+#if 0
 void vp8_filter_block2d_bil_first_pass_6
 (
     unsigned char *src_ptr,
@@ -66,14 +66,14 @@ void vp8_filter_block2d_bil_first_pass_6
     {
         for ( j=0; j<output_width; j++ )
         {
-            // Apply bilinear filter
+            /* Apply bilinear filter */
             output_ptr[j] = ( ( (int)src_ptr[0]          * vp8_filter[0]) +
                                ((int)src_ptr[1] * vp8_filter[1]) +
                                 (VP8_FILTER_WEIGHT/2) ) >> VP8_FILTER_SHIFT;
             src_ptr++;
         }
 
-        // Next row...
+        /* Next row... */
         src_ptr    += src_pixels_per_line - output_width;
         output_ptr += output_width;
     }
@@ -96,7 +96,7 @@ void vp8_filter_block2d_bil_second_pass_6
     {
         for ( j=0; j<output_width; j++ )
         {
-            // Apply filter
+            /* Apply filter */
             Temp =  ((int)src_ptr[0]         * vp8_filter[0]) +
                     ((int)src_ptr[output_width] * vp8_filter[1]) +
                     (VP8_FILTER_WEIGHT/2);
@@ -104,12 +104,12 @@ void vp8_filter_block2d_bil_second_pass_6
             src_ptr++;
         }
 
-        // Next row...
-        //src_ptr    += src_pixels_per_line - output_width;
+        /* Next row... */
+        /*src_ptr    += src_pixels_per_line - output_width;*/
         output_ptr += output_pitch;
     }
 }
-*/
+#endif
 
 void vp8_filter_block2d_bil_armv6
 (
@@ -124,13 +124,13 @@ void vp8_filter_block2d_bil_armv6
 )
 {
 
-    unsigned short FData[36*16]; // Temp data bufffer used in filtering
+    unsigned short FData[36*16]; /* Temp data bufffer used in filtering */
 
-    // First filter 1-D horizontally...
-    // pixel_step = 1;
+    /* First filter 1-D horizontally... */
+    /* pixel_step = 1; */
     vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter);
 
-    // then 1-D vertically...
+    /* then 1-D vertically... */
     vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter);
 }
 
diff --git a/vp8/common/arm/filter_arm.c b/vp8/common/arm/filter_arm.c
index 4bfa3ab34..b4f2fe6ca 100644
--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -20,13 +20,13 @@
 
 DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) =
 {
-    { 0,  0,  128,    0,   0,  0 },         // note that 1/8 pel positions are just as per alpha -0.5 bicubic
+    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
     { 0, -6,  123,   12,  -1,  0 },
-    { 2, -11, 108,   36,  -8,  1 },         // New 1/4 pel 6 tap filter
+    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
     { 0, -9,   93,   50,  -6,  0 },
-    { 3, -16,  77,   77, -16,  3 },         // New 1/2 pel 6 tap filter
+    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
     { 0, -6,   50,   93,  -9,  0 },
-    { 1, -8,   36,  108, -11,  2 },         // New 1/4 pel 6 tap filter
+    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
     { 0, -1,   12,  123,  -6,  0 },
 };
 
@@ -93,34 +93,34 @@ void vp8_sixtap_predict_armv6
 {
     const short  *HFilter;
     const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */
 
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
 
-    // Vfilter is null. First pass only
+    /* Vfilter is null. First pass only */
     if (xoffset && !yoffset)
     {
-        //vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
-        //vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );
+        /*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
+        vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/
 
         vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
     }
-    // Hfilter is null. Second pass only
+    /* Hfilter is null. Second pass only */
     else if (!xoffset && yoffset)
     {
         vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
     }
     else
     {
-        // Vfilter is a 4 tap filter
+        /* Vfilter is a 4 tap filter */
         if (yoffset & 0x1)
         {
             vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
             vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
         }
-        // Vfilter is 6 tap filter
+        /* Vfilter is 6 tap filter */
         else
         {
             vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
@@ -129,7 +129,7 @@ void vp8_sixtap_predict_armv6
     }
 }
 
-/*
+#if 0
 void vp8_sixtap_predict8x4_armv6
 (
     unsigned char  *src_ptr,
@@ -142,33 +142,33 @@ void vp8_sixtap_predict8x4_armv6
 {
     const short  *HFilter;
     const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
 
 
-//  if (xoffset && !yoffset)
-//  {
-//      vp8_filter_block2d_first_pass_only_armv6 (  src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
-//  }
-    // Hfilter is null. Second pass only
-//  else if (!xoffset && yoffset)
-//  {
-//      vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
-//  }
-//  else
-//  {
-//      if (yoffset & 0x1)
-    //      vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
-    //  else
+    /*if (xoffset && !yoffset)
+    {
+        vp8_filter_block2d_first_pass_only_armv6 (  src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
+    }*/
+    /* Hfilter is null. Second pass only */
+    /*else if (!xoffset && yoffset)
+    {
+        vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
+    }
+    else
+    {
+        if (yoffset & 0x1)
+            vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
+        else*/
 
         vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter );
 
         vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter );
-//  }
+    /*}*/
 }
-*/
+#endif
 
 void vp8_sixtap_predict8x8_armv6
 (
@@ -182,16 +182,16 @@ void vp8_sixtap_predict8x8_armv6
 {
     const short  *HFilter;
     const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
 
     if (xoffset && !yoffset)
     {
         vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
     }
-    // Hfilter is null. Second pass only
+    /* Hfilter is null. Second pass only */
     else if (!xoffset && yoffset)
     {
         vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
@@ -224,16 +224,16 @@ void vp8_sixtap_predict16x16_armv6
 {
     const short  *HFilter;
     const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16);    // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16);    /* Temp data bufffer used in filtering */
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */
 
     if (xoffset && !yoffset)
     {
         vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
     }
-    // Hfilter is null. Second pass only
+    /* Hfilter is null. Second pass only */
     else if (!xoffset && yoffset)
     {
         vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
diff --git a/vp8/common/arm/loopfilter_arm.c b/vp8/common/arm/loopfilter_arm.c
index f86bca1ea..a81c50588 100644
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -35,8 +35,8 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;
 
 
 #if HAVE_ARMV6
-//ARMV6 loopfilter functions
-// Horizontal MB filtering
+/*ARMV6 loopfilter functions*/
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                                int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -60,7 +60,7 @@ void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
     vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                                int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -84,7 +84,7 @@ void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
     vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -112,7 +112,7 @@ void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
     vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 }
 
-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -142,8 +142,8 @@ void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
 #endif
 
 #if HAVE_ARMV7
-// NEON loopfilter functions
-// Horizontal MB filtering
+/* NEON loopfilter functions */
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -164,7 +164,7 @@ void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
     vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -185,7 +185,7 @@ void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
     vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -210,7 +210,7 @@ void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign
     vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 }
 
-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
diff --git a/vp8/common/arm/neon/recon_neon.c b/vp8/common/arm/neon/recon_neon.c
index 3b2df4c9f..f7930ee5f 100644
--- a/vp8/common/arm/neon/recon_neon.c
+++ b/vp8/common/arm/neon/recon_neon.c
@@ -23,7 +23,7 @@ void vp8_recon_mb_neon(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
     unsigned char *udst_ptr = x->dst.u_buffer;
     unsigned char *vdst_ptr = x->dst.v_buffer;
     int ystride = x->dst.y_stride;
-    //int uv_stride = x->dst.uv_stride;
+    /*int uv_stride = x->dst.uv_stride;*/
 
     vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr);
 }
diff --git a/vp8/common/blockd.h b/vp8/common/blockd.h
index a81bc8b95..a38f0b72b 100644
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -24,7 +24,7 @@ void vpx_log(const char *format, ...);
 #define TRUE    1
 #define FALSE   0
 
-//#define DCPRED 1
+/*#define DCPRED 1*/
 #define DCPREDSIMTHRESH 0
 #define DCPREDCNTTHRESH 3
 
@@ -39,7 +39,7 @@ void vpx_log(const char *format, ...);
 #define MAX_REF_LF_DELTAS       4
 #define MAX_MODE_LF_DELTAS      4
 
-// Segment Feature Masks
+/* Segment Feature Masks */
 #define SEGMENT_DELTADATA   0
 #define SEGMENT_ABSDATA     1
 
@@ -75,11 +75,11 @@ typedef enum
 
 typedef enum
 {
-    DC_PRED,            // average of above and left pixels
-    V_PRED,             // vertical prediction
-    H_PRED,             // horizontal prediction
-    TM_PRED,            // Truemotion prediction
-    B_PRED,             // block based prediction, each block has its own prediction mode
+    DC_PRED,            /* average of above and left pixels */
+    V_PRED,             /* vertical prediction */
+    H_PRED,             /* horizontal prediction */
+    TM_PRED,            /* Truemotion prediction */
+    B_PRED,             /* block based prediction, each block has its own prediction mode */
 
     NEARESTMV,
     NEARMV,
@@ -90,16 +90,16 @@ typedef enum
     MB_MODE_COUNT
 } MB_PREDICTION_MODE;
 
-// Macroblock level features
+/* Macroblock level features */
 typedef enum
 {
-    MB_LVL_ALT_Q = 0,               // Use alternate Quantizer ....
-    MB_LVL_ALT_LF = 1,              // Use alternate loop filter value...
-    MB_LVL_MAX = 2,                 // Number of MB level features supported
+    MB_LVL_ALT_Q = 0,               /* Use alternate Quantizer .... */
+    MB_LVL_ALT_LF = 1,              /* Use alternate loop filter value... */
+    MB_LVL_MAX = 2                  /* Number of MB level features supported */
 
 } MB_LVL_FEATURES;
 
-// Segment Feature Masks
+/* Segment Feature Masks */
 #define SEGMENT_ALTQ    0x01
 #define SEGMENT_ALT_LF  0x02
 
@@ -110,11 +110,11 @@ typedef enum
 
 typedef enum
 {
-    B_DC_PRED,          // average of above and left pixels
+    B_DC_PRED,          /* average of above and left pixels */
     B_TM_PRED,
 
-    B_VE_PRED,           // vertical prediction
-    B_HE_PRED,           // horizontal prediction
+    B_VE_PRED,           /* vertical prediction */
+    B_HE_PRED,           /* horizontal prediction */
 
     B_LD_PRED,
     B_RD_PRED,
@@ -170,13 +170,13 @@ typedef struct
     } mv;
 
     unsigned char partitioning;
-    unsigned char mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
+    unsigned char mb_skip_coeff;                                /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
     unsigned char dc_diff;
     unsigned char need_to_clamp_mvs;
 
-    unsigned char segment_id;                  // Which set of segmentation parameters should be used for this MB
+    unsigned char segment_id;                  /* Which set of segmentation parameters should be used for this MB */
 
-    unsigned char force_no_skip; //encoder only
+    unsigned char force_no_skip; /* encoder only */
 } MB_MODE_INFO;
 
 
@@ -197,7 +197,7 @@ typedef struct
 
     short *dequant;
 
-    // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
+    /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
     unsigned char **base_pre;
     int pre;
     int pre_stride;
@@ -214,17 +214,17 @@ typedef struct
 
 typedef struct
 {
-    DECLARE_ALIGNED(16, short, diff[400]);      // from idct diff
+    DECLARE_ALIGNED(16, short, diff[400]);      /* from idct diff */
     DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
-//not used    DECLARE_ALIGNED(16, short, reference[384]);
+/* not used    DECLARE_ALIGNED(16, short, reference[384]); */
     DECLARE_ALIGNED(16, short, qcoeff[400]);
     DECLARE_ALIGNED(16, short, dqcoeff[400]);
     DECLARE_ALIGNED(16, char,  eobs[25]);
 
-    // 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries.
+    /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
     BLOCKD block[25];
 
-    YV12_BUFFER_CONFIG pre; // Filtered copy of previous frame reconstruction
+    YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
     YV12_BUFFER_CONFIG dst;
 
     MODE_INFO *mode_info_context;
@@ -235,39 +235,39 @@ typedef struct
     int up_available;
     int left_available;
 
-    // Y,U,V,Y2
+    /* Y,U,V,Y2 */
     ENTROPY_CONTEXT_PLANES *above_context;
     ENTROPY_CONTEXT_PLANES *left_context;
 
-    // 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
+    /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
     unsigned char segmentation_enabled;
 
-    // 0 (do not update) 1 (update) the macroblock segmentation map.
+    /* 0 (do not update) 1 (update) the macroblock segmentation map. */
     unsigned char update_mb_segmentation_map;
 
-    // 0 (do not update) 1 (update) the macroblock segmentation feature data.
+    /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
     unsigned char update_mb_segmentation_data;
 
-    // 0 (do not update) 1 (update) the macroblock segmentation feature data.
+    /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
     unsigned char mb_segement_abs_delta;
 
-    // Per frame flags that define which MB level features (such as quantizer or loop filter level)
-    // are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO
-    vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];         // Probability Tree used to code Segment number
+    /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
+    /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
+    vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];         /* Probability Tree used to code Segment number */
 
-    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];            // Segment parameters
+    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];            /* Segment parameters */
 
-    // mode_based Loop filter adjustment
+    /* mode_based Loop filter adjustment */
     unsigned char mode_ref_lf_delta_enabled;
     unsigned char mode_ref_lf_delta_update;
 
-    // Delta values have the range +/- MAX_LOOP_FILTER
-    signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];                // 0 = Intra, Last, GF, ARF
-    signed char ref_lf_deltas[MAX_REF_LF_DELTAS];                     // 0 = Intra, Last, GF, ARF
-    signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];                      // 0 = BPRED, ZERO_MV, MV, SPLIT
-    signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                           // 0 = BPRED, ZERO_MV, MV, SPLIT
+    /* Delta values have the range +/- MAX_LOOP_FILTER */
+    signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];                /* 0 = Intra, Last, GF, ARF */
+    signed char ref_lf_deltas[MAX_REF_LF_DELTAS];                     /* 0 = Intra, Last, GF, ARF */
+    signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];                      /* 0 = BPRED, ZERO_MV, MV, SPLIT */
+    signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                           /* 0 = BPRED, ZERO_MV, MV, SPLIT */
 
-    // Distance of MB away from frame edges
+    /* Distance of MB away from frame edges */
     int mb_to_left_edge;
     int mb_to_right_edge;
     int mb_to_top_edge;
diff --git a/vp8/common/debugmodes.c b/vp8/common/debugmodes.c
index c3ac88fc8..8c03480fa 100644
--- a/vp8/common/debugmodes.c
+++ b/vp8/common/debugmodes.c
@@ -21,7 +21,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
     int mb_index = 0;
     FILE *mvs = fopen("mvs.stt", "a");
 
-    // print out the macroblock Y modes
+    /* print out the macroblock Y modes */
     mb_index = 0;
     fprintf(mvs, "Mb Modes for Frame %d\n", frame);
 
@@ -60,7 +60,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
 
     fprintf(mvs, "\n");
 
-    // print out the macroblock UV modes
+    /* print out the macroblock UV modes */
     mb_index = 0;
     fprintf(mvs, "UV Modes for Frame %d\n", frame);
 
@@ -80,7 +80,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
 
     fprintf(mvs, "\n");
 
-    // print out the block modes
+    /* print out the block modes */
     mb_index = 0;
     fprintf(mvs, "Mbs for Frame %d\n", frame);
     {
@@ -108,7 +108,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
     }
     fprintf(mvs, "\n");
 
-    // print out the macroblock mvs
+    /* print out the macroblock mvs */
     mb_index = 0;
     fprintf(mvs, "MVs for Frame %d\n", frame);
 
@@ -128,7 +128,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
     fprintf(mvs, "\n");
 
 
-    // print out the block modes
+    /* print out the block modes */
     mb_index = 0;
     fprintf(mvs, "MVs for Frame %d\n", frame);
     {
diff --git a/vp8/common/defaultcoefcounts.h b/vp8/common/defaultcoefcounts.h
index b85f59b9f..ca58d565a 100644
--- a/vp8/common/defaultcoefcounts.h
+++ b/vp8/common/defaultcoefcounts.h
@@ -15,204 +15,204 @@ static const unsigned int default_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_C
 {
 
     {
-        // Block Type ( 0 )
+        /* Block Type ( 0 ) */
         {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
         },
         {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
             {30190, 26544, 225,  24,   4,   0,   0,   0,   0,   0,   0, 4171593,},
             {26846, 25157, 1241, 130,  26,   6,   1,   0,   0,   0,   0, 149987,},
             {10484, 9538, 1006, 160,  36,  18,   0,   0,   0,   0,   0, 15104,},
         },
         {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
             {25842, 40456, 1126,  83,  11,   2,   0,   0,   0,   0,   0,   0,},
             {9338, 8010, 512,  73,   7,   3,   2,   0,   0,   0,   0, 43294,},
             {1047, 751, 149,  31,  13,   6,   1,   0,   0,   0,   0, 879,},
         },
         {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
             {26136, 9826, 252,  13,   0,   0,   0,   0,   0,   0,   0,   0,},
             {8134, 5574, 191,  14,   2,   0,   0,   0,   0,   0,   0, 35302,},
             { 605, 677, 116,   9,   1,   0,   0,   0,   0,   0,   0, 611,},
         },
         {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
             {10263, 15463, 283,  17,   0,   0,   0,   0,   0,   0,   0,   0,},
             {2773, 2191, 128,   9,   2,   2,   0,   0,   0,   0,   0, 10073,},
             { 134, 125,  32,   4,   0,   2,   0,   0,   0,   0,   0,  50,},
         },
         {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
             {10483, 2663,  23,   1,   0,   0,   0,   0,   0,   0,   0,   0,},
             {2137, 1251,  27,   1,   1,   0,   0,   0,   0,   0,   0, 14362,},
             { 116, 156,  14,   2,   1,   0,   0,   0,   0,   0,   0, 190,},
         },
         {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
             {40977, 27614, 412,  28,   0,   0,   0,   0,   0,   0,   0,   0,},
             {6113, 5213, 261,  22,   3,   0,   0,   0,   0,   0,   0, 26164,},
             { 382, 312,  50,  14,   2,   0,   0,   0,   0,   0,   0, 345,},
         },
         {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
             {   0,  26,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,  13,   0,   0,   0,   0,   0,   0,   0,   0,   0, 319,},
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   8,},
         },
     },
     {
-        // Block Type ( 1 )
+        /* Block Type ( 1 ) */
         {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
             {3268, 19382, 1043, 250,  93,  82,  49,  26,  17,   8,  25, 82289,},
             {8758, 32110, 5436, 1832, 827, 668, 420, 153,  24,   0,   3, 52914,},
             {9337, 23725, 8487, 3954, 2107, 1836, 1069, 399,  59,   0,   0, 18620,},
         },
         {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
             {12419, 8420, 452,  62,   9,   1,   0,   0,   0,   0,   0,   0,},
             {11715, 8705, 693,  92,  15,   7,   2,   0,   0,   0,   0, 53988,},
             {7603, 8585, 2306, 778, 270, 145,  39,   5,   0,   0,   0, 9136,},
         },
         {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
             {15938, 14335, 1207, 184,  55,  13,   4,   1,   0,   0,   0,   0,},
             {7415, 6829, 1138, 244,  71,  26,   7,   0,   0,   0,   0, 9980,},
             {1580, 1824, 655, 241,  89,  46,  10,   2,   0,   0,   0, 429,},
         },
         {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
             {19453, 5260, 201,  19,   0,   0,   0,   0,   0,   0,   0,   0,},
             {9173, 3758, 213,  22,   1,   1,   0,   0,   0,   0,   0, 9820,},
             {1689, 1277, 276,  51,  17,   4,   0,   0,   0,   0,   0, 679,},
         },
         {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
             {12076, 10667, 620,  85,  19,   9,   5,   0,   0,   0,   0,   0,},
             {4665, 3625, 423,  55,  19,   9,   0,   0,   0,   0,   0, 5127,},
             { 415, 440, 143,  34,  20,   7,   2,   0,   0,   0,   0, 101,},
         },
         {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
             {12183, 4846, 115,  11,   1,   0,   0,   0,   0,   0,   0,   0,},
             {4226, 3149, 177,  21,   2,   0,   0,   0,   0,   0,   0, 7157,},
             { 375, 621, 189,  51,  11,   4,   1,   0,   0,   0,   0, 198,},
         },
         {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
             {61658, 37743, 1203,  94,  10,   3,   0,   0,   0,   0,   0,   0,},
             {15514, 11563, 903, 111,  14,   5,   0,   0,   0,   0,   0, 25195,},
             { 929, 1077, 291,  78,  14,   7,   1,   0,   0,   0,   0, 507,},
         },
         {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
             {   0, 990,  15,   3,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0, 412,  13,   0,   0,   0,   0,   0,   0,   0,   0, 1641,},
             {   0,  18,   7,   1,   0,   0,   0,   0,   0,   0,   0,  30,},
         },
     },
     {
-        // Block Type ( 2 )
+        /* Block Type ( 2 ) */
         {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
             { 953, 24519, 628, 120,  28,  12,   4,   0,   0,   0,   0, 2248798,},
             {1525, 25654, 2647, 617, 239, 143,  42,   5,   0,   0,   0, 66837,},
             {1180, 11011, 3001, 1237, 532, 448, 239,  54,   5,   0,   0, 7122,},
         },
         {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
             {1356, 2220,  67,  10,   4,   1,   0,   0,   0,   0,   0,   0,},
             {1450, 2544, 102,  18,   4,   3,   0,   0,   0,   0,   0, 57063,},
             {1182, 2110, 470, 130,  41,  21,   0,   0,   0,   0,   0, 6047,},
         },
         {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
             { 370, 3378, 200,  30,   5,   4,   1,   0,   0,   0,   0,   0,},
             { 293, 1006, 131,  29,  11,   0,   0,   0,   0,   0,   0, 5404,},
             { 114, 387,  98,  23,   4,   8,   1,   0,   0,   0,   0, 236,},
         },
         {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
             { 579, 194,   4,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             { 395, 213,   5,   1,   0,   0,   0,   0,   0,   0,   0, 4157,},
             { 119, 122,   4,   0,   0,   0,   0,   0,   0,   0,   0, 300,},
         },
         {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
             {  38, 557,  19,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {  21, 114,  12,   1,   0,   0,   0,   0,   0,   0,   0, 427,},
             {   0,   5,   0,   0,   0,   0,   0,   0,   0,   0,   0,   7,},
         },
         {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
             {  52,   7,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {  18,   6,   0,   0,   0,   0,   0,   0,   0,   0,   0, 652,},
             {   1,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,  30,},
         },
         {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
             { 640, 569,  10,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {  25,  77,   2,   0,   0,   0,   0,   0,   0,   0,   0, 517,},
             {   4,   7,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,},
         },
         {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
         },
     },
     {
-        // Block Type ( 3 )
+        /* Block Type ( 3 ) */
         {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
             {2506, 20161, 2707, 767, 261, 178, 107,  30,  14,   3,   0, 100694,},
             {8806, 36478, 8817, 3268, 1280, 850, 401, 114,  42,   0,   0, 58572,},
             {11003, 27214, 11798, 5716, 2482, 2072, 1048, 175,  32,   0,   0, 19284,},
         },
         {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
             {9738, 11313, 959, 205,  70,  18,  11,   1,   0,   0,   0,   0,},
             {12628, 15085, 1507, 273,  52,  19,   9,   0,   0,   0,   0, 54280,},
             {10701, 15846, 5561, 1926, 813, 570, 249,  36,   0,   0,   0, 6460,},
         },
         {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
             {6781, 22539, 2784, 634, 182, 123,  20,   4,   0,   0,   0,   0,},
             {6263, 11544, 2649, 790, 259, 168,  27,   5,   0,   0,   0, 20539,},
             {3109, 4075, 2031, 896, 457, 386, 158,  29,   0,   0,   0, 1138,},
         },
         {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
             {11515, 4079, 465,  73,   5,  14,   2,   0,   0,   0,   0,   0,},
             {9361, 5834, 650,  96,  24,   8,   4,   0,   0,   0,   0, 22181,},
             {4343, 3974, 1360, 415, 132,  96,  14,   1,   0,   0,   0, 1267,},
         },
         {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
             {4787, 9297, 823, 168,  44,  12,   4,   0,   0,   0,   0,   0,},
             {3619, 4472, 719, 198,  60,  31,   3,   0,   0,   0,   0, 8401,},
             {1157, 1175, 483, 182,  88,  31,   8,   0,   0,   0,   0, 268,},
         },
         {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
             {8299, 1226,  32,   5,   1,   0,   0,   0,   0,   0,   0,   0,},
             {3502, 1568,  57,   4,   1,   1,   0,   0,   0,   0,   0, 9811,},
             {1055, 1070, 166,  29,   6,   1,   0,   0,   0,   0,   0, 527,},
         },
         {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
             {27414, 27927, 1989, 347,  69,  26,   0,   0,   0,   0,   0,   0,},
             {5876, 10074, 1574, 341,  91,  24,   4,   0,   0,   0,   0, 21954,},
             {1571, 2171, 778, 324, 124,  65,  16,   0,   0,   0,   0, 979,},
         },
         {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
             {   0,  29,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
             {   0,  23,   0,   0,   0,   0,   0,   0,   0,   0,   0, 459,},
             {   0,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,  13,},
diff --git a/vp8/common/entropy.h b/vp8/common/entropy.h
index 80b481700..0685cd0ae 100644
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -17,18 +17,18 @@
 
 /* Coefficient token alphabet */
 
-#define ZERO_TOKEN              0       //0         Extra Bits 0+0
-#define ONE_TOKEN               1       //1         Extra Bits 0+1
-#define TWO_TOKEN               2       //2         Extra Bits 0+1
-#define THREE_TOKEN             3       //3         Extra Bits 0+1
-#define FOUR_TOKEN              4       //4         Extra Bits 0+1
-#define DCT_VAL_CATEGORY1       5       //5-6       Extra Bits 1+1
-#define DCT_VAL_CATEGORY2       6       //7-10      Extra Bits 2+1
-#define DCT_VAL_CATEGORY3       7       //11-26     Extra Bits 4+1
-#define DCT_VAL_CATEGORY4       8       //11-26     Extra Bits 5+1
-#define DCT_VAL_CATEGORY5       9       //27-58     Extra Bits 5+1
-#define DCT_VAL_CATEGORY6       10      //59+       Extra Bits 11+1
-#define DCT_EOB_TOKEN           11      //EOB       Extra Bits 0+0
+#define ZERO_TOKEN              0       /* 0         Extra Bits 0+0 */
+#define ONE_TOKEN               1       /* 1         Extra Bits 0+1 */
+#define TWO_TOKEN               2       /* 2         Extra Bits 0+1 */
+#define THREE_TOKEN             3       /* 3         Extra Bits 0+1 */
+#define FOUR_TOKEN              4       /* 4         Extra Bits 0+1 */
+#define DCT_VAL_CATEGORY1       5       /* 5-6       Extra Bits 1+1 */
+#define DCT_VAL_CATEGORY2       6       /* 7-10      Extra Bits 2+1 */
+#define DCT_VAL_CATEGORY3       7       /* 11-26     Extra Bits 4+1 */
+#define DCT_VAL_CATEGORY4       8       /* 11-26     Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY5       9       /* 27-58     Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY6       10      /* 59+       Extra Bits 11+1 */
+#define DCT_EOB_TOKEN           11      /* EOB       Extra Bits 0+0 */
 
 #define vp8_coef_tokens 12
 #define MAX_ENTROPY_TOKENS vp8_coef_tokens
@@ -83,7 +83,7 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
    coefficient band (and since zigzag positions 0, 1, and 2 are in
    distinct bands). */
 
-/*# define DC_TOKEN_CONTEXTS        3 // 00, 0!0, !0!0 */
+/*# define DC_TOKEN_CONTEXTS        3*/ /* 00, 0!0, !0!0 */
 #   define PREV_COEF_CONTEXTS       3
 
 extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[vp8_coef_tokens]);
diff --git a/vp8/common/entropymv.c b/vp8/common/entropymv.c
index 8e72881e9..e5df1f095 100644
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@@ -29,21 +29,21 @@ const MV_CONTEXT vp8_mv_update_probs[2] =
 const MV_CONTEXT vp8_default_mv_context[2] =
 {
     {{
-        // row
-        162,                                        // is short
-        128,                                        // sign
-        225, 146, 172, 147, 214,  39, 156,          // short tree
-        128, 129, 132,  75, 145, 178, 206, 239, 254, 254 // long bits
+        /* row */
+        162,                                        /* is short */
+        128,                                        /* sign */
+        225, 146, 172, 147, 214,  39, 156,          /* short tree */
+        128, 129, 132,  75, 145, 178, 206, 239, 254, 254 /* long bits */
     }},
 
 
 
     {{
-        // same for column
-        164,                                        // is short
+        /* same for column */
+        164,                                        /* is short */
         128,
         204, 170, 119, 235, 140, 230, 228,
-        128, 130, 130,  74, 148, 180, 203, 236, 254, 254 // long bits
+        128, 130, 130,  74, 148, 180, 203, 236, 254, 254 /* long bits */
 
     }}
 };
diff --git a/vp8/common/extend.c b/vp8/common/extend.c
index 7e06ac30c..47207fa79 100644
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -15,14 +15,14 @@
 
 static void extend_plane_borders
 (
-    unsigned char *s, // source
-    int sp,           // pitch
-    int h,            // height
-    int w,            // width
-    int et,           // extend top border
-    int el,           // extend left border
-    int eb,           // extend bottom border
-    int er            // extend right border
+    unsigned char *s, /* source */
+    int sp,           /* pitch */
+    int h,            /* height */
+    int w,            /* width */
+    int et,           /* extend top border */
+    int el,           /* extend left border */
+    int eb,           /* extend bottom border */
+    int er            /* extend right border */
 )
 {
 
@@ -31,7 +31,7 @@ static void extend_plane_borders
     unsigned char *dest_ptr1, *dest_ptr2;
     int linesize;
 
-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
     src_ptr1 = s;
     src_ptr2 = s + w - 1;
     dest_ptr1 = s - el;
@@ -39,8 +39,9 @@ static void extend_plane_borders
 
     for (i = 0; i < h - 0 + 1; i++)
     {
-        // Some linkers will complain if we call vpx_memset with el set to a
-        // constant 0.
+        /* Some linkers will complain if we call vpx_memset with el set to a
+         * constant 0.
+         */
         if (el)
             vpx_memset(dest_ptr1, src_ptr1[0], el);
         vpx_memset(dest_ptr2, src_ptr2[0], er);
@@ -50,7 +51,7 @@ static void extend_plane_borders
         dest_ptr2 += sp;
     }
 
-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
     src_ptr1 = s - el;
     src_ptr2 = s + sp * (h - 1) - el;
     dest_ptr1 = s + sp * (-et) - el;
@@ -76,12 +77,12 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
     int er = 0xf & (16 - (width & 0xf));
     int eb = 0xf & (16 - (height & 0xf));
 
-    // check for non multiples of 16
+    /* check for non multiples of 16 */
     if (er != 0 || eb != 0)
     {
         extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er);
 
-        //adjust for uv
+        /* adjust for uv */
         height = (height + 1) >> 1;
         width  = (width  + 1) >> 1;
         er = 0x7 & (8 - (width  & 0x7));
@@ -95,7 +96,7 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
     }
 }
 
-// note the extension is only for the last row, for intra prediction purpose
+/* note the extension is only for the last row, for intra prediction purpose */
 void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
 {
     int i;
diff --git a/vp8/common/filter_c.c b/vp8/common/filter_c.c
index 3d18d8191..399a847d5 100644
--- a/vp8/common/filter_c.c
+++ b/vp8/common/filter_c.c
@@ -32,13 +32,13 @@ static const int bilinear_filters[8][2] =
 static const short sub_pel_filters[8][6] =
 {
 
-    { 0,  0,  128,    0,   0,  0 },         // note that 1/8 pel positions are just as per alpha -0.5 bicubic
+    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
     { 0, -6,  123,   12,  -1,  0 },
-    { 2, -11, 108,   36,  -8,  1 },         // New 1/4 pel 6 tap filter
+    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
     { 0, -9,   93,   50,  -6,  0 },
-    { 3, -16,  77,   77, -16,  3 },         // New 1/2 pel 6 tap filter
+    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
     { 0, -6,   50,   93,  -9,  0 },
-    { 1, -8,   36,  108, -11,  2 },         // New 1/4 pel 6 tap filter
+    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
     { 0, -1,   12,  123,  -6,  0 },
 
 
@@ -69,9 +69,9 @@ void vp8_filter_block2d_first_pass
                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
-                   (VP8_FILTER_WEIGHT >> 1);      // Rounding
+                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */
 
-            // Normalize back to 0-255
+            /* Normalize back to 0-255 */
             Temp = Temp >> VP8_FILTER_SHIFT;
 
             if (Temp < 0)
@@ -83,7 +83,7 @@ void vp8_filter_block2d_first_pass
             src_ptr++;
         }
 
-        // Next row...
+        /* Next row... */
         src_ptr    += src_pixels_per_line - output_width;
         output_ptr += output_width;
     }
@@ -108,16 +108,16 @@ void vp8_filter_block2d_second_pass
     {
         for (j = 0; j < output_width; j++)
         {
-            // Apply filter
+            /* Apply filter */
             Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
                    ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
                    ((int)src_ptr[0]                 * vp8_filter[2]) +
                    ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
                    ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
                    ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
-                   (VP8_FILTER_WEIGHT >> 1);   // Rounding
+                   (VP8_FILTER_WEIGHT >> 1);   /* Rounding */
 
-            // Normalize back to 0-255
+            /* Normalize back to 0-255 */
             Temp = Temp >> VP8_FILTER_SHIFT;
 
             if (Temp < 0)
@@ -129,7 +129,7 @@ void vp8_filter_block2d_second_pass
             src_ptr++;
         }
 
-        // Start next row
+        /* Start next row */
         src_ptr    += src_pixels_per_line - output_width;
         output_ptr += output_pitch;
     }
@@ -146,12 +146,12 @@ void vp8_filter_block2d
     const short  *VFilter
 )
 {
-    int FData[9*4]; // Temp data bufffer used in filtering
+    int FData[9*4]; /* Temp data bufffer used in filtering */
 
-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
     vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);
 
-    // then filter verticaly...
+    /* then filter verticaly... */
     vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
 }
 
@@ -195,8 +195,8 @@ void vp8_sixtap_predict_c
     const short  *HFilter;
     const short  *VFilter;
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
 
     vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
 }
@@ -212,16 +212,16 @@ void vp8_sixtap_predict8x8_c
 {
     const short  *HFilter;
     const short  *VFilter;
-    int FData[13*16];   // Temp data bufffer used in filtering
+    int FData[13*16];   /* Temp data bufffer used in filtering */
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
 
-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
     vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);
 
 
-    // then filter verticaly...
+    /* then filter verticaly... */
     vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);
 
 }
@@ -238,16 +238,16 @@ void vp8_sixtap_predict8x4_c
 {
     const short  *HFilter;
     const short  *VFilter;
-    int FData[13*16];   // Temp data bufffer used in filtering
+    int FData[13*16];   /* Temp data bufffer used in filtering */
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
 
-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
     vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);
 
 
-    // then filter verticaly...
+    /* then filter verticaly... */
     vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);
 
 }
@@ -264,16 +264,16 @@ void vp8_sixtap_predict16x16_c
 {
     const short  *HFilter;
     const short  *VFilter;
-    int FData[21*24];   // Temp data bufffer used in filtering
+    int FData[21*24];   /* Temp data bufffer used in filtering */
 
 
-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */
 
-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
     vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);
 
-    // then filter verticaly...
+    /* then filter verticaly... */
     vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);
 
 }
@@ -324,14 +324,14 @@ void vp8_filter_block2d_bil_first_pass
     {
         for (j = 0; j < output_width; j++)
         {
-            // Apply bilinear filter
+            /* Apply bilinear filter */
             output_ptr[j] = (((int)src_ptr[0]          * vp8_filter[0]) +
                              ((int)src_ptr[pixel_step] * vp8_filter[1]) +
                              (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
             src_ptr++;
         }
 
-        // Next row...
+        /* Next row... */
         src_ptr    += src_pixels_per_line - output_width;
         output_ptr += output_width;
     }
@@ -384,7 +384,7 @@ void vp8_filter_block2d_bil_second_pass
     {
         for (j = 0; j < output_width; j++)
         {
-            // Apply filter
+            /* Apply filter */
             Temp = ((int)src_ptr[0]         * vp8_filter[0]) +
                    ((int)src_ptr[pixel_step] * vp8_filter[1]) +
                    (VP8_FILTER_WEIGHT / 2);
@@ -392,7 +392,7 @@ void vp8_filter_block2d_bil_second_pass
             src_ptr++;
         }
 
-        // Next row...
+        /* Next row... */
         src_ptr    += src_pixels_per_line - output_width;
         output_ptr += output_pitch;
     }
@@ -432,12 +432,12 @@ void vp8_filter_block2d_bil
 )
 {
 
-    unsigned short FData[17*16];    // Temp data bufffer used in filtering
+    unsigned short FData[17*16];    /* Temp data bufffer used in filtering */
 
-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
     vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);
 
-    // then 1-D vertically...
+    /* then 1-D vertically... */
     vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
 }
 
diff --git a/vp8/common/findnearmv.c b/vp8/common/findnearmv.c
index 41037f707..e63d4ef8d 100644
--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -168,7 +168,7 @@ void vp8_find_near_mvs
 
     vp8_clamp_mv(nearest, xd);
     vp8_clamp_mv(nearby, xd);
-    vp8_clamp_mv(best_mv, xd); //TODO: move this up before the copy
+    vp8_clamp_mv(best_mv, xd); /*TODO: move this up before the copy*/
 }
 
 vp8_prob *vp8_mv_ref_probs(
@@ -179,7 +179,7 @@ vp8_prob *vp8_mv_ref_probs(
     p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
     p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
     p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
-    //p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];
+    /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
     return p;
 }
 
diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index c624242a5..7b1a84b70 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -73,7 +73,7 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
 #endif
 
 #endif
-    // Pure C:
+    /* Pure C: */
     vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
     vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
 
diff --git a/vp8/common/invtrans.c b/vp8/common/invtrans.c
index 4cb433a80..81a3f2d89 100644
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -38,7 +38,7 @@ void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *
 {
     int i;
 
-    // do 2nd order transform on the dc block
+    /* do 2nd order transform on the dc block */
     IDCT_INVOKE(rtcd, iwalsh16)(x->block[24].dqcoeff, x->block[24].diff);
 
     recon_dcblock(x);
@@ -68,7 +68,7 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
     if (x->mode_info_context->mbmi.mode != B_PRED &&
         x->mode_info_context->mbmi.mode != SPLITMV)
     {
-        // do 2nd order transform on the dc block
+        /* do 2nd order transform on the dc block */
 
         IDCT_INVOKE(rtcd, iwalsh16)(&x->block[24].dqcoeff[0], x->block[24].diff);
         recon_dcblock(x);
diff --git a/vp8/common/loopfilter.c b/vp8/common/loopfilter.c
index da9ca2871..f9d082304 100644
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -23,7 +23,7 @@ prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
 prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
 prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);
 
-// Horizontal MB filtering
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                            int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -47,7 +47,7 @@ void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
     vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                            int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -71,7 +71,7 @@ void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
     vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }
 
-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                           int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -99,7 +99,7 @@ void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
     vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 }
 
-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                           int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -140,7 +140,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
     const int yhedge_boost  = 2;
     const int uvhedge_boost = 2;
 
-    // For each possible value for the loop filter fill out a "loop_filter_info" entry.
+    /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
     for (i = 0; i <= MAX_LOOP_FILTER; i++)
     {
         int filt_lvl = i;
@@ -166,7 +166,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
                 HEVThresh = 0;
         }
 
-        // Set loop filter paramaeters that control sharpness.
+        /* Set loop filter paramaeters that control sharpness. */
         block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
         block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);
 
@@ -195,7 +195,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
 
     }
 
-    // Set up the function pointers depending on the type of loop filtering selected
+    /* Set up the function pointers depending on the type of loop filtering selected */
     if (lft == NORMAL_LOOPFILTER)
     {
         cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v);
@@ -212,14 +212,15 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
     }
 }
 
-// Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
-// each frame. Check last_frame_type to skip the function most of times.
+/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
+ * each frame. Check last_frame_type to skip the function most of times.
+ */
 void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
 {
     int HEVThresh;
     int i, j;
 
-    // For each possible value for the loop filter fill out a "loop_filter_info" entry.
+    /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
     for (i = 0; i <= MAX_LOOP_FILTER; i++)
     {
         int filt_lvl = i;
@@ -247,15 +248,15 @@ void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
 
         for (j = 0; j < 16; j++)
         {
-            //lfi[i].lim[j] = block_inside_limit;
-            //lfi[i].mbflim[j] = filt_lvl+yhedge_boost;
+            /*lfi[i].lim[j] = block_inside_limit;
+            lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/
             lfi[i].mbthr[j] = HEVThresh;
-            //lfi[i].flim[j] = filt_lvl;
+            /*lfi[i].flim[j] = filt_lvl;*/
             lfi[i].thr[j] = HEVThresh;
-            //lfi[i].uvlim[j] = block_inside_limit;
-            //lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;
+            /*lfi[i].uvlim[j] = block_inside_limit;
+            lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/
             lfi[i].uvmbthr[j] = HEVThresh;
-            //lfi[i].uvflim[j] = filt_lvl;
+            /*lfi[i].uvflim[j] = filt_lvl;*/
             lfi[i].uvthr[j] = HEVThresh;
         }
     }
@@ -268,32 +269,32 @@ void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level)
 
     if (mbd->mode_ref_lf_delta_enabled)
     {
-        // Aplly delta for reference frame
+        /* Apply delta for reference frame */
         *filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];
 
-        // Apply delta for mode
+        /* Apply delta for mode */
         if (mbmi->ref_frame == INTRA_FRAME)
         {
-            // Only the split mode BPRED has a further special case
+            /* Only the split mode BPRED has a further special case */
             if (mbmi->mode == B_PRED)
                 *filter_level +=  mbd->mode_lf_deltas[0];
         }
         else
         {
-            // Zero motion mode
+            /* Zero motion mode */
             if (mbmi->mode == ZEROMV)
                 *filter_level +=  mbd->mode_lf_deltas[1];
 
-            // Split MB motion mode
+            /* Split MB motion mode */
             else if (mbmi->mode == SPLITMV)
                 *filter_level +=  mbd->mode_lf_deltas[3];
 
-            // All other inter motion modes (Nearest, Near, New)
+            /* All other inter motion modes (Nearest, Near, New) */
             else
                 *filter_level +=  mbd->mode_lf_deltas[2];
         }
 
-        // Range check
+        /* Range check */
         if (*filter_level > MAX_LOOP_FILTER)
             *filter_level = MAX_LOOP_FILTER;
         else if (*filter_level < 0)
@@ -311,7 +312,7 @@ void vp8_loop_filter_frame
 {
     YV12_BUFFER_CONFIG *post = cm->frame_to_show;
     loop_filter_info *lfi = cm->lf_info;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;
 
     int mb_row;
     int mb_col;
@@ -324,21 +325,21 @@ void vp8_loop_filter_frame
     int i;
     unsigned char *y_ptr, *u_ptr, *v_ptr;
 
-    mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
+    mbd->mode_info_context = cm->mi;          /* Point at base of Mb MODE_INFO list */
 
-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
     if (alt_flt_enabled)
     {
         for (i = 0; i < MAX_MB_SEGMENTS; i++)
         {
-            // Abs value
+            /* Abs value */
             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                 baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
             else
             {
                 baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
             }
         }
     }
@@ -348,18 +349,18 @@ void vp8_loop_filter_frame
             baseline_filter_level[i] = default_filt_lvl;
     }
 
-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
         vp8_init_loop_filter(cm);
     else if (frame_type != cm->last_frame_type)
         vp8_frame_init_loop_filter(lfi, frame_type);
 
-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
     y_ptr = post->y_buffer;
     u_ptr = post->u_buffer;
     v_ptr = post->v_buffer;
 
-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
     for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
     {
         for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@@ -368,9 +369,10 @@ void vp8_loop_filter_frame
 
             filter_level = baseline_filter_level[Segment];
 
-            // Distance of Mb to the various image edges.
-            // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-            // Apply any context driven MB level adjustment
+            /* Distance of Mb to the various image edges.
+             * These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+             * Apply any context driven MB level adjustment
+             */
             vp8_adjust_mb_lf_value(mbd, &filter_level);
 
             if (filter_level)
@@ -381,7 +383,7 @@ void vp8_loop_filter_frame
                 if (mbd->mode_info_context->mbmi.dc_diff > 0)
                     cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
 
-                // don't apply across umv border
+                /* don't apply across umv border */
                 if (mb_row > 0)
                     cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);
 
@@ -393,14 +395,14 @@ void vp8_loop_filter_frame
             u_ptr += 8;
             v_ptr += 8;
 
-            mbd->mode_info_context++;     // step to next MB
+            mbd->mode_info_context++;     /* step to next MB */
         }
 
         y_ptr += post->y_stride  * 16 - post->y_width;
         u_ptr += post->uv_stride *  8 - post->uv_width;
         v_ptr += post->uv_stride *  8 - post->uv_width;
 
-        mbd->mode_info_context++;         // Skip border mb
+        mbd->mode_info_context++;         /* Skip border mb */
     }
 }
 
@@ -424,26 +426,26 @@ void vp8_loop_filter_frame_yonly
     int baseline_filter_level[MAX_MB_SEGMENTS];
     int filter_level;
     int alt_flt_enabled = mbd->segmentation_enabled;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;
 
     (void) sharpness_lvl;
 
-    //MODE_INFO * this_mb_mode_info = cm->mi;  // Point at base of Mb MODE_INFO list
-    mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
+    /*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */
+    mbd->mode_info_context = cm->mi;          /* Point at base of Mb MODE_INFO list */
 
-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
     if (alt_flt_enabled)
     {
         for (i = 0; i < MAX_MB_SEGMENTS; i++)
         {
-            // Abs value
+            /* Abs value */
             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                 baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
             else
             {
                 baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
             }
         }
     }
@@ -453,16 +455,16 @@ void vp8_loop_filter_frame_yonly
             baseline_filter_level[i] = default_filt_lvl;
     }
 
-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
         vp8_init_loop_filter(cm);
     else if (frame_type != cm->last_frame_type)
         vp8_frame_init_loop_filter(lfi, frame_type);
 
-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
     y_ptr = post->y_buffer;
 
-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
     for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
     {
         for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@@ -470,7 +472,7 @@ void vp8_loop_filter_frame_yonly
             int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
             filter_level = baseline_filter_level[Segment];
 
-            // Apply any context driven MB level adjustment
+            /* Apply any context driven MB level adjustment */
             vp8_adjust_mb_lf_value(mbd, &filter_level);
 
             if (filter_level)
@@ -481,7 +483,7 @@ void vp8_loop_filter_frame_yonly
                 if (mbd->mode_info_context->mbmi.dc_diff > 0)
                     cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
 
-                // don't apply across umv border
+                /* don't apply across umv border */
                 if (mb_row > 0)
                     cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);
 
@@ -490,12 +492,12 @@ void vp8_loop_filter_frame_yonly
             }
 
             y_ptr += 16;
-            mbd->mode_info_context ++;        // step to next MB
+            mbd->mode_info_context ++;        /* step to next MB */
 
         }
 
         y_ptr += post->y_stride  * 16 - post->y_width;
-        mbd->mode_info_context ++;            // Skip border mb
+        mbd->mode_info_context ++;            /* Skip border mb */
     }
 
 }
@@ -516,7 +518,7 @@ void vp8_loop_filter_partial_frame
     unsigned char *y_ptr;
     int mb_row;
     int mb_col;
-    //int mb_rows = post->y_height >> 4;
+    /*int mb_rows = post->y_height >> 4;*/
     int mb_cols = post->y_width  >> 4;
 
     int linestocopy;
@@ -525,12 +527,12 @@ void vp8_loop_filter_partial_frame
     int baseline_filter_level[MAX_MB_SEGMENTS];
     int filter_level;
     int alt_flt_enabled = mbd->segmentation_enabled;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;
 
     (void) sharpness_lvl;
 
-    //MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);  // Point at base of Mb MODE_INFO list
-    mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);        // Point at base of Mb MODE_INFO list
+    /*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */
+    mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);        /* Point at base of Mb MODE_INFO list */
 
     linestocopy = (post->y_height >> (4 + Fraction));
 
@@ -539,19 +541,19 @@ void vp8_loop_filter_partial_frame
 
     linestocopy <<= 4;
 
-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
     if (alt_flt_enabled)
     {
         for (i = 0; i < MAX_MB_SEGMENTS; i++)
         {
-            // Abs value
+            /* Abs value */
             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                 baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
             else
             {
                 baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
             }
         }
     }
@@ -561,16 +563,16 @@ void vp8_loop_filter_partial_frame
             baseline_filter_level[i] = default_filt_lvl;
     }
 
-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
         vp8_init_loop_filter(cm);
     else if (frame_type != cm->last_frame_type)
         vp8_frame_init_loop_filter(lfi, frame_type);
 
-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
     y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;
 
-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
     for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
     {
         for (mb_col = 0; mb_col < mb_cols; mb_col++)
@@ -593,10 +595,10 @@ void vp8_loop_filter_partial_frame
             }
 
             y_ptr += 16;
-            mbd->mode_info_context += 1;      // step to next MB
+            mbd->mode_info_context += 1;      /* step to next MB */
         }
 
         y_ptr += post->y_stride  * 16 - post->y_width;
-        mbd->mode_info_context += 1;          // Skip border mb
+        mbd->mode_info_context += 1;          /* Skip border mb */
     }
 }
diff --git a/vp8/common/loopfilter.h b/vp8/common/loopfilter.h
index a2049bf61..e45683460 100644
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -22,10 +22,10 @@ typedef enum
     SIMPLE_LOOPFILTER = 1
 } LOOPFILTERTYPE;
 
-// FRK
-// Need to align this structure so when it is declared and
-// passed it can be loaded into vector registers.
-// FRK
+/* FRK
+ * Need to align this structure so when it is declared and
+ * passed it can be loaded into vector registers.
+ */
 typedef struct
 {
     DECLARE_ALIGNED(16, signed char, lim[16]);
@@ -119,8 +119,8 @@ typedef struct
 
 typedef void loop_filter_uvfunction
 (
-    unsigned char *u,   // source pointer
-    int p,              // pitch
+    unsigned char *u,   /* source pointer */
+    int p,              /* pitch */
     const signed char *flimit,
     const signed char *limit,
     const signed char *thresh,
diff --git a/vp8/common/loopfilter_filters.c b/vp8/common/loopfilter_filters.c
index 1b92e00aa..694052924 100644
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -23,7 +23,7 @@ static __inline signed char vp8_signed_char_clamp(int t)
 }
 
 
-// should we apply any filter at all ( 11111111 yes, 00000000 no)
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
                                      uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
 {
@@ -39,7 +39,7 @@ static __inline signed char vp8_filter_mask(signed char limit, signed char flimi
     return mask;
 }
 
-// is there high variance internal edge ( 11111111 yes, 00000000 no)
+/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
 {
     signed char hev = 0;
@@ -61,17 +61,18 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
     qs0 = (signed char) * oq0 ^ 0x80;
     qs1 = (signed char) * oq1 ^ 0x80;
 
-    // add outer taps if we have high edge variance
+    /* add outer taps if we have high edge variance */
     vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
     vp8_filter &= hev;
 
-    // inner taps
+    /* inner taps */
     vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
     vp8_filter &= mask;
 
-    // save bottom 3 bits so that we round one side +4 and the other +3
-    // if it equals 4 we'll set to adjust by -1 to account for the fact
-    // we'd round 3 the other way
+    /* save bottom 3 bits so that we round one side +4 and the other +3
+     * if it equals 4 we'll set to adjust by -1 to account for the fact
+     * we'd round 3 the other way
+     */
     Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
     Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
     Filter1 >>= 3;
@@ -82,7 +83,7 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
     *op0 = u ^ 0x80;
     vp8_filter = Filter1;
 
-    // outer tap adjustments
+    /* outer tap adjustments */
     vp8_filter += 1;
     vp8_filter >>= 1;
     vp8_filter &= ~hev;
@@ -96,19 +97,20 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
 void vp8_loop_filter_horizontal_edge_c
 (
     unsigned char *s,
-    int p, //pitch
+    int p, /* pitch */
     const signed char *flimit,
     const signed char *limit,
     const signed char *thresh,
     int count
 )
 {
-    int  hev = 0; // high edge variance
+    int  hev = 0; /* high edge variance */
     signed char mask = 0;
     int i = 0;
 
-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
     do
     {
         mask = vp8_filter_mask(limit[i], flimit[i],
@@ -134,12 +136,13 @@ void vp8_loop_filter_vertical_edge_c
     int count
 )
 {
-    int  hev = 0; // high edge variance
+    int  hev = 0; /* high edge variance */
     signed char mask = 0;
     int i = 0;
 
-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
     do
     {
         mask = vp8_filter_mask(limit[i], flimit[i],
@@ -166,7 +169,7 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
     signed char qs1 = (signed char) * oq1 ^ 0x80;
     signed char qs2 = (signed char) * oq2 ^ 0x80;
 
-    // add outer taps if we have high edge variance
+    /* add outer taps if we have high edge variance */
     vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
     vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
     vp8_filter &= mask;
@@ -174,7 +177,7 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
     Filter2 = vp8_filter;
     Filter2 &= hev;
 
-    // save bottom 3 bits so that we round one side +4 and the other +3
+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
     Filter1 = vp8_signed_char_clamp(Filter2 + 4);
     Filter2 = vp8_signed_char_clamp(Filter2 + 3);
     Filter1 >>= 3;
@@ -183,25 +186,25 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
     ps0 = vp8_signed_char_clamp(ps0 + Filter2);
 
 
-    // only apply wider filter if not high edge variance
+    /* only apply wider filter if not high edge variance */
     vp8_filter &= ~hev;
     Filter2 = vp8_filter;
 
-    // roughly 3/7th difference across boundary
+    /* roughly 3/7th difference across boundary */
     u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
     s = vp8_signed_char_clamp(qs0 - u);
     *oq0 = s ^ 0x80;
     s = vp8_signed_char_clamp(ps0 + u);
     *op0 = s ^ 0x80;
 
-    // roughly 2/7th difference across boundary
+    /* roughly 2/7th difference across boundary */
     u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
     s = vp8_signed_char_clamp(qs1 - u);
     *oq1 = s ^ 0x80;
     s = vp8_signed_char_clamp(ps1 + u);
     *op1 = s ^ 0x80;
 
-    // roughly 1/7th difference across boundary
+    /* roughly 1/7th difference across boundary */
     u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
     s = vp8_signed_char_clamp(qs2 - u);
     *oq2 = s ^ 0x80;
@@ -219,12 +222,13 @@ void vp8_mbloop_filter_horizontal_edge_c
     int count
 )
 {
-    signed char hev = 0; // high edge variance
+    signed char hev = 0; /* high edge variance */
     signed char mask = 0;
     int i = 0;
 
-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
     do
     {
 
@@ -253,7 +257,7 @@ void vp8_mbloop_filter_vertical_edge_c
     int count
 )
 {
-    signed char hev = 0; // high edge variance
+    signed char hev = 0; /* high edge variance */
     signed char mask = 0;
     int i = 0;
 
@@ -273,12 +277,13 @@ void vp8_mbloop_filter_vertical_edge_c
 
 }
 
-// should we apply any filter at all ( 11111111 yes, 00000000 no)
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
 {
-// Why does this cause problems for win32?
-// error C2143: syntax error : missing ';' before 'type'
-//  (void) limit;
+/* Why does this cause problems for win32?
+ * error C2143: syntax error : missing ';' before 'type'
+ *  (void) limit;
+ */
     signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  <= flimit * 2 + limit) * -1;
     return mask;
 }
@@ -296,7 +301,7 @@ static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *o
     vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (q0 - p0));
     vp8_filter &= mask;
 
-    // save bottom 3 bits so that we round one side +4 and the other +3
+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
     Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
     Filter1 >>= 3;
     u = vp8_signed_char_clamp(q0 - Filter1);
@@ -324,7 +329,7 @@ void vp8_loop_filter_simple_horizontal_edge_c
 
     do
     {
-        //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);
+        /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
         mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
         vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
         ++s;
@@ -348,7 +353,7 @@ void vp8_loop_filter_simple_vertical_edge_c
 
     do
     {
-        //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);
+        /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
         mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
         vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
         s += p;
diff --git a/vp8/common/mbpitch.c b/vp8/common/mbpitch.c
index ce40d16ca..af55e2fe0 100644
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -14,7 +14,7 @@
 typedef enum
 {
     PRED = 0,
-    DEST = 1,
+    DEST = 1
 } BLOCKSET;
 
 void vp8_setup_block
@@ -62,13 +62,13 @@ void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
         v = &x->pre.v_buffer;
     }
 
-    for (block = 0; block < 16; block++) // y blocks
+    for (block = 0; block < 16; block++) /* y blocks */
     {
         vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
                         (block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs);
     }
 
-    for (block = 16; block < 20; block++) // U and V blocks
+    for (block = 16; block < 20; block++) /* U and V blocks */
     {
         vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
                         ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
@@ -123,7 +123,7 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
 void vp8_build_block_doffsets(MACROBLOCKD *x)
 {
 
-    // handle the destination pitch features
+    /* handle the destination pitch features */
     vp8_setup_macroblock(x, DEST);
     vp8_setup_macroblock(x, PRED);
 }
diff --git a/vp8/common/modecont.c b/vp8/common/modecont.c
index 0fa299577..86a74bc0f 100644
--- a/vp8/common/modecont.c
+++ b/vp8/common/modecont.c
@@ -14,27 +14,27 @@
 const int vp8_mode_contexts[6][4] =
 {
     {
-        // 0
+        /* 0 */
         7,     1,     1,   143,
     },
     {
-        // 1
+        /* 1 */
         14,    18,    14,   107,
     },
     {
-        // 2
+        /* 2 */
         135,    64,    57,    68,
     },
     {
-        // 3
+        /* 3 */
         60,    56,   128,    65,
     },
     {
-        // 4
+        /* 4 */
         159,   134,   128,    34,
     },
     {
-        // 5
+        /* 5 */
         234,   188,   128,    28,
     },
 };
diff --git a/vp8/common/modecontext.c b/vp8/common/modecontext.c
index 8e483b800..a31a561c8 100644
--- a/vp8/common/modecontext.c
+++ b/vp8/common/modecontext.c
@@ -14,133 +14,133 @@
 const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES] =
 {
     {
-        //Above Mode :  0
-        { 43438,   2195,    470,    316,    615,    171,    217,    412,    124,    160, }, // left_mode 0
-        {  5722,   2751,    296,    291,     81,     68,     80,    101,    100,    170, }, // left_mode 1
-        {  1629,    201,    307,     25,     47,     16,     34,     72,     19,     28, }, // left_mode 2
-        {   332,    266,     36,    500,     20,     65,     23,     14,    154,    106, }, // left_mode 3
-        {   450,     97,     10,     24,    117,     10,      2,     12,      8,     71, }, // left_mode 4
-        {   384,     49,     29,     44,     12,    162,     51,      5,     87,     42, }, // left_mode 5
-        {   495,     53,    157,     27,     14,     57,    180,     17,     17,     34, }, // left_mode 6
-        {   695,     64,     62,      9,     27,      5,      3,    147,     10,     26, }, // left_mode 7
-        {   230,     54,     20,    124,     16,    125,     29,     12,    283,     37, }, // left_mode 8
-        {   260,     87,     21,    120,     32,     16,     33,     16,     33,    203, }, // left_mode 9
+        /*Above Mode :  0*/
+        { 43438,   2195,    470,    316,    615,    171,    217,    412,    124,    160, }, /* left_mode 0 */
+        {  5722,   2751,    296,    291,     81,     68,     80,    101,    100,    170, }, /* left_mode 1 */
+        {  1629,    201,    307,     25,     47,     16,     34,     72,     19,     28, }, /* left_mode 2 */
+        {   332,    266,     36,    500,     20,     65,     23,     14,    154,    106, }, /* left_mode 3 */
+        {   450,     97,     10,     24,    117,     10,      2,     12,      8,     71, }, /* left_mode 4 */
+        {   384,     49,     29,     44,     12,    162,     51,      5,     87,     42, }, /* left_mode 5 */
+        {   495,     53,    157,     27,     14,     57,    180,     17,     17,     34, }, /* left_mode 6 */
+        {   695,     64,     62,      9,     27,      5,      3,    147,     10,     26, }, /* left_mode 7 */
+        {   230,     54,     20,    124,     16,    125,     29,     12,    283,     37, }, /* left_mode 8 */
+        {   260,     87,     21,    120,     32,     16,     33,     16,     33,    203, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  1
-        {  3934,   2573,    355,    137,    128,     87,    133,    117,     37,     27, }, // left_mode 0
-        {  1036,   1929,    278,    135,     27,     37,     48,     55,     41,     91, }, // left_mode 1
-        {   223,    256,    253,     15,     13,      9,     28,     64,      3,      3, }, // left_mode 2
-        {   120,    129,     17,    316,     15,     11,      9,      4,     53,     74, }, // left_mode 3
-        {   129,     58,      6,     11,     38,      2,      0,      5,      2,     67, }, // left_mode 4
-        {    53,     22,     11,     16,      8,     26,     14,      3,     19,     12, }, // left_mode 5
-        {    59,     26,     61,     11,      4,      9,     35,     13,      8,      8, }, // left_mode 6
-        {   101,     52,     40,      8,      5,      2,      8,     59,      2,     20, }, // left_mode 7
-        {    48,     34,     10,     52,      8,     15,      6,      6,     63,     20, }, // left_mode 8
-        {    96,     48,     22,     63,     11,     14,      5,      8,      9,     96, }, // left_mode 9
+        /*Above Mode :  1*/
+        {  3934,   2573,    355,    137,    128,     87,    133,    117,     37,     27, }, /* left_mode 0 */
+        {  1036,   1929,    278,    135,     27,     37,     48,     55,     41,     91, }, /* left_mode 1 */
+        {   223,    256,    253,     15,     13,      9,     28,     64,      3,      3, }, /* left_mode 2 */
+        {   120,    129,     17,    316,     15,     11,      9,      4,     53,     74, }, /* left_mode 3 */
+        {   129,     58,      6,     11,     38,      2,      0,      5,      2,     67, }, /* left_mode 4 */
+        {    53,     22,     11,     16,      8,     26,     14,      3,     19,     12, }, /* left_mode 5 */
+        {    59,     26,     61,     11,      4,      9,     35,     13,      8,      8, }, /* left_mode 6 */
+        {   101,     52,     40,      8,      5,      2,      8,     59,      2,     20, }, /* left_mode 7 */
+        {    48,     34,     10,     52,      8,     15,      6,      6,     63,     20, }, /* left_mode 8 */
+        {    96,     48,     22,     63,     11,     14,      5,      8,      9,     96, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  2
-        {   709,    461,    506,     36,     27,     33,    151,     98,     24,      6, }, // left_mode 0
-        {   201,    375,    442,     27,     13,      8,     46,     58,      6,     19, }, // left_mode 1
-        {   122,    140,    417,      4,     13,      3,     33,     59,      4,      2, }, // left_mode 2
-        {    36,     17,     22,     16,      6,      8,     12,     17,      9,     21, }, // left_mode 3
-        {    51,     15,      7,      1,     14,      0,      4,      5,      3,     22, }, // left_mode 4
-        {    18,     11,     30,      9,      7,     20,     11,      5,      2,      6, }, // left_mode 5
-        {    38,     21,    103,      9,      4,     12,     79,     13,      2,      5, }, // left_mode 6
-        {    64,     17,     66,      2,     12,      4,      2,     65,      4,      5, }, // left_mode 7
-        {    14,      7,      7,     16,      3,     11,      4,     13,     15,     16, }, // left_mode 8
-        {    36,      8,     32,      9,      9,      4,     14,      7,      6,     24, }, // left_mode 9
+        /*Above Mode :  2*/
+        {   709,    461,    506,     36,     27,     33,    151,     98,     24,      6, }, /* left_mode 0 */
+        {   201,    375,    442,     27,     13,      8,     46,     58,      6,     19, }, /* left_mode 1 */
+        {   122,    140,    417,      4,     13,      3,     33,     59,      4,      2, }, /* left_mode 2 */
+        {    36,     17,     22,     16,      6,      8,     12,     17,      9,     21, }, /* left_mode 3 */
+        {    51,     15,      7,      1,     14,      0,      4,      5,      3,     22, }, /* left_mode 4 */
+        {    18,     11,     30,      9,      7,     20,     11,      5,      2,      6, }, /* left_mode 5 */
+        {    38,     21,    103,      9,      4,     12,     79,     13,      2,      5, }, /* left_mode 6 */
+        {    64,     17,     66,      2,     12,      4,      2,     65,      4,      5, }, /* left_mode 7 */
+        {    14,      7,      7,     16,      3,     11,      4,     13,     15,     16, }, /* left_mode 8 */
+        {    36,      8,     32,      9,      9,      4,     14,      7,      6,     24, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  3
-        {  1340,    173,     36,    119,     30,     10,     13,     10,     20,     26, }, // left_mode 0
-        {   156,    293,     26,    108,      5,     16,      2,      4,     23,     30, }, // left_mode 1
-        {    60,     34,     13,      7,      3,      3,      0,      8,      4,      5, }, // left_mode 2
-        {    72,     64,      1,    235,      3,      9,      2,      7,     28,     38, }, // left_mode 3
-        {    29,     14,      1,      3,      5,      0,      2,      2,      5,     13, }, // left_mode 4
-        {    22,      7,      4,     11,      2,      5,      1,      2,      6,      4, }, // left_mode 5
-        {    18,     14,      5,      6,      4,      3,     14,      0,      9,      2, }, // left_mode 6
-        {    41,     10,      7,      1,      2,      0,      0,     10,      2,      1, }, // left_mode 7
-        {    23,     19,      2,     33,      1,      5,      2,      0,     51,      8, }, // left_mode 8
-        {    33,     26,      7,     53,      3,      9,      3,      3,      9,     19, }, // left_mode 9
+        /*Above Mode :  3*/
+        {  1340,    173,     36,    119,     30,     10,     13,     10,     20,     26, }, /* left_mode 0 */
+        {   156,    293,     26,    108,      5,     16,      2,      4,     23,     30, }, /* left_mode 1 */
+        {    60,     34,     13,      7,      3,      3,      0,      8,      4,      5, }, /* left_mode 2 */
+        {    72,     64,      1,    235,      3,      9,      2,      7,     28,     38, }, /* left_mode 3 */
+        {    29,     14,      1,      3,      5,      0,      2,      2,      5,     13, }, /* left_mode 4 */
+        {    22,      7,      4,     11,      2,      5,      1,      2,      6,      4, }, /* left_mode 5 */
+        {    18,     14,      5,      6,      4,      3,     14,      0,      9,      2, }, /* left_mode 6 */
+        {    41,     10,      7,      1,      2,      0,      0,     10,      2,      1, }, /* left_mode 7 */
+        {    23,     19,      2,     33,      1,      5,      2,      0,     51,      8, }, /* left_mode 8 */
+        {    33,     26,      7,     53,      3,      9,      3,      3,      9,     19, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  4
-        {   410,    165,     43,     31,     66,     15,     30,     54,      8,     17, }, // left_mode 0
-        {   115,     64,     27,     18,     30,      7,     11,     15,      4,     19, }, // left_mode 1
-        {    31,     23,     25,      1,      7,      2,      2,     10,      0,      5, }, // left_mode 2
-        {    17,      4,      1,      6,      8,      2,      7,      5,      5,     21, }, // left_mode 3
-        {   120,     12,      1,      2,     83,      3,      0,      4,      1,     40, }, // left_mode 4
-        {     4,      3,      1,      2,      1,      2,      5,      0,      3,      6, }, // left_mode 5
-        {    10,      2,     13,      6,      6,      6,      8,      2,      4,      5, }, // left_mode 6
-        {    58,     10,      5,      1,     28,      1,      1,     33,      1,      9, }, // left_mode 7
-        {     8,      2,      1,      4,      2,      5,      1,      1,      2,     10, }, // left_mode 8
-        {    76,      7,      5,      7,     18,      2,      2,      0,      5,     45, }, // left_mode 9
+        /*Above Mode :  4*/
+        {   410,    165,     43,     31,     66,     15,     30,     54,      8,     17, }, /* left_mode 0 */
+        {   115,     64,     27,     18,     30,      7,     11,     15,      4,     19, }, /* left_mode 1 */
+        {    31,     23,     25,      1,      7,      2,      2,     10,      0,      5, }, /* left_mode 2 */
+        {    17,      4,      1,      6,      8,      2,      7,      5,      5,     21, }, /* left_mode 3 */
+        {   120,     12,      1,      2,     83,      3,      0,      4,      1,     40, }, /* left_mode 4 */
+        {     4,      3,      1,      2,      1,      2,      5,      0,      3,      6, }, /* left_mode 5 */
+        {    10,      2,     13,      6,      6,      6,      8,      2,      4,      5, }, /* left_mode 6 */
+        {    58,     10,      5,      1,     28,      1,      1,     33,      1,      9, }, /* left_mode 7 */
+        {     8,      2,      1,      4,      2,      5,      1,      1,      2,     10, }, /* left_mode 8 */
+        {    76,      7,      5,      7,     18,      2,      2,      0,      5,     45, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  5
-        {   444,     46,     47,     20,     14,    110,     60,     14,     60,      7, }, // left_mode 0
-        {    59,     57,     25,     18,      3,     17,     21,      6,     14,      6, }, // left_mode 1
-        {    24,     17,     20,      6,      4,     13,      7,      2,      3,      2, }, // left_mode 2
-        {    13,     11,      5,     14,      4,      9,      2,      4,     15,      7, }, // left_mode 3
-        {     8,      5,      2,      1,      4,      0,      1,      1,      2,     12, }, // left_mode 4
-        {    19,      5,      5,      7,      4,     40,      6,      3,     10,      4, }, // left_mode 5
-        {    16,      5,      9,      1,      1,     16,     26,      2,     10,      4, }, // left_mode 6
-        {    11,      4,      8,      1,      1,      4,      4,      5,      4,      1, }, // left_mode 7
-        {    15,      1,      3,      7,      3,     21,      7,      1,     34,      5, }, // left_mode 8
-        {    18,      5,      1,      3,      4,      3,      7,      1,      2,      9, }, // left_mode 9
+        /*Above Mode :  5*/
+        {   444,     46,     47,     20,     14,    110,     60,     14,     60,      7, }, /* left_mode 0 */
+        {    59,     57,     25,     18,      3,     17,     21,      6,     14,      6, }, /* left_mode 1 */
+        {    24,     17,     20,      6,      4,     13,      7,      2,      3,      2, }, /* left_mode 2 */
+        {    13,     11,      5,     14,      4,      9,      2,      4,     15,      7, }, /* left_mode 3 */
+        {     8,      5,      2,      1,      4,      0,      1,      1,      2,     12, }, /* left_mode 4 */
+        {    19,      5,      5,      7,      4,     40,      6,      3,     10,      4, }, /* left_mode 5 */
+        {    16,      5,      9,      1,      1,     16,     26,      2,     10,      4, }, /* left_mode 6 */
+        {    11,      4,      8,      1,      1,      4,      4,      5,      4,      1, }, /* left_mode 7 */
+        {    15,      1,      3,      7,      3,     21,      7,      1,     34,      5, }, /* left_mode 8 */
+        {    18,      5,      1,      3,      4,      3,      7,      1,      2,      9, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  6
-        {   476,    149,     94,     13,     14,     77,    291,     27,     23,      3, }, // left_mode 0
-        {    79,     83,     42,     14,      2,     12,     63,      2,      4,     14, }, // left_mode 1
-        {    43,     36,     55,      1,      3,      8,     42,     11,      5,      1, }, // left_mode 2
-        {     9,      9,      6,     16,      1,      5,      6,      3,     11,     10, }, // left_mode 3
-        {    10,      3,      1,      3,     10,      1,      0,      1,      1,      4, }, // left_mode 4
-        {    14,      6,     15,      5,      1,     20,     25,      2,      5,      0, }, // left_mode 5
-        {    28,      7,     51,      1,      0,      8,    127,      6,      2,      5, }, // left_mode 6
-        {    13,      3,      3,      2,      3,      1,      2,      8,      1,      2, }, // left_mode 7
-        {    10,      3,      3,      3,      3,      8,      2,      2,      9,      3, }, // left_mode 8
-        {    13,      7,     11,      4,      0,      4,      6,      2,      5,      8, }, // left_mode 9
+        /*Above Mode :  6*/
+        {   476,    149,     94,     13,     14,     77,    291,     27,     23,      3, }, /* left_mode 0 */
+        {    79,     83,     42,     14,      2,     12,     63,      2,      4,     14, }, /* left_mode 1 */
+        {    43,     36,     55,      1,      3,      8,     42,     11,      5,      1, }, /* left_mode 2 */
+        {     9,      9,      6,     16,      1,      5,      6,      3,     11,     10, }, /* left_mode 3 */
+        {    10,      3,      1,      3,     10,      1,      0,      1,      1,      4, }, /* left_mode 4 */
+        {    14,      6,     15,      5,      1,     20,     25,      2,      5,      0, }, /* left_mode 5 */
+        {    28,      7,     51,      1,      0,      8,    127,      6,      2,      5, }, /* left_mode 6 */
+        {    13,      3,      3,      2,      3,      1,      2,      8,      1,      2, }, /* left_mode 7 */
+        {    10,      3,      3,      3,      3,      8,      2,      2,      9,      3, }, /* left_mode 8 */
+        {    13,      7,     11,      4,      0,      4,      6,      2,      5,      8, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  7
-        {   376,    135,    119,      6,     32,      8,     31,    224,      9,      3, }, // left_mode 0
-        {    93,     60,     54,      6,     13,      7,      8,     92,      2,     12, }, // left_mode 1
-        {    74,     36,     84,      0,      3,      2,      9,     67,      2,      1, }, // left_mode 2
-        {    19,      4,      4,      8,      8,      2,      4,      7,      6,     16, }, // left_mode 3
-        {    51,      7,      4,      1,     77,      3,      0,     14,      1,     15, }, // left_mode 4
-        {     7,      7,      5,      7,      4,      7,      4,      5,      0,      3, }, // left_mode 5
-        {    18,      2,     19,      2,      2,      4,     12,     11,      1,      2, }, // left_mode 6
-        {   129,      6,     27,      1,     21,      3,      0,    189,      0,      6, }, // left_mode 7
-        {     9,      1,      2,      8,      3,      7,      0,      5,      3,      3, }, // left_mode 8
-        {    20,      4,      5,     10,      4,      2,      7,     17,      3,     16, }, // left_mode 9
+        /*Above Mode :  7*/
+        {   376,    135,    119,      6,     32,      8,     31,    224,      9,      3, }, /* left_mode 0 */
+        {    93,     60,     54,      6,     13,      7,      8,     92,      2,     12, }, /* left_mode 1 */
+        {    74,     36,     84,      0,      3,      2,      9,     67,      2,      1, }, /* left_mode 2 */
+        {    19,      4,      4,      8,      8,      2,      4,      7,      6,     16, }, /* left_mode 3 */
+        {    51,      7,      4,      1,     77,      3,      0,     14,      1,     15, }, /* left_mode 4 */
+        {     7,      7,      5,      7,      4,      7,      4,      5,      0,      3, }, /* left_mode 5 */
+        {    18,      2,     19,      2,      2,      4,     12,     11,      1,      2, }, /* left_mode 6 */
+        {   129,      6,     27,      1,     21,      3,      0,    189,      0,      6, }, /* left_mode 7 */
+        {     9,      1,      2,      8,      3,      7,      0,      5,      3,      3, }, /* left_mode 8 */
+        {    20,      4,      5,     10,      4,      2,      7,     17,      3,     16, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  8
-        {   617,     68,     34,     79,     11,     27,     25,     14,     75,     13, }, // left_mode 0
-        {    51,     82,     21,     26,      6,     12,     13,      1,     26,     16, }, // left_mode 1
-        {    29,      9,     12,     11,      3,      7,      1,     10,      2,      2, }, // left_mode 2
-        {    17,     19,     11,     74,      4,      3,      2,      0,     58,     13, }, // left_mode 3
-        {    10,      1,      1,      3,      4,      1,      0,      2,      1,      8, }, // left_mode 4
-        {    14,      4,      5,      5,      1,     13,      2,      0,     27,      8, }, // left_mode 5
-        {    10,      3,      5,      4,      1,      7,      6,      4,      5,      1, }, // left_mode 6
-        {    10,      2,      6,      2,      1,      1,      1,      4,      2,      1, }, // left_mode 7
-        {    14,      8,      5,     23,      2,     12,      6,      2,    117,      5, }, // left_mode 8
-        {     9,      6,      2,     19,      1,      6,      3,      2,      9,      9, }, // left_mode 9
+        /*Above Mode :  8*/
+        {   617,     68,     34,     79,     11,     27,     25,     14,     75,     13, }, /* left_mode 0 */
+        {    51,     82,     21,     26,      6,     12,     13,      1,     26,     16, }, /* left_mode 1 */
+        {    29,      9,     12,     11,      3,      7,      1,     10,      2,      2, }, /* left_mode 2 */
+        {    17,     19,     11,     74,      4,      3,      2,      0,     58,     13, }, /* left_mode 3 */
+        {    10,      1,      1,      3,      4,      1,      0,      2,      1,      8, }, /* left_mode 4 */
+        {    14,      4,      5,      5,      1,     13,      2,      0,     27,      8, }, /* left_mode 5 */
+        {    10,      3,      5,      4,      1,      7,      6,      4,      5,      1, }, /* left_mode 6 */
+        {    10,      2,      6,      2,      1,      1,      1,      4,      2,      1, }, /* left_mode 7 */
+        {    14,      8,      5,     23,      2,     12,      6,      2,    117,      5, }, /* left_mode 8 */
+        {     9,      6,      2,     19,      1,      6,      3,      2,      9,      9, }, /* left_mode 9 */
     },
     {
-        //Above Mode :  9
-        {   680,     73,     22,     38,     42,      5,     11,      9,      6,     28, }, // left_mode 0
-        {   113,    112,     21,     22,     10,      2,      8,      4,      6,     42, }, // left_mode 1
-        {    44,     20,     24,      6,      5,      4,      3,      3,      1,      2, }, // left_mode 2
-        {    40,     23,      7,     71,      5,      2,      4,      1,      7,     22, }, // left_mode 3
-        {    85,      9,      4,      4,     17,      2,      0,      3,      2,     23, }, // left_mode 4
-        {    13,      4,      2,      6,      1,      7,      0,      1,      7,      6, }, // left_mode 5
-        {    26,      6,      8,      3,      2,      3,      8,      1,      5,      4, }, // left_mode 6
-        {    54,      8,      9,      6,      7,      0,      1,     11,      1,      3, }, // left_mode 7
-        {     9,     10,      4,     13,      2,      5,      4,      2,     14,      8, }, // left_mode 8
-        {    92,      9,      5,     19,     15,      3,      3,      1,      6,     58, }, // left_mode 9
+        /*Above Mode :  9*/
+        {   680,     73,     22,     38,     42,      5,     11,      9,      6,     28, }, /* left_mode 0 */
+        {   113,    112,     21,     22,     10,      2,      8,      4,      6,     42, }, /* left_mode 1 */
+        {    44,     20,     24,      6,      5,      4,      3,      3,      1,      2, }, /* left_mode 2 */
+        {    40,     23,      7,     71,      5,      2,      4,      1,      7,     22, }, /* left_mode 3 */
+        {    85,      9,      4,      4,     17,      2,      0,      3,      2,     23, }, /* left_mode 4 */
+        {    13,      4,      2,      6,      1,      7,      0,      1,      7,      6, }, /* left_mode 5 */
+        {    26,      6,      8,      3,      2,      3,      8,      1,      5,      4, }, /* left_mode 6 */
+        {    54,      8,      9,      6,      7,      0,      1,     11,      1,      3, }, /* left_mode 7 */
+        {     9,     10,      4,     13,      2,      5,      4,      2,     14,      8, }, /* left_mode 8 */
+        {    92,      9,      5,     19,     15,      3,      3,      1,      6,     58, }, /* left_mode 9 */
     },
 };
diff --git a/vp8/common/onyxc_int.h b/vp8/common/onyxc_int.h
index d12143d4d..7e44c1f0c 100644
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -21,9 +21,9 @@
 #include "recon.h"
 #include "postproc.h"
 
-//#ifdef PACKET_TESTING
+/*#ifdef PACKET_TESTING*/
 #include "header.h"
-//#endif
+/*#endif*/
 
 /* Create/destroy static data structures. */
 
@@ -43,7 +43,7 @@ typedef struct frame_contexts
     vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
     vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1];
     MV_CONTEXT mvc[2];
-    MV_CONTEXT pre_mvc[2];  //not to caculate the mvcost for the frame if mvc doesn't change.
+    MV_CONTEXT pre_mvc[2];  /* not to caculate the mvcost for the frame if mvc doesn't change. */
 } FRAME_CONTEXT;
 
 typedef enum
@@ -105,7 +105,7 @@ typedef struct VP8Common
     YV12_BUFFER_CONFIG post_proc_buffer;
     YV12_BUFFER_CONFIG temp_scale_frame;
 
-    FRAME_TYPE last_frame_type;  //Add to check if vp8_frame_init_loop_filter() can be skipped.
+    FRAME_TYPE last_frame_type;  /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
     FRAME_TYPE frame_type;
 
     int show_frame;
@@ -116,7 +116,7 @@ typedef struct VP8Common
     int mb_cols;
     int mode_info_stride;
 
-    // prfile settings
+    /* profile settings */
     int mb_no_coeff_skip;
     int no_lpf;
     int simpler_lpf;
@@ -124,7 +124,7 @@ typedef struct VP8Common
     int full_pixel;
 
     int base_qindex;
-    int last_kf_gf_q;  // Q used on the last GF or KF
+    int last_kf_gf_q;  /* Q used on the last GF or KF */
 
     int y1dc_delta_q;
     int y2dc_delta_q;
@@ -154,31 +154,31 @@ typedef struct VP8Common
     int last_sharpness_level;
     int sharpness_level;
 
-    int refresh_last_frame;       // Two state 0 = NO, 1 = YES
-    int refresh_golden_frame;     // Two state 0 = NO, 1 = YES
-    int refresh_alt_ref_frame;     // Two state 0 = NO, 1 = YES
+    int refresh_last_frame;       /* Two state 0 = NO, 1 = YES */
+    int refresh_golden_frame;     /* Two state 0 = NO, 1 = YES */
+    int refresh_alt_ref_frame;     /* Two state 0 = NO, 1 = YES */
 
-    int copy_buffer_to_gf;         // 0 none, 1 Last to GF, 2 ARF to GF
-    int copy_buffer_to_arf;        // 0 none, 1 Last to ARF, 2 GF to ARF
+    int copy_buffer_to_gf;         /* 0 none, 1 Last to GF, 2 ARF to GF */
+    int copy_buffer_to_arf;        /* 0 none, 1 Last to ARF, 2 GF to ARF */
 
-    int refresh_entropy_probs;    // Two state 0 = NO, 1 = YES
+    int refresh_entropy_probs;    /* Two state 0 = NO, 1 = YES */
 
-    int ref_frame_sign_bias[MAX_REF_FRAMES];    // Two state 0, 1
+    int ref_frame_sign_bias[MAX_REF_FRAMES];    /* Two state 0, 1 */
 
-    // Y,U,V,Y2
-    ENTROPY_CONTEXT_PLANES *above_context;   // row of context for each plane
-    ENTROPY_CONTEXT_PLANES left_context;  // (up to) 4 contexts ""
+    /* Y,U,V,Y2 */
+    ENTROPY_CONTEXT_PLANES *above_context;   /* row of context for each plane */
+    ENTROPY_CONTEXT_PLANES left_context;  /* (up to) 4 contexts "" */
 
 
-    // keyframe block modes are predicted by their above, left neighbors
+    /* keyframe block modes are predicted by their above, left neighbors */
 
     vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1];
     vp8_prob kf_ymode_prob [VP8_YMODES-1];  /* keyframe "" */
     vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1];
 
 
-    FRAME_CONTEXT lfc; // last frame entropy
-    FRAME_CONTEXT fc;  // this frame entropy
+    FRAME_CONTEXT lfc; /* last frame entropy */
+    FRAME_CONTEXT fc;  /* this frame entropy */
 
     unsigned int current_video_frame;
 
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 42e37da30..6f419435a 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -25,28 +25,28 @@
     (-(0.148*(float)(t>>16)) - (0.291*(float)(t>>8&0xff)) + (0.439*(float)(t&0xff)) + 128), \
     ( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
 
-// global constants
+/* global constants */
 
 static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
 {
-    { RGB_TO_YUV(0x98FB98) },   // PaleGreen
-    { RGB_TO_YUV(0x00FF00) },   // Green
-    { RGB_TO_YUV(0xADFF2F) },   // GreenYellow
-    { RGB_TO_YUV(0x228B22) },   // ForestGreen
-    { RGB_TO_YUV(0x006400) },   // DarkGreen
-    { RGB_TO_YUV(0x98F5FF) },   // Cadet Blue
-    { RGB_TO_YUV(0x6CA6CD) },   // Sky Blue
-    { RGB_TO_YUV(0x00008B) },   // Dark blue
-    { RGB_TO_YUV(0x551A8B) },   // Purple
-    { RGB_TO_YUV(0xFF0000) }    // Red
+    { RGB_TO_YUV(0x98FB98) },   /* PaleGreen */
+    { RGB_TO_YUV(0x00FF00) },   /* Green */
+    { RGB_TO_YUV(0xADFF2F) },   /* GreenYellow */
+    { RGB_TO_YUV(0x228B22) },   /* ForestGreen */
+    { RGB_TO_YUV(0x006400) },   /* DarkGreen */
+    { RGB_TO_YUV(0x98F5FF) },   /* Cadet Blue */
+    { RGB_TO_YUV(0x6CA6CD) },   /* Sky Blue */
+    { RGB_TO_YUV(0x00008B) },   /* Dark blue */
+    { RGB_TO_YUV(0x551A8B) },   /* Purple */
+    { RGB_TO_YUV(0xFF0000) }    /* Red */
 };
 
 static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
 {
-    { RGB_TO_YUV(0x00ff00) },   // Blue
-    { RGB_TO_YUV(0x0000ff) },   // Green
-    { RGB_TO_YUV(0xffff00) },   // Yellow
-    { RGB_TO_YUV(0xff0000) },   // Red
+    { RGB_TO_YUV(0x00ff00) },   /* Blue */
+    { RGB_TO_YUV(0x0000ff) },   /* Green */
+    { RGB_TO_YUV(0xffff00) },   /* Yellow */
+    { RGB_TO_YUV(0xff0000) },   /* Red */
 };
 
 static const short kernel5[] =
@@ -129,7 +129,7 @@ void vp8_post_proc_down_and_across_c
 
     for (row = 0; row < rows; row++)
     {
-        // post_proc_down for one row
+        /* post_proc_down for one row */
         p_src = src_ptr;
         p_dst = dst_ptr;
 
@@ -152,7 +152,7 @@ void vp8_post_proc_down_and_across_c
             p_dst[col] = v;
         }
 
-        // now post_proc_across
+        /* now post_proc_across */
         p_src = dst_ptr;
         p_dst = dst_ptr;
 
@@ -181,12 +181,12 @@ void vp8_post_proc_down_and_across_c
                 p_dst[col-2] = d[(col-2)&7];
         }
 
-        //handle the last two pixels
+        /* handle the last two pixels */
         p_dst[col-2] = d[(col-2)&7];
         p_dst[col-1] = d[(col-1)&7];
 
 
-        //next row
+        /* next row */
         src_ptr += pitch;
         dst_ptr += pitch;
     }
@@ -379,9 +379,9 @@ static void fillrd(struct postproc_state *state, int q, int a)
 
     sigma = ai + .5 + .6 * (63 - qi) / 63.0;
 
-    // set up a lookup table of 256 entries that matches
-    // a gaussian distribution with sigma determined by q.
-    //
+    /* set up a lookup table of 256 entries that matches
+     * a gaussian distribution with sigma determined by q.
+     */
     {
         double i;
         int next, j;
@@ -472,9 +472,10 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
     }
 }
 
-// Blend the macro block with a solid colored square.  Leave the
-// edges unblended to give distinction to macro blocks in areas
-// filled with the same color block.
+/* Blend the macro block with a solid colored square.  Leave the
+ * edges unblended to give distinction to macro blocks in areas
+ * filled with the same color block.
+ */
 void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
                         int y1, int u1, int v1, int alpha, int stride)
 {
@@ -575,7 +576,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
     {
         *dest = *oci->frame_to_show;
 
-        // handle problem with extending borders
+        /* handle problem with extending borders */
         dest->y_width = oci->Width;
         dest->y_height = oci->Height;
         dest->uv_height = dest->y_height / 2;
@@ -644,7 +645,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
         y_ptr = post->y_buffer + 4 * post->y_stride + 4;
 
-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
         for (i = 0; i < mb_rows; i++)
         {
             for (j = 0; j < mb_cols; j++)
@@ -658,7 +659,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 y_ptr += 16;
             }
 
-            mb_index ++; //border
+            mb_index ++; /* border */
             y_ptr += post->y_stride  * 16 - post->y_width;
 
         }
@@ -676,7 +677,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
         y_ptr = post->y_buffer + 4 * post->y_stride + 4;
 
-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
         for (i = 0; i < mb_rows; i++)
         {
             for (j = 0; j < mb_cols; j++)
@@ -693,7 +694,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 y_ptr += 16;
             }
 
-            mb_index ++; //border
+            mb_index ++; /* border */
             y_ptr += post->y_stride  * 16 - post->y_width;
 
         }
@@ -714,7 +715,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
         y_ptr = post->y_buffer + 4 * post->y_stride + 4;
 
-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
         for (i = 0; i < mb_rows; i++)
         {
             for (j = 0; j < mb_cols; j++)
@@ -727,7 +728,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 y_ptr += 16;
             }
 
-            mb_index ++; //border
+            mb_index ++; /* border */
             y_ptr += post->y_stride  * 16 - post->y_width;
 
         }
@@ -736,7 +737,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
     }
 
-    // Draw motion vectors
+    /* Draw motion vectors */
     if (flags & VP8D_DEBUG_LEVEL5)
     {
         YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
@@ -777,7 +778,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         }
     }
 
-    // Color in block modes
+    /* Color in block modes */
     if (flags & VP8D_DEBUG_LEVEL6)
     {
         int i, j;
@@ -813,7 +814,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         }
     }
 
-    // Color in frame reference blocks
+    /* Color in frame reference blocks */
     if (flags & VP8D_DEBUG_LEVEL7)
     {
         int i, j;
@@ -851,7 +852,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
 
     *dest = oci->post_proc_buffer;
 
-    // handle problem with extending borders
+    /* handle problem with extending borders */
     dest->y_width = oci->Width;
     dest->y_height = oci->Height;
     dest->uv_height = dest->y_height / 2;
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index 651671a43..6a51d2cd6 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -23,7 +23,7 @@ enum
     VP8D_DEBUG_LEVEL4   = 1<<6,
     VP8D_DEBUG_LEVEL5   = 1<<7,
     VP8D_DEBUG_LEVEL6   = 1<<8,
-    VP8D_DEBUG_LEVEL7   = 1<<9,
+    VP8D_DEBUG_LEVEL7   = 1<<9
 };
 
 #endif
diff --git a/vp8/common/recon.c b/vp8/common/recon.c
index 2cb3bc6ce..d72d6e410 100644
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -112,15 +112,15 @@ void vp8_recon_mby_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
     BLOCKD *b = &x->block[0];
     RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
-    //b = &x->block[4];
+    /*b = &x->block[4];*/
     b += 4;
     RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
-    //b = &x->block[8];
+    /*b = &x->block[8];*/
     b += 4;
     RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 
-    //b = &x->block[12];
+    /*b = &x->block[12];*/
     b += 4;
     RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
 #else
@@ -149,7 +149,7 @@ void vp8_recon_mb_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
     RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
     b += 4;
 
-    //b = &x->block[16];
+    /*b = &x->block[16];*/
 
     RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
     b++;
diff --git a/vp8/common/recon.h b/vp8/common/recon.h
index c432c7bd2..1e6e343fc 100644
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -18,10 +18,10 @@
     void sym(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch)
 
 #define prototype_recon_block(sym) \
-    void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch);
+    void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch)
 
 #define prototype_recon_macroblock(sym) \
-    void sym(const struct vp8_recon_rtcd_vtable *rtcd, MACROBLOCKD *x);
+    void sym(const struct vp8_recon_rtcd_vtable *rtcd, MACROBLOCKD *x)
 
 struct vp8_recon_rtcd_vtable;
 
diff --git a/vp8/common/reconinter.c b/vp8/common/reconinter.c
index 2a7f12908..74871c0e8 100644
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -18,9 +18,10 @@
 #include "onyxc_int.h"
 #endif
 
-// use this define on systems where unaligned int reads and writes are
-// not allowed, i.e. ARM architectures
-//#define MUST_BE_ALIGNED
+/* use this define on systems where unaligned int reads and writes are
+ * not allowed, i.e. ARM architectures
+ */
+/*#define MUST_BE_ALIGNED*/
 
 
 static const int bbb[4] = {0, 2, 8, 10};
@@ -255,7 +256,7 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
     }
 }
 
-//encoder only
+/*encoder only*/
 void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
 {
 
@@ -491,15 +492,16 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
 }
 
 
-// The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this
-// situation, we can write the result directly to dst buffer instead of writing it to predictor
-// buffer and then copying it to dst buffer.
+/* The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this
+ * situation, we can write the result directly to dst buffer instead of writing it to predictor
+ * buffer and then copying it to dst buffer.
+ */
 static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp8_subpix_fn_t sppf)
 {
     int r;
     unsigned char *ptr_base;
     unsigned char *ptr;
-    //unsigned char *pred_ptr = d->predictor;
+    /*unsigned char *pred_ptr = d->predictor;*/
     int dst_stride = d->dst_stride;
     int pre_stride = d->pre_stride;
 
@@ -535,8 +537,8 @@ static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp
 
 void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
 {
-    //unsigned char *pred_ptr = x->block[0].predictor;
-    //unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;
+    /*unsigned char *pred_ptr = x->block[0].predictor;
+    unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;*/
     unsigned char *pred_ptr = x->predictor;
     unsigned char *dst_ptr = x->dst.y_buffer;
 
@@ -546,26 +548,26 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
         unsigned char *ptr_base;
         unsigned char *ptr;
         unsigned char *uptr, *vptr;
-        //unsigned char *pred_ptr = x->predictor;
-        //unsigned char *upred_ptr = &x->predictor[256];
-        //unsigned char *vpred_ptr = &x->predictor[320];
+        /*unsigned char *pred_ptr = x->predictor;
+        unsigned char *upred_ptr = &x->predictor[256];
+        unsigned char *vpred_ptr = &x->predictor[320];*/
         unsigned char *udst_ptr = x->dst.u_buffer;
         unsigned char *vdst_ptr = x->dst.v_buffer;
 
         int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
         int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
-        int pre_stride = x->dst.y_stride; //x->block[0].pre_stride;
+        int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/
 
         ptr_base = x->pre.y_buffer;
         ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);
 
         if ((mv_row | mv_col) & 7)
         {
-            x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+            x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
         }
         else
         {
-            RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+            RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
         }
 
         mv_row = x->block[16].bmi.mv.as_mv.row;
@@ -588,8 +590,9 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
     }
     else
     {
-        //note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
-        //if sth is wrong, go back to what it is in build_inter_predictors_mb.
+        /* note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
+         * if sth is wrong, go back to what it is in build_inter_predictors_mb.
+         */
         int i;
 
         if (x->mode_info_context->mbmi.partitioning < 3)
@@ -597,7 +600,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
             for (i = 0; i < 4; i++)
             {
                 BLOCKD *d = &x->block[bbb[i]];
-                //vp8_build_inter_predictors4b(x, d, 16);
+                /*vp8_build_inter_predictors4b(x, d, 16);*/
 
                 {
                     unsigned char *ptr_base;
@@ -609,11 +612,11 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
 
                     if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
                     {
-                        x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+                        x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
                     }
                     else
                     {
-                        RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+                        RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
                     }
                 }
             }
@@ -627,7 +630,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
 
                 if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
                 {
-                    //vp8_build_inter_predictors2b(x, d0, 16);
+                    /*vp8_build_inter_predictors2b(x, d0, 16);*/
                     unsigned char *ptr_base;
                     unsigned char *ptr;
                     unsigned char *pred_ptr = d0->predictor;
@@ -659,7 +662,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
 
             if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
             {
-                //vp8_build_inter_predictors2b(x, d0, 8);
+                /*vp8_build_inter_predictors2b(x, d0, 8);*/
                 unsigned char *ptr_base;
                 unsigned char *ptr;
                 unsigned char *pred_ptr = d0->predictor;
diff --git a/vp8/common/reconintra.c b/vp8/common/reconintra.c
index ce0b1b8ec..9cf5f6a88 100644
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -14,9 +14,9 @@
 #include "reconintra.h"
 #include "vpx_mem/vpx_mem.h"
 
-// For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
-// vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
-
+/* For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
+ * vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
+ */
 void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
     int i;
@@ -42,7 +42,7 @@ void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
         yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
     }
 
-    // for Y
+    /* for Y */
     switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
@@ -156,14 +156,14 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
     int r, c, i;
 
     int y_stride = x->dst.y_stride;
-    ypred_ptr = x->dst.y_buffer; //x->predictor;
+    ypred_ptr = x->dst.y_buffer; /*x->predictor;*/
 
     for (i = 0; i < 16; i++)
     {
         yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
     }
 
-    // for Y
+    /* for Y */
     switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
@@ -204,11 +204,11 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
             expected_dc = 128;
         }
 
-        //vpx_memset(ypred_ptr, expected_dc, 256);
+        /*vpx_memset(ypred_ptr, expected_dc, 256);*/
         for (r = 0; r < 16; r++)
         {
             vpx_memset(ypred_ptr, expected_dc, 16);
-            ypred_ptr += y_stride; //16;
+            ypred_ptr += y_stride; /*16;*/
         }
     }
     break;
@@ -222,7 +222,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
             ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
             ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
             ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
-            ypred_ptr += y_stride; //16;
+            ypred_ptr += y_stride; /*16;*/
         }
     }
     break;
@@ -233,7 +233,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
         {
 
             vpx_memset(ypred_ptr, yleft_col[r], 16);
-            ypred_ptr += y_stride;  //16;
+            ypred_ptr += y_stride;  /*16;*/
         }
 
     }
@@ -256,7 +256,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
                 ypred_ptr[c] = pred;
             }
 
-            ypred_ptr += y_stride;  //16;
+            ypred_ptr += y_stride;  /*16;*/
         }
 
     }
@@ -418,8 +418,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
     unsigned char *vabove_row = x->dst.v_buffer - x->dst.uv_stride;
     unsigned char vleft_col[20];
     unsigned char vtop_left = vabove_row[-1];
-    unsigned char *upred_ptr = x->dst.u_buffer; //&x->predictor[256];
-    unsigned char *vpred_ptr = x->dst.v_buffer; //&x->predictor[320];
+    unsigned char *upred_ptr = x->dst.u_buffer; /*&x->predictor[256];*/
+    unsigned char *vpred_ptr = x->dst.v_buffer; /*&x->predictor[320];*/
     int uv_stride = x->dst.uv_stride;
 
     int i, j;
@@ -472,14 +472,14 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
         }
 
 
-        //vpx_memset(upred_ptr,expected_udc,64);
-        //vpx_memset(vpred_ptr,expected_vdc,64);
+        /*vpx_memset(upred_ptr,expected_udc,64);*/
+        /*vpx_memset(vpred_ptr,expected_vdc,64);*/
         for (i = 0; i < 8; i++)
         {
             vpx_memset(upred_ptr, expected_udc, 8);
             vpx_memset(vpred_ptr, expected_vdc, 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
     }
     break;
@@ -491,8 +491,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
         {
             vpx_memcpy(upred_ptr, uabove_row, 8);
             vpx_memcpy(vpred_ptr, vabove_row, 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
 
     }
@@ -505,8 +505,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
         {
             vpx_memset(upred_ptr, uleft_col[i], 8);
             vpx_memset(vpred_ptr, vleft_col[i], 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
     }
 
@@ -538,8 +538,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
                 vpred_ptr[j] = predv;
             }
 
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
 
     }
diff --git a/vp8/common/reconintra4x4.c b/vp8/common/reconintra4x4.c
index 315135184..db44fa190 100644
--- a/vp8/common/reconintra4x4.c
+++ b/vp8/common/reconintra4x4.c
@@ -56,7 +56,7 @@ void vp8_predict_intra4x4(BLOCKD *x,
     break;
     case B_TM_PRED:
     {
-        // prediction similar to true_motion prediction
+        /* prediction similar to true_motion prediction */
         for (r = 0; r < 4; r++)
         {
             for (c = 0; c < 4; c++)
@@ -295,8 +295,9 @@ void vp8_predict_intra4x4(BLOCKD *x,
 
     }
 }
-// copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
-// to the right prediction have filled in pixels to use.
+/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
+ * to the right prediction have filled in pixels to use.
+ */
 void vp8_intra_prediction_down_copy(MACROBLOCKD *x)
 {
     unsigned char *above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
diff --git a/vp8/common/setupintrarecon.c b/vp8/common/setupintrarecon.c
index 8647ae2aa..7976e252b 100644
--- a/vp8/common/setupintrarecon.c
+++ b/vp8/common/setupintrarecon.c
@@ -16,7 +16,7 @@ void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf)
 {
     int i;
 
-    // set up frame new frame for intra coded blocks
+    /* set up frame new frame for intra coded blocks */
     vpx_memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5);
     for (i = 0; i < ybf->y_height; i++)
         ybf->y_buffer[ybf->y_stride *i - 1] = (unsigned char) 129;
diff --git a/vp8/common/textblit.c b/vp8/common/textblit.c
index b7922d385..1756100a7 100644
--- a/vp8/common/textblit.c
+++ b/vp8/common/textblit.c
@@ -57,7 +57,7 @@ static void plot (const int x, const int y, unsigned char *image, const int pitc
     image [x+y*pitch] ^= 255;
 }
 
-// Bresenham line algorithm
+/* Bresenham line algorithm */
 void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch)
 {
     int steep = abs(y1 - y0) > abs(x1 - x0);
diff --git a/vp8/common/threading.h b/vp8/common/threading.h
index f9a257460..1929f7c4f 100644
--- a/vp8/common/threading.h
+++ b/vp8/common/threading.h
@@ -12,7 +12,7 @@
 #ifndef _PTHREAD_EMULATION
 #define _PTHREAD_EMULATION
 
-#define VPXINFINITE 10000       //10second.
+#define VPXINFINITE 10000       /* 10second. */
 
 /* Thread management macros */
 #ifdef _WIN32
@@ -72,11 +72,11 @@
 #define sem_wait(sem) (semaphore_wait(*sem) )
 #define sem_post(sem) semaphore_signal(*sem)
 #define sem_destroy(sem) semaphore_destroy(mach_task_self(),*sem)
-#define thread_sleep(nms) // { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
+#define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
 #else
 #include <unistd.h>
 #include <sched.h>
-#define thread_sleep(nms) sched_yield();// {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);}
+#define thread_sleep(nms) sched_yield();/* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */
 #endif
 /* Not Windows. Assume pthreads */
 
diff --git a/vp8/common/treecoder.h b/vp8/common/treecoder.h
index 35e5be10c..ebf51c5ed 100644
--- a/vp8/common/treecoder.h
+++ b/vp8/common/treecoder.h
@@ -12,7 +12,7 @@
 #ifndef __INC_TREECODER_H
 #define __INC_TREECODER_H
 
-typedef unsigned char vp8bc_index_t; // probability index
+typedef unsigned char vp8bc_index_t; /* probability index */
 
 
 typedef unsigned char vp8_prob;
diff --git a/vp8/common/type_aliases.h b/vp8/common/type_aliases.h
index f2a370298..22b531a76 100644
--- a/vp8/common/type_aliases.h
+++ b/vp8/common/type_aliases.h
@@ -64,32 +64,32 @@ typedef signed char     INT8;
 #endif
 
 #ifndef TYPE_INT16
-//#define TYPE_INT16
+/*#define TYPE_INT16*/
 typedef signed short    INT16;
 #endif
 
 #ifndef TYPE_INT32
-//#define TYPE_INT32
+/*#define TYPE_INT32*/
 typedef signed int      INT32;
 #endif
 
 #ifndef TYPE_UINT8
-//#define TYPE_UINT8
+/*#define TYPE_UINT8*/
 typedef unsigned char   UINT8;
 #endif
 
 #ifndef TYPE_UINT32
-//#define TYPE_UINT32
+/*#define TYPE_UINT32*/
 typedef unsigned int    UINT32;
 #endif
 
 #ifndef TYPE_UINT16
-//#define TYPE_UINT16
+/*#define TYPE_UINT16*/
 typedef unsigned short  UINT16;
 #endif
 
 #ifndef TYPE_BOOL
-//#define TYPE_BOOL
+/*#define TYPE_BOOL*/
 typedef int             BOOL;
 #endif
 
@@ -101,7 +101,7 @@ typedef __int64 INT64;
 
 #ifndef TYPE_INT64
 #ifdef _TMS320C6X
-//for now we only have 40bits
+/* for now we only have 40bits */
 typedef long INT64;
 #else
 typedef long long INT64;
diff --git a/vp8/common/x86/loopfilter_x86.c b/vp8/common/x86/loopfilter_x86.c
index 3ff8c4e12..93107e179 100644
--- a/vp8/common/x86/loopfilter_x86.c
+++ b/vp8/common/x86/loopfilter_x86.c
@@ -40,7 +40,7 @@ extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2;
 extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2;
 
 #if HAVE_MMX
-// Horizontal MB filtering
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -66,7 +66,7 @@ void vp8_loop_filter_mbhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
 }
 
 
-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -92,7 +92,7 @@ void vp8_loop_filter_mbvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsign
 }
 
 
-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                             int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -122,7 +122,7 @@ void vp8_loop_filter_bhs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
 }
 
 
-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                             int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -153,7 +153,7 @@ void vp8_loop_filter_bvs_mmx(unsigned char *y_ptr, unsigned char *u_ptr, unsigne
 #endif
 
 
-// Horizontal MB filtering
+/* Horizontal MB filtering */
 #if HAVE_SSE2
 void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
@@ -177,7 +177,7 @@ void vp8_loop_filter_mbhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
 }
 
 
-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -200,7 +200,7 @@ void vp8_loop_filter_mbvs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsig
 }
 
 
-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -227,7 +227,7 @@ void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsign
 }
 
 
-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
diff --git a/vp8/common/x86/vp8_asm_stubs.c b/vp8/common/x86/vp8_asm_stubs.c
index 950d96262..8dd07c90d 100644
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -128,7 +128,7 @@ void vp8_sixtap_predict4x4_mmx
     int dst_pitch
 )
 {
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16);  // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 16*16);  /* Temp data bufffer used in filtering */
     const short *HFilter, *VFilter;
     HFilter = vp8_six_tap_mmx[xoffset];
     vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter);
@@ -149,7 +149,7 @@ void vp8_sixtap_predict16x16_mmx
 )
 {
 
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);  // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);  /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
 
@@ -181,7 +181,7 @@ void vp8_sixtap_predict8x8_mmx
 )
 {
 
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
 
@@ -207,7 +207,7 @@ void vp8_sixtap_predict8x4_mmx
 )
 {
 
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);    /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
 
@@ -253,7 +253,7 @@ void vp8_sixtap_predict16x16_sse2
 
 )
 {
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);    // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 24*24);    /* Temp data bufffer used in filtering */
 
     const short *HFilter, *VFilter;
 
@@ -268,14 +268,14 @@ void vp8_sixtap_predict16x16_sse2
         }
         else
         {
-            // First-pass only
+            /* First-pass only */
             HFilter = vp8_six_tap_mmx[xoffset];
             vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter);
         }
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         VFilter = vp8_six_tap_mmx[yoffset];
         vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2,   src_pixels_per_line, 21, 32);
         vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr,   dst_pitch, 32, 16 , 16, dst_pitch, VFilter);
@@ -293,7 +293,7 @@ void vp8_sixtap_predict8x8_sse2
     int dst_pitch
 )
 {
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  /* Temp data bufffer used in filtering */
     const short *HFilter, *VFilter;
 
     if (xoffset)
@@ -307,14 +307,14 @@ void vp8_sixtap_predict8x8_sse2
         }
         else
         {
-            // First-pass only
+            /* First-pass only */
             HFilter = vp8_six_tap_mmx[xoffset];
             vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter);
         }
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         VFilter = vp8_six_tap_mmx[yoffset];
         vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter);
     }
@@ -331,7 +331,7 @@ void vp8_sixtap_predict8x4_sse2
     int dst_pitch
 )
 {
-    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(16, unsigned short, FData2, 256);  /* Temp data bufffer used in filtering */
     const short *HFilter, *VFilter;
 
     if (xoffset)
@@ -345,14 +345,14 @@ void vp8_sixtap_predict8x4_sse2
         }
         else
         {
-            // First-pass only
+            /* First-pass only */
             HFilter = vp8_six_tap_mmx[xoffset];
             vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter);
         }
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         VFilter = vp8_six_tap_mmx[yoffset];
         vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter);
     }
@@ -444,13 +444,13 @@ void vp8_sixtap_predict16x16_ssse3
         }
         else
         {
-            // First-pass only
+            /* First-pass only */
             vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset);
         }
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line) , src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset);
     }
 }
@@ -481,7 +481,7 @@ void vp8_sixtap_predict8x8_ssse3
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset);
     }
 }
@@ -508,13 +508,13 @@ void vp8_sixtap_predict8x4_ssse3
         }
         else
         {
-            // First-pass only
+            /* First-pass only */
             vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset);
         }
     }
     else
     {
-        // Second-pass only
+        /* Second-pass only */
         vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset);
     }
 }
diff --git a/vp8/common/x86/x86_systemdependent.c b/vp8/common/x86/x86_systemdependent.c
index b983bc27e..38500fd01 100644
--- a/vp8/common/x86/x86_systemdependent.c
+++ b/vp8/common/x86/x86_systemdependent.c
@@ -74,7 +74,7 @@ void vp8_arch_x86_common_init(VP8_COMMON *ctx)
 
 #if CONFIG_POSTPROC
         rtcd->postproc.down        = vp8_mbpost_proc_down_mmx;
-        //rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
+        /*rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;*/
         rtcd->postproc.downacross  = vp8_post_proc_down_and_across_mmx;
         rtcd->postproc.addnoise    = vp8_plane_add_noise_mmx;
 #endif
diff --git a/vp8/decoder/arm/arm_dsystemdependent.c b/vp8/decoder/arm/arm_dsystemdependent.c
index 77cff47db..e9741e286 100644
--- a/vp8/decoder/arm/arm_dsystemdependent.c
+++ b/vp8/decoder/arm/arm_dsystemdependent.c
@@ -35,7 +35,7 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
         pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_v6;
         pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_v6;
         pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_v6;
-#if 0 //For use with RTCD, when implemented
+#if 0 /*For use with RTCD, when implemented*/
         pbi->dboolhuff.start             = vp8dx_start_decode_c;
         pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
         pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
@@ -54,7 +54,7 @@ void vp8_arch_arm_decode_init(VP8D_COMP *pbi)
         pbi->dequant.dc_idct_add_y_block = vp8_dequant_dc_idct_add_y_block_neon;
         pbi->dequant.idct_add_y_block    = vp8_dequant_idct_add_y_block_neon;
         pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_neon;
-#if 0 //For use with RTCD, when implemented
+#if 0 /*For use with RTCD, when implemented*/
         pbi->dboolhuff.start             = vp8dx_start_decode_c;
         pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
         pbi->dboolhuff.debool            = vp8dx_decode_bool_c;
diff --git a/vp8/decoder/arm/dboolhuff_arm.h b/vp8/decoder/arm/dboolhuff_arm.h
index d2ebc71e8..985951c7c 100644
--- a/vp8/decoder/arm/dboolhuff_arm.h
+++ b/vp8/decoder/arm/dboolhuff_arm.h
@@ -11,7 +11,7 @@
  * to be useless. However, its been left (for now)
  * for reference.
  */
-/*
+#if 0
 #if HAVE_ARMV6
 #undef vp8_dbool_start
 #define vp8_dbool_start vp8dx_start_decode_v6
@@ -24,7 +24,7 @@
 
 #undef vp8_dbool_devalue
 #define vp8_dbool_devalue vp8_decode_value_v6
-#endif // HAVE_ARMV6
+#endif /* HAVE_ARMV6 */
 
 #if HAVE_ARMV7
 #undef vp8_dbool_start
@@ -38,6 +38,6 @@
 
 #undef vp8_dbool_devalue
 #define vp8_dbool_devalue vp8_decode_value_neon
-#endif // HAVE_ARMV7
-*/
-#endif // DBOOLHUFF_ARM_H
+#endif /* HAVE_ARMV7 */
+#endif
+#endif /* DBOOLHUFF_ARM_H */
diff --git a/vp8/decoder/dboolhuff.h b/vp8/decoder/dboolhuff.h
index c72bc0330..c851aa7e5 100644
--- a/vp8/decoder/dboolhuff.h
+++ b/vp8/decoder/dboolhuff.h
@@ -45,7 +45,7 @@ typedef struct
     const unsigned char *source, unsigned int source_sz)
 #define prototype_dbool_fill(sym) void sym(BOOL_DECODER *br)
 #define prototype_dbool_debool(sym) int sym(BOOL_DECODER *br, int probability)
-#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits);
+#define prototype_dbool_devalue(sym) int sym(BOOL_DECODER *br, int bits)
 
 #if ARCH_ARM
 #include "arm/dboolhuff_arm.h"
@@ -84,16 +84,17 @@ typedef struct vp8_dboolhuff_rtcd_vtable {
     vp8_dbool_devalue_fn_t devalue;
 } vp8_dboolhuff_rtcd_vtable_t;
 
-// There are no processor-specific versions of these
-// functions right now. Disable RTCD to avoid using
-// function pointers which gives a speed boost
-//#ifdef ENABLE_RUNTIME_CPU_DETECT
-//#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn
-//#define IF_RTCD(x) (x)
-//#else
+/* There are no processor-specific versions of these
+ * functions right now. Disable RTCD to avoid using
+ * function pointers which gives a speed boost
+ */
+/*#ifdef ENABLE_RUNTIME_CPU_DETECT
+#define DBOOLHUFF_INVOKE(ctx,fn) (ctx)->fn
+#define IF_RTCD(x) (x)
+#else*/
 #define DBOOLHUFF_INVOKE(ctx,fn) vp8_dbool_##fn
 #define IF_RTCD(x) NULL
-//#endif
+/*#endif*/
 
 DECLARE_ALIGNED(16, extern const unsigned char, vp8dx_bitreader_norm[256]);
 
diff --git a/vp8/decoder/decodemv.c b/vp8/decoder/decodemv.c
index 106dbde06..203d72dd2 100644
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -51,10 +51,10 @@ static int vp8_read_uv_mode(vp8_reader *bc, const vp8_prob *p)
 
 static void vp8_read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x)
 {
-    // Is segmentation enabled
+    /* Is segmentation enabled */
     if (x->segmentation_enabled && x->update_mb_segmentation_map)
     {
-        // If so then read the segment id.
+        /* If so then read the segment id. */
         if (vp8_read(r, x->mb_segment_tree_probs[0]))
             mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2]));
         else
@@ -70,14 +70,15 @@ static void vp8_kfread_modes(VP8D_COMP *pbi, MODE_INFO *m, int mb_row, int mb_co
         {
             MB_PREDICTION_MODE y_mode;
 
-            // Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
-            // By default on a key frame reset all MBs to segment 0
+            /* Read the Macroblock segmentation map if it is being updated explicitly this frame (reset to 0 above by default)
+             * By default on a key frame reset all MBs to segment 0
+             */
             m->mbmi.segment_id = 0;
 
             if (pbi->mb.update_mb_segmentation_map)
                 vp8_read_mb_features(bc, &m->mbmi, &pbi->mb);
 
-            // Read the macroblock coeff skip flag if this feature is in use, else default to 0
+            /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */
             if (pbi->common.mb_no_coeff_skip)
                 m->mbmi.mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
             else
@@ -306,8 +307,9 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
     mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN;
 
     mbmi->need_to_clamp_mvs = 0;
-    // Distance of Mb to the various image edges.
-    // These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
+    /* Distance of Mb to the various image edges.
+     * These specified to 8th pel as they are always compared to MV values that are in 1/8th pel units
+     */
     pbi->mb.mb_to_left_edge =
     mb_to_left_edge = -((mb_col * 16) << 3);
     mb_to_left_edge -= LEFT_TOP_MARGIN;
@@ -316,11 +318,11 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
     mb_to_right_edge = ((pbi->common.mb_cols - 1 - mb_col) * 16) << 3;
     mb_to_right_edge += RIGHT_BOTTOM_MARGIN;
 
-    // If required read in new segmentation data for this MB
+    /* If required read in new segmentation data for this MB */
     if (pbi->mb.update_mb_segmentation_map)
         vp8_read_mb_features(bc, mbmi, &pbi->mb);
 
-    // Read the macroblock coeff skip flag if this feature is in use, else default to 0
+    /* Read the macroblock coeff skip flag if this feature is in use, else default to 0 */
     if (pbi->common.mb_no_coeff_skip)
         mbmi->mb_skip_coeff = vp8_read(bc, pbi->prob_skip_false);
     else
@@ -362,7 +364,7 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
 
                 mv_contz = vp8_mv_cont(&(vp8_left_bmi(mi, k)->mv.as_mv), &(vp8_above_bmi(mi, k, mis)->mv.as_mv));
 
-                switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) //pc->fc.sub_mv_ref_prob))
+                switch (bmi.mode = (B_PREDICTION_MODE) sub_mv_ref(bc, vp8_sub_mv_ref_prob2 [mv_contz])) /*pc->fc.sub_mv_ref_prob))*/
                 {
                 case NEW4X4:
                     read_mv(bc, mv, (const MV_CONTEXT *) mvc);
@@ -425,7 +427,7 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
 
         case NEARMV:
             *mv = nearby;
-            // Clip "next_nearest" so that it does not extend to far out of image
+            /* Clip "next_nearest" so that it does not extend to far out of image */
             mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
             mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
             mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
@@ -434,7 +436,7 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
 
         case NEARESTMV:
             *mv = nearest;
-            // Clip "next_nearest" so that it does not extend to far out of image
+            /* Clip "next_nearest" so that it does not extend to far out of image */
             mv->col = (mv->col < mb_to_left_edge) ? mb_to_left_edge : mv->col;
             mv->col = (mv->col > mb_to_right_edge) ? mb_to_right_edge : mv->col;
             mv->row = (mv->row < mb_to_top_edge) ? mb_to_top_edge : mv->row;
@@ -462,12 +464,12 @@ void vp8_read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi,
 
         propagate_mv:  /* same MV throughout */
             {
-                //int i=0;
-                //do
-                //{
-                //  mi->bmi[i].mv.as_mv = *mv;
-                //}
-                //while( ++i < 16);
+                /*int i=0;
+                do
+                {
+                  mi->bmi[i].mv.as_mv = *mv;
+                }
+                while( ++i < 16);*/
 
                 mi->bmi[0].mv.as_mv = *mv;
                 mi->bmi[1].mv.as_mv = *mv;
@@ -541,16 +543,16 @@ void vp8_decode_mode_mvs(VP8D_COMP *pbi)
 
         while (++mb_col < pbi->common.mb_cols)
         {
-//          vp8_read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);
+            /*vp8_read_mb_modes_mv(pbi, xd->mode_info_context, &xd->mode_info_context->mbmi, mb_row, mb_col);*/
             if(pbi->common.frame_type == KEY_FRAME)
                 vp8_kfread_modes(pbi, mi, mb_row, mb_col);
             else
                 vp8_read_mb_modes_mv(pbi, mi, &mi->mbmi, mb_row, mb_col);
 
-            mi++;       // next macroblock
+            mi++;       /* next macroblock */
         }
 
-        mi++;           // skip left predictor each row
+        mi++;           /* skip left predictor each row */
     }
 }
 
diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 273bdb694..1bdc3d946 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -50,7 +50,7 @@ void vp8cx_init_de_quantizer(VP8D_COMP *pbi)
         pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q);
         pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q);
 
-        // all the ac values = ;
+        /* all the ac values = ; */
         for (i = 1; i < 16; i++)
         {
             int rc = vp8_default_zig_zag1d[i];
@@ -69,24 +69,24 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
     MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi;
     VP8_COMMON *const pc = & pbi->common;
 
-    // Decide whether to use the default or alternate baseline Q value.
+    /* Decide whether to use the default or alternate baseline Q value. */
     if (xd->segmentation_enabled)
     {
-        // Abs Value
+        /* Abs Value */
         if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA)
             QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
 
-        // Delta Value
+        /* Delta Value */
         else
         {
             QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id];
-            QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    // Clamp to valid range
+            QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0;    /* Clamp to valid range */
         }
     }
     else
         QIndex = pc->base_qindex;
 
-    // Set up the block level dequant pointers
+    /* Set up the block level dequant pointers */
     for (i = 0; i < 16; i++)
     {
         xd->block[i].dequant = pc->Y1dequant[QIndex];
@@ -107,8 +107,9 @@ void mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd)
 #define RTCD_VTABLE(x) NULL
 #endif
 
-//skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
-// to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
+/* skip_recon_mb() is Modified: Instead of writing the result to predictor buffer and then copying it
+ *  to dst buffer, we can write the result directly to dst buffer. This eliminates unnecessary copy.
+ */
 static void skip_recon_mb(VP8D_COMP *pbi, MACROBLOCKD *xd)
 {
     if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
@@ -207,7 +208,7 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
     if (xd->segmentation_enabled)
         mb_init_dequantizer(pbi, xd);
 
-    // do prediction
+    /* do prediction */
     if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
         vp8_build_intra_predictors_mbuv(xd);
@@ -224,13 +225,13 @@ void vp8_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd)
         vp8_build_inter_predictors_mb(xd);
     }
 
-    // dequantization and idct
+    /* dequantization and idct */
     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
     {
         BLOCKD *b = &xd->block[24];
         DEQUANT_INVOKE(&pbi->dequant, block)(b);
 
-        // do 2nd order transform on the dc block
+        /* do 2nd order transform on the dc block */
         if (xd->eobs[24] > 1)
         {
             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
@@ -336,7 +337,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
     vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
     recon_yoffset = mb_row * recon_y_stride * 16;
     recon_uvoffset = mb_row * recon_uv_stride * 8;
-    // reset above block coeffs
+    /* reset above block coeffs */
 
     xd->above_context = pc->above_context;
     xd->up_available = (mb_row != 0);
@@ -356,8 +357,9 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
             }
         }
 
-        // Distance of Mb to the various image edges.
-        // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+        /* Distance of Mb to the various image edges.
+         * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+         */
         xd->mb_to_left_edge = -((mb_col * 16) << 3);
         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
 
@@ -367,7 +369,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
 
         xd->left_available = (mb_col != 0);
 
-        // Select the appropriate reference frame for this MB
+        /* Select the appropriate reference frame for this MB */
         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
             ref_fb_idx = pc->lst_fb_idx;
         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
@@ -399,7 +401,7 @@ void vp8_decode_mb_row(VP8D_COMP *pbi,
 
     }
 
-    // adjust to the next row of mbs
+    /* adjust to the next row of mbs */
     vp8_extend_mb_row(
         &pc->yv12_fb[dst_fb_idx],
         xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8
@@ -445,7 +447,7 @@ static void setup_token_decoder(VP8D_COMP *pbi,
     for (i = 0; i < num_part; i++)
     {
         const unsigned char *partition_size_ptr = cx_data + i * 3;
-        unsigned int         partition_size;
+        ptrdiff_t            partition_size;
 
         /* Calculate the length of this partition. The last partition
          * size is implicit.
@@ -496,7 +498,7 @@ static void init_frame(VP8D_COMP *pbi)
 
     if (pc->frame_type == KEY_FRAME)
     {
-        // Various keyframe initializations
+        /* Various keyframe initializations */
         vpx_memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context));
 
         vp8_init_mbmode_probs(pc);
@@ -504,22 +506,23 @@ static void init_frame(VP8D_COMP *pbi)
         vp8_default_coef_probs(pc);
         vp8_kf_default_bmode_probs(pc->kf_bmode_prob);
 
-        // reset the segment feature data to 0 with delta coding (Default state).
+        /* reset the segment feature data to 0 with delta coding (Default state). */
         vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
         xd->mb_segement_abs_delta = SEGMENT_DELTADATA;
 
-       // reset the mode ref deltasa for loop filter
+        /* reset the mode ref deltasa for loop filter */
         vpx_memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas));
         vpx_memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas));
 
-        // All buffers are implicitly updated on key frames.
+        /* All buffers are implicitly updated on key frames. */
         pc->refresh_golden_frame = 1;
         pc->refresh_alt_ref_frame = 1;
         pc->copy_buffer_to_gf = 0;
         pc->copy_buffer_to_arf = 0;
 
-        // Note that Golden and Altref modes cannot be used on a key frame so
-        // ref_frame_sign_bias[] is undefined and meaningless
+        /* Note that Golden and Altref modes cannot be used on a key frame so
+         * ref_frame_sign_bias[] is undefined and meaningless
+         */
         pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0;
         pc->ref_frame_sign_bias[ALTREF_FRAME] = 0;
     }
@@ -530,7 +533,7 @@ static void init_frame(VP8D_COMP *pbi)
         else
             pc->mcomp_filter_type = BILINEAR;
 
-        // To enable choice of different interploation filters
+        /* To enable choice of different interploation filters */
         if (pc->mcomp_filter_type == SIXTAP)
         {
             xd->subpixel_predict      = SUBPIX_INVOKE(RTCD_VTABLE(subpix), sixtap4x4);
@@ -561,7 +564,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     MACROBLOCKD *const xd  = & pbi->mb;
     const unsigned char *data = (const unsigned char *)pbi->Source;
     const unsigned char *const data_end = data + pbi->source_sz;
-    unsigned int first_partition_length_in_bytes;
+    ptrdiff_t first_partition_length_in_bytes;
 
     int mb_row;
     int i, j, k, l;
@@ -587,7 +590,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
         const int Width = pc->Width;
         const int Height = pc->Height;
 
-        // vet via sync code
+        /* vet via sync code */
         if (data[0] != 0x9d || data[1] != 0x01 || data[2] != 0x2a)
             vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM,
                                "Invalid frame sync code");
@@ -643,12 +646,12 @@ int vp8_decode_frame(VP8D_COMP *pbi)
         pc->clamp_type  = (CLAMP_TYPE)vp8_read_bit(bc);
     }
 
-    // Is segmentation enabled
+    /* Is segmentation enabled */
     xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc);
 
     if (xd->segmentation_enabled)
     {
-        // Signal whether or not the segmentation map is being explicitly updated this frame.
+        /* Signal whether or not the segmentation map is being explicitly updated this frame. */
         xd->update_mb_segmentation_map = (unsigned char)vp8_read_bit(bc);
         xd->update_mb_segmentation_data = (unsigned char)vp8_read_bit(bc);
 
@@ -658,12 +661,12 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
             vpx_memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data));
 
-            // For each segmentation feature (Quant and loop filter level)
+            /* For each segmentation feature (Quant and loop filter level) */
             for (i = 0; i < MB_LVL_MAX; i++)
             {
                 for (j = 0; j < MAX_MB_SEGMENTS; j++)
                 {
-                    // Frame level data
+                    /* Frame level data */
                     if (vp8_read_bit(bc))
                     {
                         xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]);
@@ -679,57 +682,57 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
         if (xd->update_mb_segmentation_map)
         {
-            // Which macro block level features are enabled
+            /* Which macro block level features are enabled */
             vpx_memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs));
 
-            // Read the probs used to decode the segment id for each macro block.
+            /* Read the probs used to decode the segment id for each macro block. */
             for (i = 0; i < MB_FEATURE_TREE_PROBS; i++)
             {
-                // If not explicitly set value is defaulted to 255 by memset above
+                /* If not explicitly set value is defaulted to 255 by memset above */
                 if (vp8_read_bit(bc))
                     xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8);
             }
         }
     }
 
-    // Read the loop filter level and type
+    /* Read the loop filter level and type */
     pc->filter_type = (LOOPFILTERTYPE) vp8_read_bit(bc);
     pc->filter_level = vp8_read_literal(bc, 6);
     pc->sharpness_level = vp8_read_literal(bc, 3);
 
-    // Read in loop filter deltas applied at the MB level based on mode or ref frame.
+    /* Read in loop filter deltas applied at the MB level based on mode or ref frame. */
     xd->mode_ref_lf_delta_update = 0;
     xd->mode_ref_lf_delta_enabled = (unsigned char)vp8_read_bit(bc);
 
     if (xd->mode_ref_lf_delta_enabled)
     {
-        // Do the deltas need to be updated
+        /* Do the deltas need to be updated */
         xd->mode_ref_lf_delta_update = (unsigned char)vp8_read_bit(bc);
 
         if (xd->mode_ref_lf_delta_update)
         {
-            // Send update
+            /* Send update */
             for (i = 0; i < MAX_REF_LF_DELTAS; i++)
             {
                 if (vp8_read_bit(bc))
                 {
-                    //sign = vp8_read_bit( bc );
+                    /*sign = vp8_read_bit( bc );*/
                     xd->ref_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6);
 
-                    if (vp8_read_bit(bc))        // Apply sign
+                    if (vp8_read_bit(bc))        /* Apply sign */
                         xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1;
                 }
             }
 
-            // Send update
+            /* Send update */
             for (i = 0; i < MAX_MODE_LF_DELTAS; i++)
             {
                 if (vp8_read_bit(bc))
                 {
-                    //sign = vp8_read_bit( bc );
+                    /*sign = vp8_read_bit( bc );*/
                     xd->mode_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6);
 
-                    if (vp8_read_bit(bc))        // Apply sign
+                    if (vp8_read_bit(bc))        /* Apply sign */
                         xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1;
                 }
             }
@@ -739,11 +742,11 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     setup_token_decoder(pbi, data + first_partition_length_in_bytes);
     xd->current_bc = &pbi->bc2;
 
-    // Read the default quantizers.
+    /* Read the default quantizers. */
     {
         int Q, q_update;
 
-        Q = vp8_read_literal(bc, 7);  // AC 1st order Q = default
+        Q = vp8_read_literal(bc, 7);  /* AC 1st order Q = default */
         pc->base_qindex = Q;
         q_update = 0;
         pc->y1dc_delta_q = get_delta_q(bc, pc->y1dc_delta_q, &q_update);
@@ -755,20 +758,21 @@ int vp8_decode_frame(VP8D_COMP *pbi)
         if (q_update)
             vp8cx_init_de_quantizer(pbi);
 
-        // MB level dequantizer setup
+        /* MB level dequantizer setup */
         mb_init_dequantizer(pbi, &pbi->mb);
     }
 
-    // Determine if the golden frame or ARF buffer should be updated and how.
-    // For all non key frames the GF and ARF refresh flags and sign bias
-    // flags must be set explicitly.
+    /* Determine if the golden frame or ARF buffer should be updated and how.
+     * For all non key frames the GF and ARF refresh flags and sign bias
+     * flags must be set explicitly.
+     */
     if (pc->frame_type != KEY_FRAME)
     {
-        // Should the GF or ARF be updated from the current frame
+        /* Should the GF or ARF be updated from the current frame */
         pc->refresh_golden_frame = vp8_read_bit(bc);
         pc->refresh_alt_ref_frame = vp8_read_bit(bc);
 
-        // Buffer to buffer copy flags.
+        /* Buffer to buffer copy flags. */
         pc->copy_buffer_to_gf = 0;
 
         if (!pc->refresh_golden_frame)
@@ -806,7 +810,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
 
     {
-        // read coef probability tree
+        /* read coef probability tree */
 
         for (i = 0; i < BLOCK_TYPES; i++)
             for (j = 0; j < COEF_BANDS; j++)
@@ -827,7 +831,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
     vpx_memcpy(&xd->pre, &pc->yv12_fb[pc->lst_fb_idx], sizeof(YV12_BUFFER_CONFIG));
     vpx_memcpy(&xd->dst, &pc->yv12_fb[pc->new_fb_idx], sizeof(YV12_BUFFER_CONFIG));
 
-    // set up frame new frame for intra coded blocks
+    /* set up frame new frame for intra coded blocks */
     if (!(pbi->b_multithreaded_rd) || pc->multi_token_partition == ONE_PARTITION || !(pc->filter_level))
         vp8_setup_intra_recon(&pc->yv12_fb[pc->new_fb_idx]);
 
@@ -835,10 +839,10 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
     vp8_build_block_doffsets(xd);
 
-    // clear out the coeff buffer
+    /* clear out the coeff buffer */
     vpx_memset(xd->qcoeff, 0, sizeof(xd->qcoeff));
 
-    // Read the mb_no_coeff_skip flag
+    /* Read the mb_no_coeff_skip flag */
     pc->mb_no_coeff_skip = (int)vp8_read_bit(bc);
 
 
@@ -853,20 +857,20 @@ int vp8_decode_frame(VP8D_COMP *pbi)
         vp8mt_decode_mb_rows(pbi, xd);
         if(pbi->common.filter_level)
         {
-            //vp8_mt_loop_filter_frame(pbi);   //cm, &pbi->mb, cm->filter_level);
+            /*vp8_mt_loop_filter_frame(pbi);*/ /*cm, &pbi->mb, cm->filter_level);*/
 
             pc->last_frame_type = pc->frame_type;
             pc->last_filter_type = pc->filter_type;
             pc->last_sharpness_level = pc->sharpness_level;
         }
-        vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]);    //cm->frame_to_show);
+        vp8_yv12_extend_frame_borders_ptr(&pc->yv12_fb[pc->new_fb_idx]);    /*cm->frame_to_show);*/
     }
     else
     {
         int ibc = 0;
         int num_part = 1 << pc->multi_token_partition;
 
-        // Decode the individual macro block
+        /* Decode the individual macro block */
         for (mb_row = 0; mb_row < pc->mb_rows; mb_row++)
         {
 
@@ -886,9 +890,9 @@ int vp8_decode_frame(VP8D_COMP *pbi)
 
     stop_token_decoder(pbi);
 
-    // vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes  \n",bc->pos+pbi->bc2.pos);
+    /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes  \n",bc->pos+pbi->bc2.pos); */
 
-    // If this was a kf or Gf note the Q used
+    /* If this was a kf or Gf note the Q used */
     if ((pc->frame_type == KEY_FRAME) ||
          pc->refresh_golden_frame || pc->refresh_alt_ref_frame)
     {
diff --git a/vp8/decoder/dequantize.c b/vp8/decoder/dequantize.c
index f5d576ac7..84a9fd943 100644
--- a/vp8/decoder/dequantize.c
+++ b/vp8/decoder/dequantize.c
@@ -45,7 +45,7 @@ void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *pred,
         input[i] = dq[i] * input[i];
     }
 
-    // the idct halves ( >> 1) the pitch
+    /* the idct halves ( >> 1) the pitch */
     vp8_short_idct4x4llm_c(input, output, 4 << 1);
 
     vpx_memset(input, 0, 32);
@@ -87,7 +87,7 @@ void vp8_dequant_dc_idct_add_c(short *input, short *dq, unsigned char *pred,
         input[i] = dq[i] * input[i];
     }
 
-    // the idct halves ( >> 1) the pitch
+    /* the idct halves ( >> 1) the pitch */
     vp8_short_idct4x4llm_c(input, output, 4 << 1);
 
     vpx_memset(input, 0, 32);
diff --git a/vp8/decoder/detokenize.c b/vp8/decoder/detokenize.c
index 65c7d5370..7d013d240 100644
--- a/vp8/decoder/detokenize.c
+++ b/vp8/decoder/detokenize.c
@@ -44,18 +44,18 @@ typedef struct
 
 DECLARE_ALIGNED(16, static const TOKENEXTRABITS, vp8d_token_extra_bits2[MAX_ENTROPY_TOKENS]) =
 {
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  //ZERO_TOKEN
-    {  1, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //ONE_TOKEN
-    {  2, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //TWO_TOKEN
-    {  3, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //THREE_TOKEN
-    {  4, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   //FOUR_TOKEN
-    {  5, 0, { 159, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  //DCT_VAL_CATEGORY1
-    {  7, 1, { 145, 165, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY2
-    { 11, 2, { 140, 148, 173, 0,  0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY3
-    { 19, 3, { 135, 140, 155, 176, 0,  0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY4
-    { 35, 4, { 130, 134, 141, 157, 180, 0,  0,  0,  0,  0,  0,  0   } }, //DCT_VAL_CATEGORY5
-    { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0   } }, //DCT_VAL_CATEGORY6
-    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  // EOB TOKEN
+    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /* ZERO_TOKEN */
+    {  1, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* ONE_TOKEN */
+    {  2, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* TWO_TOKEN */
+    {  3, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* THREE_TOKEN */
+    {  4, 0, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },   /* FOUR_TOKEN */
+    {  5, 0, { 159, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /* DCT_VAL_CATEGORY1 */
+    {  7, 1, { 145, 165, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY2 */
+    { 11, 2, { 140, 148, 173, 0,  0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY3 */
+    { 19, 3, { 135, 140, 155, 176, 0,  0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY4 */
+    { 35, 4, { 130, 134, 141, 157, 180, 0,  0,  0,  0,  0,  0,  0   } }, /* DCT_VAL_CATEGORY5 */
+    { 67, 10, { 129, 130, 133, 140, 153, 177, 196, 230, 243, 254, 254, 0   } }, /* DCT_VAL_CATEGORY6 */
+    {  0, -1, { 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0   } },  /*  EOB TOKEN */
 };
 
 
@@ -75,13 +75,14 @@ void vp8_reset_mb_tokens_context(MACROBLOCKD *x)
 }
 
 #if CONFIG_ARM_ASM_DETOK
-// mashup of vp8_block2left and vp8_block2above so we only need one pointer
-// for the assembly version.
+/* mashup of vp8_block2left and vp8_block2above so we only need one pointer
+ * for the assembly version.
+ */
 DECLARE_ALIGNED(16, const UINT8, vp8_block2leftabove[25*2]) =
 {
-    //vp8_block2left
+    /* vp8_block2left */
     0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8,
-    //vp8_block2above
+    /* vp8_block2above */
     0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8
 };
 
@@ -297,7 +298,7 @@ BLOCK_LOOP:
 
     c = (INT16)(!type);
 
-//    Dest = ((A)!=0) + ((B)!=0);
+    /*Dest = ((A)!=0) + ((B)!=0);*/
     VP8_COMBINEENTROPYCONTEXTS(v, *a, *l);
     Prob = coef_probs;
     Prob += v * ENTROPY_NODES;
@@ -387,7 +388,7 @@ ONE_CONTEXT_NODE_0_:
 
     qcoeff_ptr [ scan[15] ] = (INT16) v;
 BLOCK_FINISHED:
-    *a = *l = ((eobs[i] = c) != !type);   // any nonzero data?
+    *a = *l = ((eobs[i] = c) != !type);   /* any nonzero data? */
     eobtotal += c;
     qcoeff_ptr += 16;
 
@@ -422,4 +423,4 @@ BLOCK_FINISHED:
     return eobtotal;
 
 }
-#endif //!CONFIG_ASM_DETOK
+#endif /*!CONFIG_ASM_DETOK*/
diff --git a/vp8/decoder/generic/dsystemdependent.c b/vp8/decoder/generic/dsystemdependent.c
index 84de7af43..2e284729b 100644
--- a/vp8/decoder/generic/dsystemdependent.c
+++ b/vp8/decoder/generic/dsystemdependent.c
@@ -18,7 +18,7 @@ extern void vp8_arch_arm_decode_init(VP8D_COMP *pbi);
 
 void vp8_dmachine_specific_config(VP8D_COMP *pbi)
 {
-    // Pure C:
+    /* Pure C: */
 #if CONFIG_RUNTIME_CPU_DETECT
     pbi->mb.rtcd                     = &pbi->common.rtcd;
     pbi->dequant.block               = vp8_dequantize_b_c;
@@ -29,7 +29,7 @@ void vp8_dmachine_specific_config(VP8D_COMP *pbi)
     pbi->dequant.idct_add_uv_block   = vp8_dequant_idct_add_uv_block_c;
     pbi->dboolhuff.start             = vp8dx_start_decode_c;
     pbi->dboolhuff.fill              = vp8dx_bool_decoder_fill_c;
-#if 0 //For use with RTCD, when implemented
+#if 0 /*For use with RTCD, when implemented*/
     pbi->dboolhuff.debool = vp8dx_decode_bool_c;
     pbi->dboolhuff.devalue = vp8dx_decode_value_c;
 #endif
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index b5a6e3e85..6eda45e4a 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -113,12 +113,13 @@ VP8D_PTR vp8dx_create_decompressor(VP8D_CONFIG *oxcf)
     pbi->common.current_video_frame = 0;
     pbi->ready_for_new_data = 1;
 
-    pbi->CPUFreq = 0; //vp8_get_processor_freq();
+    pbi->CPUFreq = 0; /*vp8_get_processor_freq();*/
     pbi->max_threads = oxcf->max_threads;
     vp8_decoder_create_threads(pbi);
 
-    //vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
-    // unnecessary calling of vp8cx_init_de_quantizer() for every frame.
+    /* vp8cx_init_de_quantizer() is first called here. Add check in frame_init_dequantizer() to avoid
+     *  unnecessary calling of vp8cx_init_de_quantizer() for every frame.
+     */
     vp8cx_init_de_quantizer(pbi);
 
     {
@@ -223,7 +224,7 @@ int vp8dx_set_reference(VP8D_PTR ptr, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_C
     return 0;
 }
 
-//For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.
+/*For ARM NEON, d8-d15 are callee-saved registers, and need to be saved by us.*/
 #if HAVE_ARMV7
 extern void vp8_push_neon(INT64 *store);
 extern void vp8_pop_neon(INT64 *store);
@@ -250,7 +251,7 @@ static void ref_cnt_fb (int *buf, int *idx, int new_idx)
     buf[new_idx]++;
 }
 
-// If any buffer copy / swapping is signalled it should be done here.
+/* If any buffer copy / swapping is signalled it should be done here. */
 static int swap_frame_buffers (VP8_COMMON *cm)
 {
     int fb_to_update_with, err = 0;
@@ -260,10 +261,11 @@ static int swap_frame_buffers (VP8_COMMON *cm)
     else
         fb_to_update_with = cm->new_fb_idx;
 
-    // The alternate reference frame or golden frame can be updated
-    //  using the new, last, or golden/alt ref frame.  If it
-    //  is updated using the newly decoded frame it is a refresh.
-    //  An update using the last or golden/alt ref frame is a copy.
+    /* The alternate reference frame or golden frame can be updated
+     *  using the new, last, or golden/alt ref frame.  If it
+     *  is updated using the newly decoded frame it is a refresh.
+     *  An update using the last or golden/alt ref frame is a copy.
+     */
     if (cm->copy_buffer_to_arf)
     {
         int new_fb = 0;
@@ -322,8 +324,8 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     int retcode = 0;
     struct vpx_usec_timer timer;
 
-//  if(pbi->ready_for_new_data == 0)
-//      return -1;
+    /*if(pbi->ready_for_new_data == 0)
+        return -1;*/
 
     if (ptr == 0)
     {
@@ -363,7 +365,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 
     vpx_usec_timer_start(&timer);
 
-    //cm->current_video_frame++;
+    /*cm->current_video_frame++;*/
     pbi->Source = source;
     pbi->source_sz = size;
 
@@ -423,7 +425,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
         {
             struct vpx_usec_timer lpftimer;
             vpx_usec_timer_start(&lpftimer);
-            // Apply the loop filter if appropriate.
+            /* Apply the loop filter if appropriate. */
 
             vp8_loop_filter_frame(cm, &pbi->mb, cm->filter_level);
 
@@ -438,8 +440,8 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     }
 
 #if 0
-    // DEBUG code
-    //vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);
+    /* DEBUG code */
+    /*vp8_recon_write_yuv_frame("recon.yuv", cm->frame_to_show);*/
     if (cm->current_video_frame <= 5)
         write_dx_frame_to_file(cm->frame_to_show, cm->current_video_frame);
 #endif
@@ -451,7 +453,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
 
     pbi->time_decoding += pbi->decode_microseconds;
 
-//  vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);
+    /*vp8_print_modes_and_motion_vectors( cm->mi, cm->mb_rows,cm->mb_cols, cm->current_video_frame);*/
 
     if (cm->show_frame)
         cm->current_video_frame++;
@@ -512,7 +514,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
     if (pbi->ready_for_new_data == 1)
         return ret;
 
-    // ie no raw frame to show!!!
+    /* ie no raw frame to show!!! */
     if (pbi->common.show_frame == 0)
         return ret;
 
@@ -538,7 +540,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
         ret = -1;
     }
 
-#endif //!CONFIG_POSTPROC
+#endif /*!CONFIG_POSTPROC*/
     vp8_clear_system_state();
     return ret;
 }
diff --git a/vp8/decoder/onyxd_int.h b/vp8/decoder/onyxd_int.h
index 522ac22d2..7593edf27 100644
--- a/vp8/decoder/onyxd_int.h
+++ b/vp8/decoder/onyxd_int.h
@@ -93,18 +93,18 @@ typedef struct VP8Decompressor
     int decoding_thread_count;
     int allocated_decoding_thread_count;
 
-    // variable for threading
+    /* variable for threading */
 #if CONFIG_MULTITHREAD
     int mt_baseline_filter_level[MAX_MB_SEGMENTS];
     int sync_range;
-    int *mt_current_mb_col;                  // Each row remembers its already decoded column.
+    int *mt_current_mb_col;                  /* Each row remembers its already decoded column. */
 
-    unsigned char **mt_yabove_row;           // mb_rows x width
+    unsigned char **mt_yabove_row;           /* mb_rows x width */
     unsigned char **mt_uabove_row;
     unsigned char **mt_vabove_row;
-    unsigned char **mt_yleft_col;            // mb_rows x 16
-    unsigned char **mt_uleft_col;            // mb_rows x 8
-    unsigned char **mt_vleft_col;            // mb_rows x 8
+    unsigned char **mt_yleft_col;            /* mb_rows x 16 */
+    unsigned char **mt_uleft_col;            /* mb_rows x 8 */
+    unsigned char **mt_vleft_col;            /* mb_rows x 8 */
 
     MB_ROW_DEC           *mb_row_di;
     DECODETHREAD_DATA    *de_thread_data;
@@ -112,7 +112,7 @@ typedef struct VP8Decompressor
     pthread_t           *h_decoding_thread;
     sem_t               *h_event_start_decoding;
     sem_t                h_event_end_decoding;
-    // end of threading data
+    /* end of threading data */
 #endif
 
     vp8_reader *mbc;
diff --git a/vp8/decoder/reconintra_mt.c b/vp8/decoder/reconintra_mt.c
index 4d395629d..ad4324b27 100644
--- a/vp8/decoder/reconintra_mt.c
+++ b/vp8/decoder/reconintra_mt.c
@@ -15,16 +15,17 @@
 #include "vpx_mem/vpx_mem.h"
 #include "onyxd_int.h"
 
-// For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
-// vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
+/* For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
+ * vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
+ */
 
 void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
 {
 #if CONFIG_MULTITHREAD
-    unsigned char *yabove_row;    // = x->dst.y_buffer - x->dst.y_stride;
+    unsigned char *yabove_row;    /* = x->dst.y_buffer - x->dst.y_stride; */
     unsigned char *yleft_col;
     unsigned char yleft_buf[16];
-    unsigned char ytop_left;      // = yabove_row[-1];
+    unsigned char ytop_left;      /* = yabove_row[-1]; */
     unsigned char *ypred_ptr = x->predictor;
     int r, c, i;
 
@@ -43,7 +44,7 @@ void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row
 
     ytop_left = yabove_row[-1];
 
-    // for Y
+    /* for Y */
     switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
@@ -156,15 +157,15 @@ void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row
 void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
 {
 #if CONFIG_MULTITHREAD
-    unsigned char *yabove_row;    // = x->dst.y_buffer - x->dst.y_stride;
+    unsigned char *yabove_row;    /* = x->dst.y_buffer - x->dst.y_stride; */
     unsigned char *yleft_col;
     unsigned char yleft_buf[16];
-    unsigned char ytop_left;      // = yabove_row[-1];
+    unsigned char ytop_left;      /* = yabove_row[-1]; */
     unsigned char *ypred_ptr = x->predictor;
     int r, c, i;
 
     int y_stride = x->dst.y_stride;
-    ypred_ptr = x->dst.y_buffer; //x->predictor;
+    ypred_ptr = x->dst.y_buffer; /*x->predictor;*/
 
     if (pbi->common.filter_level)
     {
@@ -181,7 +182,7 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
 
     ytop_left = yabove_row[-1];
 
-    // for Y
+    /* for Y */
     switch (x->mode_info_context->mbmi.mode)
     {
     case DC_PRED:
@@ -222,11 +223,11 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
             expected_dc = 128;
         }
 
-        //vpx_memset(ypred_ptr, expected_dc, 256);
+        /*vpx_memset(ypred_ptr, expected_dc, 256);*/
         for (r = 0; r < 16; r++)
         {
             vpx_memset(ypred_ptr, expected_dc, 16);
-            ypred_ptr += y_stride; //16;
+            ypred_ptr += y_stride; /*16;*/
         }
     }
     break;
@@ -240,7 +241,7 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
             ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
             ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
             ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
-            ypred_ptr += y_stride; //16;
+            ypred_ptr += y_stride; /*16;*/
         }
     }
     break;
@@ -251,7 +252,7 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
         {
 
             vpx_memset(ypred_ptr, yleft_col[r], 16);
-            ypred_ptr += y_stride;  //16;
+            ypred_ptr += y_stride;  /*16;*/
         }
 
     }
@@ -274,7 +275,7 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
                 ypred_ptr[c] = pred;
             }
 
-            ypred_ptr += y_stride;  //16;
+            ypred_ptr += y_stride;  /*16;*/
         }
 
     }
@@ -299,14 +300,14 @@ void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_r
 void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
 {
 #if CONFIG_MULTITHREAD
-    unsigned char *uabove_row;   // = x->dst.u_buffer - x->dst.uv_stride;
-    unsigned char *uleft_col;    //[16];
+    unsigned char *uabove_row;   /* = x->dst.u_buffer - x->dst.uv_stride; */
+    unsigned char *uleft_col;    /*[16];*/
     unsigned char uleft_buf[8];
-    unsigned char utop_left;     // = uabove_row[-1];
-    unsigned char *vabove_row;   // = x->dst.v_buffer - x->dst.uv_stride;
-    unsigned char *vleft_col;    //[20];
+    unsigned char utop_left;     /* = uabove_row[-1]; */
+    unsigned char *vabove_row;   /* = x->dst.v_buffer - x->dst.uv_stride; */
+    unsigned char *vleft_col;    /*[20];*/
     unsigned char vleft_buf[8];
-    unsigned char vtop_left;     // = vabove_row[-1];
+    unsigned char vtop_left;     /* = vabove_row[-1]; */
     unsigned char *upred_ptr = &x->predictor[256];
     unsigned char *vpred_ptr = &x->predictor[320];
     int i, j;
@@ -462,16 +463,16 @@ void vp8mt_build_intra_predictors_mbuv(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_ro
 void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
 {
 #if CONFIG_MULTITHREAD
-    unsigned char *uabove_row;  // = x->dst.u_buffer - x->dst.uv_stride;
-    unsigned char *uleft_col;   //[16];
+    unsigned char *uabove_row;  /* = x->dst.u_buffer - x->dst.uv_stride; */
+    unsigned char *uleft_col;   /*[16];*/
     unsigned char uleft_buf[8];
-    unsigned char utop_left;    // = uabove_row[-1];
-    unsigned char *vabove_row;  // = x->dst.v_buffer - x->dst.uv_stride;
-    unsigned char *vleft_col;   //[20];
+    unsigned char utop_left;    /* = uabove_row[-1]; */
+    unsigned char *vabove_row;  /* = x->dst.v_buffer - x->dst.uv_stride; */
+    unsigned char *vleft_col;   /*[20];*/
     unsigned char vleft_buf[8];
-    unsigned char vtop_left;    // = vabove_row[-1];
-    unsigned char *upred_ptr = x->dst.u_buffer; //&x->predictor[256];
-    unsigned char *vpred_ptr = x->dst.v_buffer; //&x->predictor[320];
+    unsigned char vtop_left;    /* = vabove_row[-1]; */
+    unsigned char *upred_ptr = x->dst.u_buffer; /*&x->predictor[256];*/
+    unsigned char *vpred_ptr = x->dst.v_buffer; /*&x->predictor[320];*/
     int uv_stride = x->dst.uv_stride;
     int i, j;
 
@@ -539,14 +540,14 @@ void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_
         }
 
 
-        //vpx_memset(upred_ptr,expected_udc,64);
-        //vpx_memset(vpred_ptr,expected_vdc,64);
+        /*vpx_memset(upred_ptr,expected_udc,64);
+        vpx_memset(vpred_ptr,expected_vdc,64);*/
         for (i = 0; i < 8; i++)
         {
             vpx_memset(upred_ptr, expected_udc, 8);
             vpx_memset(vpred_ptr, expected_vdc, 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
     }
     break;
@@ -558,8 +559,8 @@ void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_
         {
             vpx_memcpy(upred_ptr, uabove_row, 8);
             vpx_memcpy(vpred_ptr, vabove_row, 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
 
     }
@@ -572,8 +573,8 @@ void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_
         {
             vpx_memset(upred_ptr, uleft_col[i], 8);
             vpx_memset(vpred_ptr, vleft_col[i], 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
     }
 
@@ -605,8 +606,8 @@ void vp8mt_build_intra_predictors_mbuv_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_
                 vpred_ptr[j] = predv;
             }
 
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
         }
 
     }
@@ -640,13 +641,13 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
 #if CONFIG_MULTITHREAD
     int i, r, c;
 
-    unsigned char *Above;   // = *(x->base_dst) + x->dst - x->dst_stride;
+    unsigned char *Above;   /* = *(x->base_dst) + x->dst - x->dst_stride; */
     unsigned char Left[4];
-    unsigned char top_left; // = Above[-1];
+    unsigned char top_left; /* = Above[-1]; */
 
     BLOCKD *x = &xd->block[num];
 
-    //Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).
+    /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 above-right).*/
     if (num < 4 && pbi->common.filter_level)
         Above = pbi->mt_yabove_row[mb_row] + mb_col*16 + num*4 + 32;
     else
@@ -696,7 +697,7 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
     break;
     case B_TM_PRED:
     {
-        // prediction similar to true_motion prediction
+        /* prediction similar to true_motion prediction */
         for (r = 0; r < 4; r++)
         {
             for (c = 0; c < 4; c++)
@@ -945,12 +946,13 @@ void vp8mt_predict_intra4x4(VP8D_COMP *pbi,
 #endif
 }
 
-// copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
-// to the right prediction have filled in pixels to use.
+/* copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
+ * to the right prediction have filled in pixels to use.
+ */
 void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col)
 {
 #if CONFIG_MULTITHREAD
-    unsigned char *above_right;   // = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
+    unsigned char *above_right;   /* = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16; */
     unsigned int *src_ptr;
     unsigned int *dst_ptr0;
     unsigned int *dst_ptr1;
@@ -962,9 +964,9 @@ void vp8mt_intra_prediction_down_copy(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row
         above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
 
     src_ptr = (unsigned int *)above_right;
-    //dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride);
-    //dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride);
-    //dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride);
+    /*dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride);
+    dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride);
+    dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride);*/
     dst_ptr0 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 3 * x->block[0].dst_stride);
     dst_ptr1 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 7 * x->block[0].dst_stride);
     dst_ptr2 = (unsigned int *)(*(x->block[0].base_dst) + x->block[0].dst + 16 + 11 * x->block[0].dst_stride);
diff --git a/vp8/decoder/reconintra_mt.h b/vp8/decoder/reconintra_mt.h
index 5c42f819f..d401295b2 100644
--- a/vp8/decoder/reconintra_mt.h
+++ b/vp8/decoder/reconintra_mt.h
@@ -12,7 +12,7 @@
 #ifndef __INC_RECONINTRA_MT_H
 #define __INC_RECONINTRA_MT_H
 
-// reconintra functions used in multi-threaded decoder
+/* reconintra functions used in multi-threaded decoder */
 #if CONFIG_MULTITHREAD
 extern void vp8mt_build_intra_predictors_mby(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
 extern void vp8mt_build_intra_predictors_mby_s(VP8D_COMP *pbi, MACROBLOCKD *x, int mb_row, int mb_col);
diff --git a/vp8/decoder/threading.c b/vp8/decoder/threading.c
index 2d7f7b9b8..fc2fad516 100644
--- a/vp8/decoder/threading.c
+++ b/vp8/decoder/threading.c
@@ -69,12 +69,12 @@ void vp8_setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC
         mbd->mb_segement_abs_delta     = xd->mb_segement_abs_delta;
         vpx_memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data));
 
-        //signed char ref_lf_deltas[MAX_REF_LF_DELTAS];
+        /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/
         vpx_memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas));
-        //signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];
+        /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/
         vpx_memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas));
-        //unsigned char mode_ref_lf_delta_enabled;
-        //unsigned char mode_ref_lf_delta_update;
+        /*unsigned char mode_ref_lf_delta_enabled;
+        unsigned char mode_ref_lf_delta_update;*/
         mbd->mode_ref_lf_delta_enabled    = xd->mode_ref_lf_delta_enabled;
         mbd->mode_ref_lf_delta_update    = xd->mode_ref_lf_delta_update;
 
@@ -113,7 +113,7 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
         eobtotal = vp8_decode_mb_tokens(pbi, xd);
     }
 
-    // Perform temporary clamping of the MV to be used for prediction
+    /* Perform temporary clamping of the MV to be used for prediction */
     if (do_clamp)
     {
         clamp_mvs(xd);
@@ -125,7 +125,7 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
     {
         xd->mode_info_context->mbmi.dc_diff = 0;
 
-        //mt_skip_recon_mb(pbi, xd, mb_row, mb_col);
+        /*mt_skip_recon_mb(pbi, xd, mb_row, mb_col);*/
         if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
         {
             vp8mt_build_intra_predictors_mbuv_s(pbi, xd, mb_row, mb_col);
@@ -141,7 +141,7 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
     if (xd->segmentation_enabled)
         mb_init_dequantizer(pbi, xd);
 
-    // do prediction
+    /* do prediction */
     if (xd->frame_type == KEY_FRAME  ||  xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME)
     {
         vp8mt_build_intra_predictors_mbuv(pbi, xd, mb_row, mb_col);
@@ -158,13 +158,13 @@ void vp8mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, int mb_row, int mb
         vp8_build_inter_predictors_mb(xd);
     }
 
-    // dequantization and idct
+    /* dequantization and idct */
     if (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)
     {
         BLOCKD *b = &xd->block[24];
         DEQUANT_INVOKE(&pbi->dequant, block)(b);
 
-        // do 2nd order transform on the dc block
+        /* do 2nd order transform on the dc block */
         if (xd->eobs[24] > 1)
         {
             IDCT_INVOKE(RTCD_VTABLE(idct), iwalsh16)(&b->dqcoeff[0], b->diff);
@@ -244,7 +244,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
         if (pbi->b_multithreaded_rd == 0)
             break;
 
-        //if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)
+        /*if(WaitForSingleObject(pbi->h_event_start_decoding[ithread], INFINITE) == WAIT_OBJECT_0)*/
         if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0)
         {
             if (pbi->b_multithreaded_rd == 0)
@@ -281,7 +281,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                     recon_yoffset = mb_row * recon_y_stride * 16;
                     recon_uvoffset = mb_row * recon_uv_stride * 8;
-                    // reset above block coeffs
+                    /* reset above block coeffs */
 
                     xd->above_context = pc->above_context;
                     xd->left_context = &mb_row_left_context;
@@ -313,17 +313,19 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                         if(pbi->common.filter_level)
                         {
-                            //update loopfilter info
+                            /*update loopfilter info*/
                             Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
                             filter_level = pbi->mt_baseline_filter_level[Segment];
-                            // Distance of Mb to the various image edges.
-                            // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-                            // Apply any context driven MB level adjustment
+                            /* Distance of Mb to the various image edges.
+                             * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                             * Apply any context driven MB level adjustment
+                             */
                             vp8_adjust_mb_lf_value(xd, &filter_level);
                         }
 
-                        // Distance of Mb to the various image edges.
-                        // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                        /* Distance of Mb to the various image edges.
+                         * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                         */
                         xd->mb_to_left_edge = -((mb_col * 16) << 3);
                         xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
 
@@ -333,7 +335,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                         xd->left_available = (mb_col != 0);
 
-                        // Select the appropriate reference frame for this MB
+                        /* Select the appropriate reference frame for this MB */
                         if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
                             ref_fb_idx = pc->lst_fb_idx;
                         else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
@@ -352,13 +354,13 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                         {
                             if( mb_row != pc->mb_rows-1 )
                             {
-                                //Save decoded MB last row data for next-row decoding
+                                /* Save decoded MB last row data for next-row decoding */
                                 vpx_memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
                                 vpx_memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8);
                                 vpx_memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
                             }
 
-                            //save left_col for next MB decoding
+                            /* save left_col for next MB decoding */
                             if(mb_col != pc->mb_cols-1)
                             {
                                 MODE_INFO *next = xd->mode_info_context +1;
@@ -375,7 +377,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                                 }
                             }
 
-                          // loopfilter on this macroblock.
+                          /* loopfilter on this macroblock. */
                             if (filter_level)
                             {
                                 if (mb_col > 0)
@@ -384,7 +386,7 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
                                 if (xd->mode_info_context->mbmi.dc_diff > 0)
                                     pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
 
-                                // don't apply across umv border
+                                /* don't apply across umv border */
                                 if (mb_row > 0)
                                     pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
 
@@ -400,11 +402,11 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                         xd->above_context++;
 
-                        //pbi->mb_row_di[ithread].current_mb_col = mb_col;
+                        /*pbi->mb_row_di[ithread].current_mb_col = mb_col;*/
                         pbi->mt_current_mb_col[mb_row] = mb_col;
                     }
 
-                    // adjust to the next row of mbs
+                    /* adjust to the next row of mbs */
                     if (pbi->common.filter_level)
                     {
                         if(mb_row != pc->mb_rows-1)
@@ -424,15 +426,15 @@ THREAD_FUNCTION vp8_thread_decoding_proc(void *p_data)
 
                     ++xd->mode_info_context;      /* skip prediction column */
 
-                    // since we have multithread
+                    /* since we have multithread */
                     xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
                 }
             }
         }
-        //  add this to each frame
+        /*  add this to each frame */
         if ((mbrd->mb_row == pbi->common.mb_rows-1) || ((mbrd->mb_row == pbi->common.mb_rows-2) && (pbi->common.mb_rows % (pbi->decoding_thread_count+1))==1))
         {
-            //SetEvent(pbi->h_event_end_decoding);
+            /*SetEvent(pbi->h_event_end_decoding);*/
             sem_post(&pbi->h_event_end_decoding);
         }
     }
@@ -502,7 +504,7 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
             pbi->mt_current_mb_col = NULL ;
         }
 
-        // Free above_row buffers.
+        /* Free above_row buffers. */
         if (pbi->mt_yabove_row)
         {
             for (i=0; i< mb_rows; i++)
@@ -545,7 +547,7 @@ void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows)
             pbi->mt_vabove_row = NULL ;
         }
 
-        // Free left_col buffers.
+        /* Free left_col buffers. */
         if (pbi->mt_yleft_col)
         {
             for (i=0; i< mb_rows; i++)
@@ -605,7 +607,7 @@ int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
     {
         vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows);
 
-        // our internal buffers are always multiples of 16
+        /* our internal buffers are always multiples of 16 */
         if ((width & 0xf) != 0)
             width += 16 - (width & 0xf);
 
@@ -616,10 +618,10 @@ int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
 
         uv_width = width >>1;
 
-        // Allocate an int for each mb row.
+        /* Allocate an int for each mb row. */
         CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(int) * pc->mb_rows));
 
-        // Allocate memory for above_row buffers.
+        /* Allocate memory for above_row buffers. */
         CHECK_MEM_ERROR(pbi->mt_yabove_row, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
         for (i=0; i< pc->mb_rows; i++)
             CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_calloc(sizeof(unsigned char) * (width + (VP8BORDERINPIXELS<<1)), 1));
@@ -632,7 +634,7 @@ int vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows)
         for (i=0; i< pc->mb_rows; i++)
             CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_calloc(sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS), 1));
 
-        // Allocate memory for left_col buffers.
+        /* Allocate memory for left_col buffers. */
         CHECK_MEM_ERROR(pbi->mt_yleft_col, vpx_malloc(sizeof(unsigned char *) * pc->mb_rows));
         for (i=0; i< pc->mb_rows; i++)
             CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1));
@@ -657,14 +659,14 @@ void vp8_decoder_remove_threads(VP8D_COMP *pbi)
 {
 #if CONFIG_MULTITHREAD
 
-    //shutdown MB Decoding thread;
+    /* shutdown MB Decoding thread; */
     if (pbi->b_multithreaded_rd)
     {
         int i;
 
         pbi->b_multithreaded_rd = 0;
 
-        // allow all threads to exit
+        /* allow all threads to exit */
         for (i = 0; i < pbi->allocated_decoding_thread_count; i++)
         {
             sem_post(&pbi->h_event_start_decoding[i]);
@@ -713,32 +715,32 @@ void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
 #if CONFIG_MULTITHREAD
     VP8_COMMON *cm  = &pbi->common;
     MACROBLOCKD *mbd = &pbi->mb;
-    //YV12_BUFFER_CONFIG *post = &cm->new_frame;  //frame_to_show;
+    /*YV12_BUFFER_CONFIG *post = &cm->new_frame;*/  /*frame_to_show;*/
     loop_filter_info *lfi = cm->lf_info;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;
 
-    //int mb_row;
-    //int mb_col;
-    //int baseline_filter_level[MAX_MB_SEGMENTS];
+    /*int mb_row;
+    int mb_col;
+    int baseline_filter_level[MAX_MB_SEGMENTS];*/
     int filter_level;
     int alt_flt_enabled = mbd->segmentation_enabled;
 
     int i;
-    //unsigned char *y_ptr, *u_ptr, *v_ptr;
+    /*unsigned char *y_ptr, *u_ptr, *v_ptr;*/
 
-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
     if (alt_flt_enabled)
     {
         for (i = 0; i < MAX_MB_SEGMENTS; i++)
         {
-            // Abs value
+            /* Abs value */
             if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                 pbi->mt_baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
             else
             {
                 pbi->mt_baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                pbi->mt_baseline_filter_level[i] = (pbi->mt_baseline_filter_level[i] >= 0) ? ((pbi->mt_baseline_filter_level[i] <= MAX_LOOP_FILTER) ? pbi->mt_baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
             }
         }
     }
@@ -748,7 +750,7 @@ void vp8mt_lpf_init( VP8D_COMP *pbi, int default_filt_lvl)
             pbi->mt_baseline_filter_level[i] = default_filt_lvl;
     }
 
-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
     if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
         vp8_init_loop_filter(cm);
     else if (frame_type != cm->last_frame_type)
@@ -779,7 +781,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 
     if(pbi->common.filter_level)
     {
-        //Set above_row buffer to 127 for decoding first MB row
+        /* Set above_row buffer to 127 for decoding first MB row */
         vpx_memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS-1, 127, pc->yv12_fb[pc->lst_fb_idx].y_width + 5);
         vpx_memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
         vpx_memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS>>1)-1, 127, (pc->yv12_fb[pc->lst_fb_idx].y_width>>1) +5);
@@ -791,7 +793,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
             vpx_memset(pbi->mt_vabove_row[i] + (VP8BORDERINPIXELS>>1)-1, (unsigned char)129, 1);
         }
 
-        //Set left_col to 129 initially
+        /* Set left_col to 129 initially */
         for (i=0; i<pc->mb_rows; i++)
         {
             vpx_memset(pbi->mt_yleft_col[i], (unsigned char)129, 16);
@@ -812,7 +814,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 
         xd->current_bc = &pbi->mbc[mb_row%num_part];
 
-        //vp8_decode_mb_row(pbi, pc, mb_row, xd);
+        /* vp8_decode_mb_row(pbi, pc, mb_row, xd); */
         {
             int i;
             int recon_yoffset, recon_uvoffset;
@@ -822,14 +824,14 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
             int recon_y_stride = pc->yv12_fb[ref_fb_idx].y_stride;
             int recon_uv_stride = pc->yv12_fb[ref_fb_idx].uv_stride;
 
-           // volatile int *last_row_current_mb_col = NULL;
+           /* volatile int *last_row_current_mb_col = NULL; */
             if (mb_row > 0)
                 last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row -1];
 
             vpx_memset(&pc->left_context, 0, sizeof(pc->left_context));
             recon_yoffset = mb_row * recon_y_stride * 16;
             recon_uvoffset = mb_row * recon_uv_stride * 8;
-            // reset above block coeffs
+            /* reset above block coeffs */
 
             xd->above_context = pc->above_context;
             xd->up_available = (mb_row != 0);
@@ -858,17 +860,19 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 
                 if(pbi->common.filter_level)
                 {
-                    //update loopfilter info
+                    /* update loopfilter info */
                     Segment = (alt_flt_enabled) ? xd->mode_info_context->mbmi.segment_id : 0;
                     filter_level = pbi->mt_baseline_filter_level[Segment];
-                    // Distance of Mb to the various image edges.
-                    // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-                    // Apply any context driven MB level adjustment
+                    /* Distance of Mb to the various image edges.
+                     * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                     * Apply any context driven MB level adjustment
+                     */
                     vp8_adjust_mb_lf_value(xd, &filter_level);
                 }
 
-                // Distance of Mb to the various image edges.
-                // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                /* Distance of Mb to the various image edges.
+                 * These are specified to 8th pel as they are always compared to values that are in 1/8th pel units
+                 */
                 xd->mb_to_left_edge = -((mb_col * 16) << 3);
                 xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
 
@@ -878,7 +882,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 
                 xd->left_available = (mb_col != 0);
 
-                // Select the appropriate reference frame for this MB
+                /* Select the appropriate reference frame for this MB */
                 if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)
                     ref_fb_idx = pc->lst_fb_idx;
                 else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME)
@@ -895,7 +899,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
 
                 if (pbi->common.filter_level)
                 {
-                    //Save decoded MB last row data for next-row decoding
+                    /* Save decoded MB last row data for next-row decoding */
                     if(mb_row != pc->mb_rows-1)
                     {
                         vpx_memcpy((pbi->mt_yabove_row[mb_row +1] + 32 + mb_col*16), (xd->dst.y_buffer + 15 * recon_y_stride), 16);
@@ -903,7 +907,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                         vpx_memcpy((pbi->mt_vabove_row[mb_row +1] + 16 + mb_col*8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8);
                     }
 
-                    //save left_col for next MB decoding
+                    /* save left_col for next MB decoding */
                     if(mb_col != pc->mb_cols-1)
                     {
                         MODE_INFO *next = xd->mode_info_context +1;
@@ -920,7 +924,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                         }
                     }
 
-                    // loopfilter on this macroblock.
+                    /* loopfilter on this macroblock. */
                     if (filter_level)
                     {
                         if (mb_col > 0)
@@ -929,7 +933,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                         if (xd->mode_info_context->mbmi.dc_diff > 0)
                             pc->lf_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
 
-                        // don't apply across umv border
+                        /* don't apply across umv border */
                         if (mb_row > 0)
                             pc->lf_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi[filter_level], pc->simpler_lpf);
 
@@ -948,7 +952,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
                 pbi->mt_current_mb_col[mb_row] = mb_col;
             }
 
-            // adjust to the next row of mbs
+            /* adjust to the next row of mbs */
             if (pbi->common.filter_level)
             {
                 if(mb_row != pc->mb_rows-1)
@@ -971,7 +975,7 @@ void vp8mt_decode_mb_rows( VP8D_COMP *pbi, MACROBLOCKD *xd)
         xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count;
     }
 
-    sem_wait(&pbi->h_event_end_decoding);   // add back for each frame
+    sem_wait(&pbi->h_event_end_decoding);   /* add back for each frame */
 #else
     (void) pbi;
     (void) xd;
diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 691aee0a6..8a94fa369 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -291,7 +291,7 @@ void vp8_output_stats(const VP8_COMP            *cpi,
 
 
         fpfile = fopen("fpmotionmap.stt", "a");
-        fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, fpfile);
+        if(fwrite(cpi->fp_motion_map, 1, cpi->common.MBs, fpfile));
         fclose(fpfile);
     }
 #endif
@@ -879,7 +879,7 @@ void vp8_first_pass(VP8_COMP *cpi)
         else
             recon_file = fopen(filename, "ab");
 
-        fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file);
+        if(fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file));
         fclose(recon_file);
     }
 
@@ -2586,7 +2586,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
         if (0)
         {
             FILE *f = fopen("Subsamle.stt", "a");
-            fprintf(f, " %8d %8d %8d %8d %12.0f %8d %8d %8d\n",  cpi->common.current_video_frame, kf_q, cpi->common.horiz_scale, cpi->common.vert_scale,  kf_group_err / cpi->frames_to_key, cpi->kf_group_bits / cpi->frames_to_key, new_height, new_width);
+            fprintf(f, " %8d %8d %8d %8d %12.0f %8d %8d %8d\n",  cpi->common.current_video_frame, kf_q, cpi->common.horiz_scale, cpi->common.vert_scale,  kf_group_err / cpi->frames_to_key, (int)(cpi->kf_group_bits / cpi->frames_to_key), new_height, new_width);
             fclose(f);
         }
 
@@ -2644,7 +2644,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
                 if (0)
                 {
                     FILE *f = fopen("Subsamle.stt", "a");
-                    fprintf(f, "******** %8d %8d %8d %12.0f %8d %8d %8d\n",  kf_q, cpi->common.horiz_scale, cpi->common.vert_scale,  kf_group_err / cpi->frames_to_key, cpi->kf_group_bits / cpi->frames_to_key, new_height, new_width);
+                    fprintf(f, "******** %8d %8d %8d %12.0f %8d %8d %8d\n",  kf_q, cpi->common.horiz_scale, cpi->common.vert_scale,  kf_group_err / cpi->frames_to_key, (int)(cpi->kf_group_bits / cpi->frames_to_key), new_height, new_width);
                     fclose(f);
                 }
             }
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index f19cb9a30..a85cad1b4 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -41,7 +41,7 @@ typedef enum
     VP8_SEG_ALG_PRIV     = 256,
     VP8_SEG_MAX
 } mem_seg_id_t;
-#define NELEMENTS(x) (sizeof(x)/sizeof(x[0]))
+#define NELEMENTS(x) ((int)(sizeof(x)/sizeof(x[0])))
 
 static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t);
 
@@ -170,7 +170,7 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
     }
 }
 
-static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, int id)
+static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, unsigned int id)
 {
     int i;
 
@@ -269,14 +269,14 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t         *data,
             const uint8_t *c = data + 3;
             si->is_kf = 1;
 
-            // vet via sync code
+            /* vet via sync code */
             if (c[0] != 0x9d || c[1] != 0x01 || c[2] != 0x2a)
                 res = VPX_CODEC_UNSUP_BITSTREAM;
 
             si->w = swap2(*(const unsigned short *)(c + 3)) & 0x3fff;
             si->h = swap2(*(const unsigned short *)(c + 5)) & 0x3fff;
 
-            //printf("w=%d, h=%d\n", si->w, si->h);
+            /*printf("w=%d, h=%d\n", si->w, si->h);*/
             if (!(si->h | si->w))
                 res = VPX_CODEC_UNSUP_BITSTREAM;
         }
@@ -670,7 +670,14 @@ CODEC_INTERFACE(vpx_codec_vp8_dx) =
         vp8_decode,       /* vpx_codec_decode_fn_t     decode; */
         vp8_get_frame,    /* vpx_codec_frame_get_fn_t  frame_get; */
     },
-    {NOT_IMPLEMENTED} /* encoder functions */
+    { /* encoder functions */
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED
+    }
 };
 
 /*
@@ -693,5 +700,12 @@ vpx_codec_iface_t vpx_codec_vp8_algo =
         vp8_decode,       /* vpx_codec_decode_fn_t     decode; */
         vp8_get_frame,    /* vpx_codec_frame_get_fn_t  frame_get; */
     },
-    {NOT_IMPLEMENTED} /* encoder functions */
+    { /* encoder functions */
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED,
+        NOT_IMPLEMENTED
+    }
 };
diff --git a/vpx/src/vpx_encoder.c b/vpx/src/vpx_encoder.c
index ddbd65484..10929590b 100644
--- a/vpx/src/vpx_encoder.c
+++ b/vpx/src/vpx_encoder.c
@@ -303,7 +303,7 @@ const vpx_codec_cx_pkt_t *vpx_codec_pkt_list_get(struct vpx_codec_pkt_list *list
 
     pkt = (const void *) * iter;
 
-    if (pkt - list->pkts < list->cnt)
+    if ((size_t)(pkt - list->pkts) < list->cnt)
         *iter = pkt + 1;
     else
         pkt = NULL;
diff --git a/vpx/vp8.h b/vpx/vp8.h
index c7553ec22..d7ed8d8c1 100644
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -53,7 +53,7 @@ enum vp8_postproc_level
     VP8_NOFILTERING    = 0,
     VP8_DEBLOCK        = 1,
     VP8_DEMACROBLOCK   = 2,
-    VP8_ADDNOISE       = 4,
+    VP8_ADDNOISE       = 4
 };
 
 /*!\brief post process flags
diff --git a/vpx/vpx_codec.h b/vpx/vpx_codec.h
index 371df00c3..899b27cca 100644
--- a/vpx/vpx_codec.h
+++ b/vpx/vpx_codec.h
@@ -62,7 +62,7 @@ extern "C" {
     /*!\brief Decorator indicating a function is potentially unused */
 #ifdef UNUSED
 #elif __GNUC__
-#define UNUSED __attribute__ ((unused));
+#define UNUSED __attribute__ ((unused))
 #else
 #define UNUSED
 #endif
@@ -128,7 +128,7 @@ extern "C" {
         /*!\brief An iterator reached the end of list.
          *
          */
-        VPX_CODEC_LIST_END,
+        VPX_CODEC_LIST_END
 
     }
     vpx_codec_err_t;
diff --git a/vpx/vpx_decoder_compat.h b/vpx/vpx_decoder_compat.h
index 9e1e49222..ca6f61849 100644
--- a/vpx/vpx_decoder_compat.h
+++ b/vpx/vpx_decoder_compat.h
@@ -78,7 +78,7 @@ extern "C" {
         /*!\brief An iterator reached the end of list.
          *
          */
-        VPX_DEC_LIST_END = VPX_CODEC_LIST_END,
+        VPX_DEC_LIST_END = VPX_CODEC_LIST_END
 
     }
     vpx_dec_err_t;
diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h
index f894cd871..3acb19945 100644
--- a/vpx/vpx_encoder.h
+++ b/vpx/vpx_encoder.h
@@ -171,7 +171,7 @@ extern "C" {
     {
         VPX_RC_ONE_PASS,   /**< Single pass mode */
         VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */
-        VPX_RC_LAST_PASS,  /**< Final pass of multi-pass mode */
+        VPX_RC_LAST_PASS   /**< Final pass of multi-pass mode */
     };
 
 
diff --git a/vpx/vpx_image.h b/vpx/vpx_image.h
index 4506dd3b2..dcb8f31bc 100644
--- a/vpx/vpx_image.h
+++ b/vpx/vpx_image.h
@@ -55,7 +55,7 @@ extern "C" {
         VPX_IMG_FMT_YV12    = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */
         VPX_IMG_FMT_I420    = VPX_IMG_FMT_PLANAR | 2,
         VPX_IMG_FMT_VPXYV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 3, /** < planar 4:2:0 format with vpx color space */
-        VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4,  /** < planar 4:2:0 format with vpx color space */
+        VPX_IMG_FMT_VPXI420 = VPX_IMG_FMT_PLANAR | 4   /** < planar 4:2:0 format with vpx color space */
     }
     vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */
 
diff --git a/vpx_mem/include/vpx_mem_intrnl.h b/vpx_mem/include/vpx_mem_intrnl.h
index 00f9c90e1..6e261ba7f 100644
--- a/vpx_mem/include/vpx_mem_intrnl.h
+++ b/vpx_mem/include/vpx_mem_intrnl.h
@@ -15,24 +15,24 @@
 
 #ifndef CONFIG_MEM_MANAGER
 # if defined(VXWORKS)
-#  define CONFIG_MEM_MANAGER  1 //include heap manager functionality,
-//default: enabled on vxworks
+#  define CONFIG_MEM_MANAGER  1 /*include heap manager functionality,*/
+/*default: enabled on vxworks*/
 # else
-#  define CONFIG_MEM_MANAGER  0 //include heap manager functionality
+#  define CONFIG_MEM_MANAGER  0 /*include heap manager functionality*/
 # endif
 #endif /*CONFIG_MEM_MANAGER*/
 
 #ifndef CONFIG_MEM_TRACKER
-# define CONFIG_MEM_TRACKER     1 //include xvpx_* calls in the lib
+# define CONFIG_MEM_TRACKER     1 /*include xvpx_* calls in the lib*/
 #endif
 
 #ifndef CONFIG_MEM_CHECKS
-# define CONFIG_MEM_CHECKS      0 //include some basic safety checks in
-//vpx_memcpy, _memset, and _memmove
+# define CONFIG_MEM_CHECKS      0 /*include some basic safety checks in
+vpx_memcpy, _memset, and _memmove*/
 #endif
 
 #ifndef USE_GLOBAL_FUNCTION_POINTERS
-# define USE_GLOBAL_FUNCTION_POINTERS   0  //use function pointers instead of compiled functions.
+# define USE_GLOBAL_FUNCTION_POINTERS   0  /*use function pointers instead of compiled functions.*/
 #endif
 
 #if CONFIG_MEM_TRACKER
@@ -46,9 +46,9 @@
 
 #ifndef DEFAULT_ALIGNMENT
 # if defined(VXWORKS)
-#  define DEFAULT_ALIGNMENT        32        //default addr alignment to use in
-//calls to vpx_* functions other
-//than vpx_memalign
+#  define DEFAULT_ALIGNMENT        32        /*default addr alignment to use in
+                                               calls to vpx_* functions other
+                                               than vpx_memalign*/
 # else
 #  define DEFAULT_ALIGNMENT        1
 # endif
@@ -59,24 +59,24 @@
 #endif
 
 #if CONFIG_MEM_TRACKER
-# define TRY_BOUNDS_CHECK         1         //when set to 1 pads each allocation,
-//integrity can be checked using
-//vpx_memory_tracker_check_integrity
-//or on free by defining
-//TRY_BOUNDS_CHECK_ON_FREE
+# define TRY_BOUNDS_CHECK         1        /*when set to 1 pads each allocation,
+                                             integrity can be checked using
+                                             vpx_memory_tracker_check_integrity
+                                             or on free by defining*/
+/*TRY_BOUNDS_CHECK_ON_FREE*/
 #else
 # define TRY_BOUNDS_CHECK         0
 #endif /*CONFIG_MEM_TRACKER*/
 
 #if TRY_BOUNDS_CHECK
-# define TRY_BOUNDS_CHECK_ON_FREE 0          //checks mem integrity on every
-//free, very expensive
-# define BOUNDS_CHECK_VALUE       0xdeadbeef //value stored before/after ea.
-//mem addr for bounds checking
-# define BOUNDS_CHECK_PAD_SIZE    32         //size of the padding before and
-//after ea allocation to be filled
-//with BOUNDS_CHECK_VALUE.
-//this should be a multiple of 4
+# define TRY_BOUNDS_CHECK_ON_FREE 0          /*checks mem integrity on every
+                                               free, very expensive*/
+# define BOUNDS_CHECK_VALUE       0xdeadbeef /*value stored before/after ea.
+                                               mem addr for bounds checking*/
+# define BOUNDS_CHECK_PAD_SIZE    32         /*size of the padding before and
+                                               after ea allocation to be filled
+                                               with BOUNDS_CHECK_VALUE.
+                                               this should be a multiple of 4*/
 #else
 # define BOUNDS_CHECK_VALUE       0
 # define BOUNDS_CHECK_PAD_SIZE    0
diff --git a/vpx_mem/vpx_mem.c b/vpx_mem/vpx_mem.c
index 85b05ab9f..eade43222 100644
--- a/vpx_mem/vpx_mem.c
+++ b/vpx_mem/vpx_mem.c
@@ -31,7 +31,7 @@ static unsigned long g_alloc_count = 0;
 # include "hmm_intrnl.h"
 
 # define SHIFT_HMM_ADDR_ALIGN_UNIT 5
-# define TOTAL_MEMORY_TO_ALLOCATE  20971520 // 20 * 1024 * 1024
+# define TOTAL_MEMORY_TO_ALLOCATE  20971520 /* 20 * 1024 * 1024 */
 
 # define MM_DYNAMIC_MEMORY 1
 # if MM_DYNAMIC_MEMORY
@@ -48,7 +48,7 @@ static int g_mng_memory_allocated = 0;
 
 static int vpx_mm_create_heap_memory();
 static void *vpx_mm_realloc(void *memblk, size_t size);
-#endif //CONFIG_MEM_MANAGER
+#endif /*CONFIG_MEM_MANAGER*/
 
 #if USE_GLOBAL_FUNCTION_POINTERS
 struct GLOBAL_FUNC_POINTERS
@@ -75,7 +75,7 @@ struct GLOBAL_FUNC_POINTERS
 # define VPX_MEMCPY_L  memcpy
 # define VPX_MEMSET_L  memset
 # define VPX_MEMMOVE_L memmove
-#endif // USE_GLOBAL_FUNCTION_POINTERS
+#endif /* USE_GLOBAL_FUNCTION_POINTERS */
 
 unsigned int vpx_mem_get_version()
 {
@@ -130,7 +130,7 @@ void *vpx_memalign(size_t align, size_t size)
     addr = hmm_alloc(&hmm_d, number_aau);
 #else
     addr = VPX_MALLOC_L(size + align - 1 + ADDRESS_STORAGE_SIZE);
-#endif //CONFIG_MEM_MANAGER
+#endif /*CONFIG_MEM_MANAGER*/
 
     if (addr)
     {
@@ -269,7 +269,7 @@ void *xvpx_memalign(size_t align, size_t size, char *file, int line)
     }
 #else
     x = vpx_memalign(align, size);
-#endif //TRY_BOUNDS_CHECK
+#endif /*TRY_BOUNDS_CHECK*/
 
     g_alloc_count++;
 
@@ -332,9 +332,10 @@ void *xvpx_realloc(void *memblk, size_t size, char *file, int line)
     vpx_memory_tracker_check_integrity(file, line);
 #endif
 
-    //have to do this regardless of success, because
-    //the memory that does get realloc'd may change
-    //the bounds values of this block
+    /* have to do this regardless of success, because
+     * the memory that does get realloc'd may change
+     * the bounds values of this block
+     */
     vpx_memory_tracker_remove((size_t)memblk);
 
 #if TRY_BOUNDS_CHECK
@@ -364,7 +365,7 @@ void *xvpx_realloc(void *memblk, size_t size, char *file, int line)
     }
 #else
     x = vpx_realloc(memblk, size);
-#endif //TRY_BOUNDS_CHECK
+#endif /*TRY_BOUNDS_CHECK*/
 
     if (!memblk) ++g_alloc_count;
 
@@ -380,7 +381,7 @@ void xvpx_free(void *p_address, char *file, int line)
 {
 #if TRY_BOUNDS_CHECK
     unsigned char *p_bounds_address = (unsigned char *)p_address;
-    //p_bounds_address -= BOUNDS_CHECK_PAD_SIZE;
+    /*p_bounds_address -= BOUNDS_CHECK_PAD_SIZE;*/
 #endif
 
 #if !TRY_BOUNDS_CHECK_ON_FREE
@@ -394,8 +395,9 @@ void xvpx_free(void *p_address, char *file, int line)
         vpx_memory_tracker_check_integrity(file, line);
 #endif
 
-        //if the addr isn't found in the list, assume it was allocated via
-        //vpx_ calls not xvpx_, therefore it does not contain any padding
+        /* if the addr isn't found in the list, assume it was allocated via
+         * vpx_ calls not xvpx_, therefore it does not contain any padding
+         */
         if (vpx_memory_tracker_remove((size_t)p_address) == -2)
         {
             p_bounds_address = p_address;
@@ -421,7 +423,7 @@ void xvpx_free(void *p_address, char *file, int line)
 
 #if CONFIG_MEM_CHECKS
 #if defined(VXWORKS)
-#include <task_lib.h> //for task_delay()
+#include <task_lib.h> /*for task_delay()*/
 /* This function is only used to get a stack trace of the player
 object so we can se where we are having a problem. */
 static int get_my_tt(int task)
@@ -627,7 +629,7 @@ static void *vpx_mm_realloc(void *memblk, size_t size)
 
     return p_ret;
 }
-#endif //CONFIG_MEM_MANAGER
+#endif /*CONFIG_MEM_MANAGER*/
 
 #if USE_GLOBAL_FUNCTION_POINTERS
 # if CONFIG_MEM_TRACKER
@@ -639,7 +641,7 @@ extern int vpx_memory_tracker_set_functions(g_malloc_func g_malloc_l
         , g_memset_func g_memset_l
         , g_memmove_func g_memmove_l);
 # endif
-#endif //USE_GLOBAL_FUNCTION_POINTERS
+#endif /*USE_GLOBAL_FUNCTION_POINTERS*/
 int vpx_mem_set_functions(g_malloc_func g_malloc_l
                           , g_calloc_func g_calloc_l
                           , g_realloc_func g_realloc_l
diff --git a/vpx_mem/vpx_mem.h b/vpx_mem/vpx_mem.h
index 31f8f9c60..749eaa42e 100644
--- a/vpx_mem/vpx_mem.h
+++ b/vpx_mem/vpx_mem.h
@@ -26,15 +26,15 @@
 /* end - vpx_mem version info */
 
 #ifndef VPX_TRACK_MEM_USAGE
-# define VPX_TRACK_MEM_USAGE       0  //enable memory tracking/integrity checks
+# define VPX_TRACK_MEM_USAGE       0  /* enable memory tracking/integrity checks */
 #endif
 #ifndef VPX_CHECK_MEM_FUNCTIONS
-# define VPX_CHECK_MEM_FUNCTIONS   0  //enable basic safety checks in _memcpy,
-//_memset, and _memmove
+# define VPX_CHECK_MEM_FUNCTIONS   0  /* enable basic safety checks in _memcpy,
+                                         _memset, and _memmove */
 #endif
 #ifndef REPLACE_BUILTIN_FUNCTIONS
-# define REPLACE_BUILTIN_FUNCTIONS 0  //replace builtin functions with their
-//vpx_ equivalents
+# define REPLACE_BUILTIN_FUNCTIONS 0  /* replace builtin functions with their
+                                         vpx_ equivalents */
 #endif
 
 #include <stdlib.h>
@@ -74,7 +74,7 @@ extern "C" {
     void *vpx_memset(void *dest, int val, size_t length);
     void *vpx_memmove(void *dest, const void *src, size_t count);
 
-// special memory functions
+    /* special memory functions */
     void *vpx_mem_alloc(int id, size_t size, size_t align);
     void vpx_mem_free(int id, void *mem, size_t size);
 
diff --git a/vpx_scale/generic/gen_scalers.c b/vpx_scale/generic/gen_scalers.c
index b084e817b..b54e334cb 100644
--- a/vpx_scale/generic/gen_scalers.c
+++ b/vpx_scale/generic/gen_scalers.c
@@ -116,7 +116,7 @@ void vp8cx_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest_pitc
         des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8);
         des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8);
 
-        // First line in next band
+        /* First line in next band */
         a = des [dest_pitch * 5];
         des [dest_pitch * 4] = (unsigned char)((d * 205 + 51 * a + 128) >> 8);
 
@@ -163,7 +163,7 @@ void vp8cx_last_vertical_band_4_5_scale_c(unsigned char *dest, unsigned int dest
         des [dest_pitch*2] = (unsigned char)((b * 102 + c + 128) >> 8);
         des [dest_pitch*3] = (unsigned char)((c + 102 * d + 128) >> 8);
 
-        // No other line for interplation of this line, so ..
+        /* No other line for interplation of this line, so .. */
         des[dest_pitch*4] = (unsigned char) d;
 
         des++;
@@ -401,7 +401,7 @@ void vp8cx_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest_pitc
         des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
         des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
 
-        // First line in next band...
+        /* First line in next band... */
         a = des [dest_pitch * 5];
         des [dest_pitch * 4] = (unsigned char)((c * 154 + a * 102 + 128) >> 8);
 
@@ -446,7 +446,7 @@ void vp8cx_last_vertical_band_3_5_scale_c(unsigned char *dest, unsigned int dest
         des [dest_pitch*2] = (unsigned char)((b * 205 + c * 51 + 128) >> 8);
         des [dest_pitch*3] = (unsigned char)((b * 51 + c * 205 + 128) >> 8);
 
-        // No other line for interplation of this line, so ..
+        /* No other line for interplation of this line, so .. */
         des [ dest_pitch * 4 ] = (unsigned char)(c) ;
 
         des++;
@@ -549,7 +549,7 @@ void vp8cx_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest_pitc
         c = des[dest_pitch*2];
         des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1);
 
-        // First line in next band...
+        /* First line in next band... */
         a = des [dest_pitch*4];
         des [dest_pitch*3] = (unsigned char)((c * 192 + a * 64 + 128) >> 8);
 
@@ -593,7 +593,7 @@ void vp8cx_last_vertical_band_3_4_scale_c(unsigned char *dest, unsigned int dest
         c = des[dest_pitch*2];
         des [dest_pitch*2] = (unsigned char)((b + c + 1) >> 1);
 
-        // No other line for interplation of this line, so ..
+        /* No other line for interplation of this line, so .. */
         des [dest_pitch*3] = (unsigned char)(c);
 
         des++;
diff --git a/vpx_scale/generic/vpxscale.c b/vpx_scale/generic/vpxscale.c
index e1c80281f..13c9122f0 100644
--- a/vpx_scale/generic/vpxscale.c
+++ b/vpx_scale/generic/vpxscale.c
@@ -279,9 +279,9 @@ void scale1d_c
 
     (void) source_length;
 
-    // These asserts are needed if there are boundary issues...
-    //assert ( dest_scale > source_scale );
-    //assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );
+    /* These asserts are needed if there are boundary issues... */
+    /*assert ( dest_scale > source_scale );*/
+    /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale );*/
 
     for (i = 0; i < dest_length * dest_step; i += dest_step)
     {
@@ -334,7 +334,7 @@ void scale1d_c
 static
 void Scale2D
 (
-    //const
+    /*const*/
     unsigned char *source,
     int source_pitch,
     unsigned int source_width,
@@ -352,7 +352,7 @@ void Scale2D
     unsigned int interlaced
 )
 {
-    //unsigned
+    /*unsigned*/
     int i, j, k;
     int bands;
     int dest_band_height;
@@ -370,7 +370,7 @@ void Scale2D
     int ratio_scalable = 1;
     int interpolation = 0;
 
-    unsigned char *source_base; // = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch)));
+    unsigned char *source_base; /* = (unsigned char *) ((source_pitch >= 0) ? source : (source + ((source_height-1) * source_pitch))); */
     unsigned char *line_src;
 
 
@@ -386,24 +386,24 @@ void Scale2D
         source_base += offset;
     }
 
-    // find out the ratio for each direction
+    /* find out the ratio for each direction */
     switch (hratio * 10 / hscale)
     {
     case 8:
-        // 4-5 Scale in Width direction
+        /* 4-5 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_5_4_scale;
         break;
     case 6:
-        // 3-5 Scale in Width direction
+        /* 3-5 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_5_3_scale;
         break;
     case 5:
-        // 1-2 Scale in Width direction
+        /* 1-2 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_2_1_scale;
         break;
     default:
-        // The ratio is not acceptable now
-        // throw("The ratio is not acceptable for now!");
+        /* The ratio is not acceptable now */
+        /* throw("The ratio is not acceptable for now!"); */
         ratio_scalable = 0;
         break;
     }
@@ -411,30 +411,30 @@ void Scale2D
     switch (vratio * 10 / vscale)
     {
     case 8:
-        // 4-5 Scale in vertical direction
+        /* 4-5 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_5_4_scale;
         source_band_height  = 5;
         dest_band_height    = 4;
         break;
     case 6:
-        // 3-5 Scale in vertical direction
+        /* 3-5 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_5_3_scale;
         source_band_height  = 5;
         dest_band_height    = 3;
         break;
     case 5:
-        // 1-2 Scale in vertical direction
+        /* 1-2 Scale in vertical direction */
 
         if (interlaced)
         {
-            //if the content is interlaced, point sampling is used
+            /* if the content is interlaced, point sampling is used */
             vert_band_scale     = vp8_vertical_band_2_1_scale;
         }
         else
         {
 
             interpolation = 1;
-            //if the content is progressive, interplo
+            /* if the content is progressive, interplo */
             vert_band_scale     = vp8_vertical_band_2_1_scale_i;
 
         }
@@ -443,8 +443,8 @@ void Scale2D
         dest_band_height    = 1;
         break;
     default:
-        // The ratio is not acceptable now
-        // throw("The ratio is not acceptable for now!");
+        /* The ratio is not acceptable now */
+        /* throw("The ratio is not acceptable for now!"); */
         ratio_scalable = 0;
         break;
     }
@@ -453,7 +453,7 @@ void Scale2D
     {
         if (source_height == dest_height)
         {
-            // for each band of the image
+            /* for each band of the image */
             for (k = 0; k < (int)dest_height; k++)
             {
                 horiz_line_scale(source, source_width, dest, dest_width);
@@ -474,10 +474,10 @@ void Scale2D
 
         for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++)
         {
-            // scale one band horizontally
+            /* scale one band horizontally */
             for (i = 0; i < source_band_height; i++)
             {
-                // Trap case where we could read off the base of the source buffer
+                /* Trap case where we could read off the base of the source buffer */
 
                 line_src = (unsigned char *)source + i * source_pitch;
 
@@ -488,13 +488,13 @@ void Scale2D
                                  temp_area + (i + 1)*dest_pitch, dest_width);
             }
 
-            // Vertical scaling is in place
+            /* Vertical scaling is in place */
             vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width);
 
             if (interpolation)
                 vpx_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width);
 
-            // Next band...
+            /* Next band... */
             source += (unsigned long) source_band_height  * source_pitch;
             dest   += (unsigned long) dest_band_height * dest_pitch;
         }
@@ -515,7 +515,7 @@ void Scale2D
 
     if (source_height == dest_height)
     {
-        // for each band of the image
+        /* for each band of the image */
         for (k = 0; k < (int)dest_height; k++)
         {
             Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width);
@@ -537,15 +537,15 @@ void Scale2D
         dest_band_height   = source_band_height * vratio / vscale;
     }
 
-    // first row needs to be done so that we can stay one row ahead for vertical zoom
+    /* first row needs to be done so that we can stay one row ahead for vertical zoom */
     Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width);
 
-    // for each band of the image
+    /* for each band of the image */
     bands = (dest_height + dest_band_height - 1) / dest_band_height;
 
     for (k = 0; k < bands; k++)
     {
-        // scale one band horizontally
+        /* scale one band horizontally */
         for (i = 1; i < source_band_height + 1; i++)
         {
             if (k * source_band_height + i < (int) source_height)
@@ -553,24 +553,24 @@ void Scale2D
                 Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1,
                          temp_area + i * dest_pitch, 1, hratio, dest_width);
             }
-            else  //  Duplicate the last row
+            else  /*  Duplicate the last row */
             {
-                // copy temp_area row 0 over from last row in the past
+                /* copy temp_area row 0 over from last row in the past */
                 duck_memcpy(temp_area + i * dest_pitch, temp_area + (i - 1)*dest_pitch, dest_pitch);
             }
         }
 
-        // scale one band vertically
+        /* scale one band vertically */
         for (j = 0; j < (int)dest_width; j++)
         {
             Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1,
                      &dest[j], dest_pitch, vratio, dest_band_height);
         }
 
-        // copy temp_area row 0 over from last row in the past
+        /* copy temp_area row 0 over from last row in the past */
         duck_memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch);
 
-        // move to the next band
+        /* move to the next band */
         source += source_band_height * source_pitch;
         dest   += dest_band_height * dest_pitch;
     }
@@ -617,7 +617,7 @@ void vp8_scale_frame
     int dw = (hscale - 1 + src->y_width * hratio) / hscale;
     int dh = (vscale - 1 + src->y_height * vratio) / vscale;
 
-    // call our internal scaling routines!!
+    /* call our internal scaling routines!! */
     Scale2D((unsigned char *) src->y_buffer, src->y_stride, src->y_width, src->y_height,
             (unsigned char *) dst->y_buffer, dst->y_stride, dw, dh,
             temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced);
@@ -696,13 +696,13 @@ int any_ratio_2d_scale
     unsigned int src_band_height  = 0;
     unsigned int dest_band_height = 0;
 
-    // suggested scale factors
+    /* suggested scale factors */
     int hs = si->HScale;
     int hr = si->HRatio;
     int vs = si->VScale;
     int vr = si->VRatio;
 
-    // assume the ratios are scalable instead of should be centered
+    /* assume the ratios are scalable instead of should be centered */
     int ratio_scalable = 1;
 
     const unsigned char *source_base = ((source_pitch >= 0) ? source : (source + ((source_height - 1) * source_pitch)));
@@ -714,37 +714,37 @@ int any_ratio_2d_scale
 
     (void) si;
 
-    // find out the ratio for each direction
+    /* find out the ratio for each direction */
     switch (hr * 30 / hs)
     {
     case 24:
-        // 4-5 Scale in Width direction
+        /* 4-5 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_4_5_scale;
         break;
     case 22:
-        // 3-4 Scale in Width direction
+        /* 3-4 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_3_4_scale;
         break;
 
     case 20:
-        // 4-5 Scale in Width direction
+        /* 4-5 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_2_3_scale;
         break;
     case 18:
-        // 3-5 Scale in Width direction
+        /* 3-5 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_3_5_scale;
         break;
     case 15:
-        // 1-2 Scale in Width direction
+        /* 1-2 Scale in Width direction */
         horiz_line_scale = vp8_horizontal_line_1_2_scale;
         break;
     case 30:
-        // no scale in Width direction
+        /* no scale in Width direction */
         horiz_line_scale = horizontal_line_copy;
         break;
     default:
-        // The ratio is not acceptable now
-        // throw("The ratio is not acceptable for now!");
+        /* The ratio is not acceptable now */
+        /* throw("The ratio is not acceptable for now!"); */
         ratio_scalable = 0;
         break;
     }
@@ -752,50 +752,50 @@ int any_ratio_2d_scale
     switch (vr * 30 / vs)
     {
     case 24:
-        // 4-5 Scale in vertical direction
+        /* 4-5 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_4_5_scale;
         last_vert_band_scale = vp8_last_vertical_band_4_5_scale;
         src_band_height     = 4;
         dest_band_height    = 5;
         break;
     case 22:
-        // 3-4 Scale in vertical direction
+        /* 3-4 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_3_4_scale;
         last_vert_band_scale = vp8_last_vertical_band_3_4_scale;
         src_band_height     = 3;
         dest_band_height    = 4;
         break;
     case 20:
-        // 2-3 Scale in vertical direction
+        /* 2-3 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_2_3_scale;
         last_vert_band_scale = vp8_last_vertical_band_2_3_scale;
         src_band_height     = 2;
         dest_band_height    = 3;
         break;
     case 18:
-        // 3-5 Scale in vertical direction
+        /* 3-5 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_3_5_scale;
         last_vert_band_scale = vp8_last_vertical_band_3_5_scale;
         src_band_height     = 3;
         dest_band_height    = 5;
         break;
     case 15:
-        // 1-2 Scale in vertical direction
+        /* 1-2 Scale in vertical direction */
         vert_band_scale     = vp8_vertical_band_1_2_scale;
         last_vert_band_scale = vp8_last_vertical_band_1_2_scale;
         src_band_height     = 1;
         dest_band_height    = 2;
         break;
     case 30:
-        // no scale in Width direction
+        /* no scale in Width direction */
         vert_band_scale     = null_scale;
         last_vert_band_scale = null_scale;
         src_band_height     = 4;
         dest_band_height    = 4;
         break;
     default:
-        // The ratio is not acceptable now
-        // throw("The ratio is not acceptable for now!");
+        /* The ratio is not acceptable now */
+        /* throw("The ratio is not acceptable for now!"); */
         ratio_scalable = 0;
         break;
     }
@@ -805,13 +805,13 @@ int any_ratio_2d_scale
 
     horiz_line_scale(source, source_width, dest, dest_width);
 
-    // except last band
+    /* except last band */
     for (k = 0; k < (dest_height + dest_band_height - 1) / dest_band_height - 1; k++)
     {
-        // scale one band horizontally
+        /* scale one band horizontally */
         for (i = 1; i < src_band_height; i++)
         {
-            // Trap case where we could read off the base of the source buffer
+            /* Trap case where we could read off the base of the source buffer */
             line_src = source + i * source_pitch;
 
             if (line_src < source_base)
@@ -821,8 +821,8 @@ int any_ratio_2d_scale
                              dest + i * dest_pitch, dest_width);
         }
 
-        // first line of next band
-        // Trap case where we could read off the base of the source buffer
+        /* first line of next band */
+        /* Trap case where we could read off the base of the source buffer */
         line_src = source + src_band_height * source_pitch;
 
         if (line_src < source_base)
@@ -832,18 +832,18 @@ int any_ratio_2d_scale
                          dest + dest_band_height * dest_pitch,
                          dest_width);
 
-        // Vertical scaling is in place
+        /* Vertical scaling is in place */
         vert_band_scale(dest, dest_pitch, dest_width);
 
-        // Next band...
+        /* Next band... */
         source += src_band_height  * source_pitch;
         dest   += dest_band_height * dest_pitch;
     }
 
-    // scale one band horizontally
+    /* scale one band horizontally */
     for (i = 1; i < src_band_height; i++)
     {
-        // Trap case where we could read off the base of the source buffer
+        /* Trap case where we could read off the base of the source buffer */
         line_src = source + i * source_pitch;
 
         if (line_src < source_base)
@@ -854,7 +854,7 @@ int any_ratio_2d_scale
                          dest_width);
     }
 
-    // Vertical scaling is in place
+    /* Vertical scaling is in place */
     last_vert_band_scale(dest, dest_pitch, dest_width);
 
     return ratio_scalable;
@@ -885,7 +885,7 @@ int any_ratio_frame_scale(SCALE_VARS *scale_vars, int YOffset, int UVOffset)
     int ew;
     int eh;
 
-    // suggested scale factors
+    /* suggested scale factors */
     int hs = scale_vars->HScale;
     int hr = scale_vars->HRatio;
     int vs = scale_vars->VScale;
@@ -968,11 +968,11 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con
     unsigned char *src_data_pointer;
     unsigned char *dst_data_pointer;
 
-    // center values
+    /* center values */
     row_offset = (dst_yuv_config->y_height - src_yuv_config->y_height) / 2;
     col_offset = (dst_yuv_config->y_width - src_yuv_config->y_width) / 2;
 
-    // Y's
+    /* Y's */
     src_data_pointer = src_yuv_config->y_buffer;
     dst_data_pointer = (unsigned char *)dst_yuv_config->y_buffer + (row_offset * dst_yuv_config->y_stride) + col_offset;
 
@@ -986,7 +986,7 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con
     row_offset /= 2;
     col_offset /= 2;
 
-    // U's
+    /* U's */
     src_data_pointer = src_yuv_config->u_buffer;
     dst_data_pointer = (unsigned char *)dst_yuv_config->u_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
 
@@ -997,7 +997,7 @@ center_image(YV12_BUFFER_CONFIG *src_yuv_config, YV12_BUFFER_CONFIG *dst_yuv_con
         src_data_pointer += src_yuv_config->uv_stride;
     }
 
-    // V's
+    /* V's */
     src_data_pointer = src_yuv_config->v_buffer;
     dst_data_pointer = (unsigned char *)dst_yuv_config->v_buffer + (row_offset * dst_yuv_config->uv_stride) + col_offset;
 
@@ -1040,8 +1040,8 @@ vp8_yv12_scale_or_center
     int VRatio
 )
 {
-//    if ( ppi->post_processing_level )
-    //      update_umvborder ( ppi, frame_buffer );
+    /*if ( ppi->post_processing_level )
+          update_umvborder ( ppi, frame_buffer );*/
 
 
     switch (scaling_mode)
@@ -1050,12 +1050,12 @@ vp8_yv12_scale_or_center
     case MAINTAIN_ASPECT_RATIO:
     {
         SCALE_VARS scale_vars;
-        // center values
+        /* center values */
 #if 1
         int row = (dst_yuv_config->y_height - expanded_frame_height) / 2;
         int col = (dst_yuv_config->y_width  - expanded_frame_width) / 2;
-//        int YOffset  = row * dst_yuv_config->y_width + col;
-//        int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);
+        /*int YOffset  = row * dst_yuv_config->y_width + col;
+        int UVOffset = (row>>1) * dst_yuv_config->uv_width + (col>>1);*/
         int YOffset  = row * dst_yuv_config->y_stride + col;
         int UVOffset = (row >> 1) * dst_yuv_config->uv_stride + (col >> 1);
 #else
@@ -1074,7 +1074,7 @@ vp8_yv12_scale_or_center
         scale_vars.expanded_frame_width = expanded_frame_width;
         scale_vars.expanded_frame_height = expanded_frame_height;
 
-        // perform center and scale
+        /* perform center and scale */
         any_ratio_frame_scale(&scale_vars, YOffset, UVOffset);
 
         break;
diff --git a/vpx_scale/generic/yv12config.c b/vpx_scale/generic/yv12config.c
index 3034cc3a7..d9d228551 100644
--- a/vpx_scale/generic/yv12config.c
+++ b/vpx_scale/generic/yv12config.c
@@ -45,7 +45,7 @@ vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf)
 int
 vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border)
 {
-//NOTE:
+/*NOTE:*/
 
     int yplane_size = (height + 2 * border) * (width + 2 * border);
     int uvplane_size = ((1 + height) / 2 + border) * ((1 + width) / 2 + border);
@@ -65,9 +65,10 @@ vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int
         ybf->border = border;
         ybf->frame_size = yplane_size + 2 * uvplane_size;
 
-        // Added 2 extra lines to framebuffer so that copy12x12 doesn't fail
-        // when we have a large motion vector in V on the last v block.
-        // Note : We never use these pixels anyway so this doesn't hurt.
+        /* Added 2 extra lines to framebuffer so that copy12x12 doesn't fail
+         * when we have a large motion vector in V on the last v block.
+         * Note : We never use these pixels anyway so this doesn't hurt.
+         */
         ybf->buffer_alloc = (unsigned char *) duck_memalign(32,  ybf->frame_size + (ybf->y_stride * 2) + 32, 0);
 
         if (ybf->buffer_alloc == NULL)
diff --git a/vpx_scale/generic/yv12extend.c b/vpx_scale/generic/yv12extend.c
index 39d2fa854..e58aa1fb2 100644
--- a/vpx_scale/generic/yv12extend.c
+++ b/vpx_scale/generic/yv12extend.c
@@ -40,7 +40,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
     plane_height = ybf->y_height;
     plane_width = ybf->y_width;
 
-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
     src_ptr1 = ybf->y_buffer;
     src_ptr2 = src_ptr1 + plane_width - 1;
     dest_ptr1 = src_ptr1 - Border;
@@ -56,7 +56,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
         dest_ptr2 += plane_stride;
     }
 
-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
     src_ptr1 = ybf->y_buffer - Border;
     src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
     dest_ptr1 = src_ptr1 - (Border * plane_stride);
@@ -79,7 +79,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
     plane_width = ybf->uv_width;
     Border /= 2;
 
-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
     src_ptr1 = ybf->u_buffer;
     src_ptr2 = src_ptr1 + plane_width - 1;
     dest_ptr1 = src_ptr1 - Border;
@@ -95,7 +95,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
         dest_ptr2 += plane_stride;
     }
 
-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
     src_ptr1 = ybf->u_buffer - Border;
     src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
     dest_ptr1 = src_ptr1 - (Border * plane_stride);
@@ -113,7 +113,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
     /* V Plane */
     /***********/
 
-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
     src_ptr1 = ybf->v_buffer;
     src_ptr2 = src_ptr1 + plane_width - 1;
     dest_ptr1 = src_ptr1 - Border;
@@ -129,7 +129,7 @@ vp8_yv12_extend_frame_borders(YV12_BUFFER_CONFIG *ybf)
         dest_ptr2 += plane_stride;
     }
 
-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
     src_ptr1 = ybf->v_buffer - Border;
     src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
     dest_ptr1 = src_ptr1 - (Border * plane_stride);
@@ -165,7 +165,7 @@ vp8_yv12_extend_frame_borders_yonly(YV12_BUFFER_CONFIG *ybf)
     plane_height = ybf->y_height;
     plane_width = ybf->y_width;
 
-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
     src_ptr1 = ybf->y_buffer;
     src_ptr2 = src_ptr1 + plane_width - 1;
     dest_ptr1 = src_ptr1 - Border;
@@ -181,7 +181,7 @@ vp8_yv12_extend_frame_borders_yonly(YV12_BUFFER_CONFIG *ybf)
         dest_ptr2 += plane_stride;
     }
 
-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
     src_ptr1 = ybf->y_buffer - Border;
     src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride;
     dest_ptr1 = src_ptr1 - (Border * plane_stride);
diff --git a/vpx_scale/yv12config.h b/vpx_scale/yv12config.h
index 50d6e3b3a..5dcee818a 100644
--- a/vpx_scale/yv12config.h
+++ b/vpx_scale/yv12config.h
@@ -32,8 +32,8 @@ extern "C"
     ************************************/
     typedef enum
     {
-        REG_YUV = 0,    // Regular yuv
-        INT_YUV = 1     // The type of yuv that can be tranfer to and from RGB through integer transform
+        REG_YUV = 0,    /* Regular yuv */
+        INT_YUV = 1     /* The type of yuv that can be tranfer to and from RGB through integer transform */
               }
               YUV_TYPE;
 
@@ -42,12 +42,12 @@ extern "C"
         int   y_width;
         int   y_height;
         int   y_stride;
-//    int   yinternal_width;
+/*    int   yinternal_width; */
 
         int   uv_width;
         int   uv_height;
         int   uv_stride;
-//    int   uvinternal_width;
+/*    int   uvinternal_width; */
 
         unsigned char *y_buffer;
         unsigned char *u_buffer;
@@ -68,4 +68,4 @@ extern "C"
 #endif
 
 
-#endif //YV12_CONFIG_H
+#endif /*YV12_CONFIG_H*/
diff --git a/vpxdec.c b/vpxdec.c
index 48a36cd65..e8e4d5f12 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -336,7 +336,7 @@ void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5)
     }
     else
     {
-        fwrite(buf, 1, len, out);
+        if(fwrite(buf, 1, len, out));
     }
 }
 
@@ -461,11 +461,13 @@ nestegg_read_cb(void *buffer, size_t length, void *userdata)
 {
     FILE *f = userdata;
 
-    fread(buffer, 1, length, f);
-    if (ferror(f))
-        return -1;
-    if (feof(f))
-        return 0;
+    if(fread(buffer, 1, length, f) < length)
+    {
+        if (ferror(f))
+            return -1;
+        if (feof(f))
+            return 0;
+    }
     return 1;
 }
 
diff --git a/vpxenc.c b/vpxenc.c
index fc0b779ad..f95657d37 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -213,7 +213,7 @@ void stats_write(stats_io_t *stats, const void *pkt, size_t len)
 {
     if (stats->file)
     {
-        fwrite(pkt, 1, len, stats->file);
+        if(fwrite(pkt, 1, len, stats->file));
     }
     else
     {
@@ -259,10 +259,11 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
                       y4m_input *y4m, struct detect_buffer *detect)
 {
     int plane = 0;
+    int shortread = 0;
 
     if (file_type == FILE_TYPE_Y4M)
     {
-        if (y4m_input_fetch_frame(y4m, f, img) < 0)
+        if (y4m_input_fetch_frame(y4m, f, img) < 1)
            return 0;
     }
     else
@@ -275,7 +276,7 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
              * write_ivf_frame_header() for documentation on the frame header
              * layout.
              */
-            fread(junk, 1, IVF_FRAME_HDR_SZ, f);
+            if(fread(junk, 1, IVF_FRAME_HDR_SZ, f));
         }
 
         for (plane = 0; plane < 3; plane++)
@@ -306,18 +307,18 @@ static int read_frame(FILE *f, vpx_image_t *img, unsigned int file_type,
                 if (detect->valid)
                 {
                     memcpy(ptr, detect->buf, 4);
-                    fread(ptr+4, 1, w-4, f);
+                    shortread |= fread(ptr+4, 1, w-4, f) < w-4;
                     detect->valid = 0;
                 }
                 else
-                    fread(ptr, 1, w, f);
+                    shortread |= fread(ptr, 1, w, f) < w;
 
                 ptr += img->stride[plane];
             }
         }
     }
 
-    return !feof(f);
+    return !shortread;
 }
 
 
@@ -396,7 +397,7 @@ static void write_ivf_file_header(FILE *outfile,
     mem_put_le32(header + 24, frame_cnt);         /* length */
     mem_put_le32(header + 28, 0);                 /* unused */
 
-    fwrite(header, 1, 32, outfile);
+    if(fwrite(header, 1, 32, outfile));
 }
 
 
@@ -414,7 +415,7 @@ static void write_ivf_frame_header(FILE *outfile,
     mem_put_le32(header + 4, pts & 0xFFFFFFFF);
     mem_put_le32(header + 8, pts >> 32);
 
-    fwrite(header, 1, 12, outfile);
+    if(fwrite(header, 1, 12, outfile));
 }
 
 
@@ -462,7 +463,7 @@ struct EbmlGlobal
 
 void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
 {
-    fwrite(buffer_in, 1, len, glob->stream);
+    if(fwrite(buffer_in, 1, len, glob->stream));
 }
 
 
@@ -1329,6 +1330,7 @@ int main(int argc, const char **argv_)
     {
         int frames_in = 0, frames_out = 0;
         unsigned long nbytes = 0;
+        size_t detect_bytes;
         struct detect_buffer detect;
 
         /* Parse certain options from the input file, if possible */
@@ -1340,10 +1342,15 @@ int main(int argc, const char **argv_)
             return EXIT_FAILURE;
         }
 
-        fread(detect.buf, 1, 4, infile);
+        /* For RAW input sources, these bytes will applied on the first frame
+         *  in read_frame().
+         * We can always read 4 bytes because the minimum supported frame size
+         *  is 2x2.
+         */
+        detect_bytes = fread(detect.buf, 1, 4, infile);
         detect.valid = 0;
 
-        if (file_is_y4m(infile, &y4m, detect.buf))
+        if (detect_bytes == 4 && file_is_y4m(infile, &y4m, detect.buf))
         {
             if (y4m_input_open(&y4m, infile, detect.buf, 4) >= 0)
             {
@@ -1368,7 +1375,8 @@ int main(int argc, const char **argv_)
                 return EXIT_FAILURE;
             }
         }
-        else if (file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, detect.buf))
+        else if (detect_bytes == 4 &&
+                 file_is_ivf(infile, &fourcc, &cfg.g_w, &cfg.g_h, detect.buf))
         {
             file_type = FILE_TYPE_IVF;
             switch (fourcc)
@@ -1572,7 +1580,8 @@ int main(int argc, const char **argv_)
                     else
                     {
                         write_ivf_frame_header(outfile, pkt);
-                        fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile);
+                        if(fwrite(pkt->data.frame.buf, 1,
+                                  pkt->data.frame.sz, outfile));
                     }
                     nbytes += pkt->data.raw.sz;
                     break;
diff --git a/y4minput.c b/y4minput.c
index 3eaec4ed3..449afe858 100644
--- a/y4minput.c
+++ b/y4minput.c
@@ -877,5 +877,5 @@ int y4m_input_fetch_frame(y4m_input *_y4m,FILE *_fin,vpx_image_t *_img){
   _img->planes[PLANE_Y]=_y4m->dst_buf;
   _img->planes[PLANE_U]=_y4m->dst_buf+pic_sz;
   _img->planes[PLANE_V]=_y4m->dst_buf+pic_sz+c_sz;
-  return 0;
+  return 1;
 }

From 686b217ed7fa3d77ac4b7c7754edaecbd2acc1f4 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 27 Oct 2010 16:27:56 -0400
Subject: [PATCH 281/307] Update CHANGELOG for v0.9.5 (Aylesbury) release

Change-Id: Ic9f05dbbe90480d5b172233c87eaf1d4e2f1b48e
---
 CHANGELOG | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)

diff --git a/CHANGELOG b/CHANGELOG
index 2b2803740..d3d1e5a2a 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,3 +1,111 @@
+2010-09-02 v0.9.5 "Aylesbury"
+  Our first named release, focused on a faster decoder, and a better encoder.
+
+  - Upgrading:
+    This release incorporates backwards-incompatible changes to the
+    ivfenc and ivfdec tools. These tools are now called vpxenc and vpxdec.
+
+    vpxdec
+      * the -q (quiet) option has been removed, and replaced with
+        -v (verbose). the output is quiet by default. Use -v to see
+        the version number of the binary.
+
+      * The default behavior is now to write output to a single file
+        instead of individual frames. The -y option has been removed.
+        Y4M output is the default.
+
+      * For raw I420/YV12 output instead of Y4M, the --i420 or --yv12
+        options must be specified.
+
+          $ ivfdec -o OUTPUT INPUT
+          $ vpxdec --i420 -o OUTPUT INPUT
+
+      * If an output file is not specified, the default is to write
+        Y4M to stdout. This makes piping more natural.
+
+          $ ivfdec -y -o - INPUT | ...
+          $ vpxdec INPUT | ...
+
+      * The output file has additional flexibility for formatting the
+        filename. It supports escape characters for constructing a
+        filename from the width, height, and sequence number. This
+        replaces the -p option. To get the equivalent:
+
+          $ ivfdec -p frame INPUT
+          $ vpxdec --i420 -o frame-%wx%h-%4.i420 INPUT
+
+    vpxenc
+      * The output file must be specified with -o, rather than as the
+        last argument.
+
+          $ ivfenc <options> INPUT OUTPUT
+          $ vpxenc <options> -o OUTPUT INPUT
+
+      * The output defaults to webm. To get IVF output, use the --ivf
+        option.
+
+          $ ivfenc <options> INPUT OUTPUT.ivf
+          $ vpxenc <options> -o OUTPUT.ivf --ivf INPUT
+
+
+  - Enhancements:
+      ivfenc and ivfdec have been renamed to vpxenc, vpxdec.
+      vpxdec supports .webm input
+      vpxdec writes .y4m by default
+      vpxenc writes .webm output by default
+      vpxenc --psnr now shows the average/overall PSNR at the end
+      ARM platforms now support runtime cpu detection
+      vpxdec visualizations added for motion vectors, block modes, references
+      vpxdec now silent by default
+      vpxdec --progress shows frame-by-frame timing information
+      vpxenc supports the distinction between --fps and --timebase
+      NASM is now a supported assembler
+      configure: enable PIC for shared libs by default
+      configure: add --enable-small
+      configure: support for ppc32-linux-gcc
+      configure: support for sparc-solaris-gcc
+
+  - Bugs:
+      Improve handling of invalid frames
+      Fix valgrind errors in the NEON loop filters.
+      Fix loopfilter delta zero transitions
+      Fix valgrind errors in vp8_sixtap_predict8x4_armv6().
+      Build fixes for darwin-icc
+
+  - Speed:
+      20-40% (average 28%) improvement in libvpx decoder speed,
+      including:
+        Rewrite vp8_short_walsh4x4_sse2()
+        Optimizations on the loopfilters.
+        Miscellaneous improvements for Atom
+        Add 4-tap version of 2nd-pass ARMv6 MC filter.
+        Improved multithread utilization
+        Better instruction choices on x86
+        reorder data to use wider instructions
+        Update NEON wide idcts
+        Make block access to frame buffer sequential
+        Improved subset block search
+        Bilinear subpixel optimizations for ssse3.
+        Decrease memory footprint
+
+      Encoder speed improvements (percentage gain not measured):
+        Skip unnecessary search of identical frames
+        Add SSE2 subtract functions
+        Improve bounds checking in vp8_diamond_search_sadx4()
+        Added vp8_fast_quantize_b_sse2
+
+  - Quality:
+      Over 7% overall PSNR improvement (6.3% SSIM) in "best" quality
+      encoding mode, and up to 60% improvement on very noisy, still
+      or slow moving source video
+
+        Motion compensated temporal filter for Alt-Ref Noise Reduction
+        Improved use of trellis quantization on 2nd order Y blocks
+        Tune effect of motion on KF/GF boost in two pass
+        Allow coefficient optimization for good quality speed 0.
+        Improved control of active min quantizer for two pass.
+        Enable ARFs for non-lagged compress
+
 2010-09-02 v0.9.2
   - Enhancements:
       Disable frame dropping by default

From cad2164911eb48ec608cfddfb858821083c9e99b Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 28 Oct 2010 09:14:14 -0400
Subject: [PATCH 282/307] CHANGELOG: correct date

Change-Id: I146a7f241efad4f0684cf8613c7fa42bd5cf42f3
---
 CHANGELOG | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG b/CHANGELOG
index d3d1e5a2a..b8da8f8e3 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,4 +1,4 @@
-2010-09-02 v0.9.5 "Aylesbury"
+2010-10-28 v0.9.5 "Aylesbury"
   Our first named release, focused on a faster decoder, and a better encoder.
 
   - Upgrading:

From 7e3a1e7361a7ddd8de3b0766669392faaa3086a3 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 28 Oct 2010 15:26:58 -0400
Subject: [PATCH 283/307] Fix full-search SAD function crash in Visual Studio

Unlike GCC, Visual Studio compiler doesn't allocate SAD output
array 16-byte aligned, which causes crash in visual studio.

Change-Id: Ia755cf5a807f12929bda8db94032bb3c9d0c2362
---
 vp8/encoder/x86/sad_sse4.asm | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/vp8/encoder/x86/sad_sse4.asm b/vp8/encoder/x86/sad_sse4.asm
index 03ecec4b3..21e2e5007 100644
--- a/vp8/encoder/x86/sad_sse4.asm
+++ b/vp8/encoder/x86/sad_sse4.asm
@@ -186,7 +186,7 @@ sym(vp8_sad16x16x8_sse4):
         PROCESS_16X2X8 0
 
         mov             rdi,        arg(4)           ;Results
-        movdqa          XMMWORD PTR [rdi],    xmm1
+        movdqu          XMMWORD PTR [rdi],    xmm1
 
     ; begin epilog
     pop         rdi
@@ -224,7 +224,7 @@ sym(vp8_sad16x8x8_sse4):
         PROCESS_16X2X8 0
 
         mov             rdi,        arg(4)           ;Results
-        movdqa          XMMWORD PTR [rdi],    xmm1
+        movdqu          XMMWORD PTR [rdi],    xmm1
 
     ; begin epilog
     pop         rdi
@@ -262,7 +262,7 @@ sym(vp8_sad8x8x8_sse4):
         PROCESS_8X2X8 0
 
         mov             rdi,        arg(4)           ;Results
-        movdqa          XMMWORD PTR [rdi],    xmm1
+        movdqu          XMMWORD PTR [rdi],    xmm1
 
     ; begin epilog
     pop         rdi
@@ -303,7 +303,7 @@ sym(vp8_sad8x16x8_sse4):
         PROCESS_8X2X8 0
         PROCESS_8X2X8 0
         mov             rdi,        arg(4)           ;Results
-        movdqa          XMMWORD PTR [rdi],    xmm1
+        movdqu          XMMWORD PTR [rdi],    xmm1
 
     ; begin epilog
     pop         rdi
@@ -339,7 +339,7 @@ sym(vp8_sad4x4x8_sse4):
         PROCESS_4X2X8 0
 
         mov             rdi,        arg(4)           ;Results
-        movdqa          XMMWORD PTR [rdi],    xmm1
+        movdqu          XMMWORD PTR [rdi],    xmm1
 
     ; begin epilog
     pop         rdi

From 6614563b8f851adebdb25cc9ffca28b871e34616 Mon Sep 17 00:00:00 2001
From: Yunqing Wang <yunqingwang@google.com>
Date: Thu, 28 Oct 2010 16:59:03 -0400
Subject: [PATCH 284/307] Save XMM registers in asm functions

XMM6/7 are used in these functions, and need to be saved.

Change-Id: I3dfaddaf2a69cd4bf8e8735c7064b17bac5a14e5
---
 vp8/encoder/x86/fwalsh_sse2.asm   | 2 ++
 vp8/encoder/x86/subtract_sse2.asm | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/vp8/encoder/x86/fwalsh_sse2.asm b/vp8/encoder/x86/fwalsh_sse2.asm
index 145b59d45..39439f0d8 100644
--- a/vp8/encoder/x86/fwalsh_sse2.asm
+++ b/vp8/encoder/x86/fwalsh_sse2.asm
@@ -17,6 +17,7 @@ sym(vp8_short_walsh4x4_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 3
+    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
@@ -143,6 +144,7 @@ sym(vp8_short_walsh4x4_sse2):
     pop rdi
     pop rsi
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret
diff --git a/vp8/encoder/x86/subtract_sse2.asm b/vp8/encoder/x86/subtract_sse2.asm
index 27661e07f..3fb23d097 100644
--- a/vp8/encoder/x86/subtract_sse2.asm
+++ b/vp8/encoder/x86/subtract_sse2.asm
@@ -77,6 +77,7 @@ sym(vp8_subtract_mby_sse2):
     push        rbp
     mov         rbp, rsp
     SHADOW_ARGS_TO_STACK 4
+    SAVE_XMM
     GET_GOT     rbx
     push rsi
     push rdi
@@ -138,6 +139,7 @@ submby_loop:
     pop rsi
     ; begin epilog
     RESTORE_GOT
+    RESTORE_XMM
     UNSHADOW_ARGS
     pop         rbp
     ret

From dcee88ea37b1a448732b277dced5343c268a3654 Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Wed, 27 Oct 2010 14:38:33 -0400
Subject: [PATCH 285/307] Finding first label

Using tables for the label count and label offset.

Change-Id: Iac3d5b292c37341a881be0af282f5cac3b3e01eb
---
 vp8/encoder/rdopt.c | 66 +++++++++------------------------------------
 1 file changed, 12 insertions(+), 54 deletions(-)

diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 8f406b9b7..8a753fd44 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -918,21 +918,6 @@ void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, MV *mv)
 }
 
 #if !(CONFIG_REALTIME_ONLY)
-int vp8_count_labels(int const *labelings)
-{
-    int i;
-    int count = 0;
-
-    for (i = 0; i < 16; i++)
-    {
-        if (labelings[i] > count)
-            count = labelings[i];
-    }
-
-    return count + 1;
-}
-
-
 static int labels2mode(
     MACROBLOCK *x,
     int const *labelings, int which_label,
@@ -1112,15 +1097,19 @@ static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion, const vp
     *Rate = vp8_rdcost_mby(mb);
 }
 
+unsigned char vp8_mbsplit_offset2[4][16] = {
+    { 0,  8,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  2,  0,  0,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  2,  8, 10,  0,  0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  0},
+    { 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14, 15}
+};
 static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *best_ref_mv, int best_rd, int *mdcounts, int *returntotrate, int *returnyrate, int *returndistortion, int compressor_speed, int *mvcost[2], int mvthresh, int fullpixel)
 {
     int i, segmentation;
     B_PREDICTION_MODE this_mode;
     MACROBLOCKD *xc = &x->e_mbd;
-    BLOCK *b = &x->block[0];
-    BLOCKD *d = &x->e_mbd.block[0];
-    BLOCK *c = &x->block[0];
-    BLOCKD *e = &x->e_mbd.block[0];
+    BLOCK *c;
+    BLOCKD *e;
     int const *labels;
     int best_segment_rd = INT_MAX;
     int best_seg = 0;
@@ -1179,7 +1168,7 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
         v_fn_ptr = &cpi->fn_ptr[segmentation];
         sseshift = segmentation_to_sseshift[segmentation];
         labels = vp8_mbsplits[segmentation];
-        label_count = vp8_count_labels(labels);
+        label_count = vp8_mbsplit_count[segmentation];
 
         // 64 makes this threshold really big effectively
         // making it so that we very rarely check mvs on
@@ -1203,14 +1192,9 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
             int j;
             int bestlabelyrate = 0;
 
-            b = &x->block[0];
-            d = &x->e_mbd.block[0];
-
 
             // find first label
-            for (j = 0; j < 16; j++)
-                if (labels[j] == i)
-                    break;
+            j = vp8_mbsplit_offset2[segmentation][i];
 
             c = &x->block[j];
             e = &x->e_mbd.block[j];
@@ -1378,46 +1362,20 @@ static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, MV *bes
         bd->eob = beobs[i];
     }
 
-    // Trap cases where the best split mode has all vectors coded 0,0 (or all the same)
-    if (FALSE)
-    {
-        int allsame = 1;
-
-        for (i = 1; i < 16; i++)
-        {
-            if ((bmvs[i].col != bmvs[i-1].col) || (bmvs[i].row != bmvs[i-1].row))
-            {
-                allsame = 0;
-                break;
-            }
-        }
-
-        if (allsame)
-        {
-            best_segment_rd = INT_MAX;
-        }
-    }
-
     *returntotrate = bsr;
     *returndistortion = bsd;
     *returnyrate = bestsegmentyrate;
 
-
-
     // save partitions
     labels = vp8_mbsplits[best_seg];
     x->e_mbd.mode_info_context->mbmi.partitioning = best_seg;
-    x->partition_info->count = vp8_count_labels(labels);
+    x->partition_info->count = vp8_mbsplit_count[best_seg];
 
     for (i = 0; i < x->partition_info->count; i++)
     {
         int j;
 
-        for (j = 0; j < 16; j++)
-        {
-            if (labels[j] == i)
-                break;
-        }
+        j = vp8_mbsplit_offset2[best_seg][i];
 
         x->partition_info->bmi[i].mode = x->e_mbd.block[j].bmi.mode;
         x->partition_info->bmi[i].mv.as_mv = x->e_mbd.block[j].bmi.mv.as_mv;

From 0684c647ef38e81396567720e6fe5a1190e3c103 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Fri, 29 Oct 2010 11:07:31 -0400
Subject: [PATCH 286/307] cosmetic: remove alt_ref from vpxenc usage message

Undo an automatic transform.

Change-Id: Ie730a6a31b4680b34e54b61691d67c4b3ed2f2aa
---
 vpxenc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/vpxenc.c b/vpxenc.c
index f95657d37..7e0595207 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -978,11 +978,11 @@ static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1,
 static const arg_def_t auto_altref = ARG_DEF(NULL, "auto-alt-ref", 1,
                                      "Enable automatic alt reference frames");
 static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1,
-                                        "alt_ref Max Frames");
+                                        "AltRef Max Frames");
 static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1,
-                                       "alt_ref Strength");
+                                       "AltRef Strength");
 static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1,
-                                   "alt_ref Type");
+                                   "AltRef Type");
 
 static const arg_def_t *vp8_args[] =
 {

From ff4a71f4c20b7b2047d17872747aed6e98586b13 Mon Sep 17 00:00:00 2001
From: Scott LaVarnway <slavarnway@google.com>
Date: Mon, 1 Nov 2010 16:24:15 -0400
Subject: [PATCH 287/307] SSSE3 version of fast quantizer

(test clip: tulip)
For good quality mode with speed=1, this gave the encoder
a small (2 - 3%) performance boost.

Change-Id: I8a1d4269465944ac0819986c2f0be4b0a2ee0b35
---
 vp8/encoder/x86/quantize_ssse3.asm     | 114 +++++++++++++++++++++++++
 vp8/encoder/x86/x86_csystemdependent.c |  22 +++++
 vp8/vp8cx.mk                           |   1 +
 3 files changed, 137 insertions(+)
 create mode 100755 vp8/encoder/x86/quantize_ssse3.asm

diff --git a/vp8/encoder/x86/quantize_ssse3.asm b/vp8/encoder/x86/quantize_ssse3.asm
new file mode 100755
index 000000000..2f33199e5
--- /dev/null
+++ b/vp8/encoder/x86/quantize_ssse3.asm
@@ -0,0 +1,114 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license and patent
+;  grant that can be found in the LICENSE file in the root of the source
+;  tree. All contributing project authors may be found in the AUTHORS
+;  file in the root of the source tree.
+;
+
+
+%include "vpx_ports/x86_abi_support.asm"
+
+
+;int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr
+;               short *qcoeff_ptr,short *dequant_ptr,
+;               short *round_ptr,
+;               short *quant_ptr, short *dqcoeff_ptr);
+;
+global sym(vp8_fast_quantize_b_impl_ssse3)
+sym(vp8_fast_quantize_b_impl_ssse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 6
+    GET_GOT     rbx
+    push        rsi
+    push        rdi
+    ; end prolog
+
+    mov         rdx, arg(0)                 ;coeff_ptr
+    mov         rdi, arg(3)                 ;round_ptr
+    mov         rsi, arg(4)                 ;quant_ptr
+
+    movdqa      xmm0, [rdx]
+    movdqa      xmm4, [rdx + 16]
+
+    movdqa      xmm2, [rdi]                 ;round lo
+    movdqa      xmm3, [rdi + 16]            ;round hi
+
+    movdqa      xmm1, xmm0
+    movdqa      xmm5, xmm4
+
+    psraw       xmm0, 15                    ;sign of z (aka sz)
+    psraw       xmm4, 15                    ;sign of z (aka sz)
+
+    pabsw       xmm1, xmm1
+    pabsw       xmm5, xmm5
+
+    paddw       xmm1, xmm2
+    paddw       xmm5, xmm3
+
+    pmulhw      xmm1, [rsi]
+    pmulhw      xmm5, [rsi + 16]
+
+    mov         rdi, arg(1)                 ;qcoeff_ptr
+    mov         rcx, arg(2)                 ;dequant_ptr
+    mov         rsi, arg(5)                 ;dqcoeff_ptr
+
+    pxor        xmm1, xmm0
+    pxor        xmm5, xmm4
+    psubw       xmm1, xmm0
+    psubw       xmm5, xmm4
+
+    movdqa      [rdi], xmm1
+    movdqa      [rdi + 16], xmm5
+
+    movdqa      xmm2, [rcx]
+    movdqa      xmm3, [rcx + 16]
+
+    pxor        xmm4, xmm4
+    pmullw      xmm2, xmm1
+    pmullw      xmm3, xmm5
+
+    pcmpeqw     xmm1, xmm4                  ;non zero mask
+    pcmpeqw     xmm5, xmm4                  ;non zero mask
+    packsswb    xmm1, xmm5
+    pshufb      xmm1, [ GLOBAL(zz_shuf)]
+
+    pmovmskb    edx, xmm1
+
+;    xor         ecx, ecx
+;    mov         eax, -1
+;find_eob_loop:
+;    shr         edx, 1
+;    jc          fq_skip
+;    mov         eax, ecx
+;fq_skip:
+;    inc         ecx
+;    cmp         ecx, 16
+;    jne         find_eob_loop
+    xor         rdi, rdi
+    mov         eax, -1
+    xor         dx, ax                      ;flip the bits for bsr
+    bsr         eax, edx
+
+    movdqa      [rsi], xmm2                 ;store dqcoeff
+    movdqa      [rsi + 16], xmm3            ;store dqcoeff
+
+    sub         edi, edx                    ;check for all zeros in bit mask
+    sar         edi, 31                     ;0 or -1
+    add         eax, 1
+    and         eax, edi                    ;if the bit mask was all zero,
+                                            ;then eob = 0
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
+
+SECTION_RODATA
+align 16
+zz_shuf:
+    db 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index ed0e71ed0..fb1b37ccb 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -179,6 +179,25 @@ void vp8_subtract_b_sse2(BLOCK *be, BLOCKD *bd, int pitch)
 
 #endif
 
+#if HAVE_SSSE3
+int vp8_fast_quantize_b_impl_ssse3(short *coeff_ptr,
+                                 short *qcoeff_ptr, short *dequant_ptr,
+                                 short *round_ptr,
+                                 short *quant_ptr, short *dqcoeff_ptr);
+void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d)
+{
+    d->eob = vp8_fast_quantize_b_impl_ssse3(
+                    b->coeff,
+                    d->qcoeff,
+                    d->dequant,
+                    b->round,
+                    b->quant,
+                    d->dqcoeff
+               );
+}
+#endif
+
+
 void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
 {
 #if CONFIG_RUNTIME_CPU_DETECT
@@ -333,6 +352,9 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
     {
         cpi->rtcd.variance.sad16x16x3            = vp8_sad16x16x3_ssse3;
         cpi->rtcd.variance.sad16x8x3             = vp8_sad16x8x3_ssse3;
+
+        cpi->rtcd.quantize.fastquantb            = vp8_fast_quantize_b_ssse3;
+
     }
 #endif
 
diff --git a/vp8/vp8cx.mk b/vp8/vp8cx.mk
index f693f865b..683d785e6 100644
--- a/vp8/vp8cx.mk
+++ b/vp8/vp8cx.mk
@@ -109,6 +109,7 @@ VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/quantize_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/subtract_sse2.asm
 VP8_CX_SRCS-$(HAVE_SSE3) += encoder/x86/sad_sse3.asm
 VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/sad_ssse3.asm
+VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/quantize_ssse3.asm
 VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/sad_sse4.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/quantize_mmx.asm
 VP8_CX_SRCS-$(ARCH_X86)$(ARCH_X86_64) += encoder/x86/encodeopt.asm

From 9f61a83bf9201be73083dd09fbb67f93955f4ac2 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Fri, 29 Oct 2010 17:09:33 -0700
Subject: [PATCH 288/307] postproc : Added SPLITMV visualization, fix line
 constrain.

Now draw 16 vectors for SPLITMV mode.

Fixed constrain line to block divide by zero issues.

Blend block was not centering the shaded area correctly.

Change-Id: I1edabd8b4e553aac8d980f7b45c80159e9202434
---
 vp8/common/postproc.c | 42 +++++++++++++++++++++++++++++++++++-------
 1 file changed, 35 insertions(+), 7 deletions(-)

diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 6f419435a..d7044fd57 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -484,7 +484,7 @@ void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
     int u1_const = u1*((1<<16)-alpha);
     int v1_const = v1*((1<<16)-alpha);
 
-    y += stride + 2;
+    y += stride + 1;
     for (i = 0; i < 14; i++)
     {
         for (j = 0; j < 14; j++)
@@ -522,7 +522,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
         dy = *y1 - y0;
 
         *x1 = width;
-        if (dy)
+        if (dx)
             *y1 = ((width-x0)*dy)/dx + y0;
     }
     if (*x1 < 0)
@@ -531,7 +531,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
         dy = *y1 - y0;
 
         *x1 = 0;
-        if (dy)
+        if (dx)
             *y1 = ((0-x0)*dy)/dx + y0;
     }
     if (*y1 > height)
@@ -540,7 +540,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
         dy = *y1 - y0;
 
         *y1 = height;
-        if (dx)
+        if (dy)
             *x1 = ((height-y0)*dx)/dy + x0;
     }
     if (*y1 < 0)
@@ -549,7 +549,7 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
         dy = *y1 - y0;
 
         *y1 = 0;
-        if (dx)
+        if (dy)
             *x1 = ((0-y0)*dx)/dy + x0;
     }
 }
@@ -753,8 +753,36 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         {
             for (x0 = 8; x0 < (width + 8); x0 += 16)
             {
-               int x1, y1;
-               if (mi->mbmi.mode >= NEARESTMV)
+                int x1, y1;
+
+                if (mi->mbmi.mode == SPLITMV)
+                {
+                    int bx0, by0;
+
+                    B_MODE_INFO *bmi = mi->bmi;
+                    MV *mv = &bmi->mv.as_mv;
+
+                    for (by0 = (y0-8); by0 < y0+8; by0 += 4)
+                    {
+                        for (bx0 = (x0-8); bx0 < x0+8; bx0 += 4)
+                        {
+
+                            x1 = bx0 + (mv->col >> 3);
+                            y1 = by0 + (mv->row >> 3);
+
+                            if (x1 != bx0 && y1 != by0)
+                            {
+                                constrain_line (bx0, &x1, by0, &y1, width, height);
+                                vp8_blit_line  (bx0,  x1, by0,  y1, y_buffer, y_stride);
+                            }
+                            else
+                                vp8_blit_line  (bx0,  x1, by0,  y1, y_buffer, y_stride);
+
+                            mv++;
+                        }
+                    }
+                }
+                else if (mi->mbmi.mode >= NEARESTMV)
                 {
                     MV *mv = &mi->mbmi.mv.as_mv;
 

From c377bf0eec4ab3862e41f7db534e9d3070b0bed7 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Tue, 2 Nov 2010 09:11:57 -0400
Subject: [PATCH 289/307] fix pipe support on windows

STDIO streams are opened in text mode by default on Windows. This patch
changes the stdin/stdout streams to be in binary mode if they are being
used for I/O from the vpxenc or vpxdec tools.

Fixes issue #216. Thanks to mw AT hesotech.de for the fix.

Change-Id: I34525b3ce2a4a031d5a48d36df4667589372225b
---
 examples.mk    |  2 ++
 tools_common.c | 24 ++++++++++++++++++++++++
 tools_common.h | 16 ++++++++++++++++
 vpxdec.c       |  6 ++++--
 vpxenc.c       |  7 +++++--
 5 files changed, 51 insertions(+), 4 deletions(-)
 create mode 100644 tools_common.c
 create mode 100644 tools_common.h

diff --git a/examples.mk b/examples.mk
index 9340e23bb..a30205d31 100644
--- a/examples.mk
+++ b/examples.mk
@@ -17,6 +17,7 @@ vpxdec.SRCS                 += md5_utils.c md5_utils.h
 vpxdec.SRCS                 += vpx_ports/vpx_timer.h
 vpxdec.SRCS                 += vpx/vpx_integer.h
 vpxdec.SRCS                 += args.c args.h vpx_ports/config.h
+vpxdec.SRCS                 += tools_common.c tools_common.h
 vpxdec.SRCS                 += nestegg/halloc/halloc.h
 vpxdec.SRCS                 += nestegg/halloc/src/align.h
 vpxdec.SRCS                 += nestegg/halloc/src/halloc.c
@@ -28,6 +29,7 @@ vpxdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
 vpxdec.DESCRIPTION           = Full featured decoder
 UTILS-$(CONFIG_ENCODERS)    += vpxenc.c
 vpxenc.SRCS                 += args.c args.h y4minput.c y4minput.h
+vpxenc.SRCS                 += tools_common.c tools_common.h
 vpxenc.SRCS                 += vpx_ports/config.h vpx_ports/mem_ops.h
 vpxenc.SRCS                 += vpx_ports/mem_ops_aligned.h
 vpxenc.SRCS                 += libmkv/EbmlIDs.h
diff --git a/tools_common.c b/tools_common.c
new file mode 100644
index 000000000..d188bbe20
--- /dev/null
+++ b/tools_common.c
@@ -0,0 +1,24 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <stdio.h>
+#include "tools_common.h"
+#ifdef _WIN32
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+FILE* set_binary_mode(FILE *stream)
+{
+    (void)stream;
+#ifdef _WIN32
+    _setmode(_fileno(stream), _O_BINARY);
+#endif
+    return stream;
+}
diff --git a/tools_common.h b/tools_common.h
new file mode 100644
index 000000000..80c974732
--- /dev/null
+++ b/tools_common.h
@@ -0,0 +1,16 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TOOLS_COMMON_H
+#define TOOLS_COMMON_H
+
+/* Sets a stdio stream into binary mode */
+FILE* set_binary_mode(FILE *stream);
+
+#endif
diff --git a/vpxdec.c b/vpxdec.c
index e8e4d5f12..85ab20693 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -35,6 +35,7 @@
 #if CONFIG_MD5
 #include "md5_utils.h"
 #endif
+#include "tools_common.h"
 #include "nestegg/include/nestegg/nestegg.h"
 
 #ifndef PATH_MAX
@@ -314,7 +315,8 @@ void *out_open(const char *out_fn, int do_md5)
     }
     else
     {
-        FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb") : stdout;
+        FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb")
+                                                  : set_binary_mode(stdout);
 
         if (!outfile)
         {
@@ -805,7 +807,7 @@ int main(int argc, const char **argv_)
         usage_exit();
 
     /* Open file */
-    infile = strcmp(fn, "-") ? fopen(fn, "rb") : stdin;
+    infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin);
 
     if (!infile)
     {
diff --git a/vpxenc.c b/vpxenc.c
index 7e0595207..4cdadb614 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -35,6 +35,7 @@
 #include "vpx/vp8cx.h"
 #include "vpx_ports/mem_ops.h"
 #include "vpx_ports/vpx_timer.h"
+#include "tools_common.h"
 #include "y4minput.h"
 #include "libmkv/EbmlWriter.h"
 #include "libmkv/EbmlIDs.h"
@@ -1334,7 +1335,8 @@ int main(int argc, const char **argv_)
         struct detect_buffer detect;
 
         /* Parse certain options from the input file, if possible */
-        infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb") : stdin;
+        infile = strcmp(in_fn, "-") ? fopen(in_fn, "rb")
+                                    : set_binary_mode(stdin);
 
         if (!infile)
         {
@@ -1449,7 +1451,8 @@ int main(int argc, const char **argv_)
                               cfg.g_w, cfg.g_h, 1);
         }
 
-        outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb") : stdout;
+        outfile = strcmp(out_fn, "-") ? fopen(out_fn, "wb")
+                                      : set_binary_mode(stdout);
 
         if (!outfile)
         {

From 0a29bd9793743533f7991994cf8880ee582afe24 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Tue, 2 Nov 2010 13:29:13 -0700
Subject: [PATCH 290/307] postproc : Fix display of motion vectors.

Split motion vectors were all being treated as 4x4
blocks.  Now correctly handle 16x8, 8x16, 8x8, 4x4
blocks.

Change-Id: Icf345c5e69b5e374e12456877ed7c41213ad88cc
---
 vp8/common/postproc.c | 132 +++++++++++++++++++++++++++++++++---------
 1 file changed, 106 insertions(+), 26 deletions(-)

diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index d7044fd57..453cb47f4 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -749,56 +749,136 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         MODE_INFO *mi = oci->mi;
         int x0, y0;
 
-        for (y0 = 8; y0 < (height + 8); y0 += 16)
+        for (y0 = 0; y0 < height; y0 += 16)
         {
-            for (x0 = 8; x0 < (width + 8); x0 += 16)
+            for (x0 = 0; x0 < width; x0 += 16)
             {
                 int x1, y1;
 
                 if (mi->mbmi.mode == SPLITMV)
                 {
-                    int bx0, by0;
-
-                    B_MODE_INFO *bmi = mi->bmi;
-                    MV *mv = &bmi->mv.as_mv;
-
-                    for (by0 = (y0-8); by0 < y0+8; by0 += 4)
+                    switch (mi->mbmi.partitioning)
                     {
-                        for (bx0 = (x0-8); bx0 < x0+8; bx0 += 4)
+                        case 0 :    /* mv_top_bottom */
                         {
+                            B_MODE_INFO *bmi = &mi->bmi[0];
+                            MV *mv = &bmi->mv.as_mv;
 
-                            x1 = bx0 + (mv->col >> 3);
-                            y1 = by0 + (mv->row >> 3);
+                            x1 = x0 + 8 + (mv->col >> 3);
+                            y1 = y0 + 4 + (mv->row >> 3);
 
-                            if (x1 != bx0 && y1 != by0)
+                            constrain_line (x0+8, &x1, y0+4, &y1, width, height);
+                            vp8_blit_line  (x0+8,  x1, y0+4,  y1, y_buffer, y_stride);
+
+                            bmi = &mi->bmi[8];
+
+                            x1 = x0 + 8 + (mv->col >> 3);
+                            y1 = y0 +12 + (mv->row >> 3);
+
+                            constrain_line (x0+8, &x1, y0+12, &y1, width, height);
+                            vp8_blit_line  (x0+8,  x1, y0+12,  y1, y_buffer, y_stride);
+
+                            break;
+                        }
+                        case 1 :    /* mv_left_right */
+                        {
+                            B_MODE_INFO *bmi = &mi->bmi[0];
+                            MV *mv = &bmi->mv.as_mv;
+
+                            x1 = x0 + 4 + (mv->col >> 3);
+                            y1 = y0 + 8 + (mv->row >> 3);
+
+                            constrain_line (x0+4, &x1, y0+8, &y1, width, height);
+                            vp8_blit_line  (x0+4,  x1, y0+8,  y1, y_buffer, y_stride);
+
+                            bmi = &mi->bmi[2];
+
+                            x1 = x0 +12 + (mv->col >> 3);
+                            y1 = y0 + 8 + (mv->row >> 3);
+
+                            constrain_line (x0+12, &x1, y0+8, &y1, width, height);
+                            vp8_blit_line  (x0+12,  x1, y0+8,  y1, y_buffer, y_stride);
+
+                            break;
+                        }
+                        case 2 :    /* mv_quarters   */
+                        {
+                            B_MODE_INFO *bmi = &mi->bmi[0];
+                            MV *mv = &bmi->mv.as_mv;
+
+                            x1 = x0 + 4 + (mv->col >> 3);
+                            y1 = y0 + 4 + (mv->row >> 3);
+
+                            constrain_line (x0+4, &x1, y0+4, &y1, width, height);
+                            vp8_blit_line  (x0+4,  x1, y0+4,  y1, y_buffer, y_stride);
+
+                            bmi = &mi->bmi[2];
+
+                            x1 = x0 +12 + (mv->col >> 3);
+                            y1 = y0 + 4 + (mv->row >> 3);
+
+                            constrain_line (x0+12, &x1, y0+4, &y1, width, height);
+                            vp8_blit_line  (x0+12,  x1, y0+4,  y1, y_buffer, y_stride);
+
+                            bmi = &mi->bmi[8];
+
+                            x1 = x0 + 4 + (mv->col >> 3);
+                            y1 = y0 +12 + (mv->row >> 3);
+
+                            constrain_line (x0+4, &x1, y0+12, &y1, width, height);
+                            vp8_blit_line  (x0+4,  x1, y0+12,  y1, y_buffer, y_stride);
+
+                            bmi = &mi->bmi[10];
+
+                            x1 = x0 +12 + (mv->col >> 3);
+                            y1 = y0 +12 + (mv->row >> 3);
+
+                            constrain_line (x0+12, &x1, y0+12, &y1, width, height);
+                            vp8_blit_line  (x0+12,  x1, y0+12,  y1, y_buffer, y_stride);
+                            break;
+                        }
+                        default :
+                        {
+                            B_MODE_INFO *bmi = mi->bmi;
+                            int bx0, by0;
+
+                            for (by0 = y0; by0 < (y0+16); by0 += 4)
                             {
-                                constrain_line (bx0, &x1, by0, &y1, width, height);
-                                vp8_blit_line  (bx0,  x1, by0,  y1, y_buffer, y_stride);
-                            }
-                            else
-                                vp8_blit_line  (bx0,  x1, by0,  y1, y_buffer, y_stride);
+                                for (bx0 = x0; bx0 < (x0+16); bx0 += 4)
+                                {
+                                    MV *mv = &bmi->mv.as_mv;
 
-                            mv++;
+                                    x1 = bx0 + 2 + (mv->col >> 3);
+                                    y1 = by0 + 2 + (mv->row >> 3);
+
+                                    constrain_line (bx0+2, &x1, by0+2, &y1, width, height);
+                                    vp8_blit_line  (bx0+2,  x1, by0+2,  y1, y_buffer, y_stride);
+
+                                    bmi++;
+                                }
+                            }
                         }
                     }
                 }
                 else if (mi->mbmi.mode >= NEARESTMV)
                 {
                     MV *mv = &mi->mbmi.mv.as_mv;
+                    const int lx0 = x0 + 8;
+                    const int ly0 = y0 + 8;
 
-                    x1 = x0 + (mv->col >> 3);
-                    y1 = y0 + (mv->row >> 3);
+                    x1 = lx0 + (mv->col >> 3);
+                    y1 = ly0 + (mv->row >> 3);
 
-                    if (x1 != x0 && y1 != y0)
+                    if (x1 != lx0 && y1 != ly0)
                     {
-                        constrain_line (x0, &x1, y0-1, &y1, width, height);
-                        vp8_blit_line  (x0,  x1, y0-1,  y1, y_buffer, y_stride);
+                        constrain_line (lx0, &x1, ly0-1, &y1, width, height);
+                        vp8_blit_line  (lx0,  x1, ly0-1,  y1, y_buffer, y_stride);
 
-                        constrain_line (x0, &x1, y0+1, &y1, width, height);
-                        vp8_blit_line  (x0,  x1, y0+1,  y1, y_buffer, y_stride);
+                        constrain_line (lx0, &x1, ly0+1, &y1, width, height);
+                        vp8_blit_line  (lx0,  x1, ly0+1,  y1, y_buffer, y_stride);
                     }
                     else
-                        vp8_blit_line  (x0,  x1, y0,  y1, y_buffer, y_stride);
+                        vp8_blit_line  (lx0,  x1, ly0,  y1, y_buffer, y_stride);
                 }
                 mi++;
             }

From 77e6b4504b97724fb3dbfdcf26365b68c3cf5f3c Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Wed, 3 Nov 2010 13:58:40 -0400
Subject: [PATCH 291/307] vpxenc: require width and height for raw streams

Defaulting to 320x240 for raw streams is arbitrary and error-prone.
Instead, require that the width and height be set manually if they
can't be parsed from the input file.

Change-Id: Ic61979857e372eed0779c2677247e894f9fd6160
---
 vpxenc.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/vpxenc.c b/vpxenc.c
index 4cdadb614..45ec7e5c9 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1190,6 +1190,12 @@ int main(int argc, const char **argv_)
      */
     cfg.g_timebase.den = 1000;
 
+    /* Never use the library's default resolution, require it be parsed
+     * from the file or set on the command line.
+     */
+    cfg.g_w = 0;
+    cfg.g_h = 0;
+
     /* Now parse the remainder of the parameters. */
     for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
     {
@@ -1399,6 +1405,14 @@ int main(int argc, const char **argv_)
             file_type = FILE_TYPE_RAW;
             detect.valid = 1;
         }
+
+        if(!cfg.g_w || !cfg.g_h)
+        {
+            fprintf(stderr, "Specify stream dimensions with --width (-w) "
+                            " and --height (-h).\n");
+            return EXIT_FAILURE;
+        }
+
 #define SHOW(field) fprintf(stderr, "    %-28s = %d\n", #field, cfg.field)
 
         if (verbose && pass == 0)

From a5397dbaf13c87516bbf404dea35fe49bfe805e3 Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Wed, 3 Nov 2010 12:56:31 -0700
Subject: [PATCH 292/307] Increase the resolution of default timebase

The old value 1000 was too low, which caused the effective duration and
frame rate calculation to have an 1% error for typical 30 frame/second
inputs. Symptom of the issue has been that most 2 pass encodings were
undershooting target bit rate by 1% or so for 30 fps input.

Change-Id: Ice204e86f844ceb9ce973456f2b995cc095283cf
---
 vpxenc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vpxenc.c b/vpxenc.c
index 45ec7e5c9..032e2e8d8 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1188,7 +1188,7 @@ int main(int argc, const char **argv_)
     /* Change the default timebase to a high enough value so that the encoder
      * will always create strictly increasing timestamps.
      */
-    cfg.g_timebase.den = 1000;
+    cfg.g_timebase.den = 100000;
 
     /* Never use the library's default resolution, require it be parsed
      * from the file or set on the command line.

From 0e7b60617fe7cd2724a0bab055b738b73db49acd Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Thu, 4 Nov 2010 10:35:02 -0700
Subject: [PATCH 293/307] postproc : Update visualizations.

Change color reference frame to blend the macro block edge.
This helps with layering of visualizations.

Add block coloring for intra prediction modes.

Change-Id: Icefe0e189e26719cd6937cebd6727efac0b4d278
---
 vp8/common/generic/systemdependent.c |  12 +-
 vp8/common/postproc.c                | 187 ++++++++++++++++++++++++---
 vp8/common/postproc.h                |  42 ++++--
 vp8/common/ppflags.h                 |  22 ++--
 4 files changed, 217 insertions(+), 46 deletions(-)

diff --git a/vp8/common/generic/systemdependent.c b/vp8/common/generic/systemdependent.c
index 7b1a84b70..b3eadaf27 100644
--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -65,11 +65,13 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
     rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;
 
 #if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
-    rtcd->postproc.down        = vp8_mbpost_proc_down_c;
-    rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
-    rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
-    rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
-    rtcd->postproc.blend_mb    = vp8_blend_mb_c;
+    rtcd->postproc.down             = vp8_mbpost_proc_down_c;
+    rtcd->postproc.across           = vp8_mbpost_proc_across_ip_c;
+    rtcd->postproc.downacross       = vp8_post_proc_down_and_across_c;
+    rtcd->postproc.addnoise         = vp8_plane_add_noise_c;
+    rtcd->postproc.blend_mb_inner   = vp8_blend_mb_inner_c;
+    rtcd->postproc.blend_mb_outer   = vp8_blend_mb_outer_c;
+    rtcd->postproc.blend_b          = vp8_blend_b_c;
 #endif
 
 #endif
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index 453cb47f4..e797e1036 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -41,6 +41,24 @@ static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
     { RGB_TO_YUV(0xFF0000) }    /* Red */
 };
 
+static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] =
+{
+    { RGB_TO_YUV(0x6633ff) },   /* Purple */
+    { RGB_TO_YUV(0xcc33ff) },   /* Magenta */
+    { RGB_TO_YUV(0xff33cc) },   /* Pink */
+    { RGB_TO_YUV(0xff3366) },   /* Coral */
+    { RGB_TO_YUV(0x3366ff) },   /* Blue */
+    { RGB_TO_YUV(0xed00f5) },   /* Dark Blue */
+    { RGB_TO_YUV(0x2e00b8) },   /* Dark Purple */
+    { RGB_TO_YUV(0xff6633) },   /* Orange */
+    { RGB_TO_YUV(0x33ccff) },   /* Light Blue */
+    { RGB_TO_YUV(0x8ab800) },   /* Green */
+    { RGB_TO_YUV(0xffcc33) },   /* Light Orange */
+    { RGB_TO_YUV(0x33ffcc) },   /* Aqua */
+    { RGB_TO_YUV(0x66ff33) },   /* Light Green */
+    { RGB_TO_YUV(0xccff33) },   /* Yellow */
+};
+
 static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
 {
     { RGB_TO_YUV(0x00ff00) },   /* Blue */
@@ -476,7 +494,7 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
  * edges unblended to give distinction to macro blocks in areas
  * filled with the same color block.
  */
-void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
+void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
                         int y1, int u1, int v1, int alpha, int stride)
 {
     int i, j;
@@ -484,10 +502,10 @@ void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
     int u1_const = u1*((1<<16)-alpha);
     int v1_const = v1*((1<<16)-alpha);
 
-    y += stride + 1;
-    for (i = 0; i < 14; i++)
+    y += 2*stride + 2;
+    for (i = 0; i < 12; i++)
     {
-        for (j = 0; j < 14; j++)
+        for (j = 0; j < 12; j++)
         {
             y[j] = (y[j]*alpha + y1_const)>>16;
         }
@@ -511,6 +529,104 @@ void vp8_blend_mb_c (unsigned char *y, unsigned char *u, unsigned char *v,
     }
 }
 
+/* Blend only the edge of the macro block.  Leave center
+ * unblended to allow for other visualizations to be layered.
+ */
+void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v,
+                        int y1, int u1, int v1, int alpha, int stride)
+{
+    int i, j;
+    int y1_const = y1*((1<<16)-alpha);
+    int u1_const = u1*((1<<16)-alpha);
+    int v1_const = v1*((1<<16)-alpha);
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+            y[j] = (y[j]*alpha + y1_const)>>16;
+        }
+        y += stride;
+    }
+
+    for (i = 0; i < 12; i++)
+    {
+        y[0]  = (y[0]*alpha  + y1_const)>>16;
+        y[1]  = (y[1]*alpha  + y1_const)>>16;
+        y[14] = (y[14]*alpha + y1_const)>>16;
+        y[15] = (y[15]*alpha + y1_const)>>16;
+        y += stride;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+            y[j] = (y[j]*alpha + y1_const)>>16;
+        }
+        y += stride;
+    }
+
+    stride >>= 1;
+
+    for (j = 0; j < 8; j++)
+    {
+        u[j] = (u[j]*alpha + u1_const)>>16;
+        v[j] = (v[j]*alpha + v1_const)>>16;
+    }
+    u += stride;
+    v += stride;
+
+    for (i = 0; i < 6; i++)
+    {
+        u[0] = (u[0]*alpha + u1_const)>>16;
+        v[0] = (v[0]*alpha + v1_const)>>16;
+
+        u[7] = (u[7]*alpha + u1_const)>>16;
+        v[7] = (v[7]*alpha + v1_const)>>16;
+
+        u += stride;
+        v += stride;
+    }
+
+    for (j = 0; j < 8; j++)
+    {
+        u[j] = (u[j]*alpha + u1_const)>>16;
+        v[j] = (v[j]*alpha + v1_const)>>16;
+    }
+}
+
+void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v,
+                        int y1, int u1, int v1, int alpha, int stride)
+{
+    int i, j;
+    int y1_const = y1*((1<<16)-alpha);
+    int u1_const = u1*((1<<16)-alpha);
+    int v1_const = v1*((1<<16)-alpha);
+
+    for (i = 0; i < 4; i++)
+    {
+        for (j = 0; j < 4; j++)
+        {
+            y[j] = (y[j]*alpha + y1_const)>>16;
+        }
+        y += stride;
+    }
+
+    stride >>= 1;
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            u[j] = (u[j]*alpha + u1_const)>>16;
+            v[j] = (v[j]*alpha + v1_const)>>16;
+        }
+        u += stride;
+        v += stride;
+    }
+}
+
 static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
 {
     int dx;
@@ -738,7 +854,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
     }
 
     /* Draw motion vectors */
-    if (flags & VP8D_DEBUG_LEVEL5)
+    if (flags & VP8D_DEBUG_DRAW_MV)
     {
         YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
         int width  = post->y_width;
@@ -887,9 +1003,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
     }
 
     /* Color in block modes */
-    if (flags & VP8D_DEBUG_LEVEL6)
+    if (flags & VP8D_DEBUG_CLR_BLK_MODES)
     {
-        int i, j;
+        int y, x;
         YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
         int width  = post->y_width;
         int height = post->y_height;
@@ -899,19 +1015,50 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         int y_stride = oci->post_proc_buffer.y_stride;
         MODE_INFO *mi = oci->mi;
 
-        for (i = 0; i < height; i += 16)
+        for (y = 0; y < height; y += 16)
         {
-            for (j = 0; j < width; j += 16)
+            for (x = 0; x < width; x += 16)
             {
                 int Y = 0, U = 0, V = 0;
 
-                Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
-                U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
-                V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
+                if (mi->mbmi.mode == B_PRED)
+                {
+                    int by, bx;
+                    unsigned char *yl, *ul, *vl;
+                    B_MODE_INFO *bmi = mi->bmi;
 
-                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
-                    (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
+                    yl = y_ptr + x;
+                    ul = u_ptr + (x>>1);
+                    vl = v_ptr + (x>>1);
 
+                    for (by = 0; by < 16; by += 4)
+                    {
+                        for (bx = 0; bx < 16; bx += 4)
+                        {
+                            Y = B_PREDICTION_MODE_colors[bmi->mode][0];
+                            U = B_PREDICTION_MODE_colors[bmi->mode][1];
+                            V = B_PREDICTION_MODE_colors[bmi->mode][2];
+
+                            POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b)
+                                (yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride);
+
+                            bmi++;
+                        }
+
+                        yl += y_stride*4;
+                        ul += y_stride*1;
+                        vl += y_stride*1;
+                    }
+                }
+                else
+                {
+                    Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
+                    U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
+                    V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
+
+                    POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_inner)
+                        (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
+                }
                 mi++;
             }
             y_ptr += y_stride*16;
@@ -923,9 +1070,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
     }
 
     /* Color in frame reference blocks */
-    if (flags & VP8D_DEBUG_LEVEL7)
+    if (flags & VP8D_DEBUG_CLR_FRM_REF_BLKS)
     {
-        int i, j;
+        int y, x;
         YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
         int width  = post->y_width;
         int height = post->y_height;
@@ -935,9 +1082,9 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         int y_stride = oci->post_proc_buffer.y_stride;
         MODE_INFO *mi = oci->mi;
 
-        for (i = 0; i < height; i += 16)
+        for (y = 0; y < height; y += 16)
         {
-            for (j = 0; j < width; j +=16)
+            for (x = 0; x < width; x +=16)
             {
                 int Y = 0, U = 0, V = 0;
 
@@ -945,8 +1092,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                 U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
                 V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
 
-                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb)
-                    (&y_ptr[j], &u_ptr[j>>1], &v_ptr[j>>1], Y, U, V, 0xc000, y_stride);
+                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer)
+                    (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
 
                 mi++;
             }
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index 4a4493802..7485135bf 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -24,7 +24,15 @@
               char whiteclamp[16], char bothclamp[16],\
               unsigned int w, unsigned int h, int pitch)
 
-#define prototype_postproc_blend_mb(sym)\
+#define prototype_postproc_blend_mb_inner(sym)\
+    void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
+              int y1, int u1, int v1, int alpha, int stride)
+
+#define prototype_postproc_blend_mb_outer(sym)\
+    void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
+              int y1, int u1, int v1, int alpha, int stride)
+
+#define prototype_postproc_blend_b(sym)\
     void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
               int y1, int u1, int v1, int alpha, int stride)
 
@@ -52,22 +60,36 @@ extern prototype_postproc(vp8_postproc_downacross);
 #endif
 extern prototype_postproc_addnoise(vp8_postproc_addnoise);
 
-#ifndef vp8_postproc_blend_mb
-#define vp8_postproc_blend_mb vp8_blend_mb_c
+#ifndef vp8_postproc_blend_mb_inner
+#define vp8_postproc_blend_mb_inner vp8_blend_mb_inner_c
 #endif
-extern prototype_postproc_blend_mb(vp8_postproc_blend_mb);
+extern prototype_postproc_blend_mb_inner(vp8_postproc_blend_mb_inner);
+
+#ifndef vp8_postproc_blend_mb_outer
+#define vp8_postproc_blend_mb_outer vp8_blend_mb_outer_c
+#endif
+extern prototype_postproc_blend_mb_outer(vp8_postproc_blend_mb_outer);
+
+#ifndef vp8_postproc_blend_b
+#define vp8_postproc_blend_b vp8_blend_b_c
+#endif
+extern prototype_postproc_blend_b(vp8_postproc_blend_b);
 
 typedef prototype_postproc((*vp8_postproc_fn_t));
 typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
 typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
-typedef prototype_postproc_blend_mb((*vp8_postproc_blend_mb_fn_t));
+typedef prototype_postproc_blend_mb_inner((*vp8_postproc_blend_mb_inner_fn_t));
+typedef prototype_postproc_blend_mb_outer((*vp8_postproc_blend_mb_outer_fn_t));
+typedef prototype_postproc_blend_b((*vp8_postproc_blend_b_fn_t));
 typedef struct
 {
-    vp8_postproc_inplace_fn_t   down;
-    vp8_postproc_inplace_fn_t   across;
-    vp8_postproc_fn_t           downacross;
-    vp8_postproc_addnoise_fn_t  addnoise;
-    vp8_postproc_blend_mb_fn_t  blend_mb;
+    vp8_postproc_inplace_fn_t           down;
+    vp8_postproc_inplace_fn_t           across;
+    vp8_postproc_fn_t                   downacross;
+    vp8_postproc_addnoise_fn_t          addnoise;
+    vp8_postproc_blend_mb_inner_fn_t    blend_mb_inner;
+    vp8_postproc_blend_mb_outer_fn_t    blend_mb_outer;
+    vp8_postproc_blend_b_fn_t           blend_b;
 } vp8_postproc_rtcd_vtable_t;
 
 #if CONFIG_RUNTIME_CPU_DETECT
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index 6a51d2cd6..b8d713cf0 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -13,17 +13,17 @@
 #define __INC_PPFLAGS_H
 enum
 {
-    VP8D_NOFILTERING    = 0,
-    VP8D_DEBLOCK        = 1<<0,
-    VP8D_DEMACROBLOCK   = 1<<1,
-    VP8D_ADDNOISE       = 1<<2,
-    VP8D_DEBUG_LEVEL1   = 1<<3,
-    VP8D_DEBUG_LEVEL2   = 1<<4,
-    VP8D_DEBUG_LEVEL3   = 1<<5,
-    VP8D_DEBUG_LEVEL4   = 1<<6,
-    VP8D_DEBUG_LEVEL5   = 1<<7,
-    VP8D_DEBUG_LEVEL6   = 1<<8,
-    VP8D_DEBUG_LEVEL7   = 1<<9
+    VP8D_NOFILTERING            = 0,
+    VP8D_DEBLOCK                = 1<<0,
+    VP8D_DEMACROBLOCK           = 1<<1,
+    VP8D_ADDNOISE               = 1<<2,
+    VP8D_DEBUG_LEVEL1           = 1<<3,
+    VP8D_DEBUG_LEVEL2           = 1<<4,
+    VP8D_DEBUG_LEVEL3           = 1<<5,
+    VP8D_DEBUG_LEVEL4           = 1<<6,
+    VP8D_DEBUG_DRAW_MV          = 1<<7,
+    VP8D_DEBUG_CLR_BLK_MODES    = 1<<8,
+    VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9
 };
 
 #endif

From bd05d9e480623536e35b76cf3cfd6b8884d254ba Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 4 Nov 2010 14:54:51 -0400
Subject: [PATCH 294/307] vpxdec: report parse errors from
 webm_guess_framerate()

If this function fails silently, the nestegg context is destroyed and
future nestegg calls will segfault.

Change-Id: Ie6a0ea284ab9ddfa97b1843ef8030a953937c8cd
---
 vpxdec.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/vpxdec.c b/vpxdec.c
index 85ab20693..9b565b022 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -542,6 +542,7 @@ webm_guess_framerate(struct input_ctx *input,
     *fps_den = tstamp / 1000;
     return 0;
 fail:
+    nestegg_destroy(input->nestegg_ctx);
     input->nestegg_ctx = NULL;
     rewind(input->infile);
     return 1;
@@ -878,7 +879,13 @@ int main(int argc, const char **argv_)
         }
 
         if(input.kind == WEBM_FILE)
-            webm_guess_framerate(&input, &fps_den, &fps_num);
+            if(webm_guess_framerate(&input, &fps_den, &fps_num))
+            {
+                fprintf(stderr, "Failed to guess framerate -- error parsing "
+                                "webm file?\n");
+                return EXIT_FAILURE;
+            }
+
 
         /*Note: We can't output an aspect ratio here because IVF doesn't
            store one, and neither does VP8.

From f7e187d3626f029490b8b619fb80da1b0e77dcfe Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 4 Nov 2010 15:05:45 -0400
Subject: [PATCH 295/307] improve average framerate calculation

Change Ice204e86 identified a problem with bitrate undershoot due to
low precision in the timestamps passed to the library. This patch
takes a different approach by calculating the duration of this frame
and passing it to the library, rather than using a fixed duration
and letting the library average it out with higher precision
timestamps. This part of the fix only applies to vpxenc.

This patch also attempts to fix the problem for generic applications
that may have made the same mistake vpxenc did. Instead of
calculating this frame's duration by the difference of this frame's
and the last frame's start time, we use the end times instead. This
allows the framerate calculation to scavenge "unclaimed" time from
the last frame. For instance:

  start |  end  | calculated duration
  ======+=======+====================
    0ms    33ms   33ms
   33ms    66ms   33ms
   66ms    99ms   33ms
  100ms   133ms   34ms

Change-Id: I92be4b3518e0bd530e97f90e69e75330a4c413fc
---
 vp8/encoder/onyx_if.c  | 13 +++++++------
 vp8/encoder/onyx_int.h |  1 +
 vpxenc.c               | 10 ++++++----
 3 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 5f02a5a02..bceb58602 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -4972,17 +4972,16 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
 
     *frame_flags = cpi->source_frame_flags;
 
-#if CONFIG_PSNR
-
     if (cpi->source_time_stamp < cpi->first_time_stamp_ever)
+    {
         cpi->first_time_stamp_ever = cpi->source_time_stamp;
-
-#endif
+        cpi->last_end_time_stamp_seen = cpi->source_time_stamp;
+    }
 
     // adjust frame rates based on timestamps given
     if (!cm->refresh_alt_ref_frame)
     {
-        if (cpi->last_time_stamp_seen == 0)
+        if (cpi->source_time_stamp == cpi->first_time_stamp_ever)
         {
             double this_fps = 10000000.000 / (cpi->source_end_time_stamp - cpi->source_time_stamp);
 
@@ -4990,7 +4989,8 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
         }
         else
         {
-            long long nanosecs = cpi->source_time_stamp - cpi->last_time_stamp_seen;
+            long long nanosecs = cpi->source_end_time_stamp
+                - cpi->last_end_time_stamp_seen;
             double this_fps = 10000000.000 / nanosecs;
 
             vp8_new_frame_rate(cpi, (7 * cpi->oxcf.frame_rate + this_fps) / 8);
@@ -4998,6 +4998,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
         }
 
         cpi->last_time_stamp_seen = cpi->source_time_stamp;
+        cpi->last_end_time_stamp_seen = cpi->source_end_time_stamp;
     }
 
     if (cpi->compressor_speed == 2)
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 81e32f031..134e03075 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -399,6 +399,7 @@ typedef struct
     int inter_frame_target;
     double output_frame_rate;
     long long last_time_stamp_seen;
+    long long last_end_time_stamp_seen;
     long long first_time_stamp_ever;
 
     int ni_av_qi;
diff --git a/vpxenc.c b/vpxenc.c
index 032e2e8d8..b139c6829 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -1188,7 +1188,7 @@ int main(int argc, const char **argv_)
     /* Change the default timebase to a high enough value so that the encoder
      * will always create strictly increasing timestamps.
      */
-    cfg.g_timebase.den = 100000;
+    cfg.g_timebase.den = 1000;
 
     /* Never use the library's default resolution, require it be parsed
      * from the file or set on the command line.
@@ -1544,7 +1544,7 @@ int main(int argc, const char **argv_)
             vpx_codec_iter_t iter = NULL;
             const vpx_codec_cx_pkt_t *pkt;
             struct vpx_usec_timer timer;
-            int64_t frame_start;
+            int64_t frame_start, next_frame_start;
 
             if (!arg_limit || frames_in < arg_limit)
             {
@@ -1565,9 +1565,11 @@ int main(int argc, const char **argv_)
 
             frame_start = (cfg.g_timebase.den * (int64_t)(frames_in - 1)
                           * arg_framerate.den) / cfg.g_timebase.num / arg_framerate.num;
+            next_frame_start = (cfg.g_timebase.den * (int64_t)(frames_in)
+                                * arg_framerate.den)
+                                / cfg.g_timebase.num / arg_framerate.num;
             vpx_codec_encode(&encoder, frame_avail ? &raw : NULL, frame_start,
-                             cfg.g_timebase.den * arg_framerate.den
-                             / cfg.g_timebase.num / arg_framerate.num,
+                             next_frame_start - frame_start,
                              0, arg_deadline);
             vpx_usec_timer_mark(&timer);
             cx_time += vpx_usec_timer_elapsed(&timer);

From 9fb80f7170ec48e23c3c7b477149eeb37081c699 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 4 Nov 2010 16:59:26 -0400
Subject: [PATCH 296/307] fix integer promotion bug in partition size check

The check '(user_data_end - partition < partition_size)' must be
evaluated as a signed comparison, but because partition_size was
unsigned, the LHS was promoted to unsigned, causing an incorrect
result on 32-bit. Instead, check the upper and lower bounds of
the segment separately.

Change-Id: I6266aba7fd7de084268712a3d2a81424ead7aa06
---
 vp8/decoder/decodframe.c |  6 ++++--
 vp8/vp8_dx_iface.c       | 10 ++++++++--
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/vp8/decoder/decodframe.c b/vp8/decoder/decodframe.c
index 2d81d61d5..f5e49a147 100644
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -462,7 +462,8 @@ static void setup_token_decoder(VP8D_COMP *pbi,
             partition_size = user_data_end - partition;
         }
 
-        if (user_data_end - partition < partition_size)
+        if (partition + partition_size > user_data_end
+            || partition + partition_size < partition)
             vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
                                "Truncated packet or corrupt partition "
                                "%d length", i + 1);
@@ -580,7 +581,8 @@ int vp8_decode_frame(VP8D_COMP *pbi)
         (data[0] | (data[1] << 8) | (data[2] << 16)) >> 5;
     data += 3;
 
-    if (data_end - data < first_partition_length_in_bytes)
+    if (data + first_partition_length_in_bytes > data_end
+        || data + first_partition_length_in_bytes < data)
         vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME,
                            "Truncated packet or corrupt partition 0 length");
     vp8_setup_version(pc);
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index e7e535638..f0adf5bcb 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -253,8 +253,11 @@ static vpx_codec_err_t vp8_peek_si(const uint8_t         *data,
                                    unsigned int           data_sz,
                                    vpx_codec_stream_info_t *si)
 {
-
     vpx_codec_err_t res = VPX_CODEC_OK;
+
+    if(data + data_sz <= data)
+        res = VPX_CODEC_INVALID_PARAM;
+    else
     {
         /* Parse uncompresssed part of key frame header.
          * 3 bytes:- including version, frame type and an offset
@@ -331,7 +334,10 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
 
     ctx->img_avail = 0;
 
-    /* Determine the stream parameters */
+    /* Determine the stream parameters. Note that we rely on peek_si to
+     * validate that we have a buffer that does not wrap around the top
+     * of the heap.
+     */
     if (!ctx->si.h)
         res = ctx->base.iface->dec.peek_si(data, data_sz, &ctx->si);
 

From 6adbe09058f05a42ff1360ab55e3916f45e39a0c Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Mon, 8 Nov 2010 15:28:54 +0000
Subject: [PATCH 297/307] Tuning for the more exact quantizer.

Small changes to the default zero bin and rounding tables.
Though the tables are currently the same for the Y1 and Y2 cases
I have left them as separate tables in case we want to tune this later.

There is now some adjustment of the zbin based on the prediction mode.
Previously this was restricted to an adjustment for gf/arf 0,0 MV.

The exact quantizer now marginal outperforms and is the default.

The overall average gain is about 0.5%

Change-Id: I5e4353f3d5326dde4e86823684b236a1e9ea7f47
---
 vp8/encoder/encodeframe.c | 61 +++++++++++++++++++++++++--------------
 vp8/encoder/onyx_if.c     | 12 +++++++-
 vp8/encoder/onyx_int.h    |  2 ++
 vp8/encoder/quantize.c    |  2 +-
 vp8/encoder/rdopt.c       | 18 ++++++++++--
 5 files changed, 68 insertions(+), 27 deletions(-)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index 85e121be3..b67edd39f 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -62,7 +62,6 @@ unsigned int b_modes[14]  = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
 static const int qrounding_factors[129] =
 {
-    56, 56, 56, 56, 48, 48, 56, 56,
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
@@ -78,12 +77,18 @@ static const int qrounding_factors[129] =
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
-    48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48
 };
 
 static const int qzbin_factors[129] =
 {
-    72, 72, 72, 72, 80, 80, 72, 72,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,
@@ -94,17 +99,11 @@ static const int qzbin_factors[129] =
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80,
+    80
 };
 
 static const int qrounding_factors_y2[129] =
 {
-    56, 56, 56, 56, 48, 48, 56, 56,
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
@@ -120,12 +119,18 @@ static const int qrounding_factors_y2[129] =
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
     48, 48, 48, 48, 48, 48, 48, 48,
-    48,
+    48, 48, 48, 48, 48, 48, 48, 48,
+    48
 };
 
 static const int qzbin_factors_y2[129] =
 {
-    72, 72, 72, 72, 80, 80, 72, 72,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
+    84, 84, 84, 84, 84, 84, 84, 84,
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,
@@ -136,15 +141,10 @@ static const int qzbin_factors_y2[129] =
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,
     80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80, 80, 80, 80, 80, 80, 80, 80,
-    80,
+    80
 };
 
-//#define EXACT_QUANT
+#define EXACT_QUANT
 #ifdef EXACT_QUANT
 static void vp8cx_invert_quant(short *quant, short *shift, short d)
 {
@@ -351,6 +351,9 @@ void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x)
 
 void vp8cx_frame_init_quantizer(VP8_COMP *cpi)
 {
+    // Clear Zbin mode boost for default case
+    cpi->zbin_mode_boost = 0;
+
     // vp8cx_init_quantizer() is first called in vp8_create_compressor(). A check is added here so that vp8cx_init_quantizer() is only called
     // when these values are not all zero.
     if (cpi->common.y1dc_delta_q | cpi->common.y2dc_delta_q | cpi->common.uvdc_delta_q | cpi->common.y2ac_delta_q | cpi->common.uvac_delta_q)
@@ -1214,11 +1217,25 @@ int vp8cx_encode_inter_macroblock
         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
         if (cpi->zbin_mode_boost_enabled)
         {
-            if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME))
-                cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
+            if ( xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME )
+                 cpi->zbin_mode_boost = 0;
             else
-                cpi->zbin_mode_boost = 0;
+            {
+                if (xd->mode_info_context->mbmi.mode == ZEROMV)
+                {
+                    if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME)
+                        cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
+                    else
+                        cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
+                }
+                else if (xd->mode_info_context->mbmi.mode == SPLITMV)
+                    cpi->zbin_mode_boost = 0;
+                else
+                    cpi->zbin_mode_boost = MV_ZBIN_BOOST;
+            }
         }
+        else
+            cpi->zbin_mode_boost = 0;
 
         vp8cx_mb_init_quantizer(cpi,  x);
     }
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index bceb58602..93e13d139 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -3494,8 +3494,18 @@ static void encode_frame_to_data_rate
     cpi->zbin_over_quant = 0;
     cpi->zbin_mode_boost = 0;
 
-    // Enable mode based tweaking of the zbin
+    // Enable or disable mode based tweaking of the zbin
+    // For 2 Pass Only used where GF/ARF prediction quality
+    // is above a threshold
+    cpi->zbin_mode_boost = 0;
     cpi->zbin_mode_boost_enabled = TRUE;
+    if (cpi->pass == 2)
+    {
+        if ( cpi->gfu_boost <= 400 )
+        {
+            cpi->zbin_mode_boost_enabled = FALSE;
+        }
+    }
 
     // Current default encoder behaviour for the altref sign bias
     if (cpi->source_alt_ref_active)
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index 134e03075..be5b00de8 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -46,6 +46,8 @@
 #define MAX_THRESHMULT  512
 
 #define GF_ZEROMV_ZBIN_BOOST 24
+#define LF_ZEROMV_ZBIN_BOOST 12
+#define MV_ZBIN_BOOST        4
 #define ZBIN_OQ_MAX 192
 
 #define VP8_TEMPORAL_ALT_REF 1
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index 5e65fadb3..c2c0351c0 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -16,7 +16,7 @@
 #include "entropy.h"
 #include "predictdc.h"
 
-//#define EXACT_QUANT
+#define EXACT_QUANT
 #ifdef EXACT_QUANT
 void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
 {
diff --git a/vp8/encoder/rdopt.c b/vp8/encoder/rdopt.c
index 8a753fd44..d291b9e6f 100644
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -1626,10 +1626,22 @@ int vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int
         // Experimental code. Special case for gf and arf zeromv modes. Increase zbin size to supress noise
         if (cpi->zbin_mode_boost_enabled)
         {
-            if ((vp8_mode_order[mode_index] == ZEROMV) && (vp8_ref_frame_order[mode_index] != LAST_FRAME))
-                cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
-            else
+            if ( vp8_ref_frame_order[mode_index] == INTRA_FRAME )
                 cpi->zbin_mode_boost = 0;
+            else
+            {
+                if (vp8_mode_order[mode_index] == ZEROMV)
+                {
+                    if (vp8_ref_frame_order[mode_index] != LAST_FRAME)
+                        cpi->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST;
+                    else
+                        cpi->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST;
+                }
+                else if (vp8_ref_frame_order[mode_index] == SPLITMV)
+                    cpi->zbin_mode_boost = 0;
+                else
+                    cpi->zbin_mode_boost = MV_ZBIN_BOOST;
+            }
 
             vp8cx_mb_init_quantizer(cpi, x);
         }

From 513f8e681415eb8fc96fdf862ffa17a26b1a5bb2 Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Wed, 10 Nov 2010 10:09:45 +0000
Subject: [PATCH 298/307] Relax rate control for last few frames

VBR rate control can become very noisy for the last few frames.
If there are a few bits to spare or a small overshoot then the
target rate and hence quantizer may start to fluctuate wildly.

This patch prevents further adjustment of the active Q limits for
the last few frames.

Patch also removes some redundant variables and makes one small bug fix.

Change-Id: Ic167831bec79acc9f0d7e4698bcc4bb188840c45
---
 vp8/encoder/firstpass.c | 33 ++++++++++-----------------------
 vp8/encoder/onyx_int.h  |  3 ---
 2 files changed, 10 insertions(+), 26 deletions(-)

diff --git a/vp8/encoder/firstpass.c b/vp8/encoder/firstpass.c
index 8a94fa369..a7f5ce44c 100644
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -1439,7 +1439,7 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
         // Boost for arf frame
         Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100);
-        Boost += (cpi->baseline_gf_interval * 50);
+        Boost += (i * 50);
         allocation_chunks = (i * 100) + Boost;
 
         // Normalize Altboost and allocations chunck down to prevent overflow
@@ -1738,16 +1738,6 @@ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
         vp8_avg_stats(&sectionstats);
 
-        if (sectionstats.pcnt_motion < .17)
-            cpi->section_is_low_motion = 1;
-        else
-            cpi->section_is_low_motion = 0;
-
-        if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45)
-            cpi->section_is_fast_motion = 1;
-        else
-            cpi->section_is_fast_motion = 0;
-
         cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
 
         Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
@@ -1980,7 +1970,14 @@ void vp8_second_pass(VP8_COMP *cpi)
             cpi->ni_av_qi                     = cpi->worst_quality;
         }
     }
-    else
+    // The last few frames of a clip almost always have to few or too many
+    // bits and for the sake of over exact rate control we dont want to make
+    // radical adjustments to the allowed quantizer range just to use up a
+    // few surplus bits or get beneath the target rate.
+    else if ( (cpi->common.current_video_frame <
+                  (((unsigned int)cpi->total_stats->count * 255)>>8)) &&
+              ((cpi->common.current_video_frame + cpi->baseline_gf_interval) <
+                  (unsigned int)cpi->total_stats->count) )
     {
         if (frames_left < 1)
             frames_left = 1;
@@ -2344,17 +2341,7 @@ void vp8_find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
 
         vp8_avg_stats(&sectionstats);
 
-        if (sectionstats.pcnt_motion < .17)
-            cpi->section_is_low_motion = 1;
-        else
-            cpi->section_is_low_motion = 0;
-
-        if (sectionstats.mvc_abs + sectionstats.mvr_abs > 45)
-            cpi->section_is_fast_motion = 1;
-        else
-            cpi->section_is_fast_motion = 0;
-
-        cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
+         cpi->section_intra_rating = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
 
         Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error);
         // if( (Ratio > 11) ) //&& (sectionstats.pcnt_second_ref < .20) )
diff --git a/vp8/encoder/onyx_int.h b/vp8/encoder/onyx_int.h
index be5b00de8..a9eedf399 100644
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -614,9 +614,6 @@ typedef struct
     unsigned int tempdata2;
 
     int base_skip_false_prob[128];
-    unsigned int section_is_low_motion;
-    unsigned int section_benefits_from_aggresive_q;
-    unsigned int section_is_fast_motion;
     unsigned int section_intra_rating;
 
     double section_max_qfactor;

From 647df00f3058838cb37a1c257ab0d95c185084a9 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Thu, 4 Nov 2010 16:03:36 -0700
Subject: [PATCH 299/307] postproc : Re-work posproc calling to allow more
 flags.

Debugging in postproc needs more flags to allow for specific
block types to be turned on or off in the visualizations.

Must be enabled with --enable-postproc-visualizer during
configuration time.

Change-Id: Ia74f357ddc3ad4fb8082afd3a64f62384e4fcb2d
---
 configure              |  8 +++-
 vp8/common/onyx.h      |  2 +-
 vp8/common/onyxd.h     |  2 +-
 vp8/common/postproc.c  | 99 +++++++++++++++++++-----------------------
 vp8/common/postproc.h  |  2 +-
 vp8/common/ppflags.h   | 19 ++++++--
 vp8/decoder/onyxd_if.c |  4 +-
 vp8/encoder/onyx_if.c  |  4 +-
 vp8/vp8_cx_iface.c     | 10 ++++-
 vp8/vp8_dx_iface.c     | 67 +++++++++++++++++++++++-----
 vpx/vp8.h              | 34 ++++++++++-----
 vpxdec.c               | 77 +++++++++++++++++++++++++++++++-
 12 files changed, 237 insertions(+), 91 deletions(-)

diff --git a/configure b/configure
index 11e086e9c..60308b028 100755
--- a/configure
+++ b/configure
@@ -41,6 +41,7 @@ Advanced options:
   ${toggle_shared}                shared library support
   ${toggle_small}                 favor smaller size over speed
   ${toggle_arm_asm_detok}         assembly version of the detokenizer (ARM platforms only)
+  ${toggle_postproc_visualizer}   macro block / block level visualizers
 
 Codecs:
   Codecs can be selectively enabled or disabled individually, or by family:
@@ -250,6 +251,7 @@ CONFIG_LIST="
     shared
     small
     arm_asm_detok
+    postproc_visualizer
 "
 CMDLINE_SELECT="
     extra_warnings
@@ -289,6 +291,7 @@ CMDLINE_SELECT="
     shared
     small
     arm_asm_detok
+    postproc_visualizer
 "
 
 process_cmdline() {
@@ -325,8 +328,6 @@ post_process_cmdline() {
     for c in ${CODECS}; do
         enabled ${c} && enable ${c##*_}s
     done
-
-
 }
 
 
@@ -536,6 +537,9 @@ process_toolchain() {
 
     # Other toolchain specific defaults
     case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac
+
+    enabled postproc_visualizer && {enabled postproc \
+        || die "postproc_visualizer requires postproc to be enabled"}
 }
 
 
diff --git a/vp8/common/onyx.h b/vp8/common/onyx.h
index a006306db..3c199d1c2 100644
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -204,7 +204,7 @@ extern "C"
 // and not just a copy of the pointer..
     int vp8_receive_raw_frame(VP8_PTR comp, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, INT64 time_stamp, INT64 end_time_stamp);
     int vp8_get_compressed_data(VP8_PTR comp, unsigned int *frame_flags, unsigned long *size, unsigned char *dest, INT64 *time_stamp, INT64 *time_end, int flush);
-    int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags);
+    int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags);
 
     int vp8_use_as_reference(VP8_PTR comp, int ref_frame_flags);
     int vp8_update_reference(VP8_PTR comp, int ref_frame_flags);
diff --git a/vp8/common/onyxd.h b/vp8/common/onyxd.h
index 00a97d97d..e53bc3138 100644
--- a/vp8/common/onyxd.h
+++ b/vp8/common/onyxd.h
@@ -51,7 +51,7 @@ extern "C"
     int vp8dx_get_setting(VP8D_PTR comp, VP8D_SETTING oxst);
 
     int vp8dx_receive_compressed_data(VP8D_PTR comp, unsigned long size, const unsigned char *dest, INT64 time_stamp);
-    int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level,  int noise_level, int flags);
+    int vp8dx_get_raw_frame(VP8D_PTR comp, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags);
 
     int vp8dx_get_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
     int vp8dx_set_reference(VP8D_PTR comp, VP8_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd);
diff --git a/vp8/common/postproc.c b/vp8/common/postproc.c
index e797e1036..15b1c2c89 100644
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -26,7 +26,7 @@
     ( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
 
 /* global constants */
-
+#if CONFIG_POSTPROC_VISUALIZER
 static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
 {
     { RGB_TO_YUV(0x98FB98) },   /* PaleGreen */
@@ -59,13 +59,14 @@ static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] =
     { RGB_TO_YUV(0xccff33) },   /* Yellow */
 };
 
-static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
+static const unsigned char MV_REFERENCE_FRAME_colors[MAX_REF_FRAMES][3] =
 {
     { RGB_TO_YUV(0x00ff00) },   /* Blue */
     { RGB_TO_YUV(0x0000ff) },   /* Green */
     { RGB_TO_YUV(0xffff00) },   /* Yellow */
     { RGB_TO_YUV(0xff0000) },   /* Red */
 };
+#endif
 
 static const short kernel5[] =
 {
@@ -677,10 +678,13 @@ static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int hei
 #define RTCD_VTABLE(oci) NULL
 #endif
 
-int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
+int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags)
 {
     char message[512];
     int q = oci->filter_level * 10 / 6;
+    int flags = ppflags->post_proc_flag;
+    int deblock_level = ppflags->deblocking_level;
+    int noise_level = ppflags->noise_level;
 
     if (!oci->frame_to_show)
         return -1;
@@ -737,7 +741,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
          oci->post_proc_buffer.y_stride);
     }
 
-    if (flags & VP8D_DEBUG_LEVEL1)
+#if CONFIG_POSTPROC_VISUALIZER
+    if (flags & VP8D_DEBUG_TXT_FRAME_INFO)
     {
         sprintf(message, "F%1dG%1dQ%3dF%3dP%d_s%dx%d",
                 (oci->frame_type == KEY_FRAME),
@@ -749,7 +754,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
     }
 
-    if (flags & VP8D_DEBUG_LEVEL2)
+    if (flags & VP8D_DEBUG_TXT_MBLK_MODES)
     {
         int i, j;
         unsigned char *y_ptr;
@@ -781,7 +786,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         }
     }
 
-    if (flags & VP8D_DEBUG_LEVEL3)
+    if (flags & VP8D_DEBUG_TXT_DC_DIFF)
     {
         int i, j;
         unsigned char *y_ptr;
@@ -816,45 +821,14 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
         }
     }
 
-    if (flags & VP8D_DEBUG_LEVEL4)
+    if (flags & VP8D_DEBUG_TXT_RATE_INFO)
     {
         sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
         vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
-#if 0
-        int i, j;
-        unsigned char *y_ptr;
-        YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
-        int mb_rows = post->y_height >> 4;
-        int mb_cols = post->y_width  >> 4;
-        int mb_index = 0;
-        MODE_INFO *mi = oci->mi;
-
-        y_ptr = post->y_buffer + 4 * post->y_stride + 4;
-
-        /* vp8_filter each macro block */
-        for (i = 0; i < mb_rows; i++)
-        {
-            for (j = 0; j < mb_cols; j++)
-            {
-                char zz[4];
-
-                sprintf(zz, "%c", mi[mb_index].mbmi.dc_diff + '0');
-                vp8_blit_text(zz, y_ptr, post->y_stride);
-                mb_index ++;
-                y_ptr += 16;
-            }
-
-            mb_index ++; /* border */
-            y_ptr += post->y_stride  * 16 - post->y_width;
-
-        }
-
-#endif
-
     }
 
     /* Draw motion vectors */
-    if (flags & VP8D_DEBUG_DRAW_MV)
+    if ((flags & VP8D_DEBUG_DRAW_MV) && ppflags->display_mv_flag)
     {
         YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
         int width  = post->y_width;
@@ -871,6 +845,12 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
             {
                 int x1, y1;
 
+                if (!(ppflags->display_mv_flag & (1<<mi->mbmi.mode)))
+                {
+                    mi++;
+                    continue;
+                }
+
                 if (mi->mbmi.mode == SPLITMV)
                 {
                     switch (mi->mbmi.partitioning)
@@ -996,6 +976,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                     else
                         vp8_blit_line  (lx0,  x1, ly0,  y1, y_buffer, y_stride);
                 }
+
                 mi++;
             }
             mi++;
@@ -1003,7 +984,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
     }
 
     /* Color in block modes */
-    if (flags & VP8D_DEBUG_CLR_BLK_MODES)
+    if ((flags & VP8D_DEBUG_CLR_BLK_MODES)
+        && (ppflags->display_mb_modes_flag || ppflags->display_b_modes_flag))
     {
         int y, x;
         YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
@@ -1021,7 +1003,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
             {
                 int Y = 0, U = 0, V = 0;
 
-                if (mi->mbmi.mode == B_PRED)
+                if (mi->mbmi.mode == B_PRED &&
+                    ((ppflags->display_mb_modes_flag & B_PRED) || ppflags->display_b_modes_flag))
                 {
                     int by, bx;
                     unsigned char *yl, *ul, *vl;
@@ -1035,13 +1018,16 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                     {
                         for (bx = 0; bx < 16; bx += 4)
                         {
-                            Y = B_PREDICTION_MODE_colors[bmi->mode][0];
-                            U = B_PREDICTION_MODE_colors[bmi->mode][1];
-                            V = B_PREDICTION_MODE_colors[bmi->mode][2];
-
-                            POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b)
-                                (yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride);
+                            if ((ppflags->display_b_modes_flag & (1<<mi->mbmi.mode))
+                                || (ppflags->display_mb_modes_flag & B_PRED))
+                            {
+                                Y = B_PREDICTION_MODE_colors[bmi->mode][0];
+                                U = B_PREDICTION_MODE_colors[bmi->mode][1];
+                                V = B_PREDICTION_MODE_colors[bmi->mode][2];
 
+                                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b)
+                                    (yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride);
+                            }
                             bmi++;
                         }
 
@@ -1050,7 +1036,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                         vl += y_stride*1;
                     }
                 }
-                else
+                else if (ppflags->display_mb_modes_flag & (1<<mi->mbmi.mode))
                 {
                     Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
                     U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
@@ -1059,6 +1045,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                     POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_inner)
                         (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
                 }
+
                 mi++;
             }
             y_ptr += y_stride*16;
@@ -1070,7 +1057,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
     }
 
     /* Color in frame reference blocks */
-    if (flags & VP8D_DEBUG_CLR_FRM_REF_BLKS)
+    if ((flags & VP8D_DEBUG_CLR_FRM_REF_BLKS) && ppflags->display_ref_frame_flag)
     {
         int y, x;
         YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
@@ -1088,12 +1075,15 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
             {
                 int Y = 0, U = 0, V = 0;
 
-                Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
-                U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
-                V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
+                if (ppflags->display_ref_frame_flag & (1<<mi->mbmi.ref_frame))
+                {
+                    Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
+                    U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
+                    V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
 
-                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer)
-                    (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
+                    POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer)
+                        (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
+                }
 
                 mi++;
             }
@@ -1104,6 +1094,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
             mi++;
         }
     }
+#endif
 
     *dest = oci->post_proc_buffer;
 
diff --git a/vp8/common/postproc.h b/vp8/common/postproc.h
index 7485135bf..c641b9ca5 100644
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -111,7 +111,7 @@ struct postproc_state
 #include "onyxc_int.h"
 #include "ppflags.h"
 int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest,
-                        int deblock_level, int noise_level, int flags);
+                        vp8_ppflags_t *flags);
 
 
 void vp8_de_noise(YV12_BUFFER_CONFIG         *source,
diff --git a/vp8/common/ppflags.h b/vp8/common/ppflags.h
index b8d713cf0..65b0cab6a 100644
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -17,13 +17,24 @@ enum
     VP8D_DEBLOCK                = 1<<0,
     VP8D_DEMACROBLOCK           = 1<<1,
     VP8D_ADDNOISE               = 1<<2,
-    VP8D_DEBUG_LEVEL1           = 1<<3,
-    VP8D_DEBUG_LEVEL2           = 1<<4,
-    VP8D_DEBUG_LEVEL3           = 1<<5,
-    VP8D_DEBUG_LEVEL4           = 1<<6,
+    VP8D_DEBUG_TXT_FRAME_INFO   = 1<<3,
+    VP8D_DEBUG_TXT_MBLK_MODES   = 1<<4,
+    VP8D_DEBUG_TXT_DC_DIFF      = 1<<5,
+    VP8D_DEBUG_TXT_RATE_INFO    = 1<<6,
     VP8D_DEBUG_DRAW_MV          = 1<<7,
     VP8D_DEBUG_CLR_BLK_MODES    = 1<<8,
     VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9
 };
 
+typedef struct
+{
+    int post_proc_flag;
+    int deblocking_level;
+    int noise_level;
+    int display_ref_frame_flag;
+    int display_mb_modes_flag;
+    int display_b_modes_flag;
+    int display_mv_flag;
+} vp8_ppflags_t;
+
 #endif
diff --git a/vp8/decoder/onyxd_if.c b/vp8/decoder/onyxd_if.c
index 6eda45e4a..aa2709f5b 100644
--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -506,7 +506,7 @@ int vp8dx_receive_compressed_data(VP8D_PTR ptr, unsigned long size, const unsign
     pbi->common.error.setjmp = 0;
     return retcode;
 }
-int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, int deblock_level,  int noise_level, int flags)
+int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp, INT64 *time_end_stamp, vp8_ppflags_t *flags)
 {
     int ret = -1;
     VP8D_COMP *pbi = (VP8D_COMP *) ptr;
@@ -524,7 +524,7 @@ int vp8dx_get_raw_frame(VP8D_PTR ptr, YV12_BUFFER_CONFIG *sd, INT64 *time_stamp,
 
     sd->clrtype = pbi->common.clr_type;
 #if CONFIG_POSTPROC
-    ret = vp8_post_proc_frame(&pbi->common, sd, deblock_level, noise_level, flags);
+    ret = vp8_post_proc_frame(&pbi->common, sd, flags);
 #else
 
     if (pbi->common.frame_to_show)
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index 93e13d139..ba7bb104b 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -5224,7 +5224,7 @@ int vp8_get_compressed_data(VP8_PTR ptr, unsigned int *frame_flags, unsigned lon
     return 0;
 }
 
-int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int deblock_level, int noise_level, int flags)
+int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags)
 {
     VP8_COMP *cpi = (VP8_COMP *) comp;
 
@@ -5234,7 +5234,7 @@ int vp8_get_preview_raw_frame(VP8_PTR comp, YV12_BUFFER_CONFIG *dest, int debloc
     {
         int ret;
 #if CONFIG_POSTPROC
-        ret = vp8_post_proc_frame(&cpi->common, dest, deblock_level, noise_level, flags);
+        ret = vp8_post_proc_frame(&cpi->common, dest, flags);
 #else
 
         if (cpi->common.frame_to_show)
diff --git a/vp8/vp8_cx_iface.c b/vp8/vp8_cx_iface.c
index 8e50b7f1b..6a2872031 100644
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -860,8 +860,16 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
 {
 
     YV12_BUFFER_CONFIG sd;
+    vp8_ppflags_t flags = {0};
 
-    if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, ctx->preview_ppcfg.deblocking_level, ctx->preview_ppcfg.noise_level, ctx->preview_ppcfg.post_proc_flag))
+    if (ctx->preview_ppcfg.post_proc_flag)
+    {
+        flags.post_proc_flag        = ctx->preview_ppcfg.post_proc_flag;
+        flags.deblocking_level      = ctx->preview_ppcfg.deblocking_level;
+        flags.noise_level           = ctx->preview_ppcfg.noise_level;
+    }
+
+    if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, &flags))
     {
 
         /*
diff --git a/vp8/vp8_dx_iface.c b/vp8/vp8_dx_iface.c
index 9964124d1..9dd492217 100644
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -65,12 +65,19 @@ struct vpx_codec_alg_priv
     vpx_codec_priv_t        base;
     vpx_codec_mmap_t        mmaps[NELEMENTS(vp8_mem_req_segs)-1];
     vpx_codec_dec_cfg_t     cfg;
-    vp8_stream_info_t   si;
+    vp8_stream_info_t       si;
     int                     defer_alloc;
     int                     decoder_init;
     VP8D_PTR                pbi;
     int                     postproc_cfg_set;
     vp8_postproc_cfg_t      postproc_cfg;
+#if CONFIG_POSTPROC_VISUALIZER
+    unsigned int            dbg_postproc_flag;
+    int                     dbg_color_ref_frame_flag;
+    int                     dbg_color_mb_modes_flag;
+    int                     dbg_color_b_modes_flag;
+    int                     dbg_display_mv_flag;
+#endif
     vpx_image_t             img;
     int                     img_setup;
     int                     img_avail;
@@ -416,15 +423,27 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
     {
         YV12_BUFFER_CONFIG sd;
         INT64 time_stamp = 0, time_end_stamp = 0;
-        int ppflag       = 0;
-        int ppdeblocking = 0;
-        int ppnoise      = 0;
+        vp8_ppflags_t flags = {0};
 
         if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)
         {
-            ppflag      = ctx->postproc_cfg.post_proc_flag;
-            ppdeblocking = ctx->postproc_cfg.deblocking_level;
-            ppnoise     = ctx->postproc_cfg.noise_level;
+            flags.post_proc_flag= ctx->postproc_cfg.post_proc_flag
+#if CONFIG_POSTPROC_VISUALIZER
+
+                                | ((ctx->dbg_color_ref_frame_flag != 0) ? VP8D_DEBUG_CLR_FRM_REF_BLKS : 0)
+                                | ((ctx->dbg_color_mb_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0)
+                                | ((ctx->dbg_color_b_modes_flag != 0) ? VP8D_DEBUG_CLR_BLK_MODES : 0)
+                                | ((ctx->dbg_display_mv_flag != 0) ? VP8D_DEBUG_DRAW_MV : 0)
+#endif
+                                ;
+            flags.deblocking_level      = ctx->postproc_cfg.deblocking_level;
+            flags.noise_level           = ctx->postproc_cfg.noise_level;
+#if CONFIG_POSTPROC_VISUALIZER
+            flags.display_ref_frame_flag= ctx->dbg_color_ref_frame_flag;
+            flags.display_mb_modes_flag = ctx->dbg_color_mb_modes_flag;
+            flags.display_b_modes_flag  = ctx->dbg_color_b_modes_flag;
+            flags.display_mv_flag       = ctx->dbg_display_mv_flag;
+#endif
         }
 
         if (vp8dx_receive_compressed_data(ctx->pbi, data_sz, data, deadline))
@@ -433,7 +452,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
             res = update_error_state(ctx, &pbi->common.error);
         }
 
-        if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, ppdeblocking, ppnoise, ppflag))
+        if (!res && 0 == vp8dx_get_raw_frame(ctx->pbi, &sd, &time_stamp, &time_end_stamp, &flags))
         {
             /* Align width/height */
             unsigned int a_w = (sd.y_width + 15) & ~15;
@@ -646,12 +665,38 @@ static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx,
 #endif
 }
 
+static vpx_codec_err_t vp8_set_dbg_options(vpx_codec_alg_priv_t *ctx,
+                                        int ctrl_id,
+                                        va_list args)
+{
+#if CONFIG_POSTPROC_VISUALIZER && CONFIG_POSTPROC
+    int data = va_arg(args, int);
+
+#define MAP(id, var) case id: var = data; break;
+
+    switch (ctrl_id)
+    {
+        MAP (VP8_SET_DBG_COLOR_REF_FRAME,   ctx->dbg_color_ref_frame_flag);
+        MAP (VP8_SET_DBG_COLOR_MB_MODES,    ctx->dbg_color_mb_modes_flag);
+        MAP (VP8_SET_DBG_COLOR_B_MODES,     ctx->dbg_color_b_modes_flag);
+        MAP (VP8_SET_DBG_DISPLAY_MV,        ctx->dbg_display_mv_flag);
+    }
+
+    return VPX_CODEC_OK;
+#else
+    return VPX_CODEC_INCAPABLE;
+#endif
+}
 
 vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] =
 {
-    {VP8_SET_REFERENCE,  vp8_set_reference},
-    {VP8_COPY_REFERENCE, vp8_get_reference},
-    {VP8_SET_POSTPROC,   vp8_set_postproc},
+    {VP8_SET_REFERENCE,             vp8_set_reference},
+    {VP8_COPY_REFERENCE,            vp8_get_reference},
+    {VP8_SET_POSTPROC,              vp8_set_postproc},
+    {VP8_SET_DBG_COLOR_REF_FRAME,   vp8_set_dbg_options},
+    {VP8_SET_DBG_COLOR_MB_MODES,    vp8_set_dbg_options},
+    {VP8_SET_DBG_COLOR_B_MODES,     vp8_set_dbg_options},
+    {VP8_SET_DBG_DISPLAY_MV,        vp8_set_dbg_options},
     { -1, NULL},
 };
 
diff --git a/vpx/vp8.h b/vpx/vp8.h
index d7ed8d8c1..32c01325f 100644
--- a/vpx/vp8.h
+++ b/vpx/vp8.h
@@ -38,9 +38,13 @@
  */
 enum vp8_dec_control_id
 {
-    VP8_SET_REFERENCE       = 1,    /**< pass in an external frame into decoder to be used as reference frame */
-    VP8_COPY_REFERENCE      = 2,    /**< get a copy of reference frame from the decoder */
-    VP8_SET_POSTPROC        = 3,    /**< set decoder's the post processing settings  */
+    VP8_SET_REFERENCE           = 1,    /**< pass in an external frame into decoder to be used as reference frame */
+    VP8_COPY_REFERENCE          = 2,    /**< get a copy of reference frame from the decoder */
+    VP8_SET_POSTPROC            = 3,    /**< set the decoder's post processing settings  */
+    VP8_SET_DBG_COLOR_REF_FRAME = 4,    /**< set the reference frames to color for each macroblock */
+    VP8_SET_DBG_COLOR_MB_MODES  = 5,    /**< set which macro block modes to color */
+    VP8_SET_DBG_COLOR_B_MODES   = 6,    /**< set which blocks modes to color */
+    VP8_SET_DBG_DISPLAY_MV      = 7,    /**< set which motion vector modes to draw */
     VP8_COMMON_CTRL_ID_MAX
 };
 
@@ -50,10 +54,14 @@ enum vp8_dec_control_id
  */
 enum vp8_postproc_level
 {
-    VP8_NOFILTERING    = 0,
-    VP8_DEBLOCK        = 1,
-    VP8_DEMACROBLOCK   = 2,
-    VP8_ADDNOISE       = 4
+    VP8_NOFILTERING             = 0,
+    VP8_DEBLOCK                 = 1<<0,
+    VP8_DEMACROBLOCK            = 1<<1,
+    VP8_ADDNOISE                = 1<<2,
+    VP8_DEBUG_TXT_FRAME_INFO    = 1<<3, /**< print frame information */
+    VP8_DEBUG_TXT_MBLK_MODES    = 1<<4, /**< print macro block modes over each macro block */
+    VP8_DEBUG_TXT_DC_DIFF       = 1<<5, /**< print dc diff for each macro block */
+    VP8_DEBUG_TXT_RATE_INFO     = 1<<6, /**< print video rate info (encoder only) */
 };
 
 /*!\brief post process flags
@@ -65,9 +73,9 @@ enum vp8_postproc_level
 
 typedef struct vp8_postproc_cfg
 {
-    int post_proc_flag;           /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */
-    int deblocking_level;        /**< the strength of deblocking, valid range [0, 16] */
-    int noise_level;             /**< the strength of additive noise, valid range [0, 16] */
+    int post_proc_flag;         /**< the types of post processing to be done, should be combination of "vp8_postproc_level" */
+    int deblocking_level;       /**< the strength of deblocking, valid range [0, 16] */
+    int noise_level;            /**< the strength of additive noise, valid range [0, 16] */
 } vp8_postproc_cfg_t;
 
 /*!\brief reference frame type
@@ -95,12 +103,16 @@ typedef struct vpx_ref_frame
 
 /*!\brief vp8 decoder control funciton parameter type
  *
- * defines the data type for each of VP8 decoder control funciton requires
+ * defines the data type for each of VP8 decoder control function requires
  */
 
 VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE,           vpx_ref_frame_t *)
 VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE,          vpx_ref_frame_t *)
 VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC,            vp8_postproc_cfg_t *)
+VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_REF_FRAME, int)
+VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_MB_MODES,  int)
+VPX_CTRL_USE_TYPE(VP8_SET_DBG_COLOR_B_MODES,   int)
+VPX_CTRL_USE_TYPE(VP8_SET_DBG_DISPLAY_MV,      int)
 
 
 /*! @} - end defgroup vp8 */
diff --git a/vpxdec.c b/vpxdec.c
index 9b565b022..5eb0bebde 100644
--- a/vpxdec.c
+++ b/vpxdec.c
@@ -108,11 +108,19 @@ static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level",
         "Enable VP8 demacroblocking, w/ level");
 static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
                                        "Enable VP8 visible debug info");
-
+static const arg_def_t pp_disp_ref_frame = ARG_DEF(NULL, "pp-dbg-ref-frame", 1,
+                                       "Display only selected reference frame per macro block");
+static const arg_def_t pp_disp_mb_modes = ARG_DEF(NULL, "pp-dbg-mb-modes", 1,
+                                       "Display only selected macro block modes");
+static const arg_def_t pp_disp_b_modes = ARG_DEF(NULL, "pp-dbg-b-modes", 1,
+                                       "Display only selected block modes");
+static const arg_def_t pp_disp_mvs = ARG_DEF(NULL, "pp-dbg-mvs", 1,
+                                       "Draw only selected motion vectors");
 
 static const arg_def_t *vp8_pp_args[] =
 {
     &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
+    &pp_disp_ref_frame, &pp_disp_mb_modes, &pp_disp_b_modes, &pp_disp_mvs,
     NULL
 };
 #endif
@@ -705,6 +713,10 @@ int main(int argc, const char **argv_)
     vpx_codec_dec_cfg_t     cfg = {0};
 #if CONFIG_VP8_DECODER
     vp8_postproc_cfg_t      vp8_pp_cfg = {0};
+    int                     vp8_dbg_color_ref_frame = 0;
+    int                     vp8_dbg_color_mb_modes = 0;
+    int                     vp8_dbg_color_b_modes = 0;
+    int                     vp8_dbg_display_mv = 0;
 #endif
     struct input_ctx        input = {0};
 
@@ -790,6 +802,42 @@ int main(int argc, const char **argv_)
             if (level)
                 vp8_pp_cfg.post_proc_flag |= level;
         }
+        else if (arg_match(&arg, &pp_disp_ref_frame, argi))
+        {
+            unsigned int flags = arg_parse_int(&arg);
+            if (flags)
+            {
+                postproc = 1;
+                vp8_dbg_color_ref_frame = flags;
+            }
+        }
+        else if (arg_match(&arg, &pp_disp_mb_modes, argi))
+        {
+            unsigned int flags = arg_parse_int(&arg);
+            if (flags)
+            {
+                postproc = 1;
+                vp8_dbg_color_mb_modes = flags;
+            }
+        }
+        else if (arg_match(&arg, &pp_disp_b_modes, argi))
+        {
+            unsigned int flags = arg_parse_int(&arg);
+            if (flags)
+            {
+                postproc = 1;
+                vp8_dbg_color_b_modes = flags;
+            }
+        }
+        else if (arg_match(&arg, &pp_disp_mvs, argi))
+        {
+            unsigned int flags = arg_parse_int(&arg);
+            if (flags)
+            {
+                postproc = 1;
+                vp8_dbg_display_mv = flags;
+            }
+        }
 
 #endif
         else
@@ -929,6 +977,33 @@ int main(int argc, const char **argv_)
         return EXIT_FAILURE;
     }
 
+    if (vp8_dbg_color_ref_frame
+        && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_REF_FRAME, vp8_dbg_color_ref_frame))
+    {
+        fprintf(stderr, "Failed to configure reference block visualizer: %s\n", vpx_codec_error(&decoder));
+        return EXIT_FAILURE;
+    }
+
+    if (vp8_dbg_color_mb_modes
+        && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_MB_MODES, vp8_dbg_color_mb_modes))
+    {
+        fprintf(stderr, "Failed to configure macro block visualizer: %s\n", vpx_codec_error(&decoder));
+        return EXIT_FAILURE;
+    }
+
+    if (vp8_dbg_color_b_modes
+        && vpx_codec_control(&decoder, VP8_SET_DBG_COLOR_B_MODES, vp8_dbg_color_b_modes))
+    {
+        fprintf(stderr, "Failed to configure block visualizer: %s\n", vpx_codec_error(&decoder));
+        return EXIT_FAILURE;
+    }
+
+    if (vp8_dbg_display_mv
+        && vpx_codec_control(&decoder, VP8_SET_DBG_DISPLAY_MV, vp8_dbg_display_mv))
+    {
+        fprintf(stderr, "Failed to configure motion vector visualizer: %s\n", vpx_codec_error(&decoder));
+        return EXIT_FAILURE;
+    }
 #endif
 
     /* Decode file */

From 5f0e0617bad80d06b7231ec11814e52d6f3edba8 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Thu, 21 Oct 2010 10:53:15 -0700
Subject: [PATCH 300/307] FDCT optimizations.

Fixed up the fdct for mmx and 8x4 sse2 to match them
most recent changes.

Change-Id: Ibee2d6c536fe14dcf75cd6eb1c73f4848a56d719
---
 vp8/encoder/x86/dct_mmx.asm            | 592 +++++++------------------
 vp8/encoder/x86/dct_sse2.asm           | 289 +++++++++++-
 vp8/encoder/x86/dct_x86.h              |  12 +-
 vp8/encoder/x86/x86_csystemdependent.c |  20 +-
 4 files changed, 429 insertions(+), 484 deletions(-)

diff --git a/vp8/encoder/x86/dct_mmx.asm b/vp8/encoder/x86/dct_mmx.asm
index 5acaca875..f07b030bd 100644
--- a/vp8/encoder/x86/dct_mmx.asm
+++ b/vp8/encoder/x86/dct_mmx.asm
@@ -11,511 +11,231 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-section .text
-    global sym(vp8_short_fdct4x4_mmx)
-    global sym(vp8_short_fdct8x4_wmt)
-
-
-%define         DCTCONSTANTSBITS         (16)
-%define         DCTROUNDINGVALUE         (1<< (DCTCONSTANTSBITS-1))
-%define         x_c1                      (60547)          ; cos(pi  /8) * (1<<15)
-%define         x_c2                      (46341)          ; cos(pi*2/8) * (1<<15)
-%define         x_c3                      (25080)          ; cos(pi*3/8) * (1<<15)
-
-
 ;void vp8_short_fdct4x4_mmx(short *input, short *output, int pitch)
+global sym(vp8_short_fdct4x4_mmx)
 sym(vp8_short_fdct4x4_mmx):
     push        rbp
-    mov         rbp, rsp
+    mov         rbp,        rsp
     SHADOW_ARGS_TO_STACK 3
     GET_GOT     rbx
-    push rsi
-    push rdi
+    push        rsi
+    push        rdi
     ; end prolog
-        mov     rsi,    arg(0) ;input
-        mov     rdi,    arg(1) ;output
 
-        lea     rdx,    [GLOBAL(dct_const_mmx)]
-        movsxd  rax,    dword ptr arg(2) ;pitch
+        mov         rsi,        arg(0)      ; input
+        mov         rdi,        arg(1)      ; output
 
-        lea     rcx,    [rsi + rax*2]
+        movsxd      rax,        dword ptr arg(2) ;pitch
+
+        lea         rcx,        [rsi + rax*2]
         ; read the input data
-        movq    mm0,    [rsi]
-        movq    mm1,    [rsi + rax    ]
+        movq        mm0,        [rsi]
+        movq        mm1,        [rsi + rax]
 
-        movq    mm2,    [rcx]
-        movq    mm3,    [rcx + rax]
-        ; get the constants
-        ;shift to left by 1 for prescision
-        psllw   mm0,    3
-        psllw   mm1,    3
+        movq        mm2,        [rcx]
+        movq        mm4,        [rcx + rax]
 
-        psllw   mm2,    3
-        psllw   mm3,    3
+        ; transpose for the first stage
+        movq        mm3,        mm0         ; 00 01 02 03
+        movq        mm5,        mm2         ; 20 21 22 23
 
-        ; transpose for the second stage
-        movq    mm4,    mm0         ; 00 01 02 03
-        movq    mm5,    mm2         ; 10 11 12 03
+        punpcklwd   mm0,        mm1         ; 00 10 01 11
+        punpckhwd   mm3,        mm1         ; 02 12 03 13
 
-        punpcklwd   mm0,    mm1     ; 00 10 01 11
-        punpckhwd   mm4,    mm1     ; 02 12 03 13
+        punpcklwd   mm2,        mm4         ; 20 30 21 31
+        punpckhwd   mm5,        mm4         ; 22 32 23 33
 
-        punpcklwd   mm2,    mm3     ; 20 30 21 31
-        punpckhwd   mm5,    mm3     ; 22 32 23 33
+        movq        mm1,        mm0         ; 00 10 01 11
+        punpckldq   mm0,        mm2         ; 00 10 20 30
 
+        punpckhdq   mm1,        mm2         ; 01 11 21 31
 
-        movq        mm1,    mm0     ; 00 10 01 11
-        punpckldq   mm0,    mm2     ; 00 10 20 30
+        movq        mm2,        mm3         ; 02 12 03 13
+        punpckldq   mm2,        mm5         ; 02 12 22 32
 
-        punpckhdq   mm1,    mm2     ; 01 11 21 31
-
-        movq        mm2,    mm4     ; 02 12 03 13
-        punpckldq   mm2,    mm5     ; 02 12 22 32
-
-        punpckhdq   mm4,    mm5     ; 03 13 23 33
-        movq        mm3,    mm4
+        punpckhdq   mm3,        mm5         ; 03 13 23 33
 
+        ; mm0 0
+        ; mm1 1
+        ; mm2 2
+        ; mm3 3
 
         ; first stage
-        movq    mm5,    mm0
-        movq    mm4,    mm1
+        movq        mm5,        mm0
+        movq        mm4,        mm1
 
-        paddw   mm0,    mm3         ; a = 0 + 3
-        paddw   mm1,    mm2         ; b = 1 + 2
+        paddw       mm0,        mm3         ; a1 = 0 + 3
+        paddw       mm1,        mm2         ; b1 = 1 + 2
 
-        psubw   mm4,    mm2         ; c = 1 - 2
-        psubw   mm5,    mm3         ; d = 0 - 3
+        psubw       mm4,        mm2         ; c1 = 1 - 2
+        psubw       mm5,        mm3         ; d1 = 0 - 3
 
+        psllw       mm5,        3
+        psllw       mm4,        3
+
+        psllw       mm0,        3
+        psllw       mm1,        3
 
         ; output 0 and 2
-        movq    mm6,    [rdx +  16] ; c2
-        movq    mm2,    mm0         ; a
+        movq        mm2,        mm0         ; a1
 
-        paddw   mm0,    mm1         ; a + b
-        psubw   mm2,    mm1         ; a - b
-
-        movq    mm1,    mm0         ; a + b
-        pmulhw  mm0,    mm6         ; 00 01 02 03
-
-        paddw   mm0,    mm1         ; output 00 01 02 03
-        pmulhw  mm6,    mm2         ; 20 21 22 23
-
-        paddw   mm2,    mm6         ; output 20 21 22 23
+        paddw       mm0,        mm1         ; op[0] = a1 + b1
+        psubw       mm2,        mm1         ; op[2] = a1 - b1
 
         ; output 1 and 3
-        movq    mm6,    [rdx +  8]  ; c1
-        movq    mm7,    [rdx + 24]  ; c3
+        ; interleave c1, d1
+        movq        mm1,        mm5         ; d1
+        punpcklwd   mm1,        mm4         ; c1 d1
+        punpckhwd   mm5,        mm4         ; c1 d1
 
-        movq    mm1,    mm4         ; c
-        movq    mm3,    mm5         ; d
+        movq        mm3,        mm1
+        movq        mm4,        mm5
 
-        pmulhw  mm1,    mm7         ; c * c3
-        pmulhw  mm3,    mm6         ; d * c1
+        pmaddwd     mm1,        MMWORD PTR[GLOBAL (_5352_2217)]    ; c1*2217 + d1*5352
+        pmaddwd     mm4,        MMWORD PTR[GLOBAL (_5352_2217)]    ; c1*2217 + d1*5352
 
-        paddw   mm3,    mm5         ; d * c1 rounded
-        paddw   mm1,    mm3         ; output 10 11 12 13
+        pmaddwd     mm3,        MMWORD PTR[GLOBAL(_2217_neg5352)]  ; d1*2217 - c1*5352
+        pmaddwd     mm5,        MMWORD PTR[GLOBAL(_2217_neg5352)]  ; d1*2217 - c1*5352
 
-        movq    mm3,    mm4         ; c
-        pmulhw  mm5,    mm7         ; d * c3
+        paddd       mm1,        MMWORD PTR[GLOBAL(_14500)]
+        paddd       mm4,        MMWORD PTR[GLOBAL(_14500)]
+        paddd       mm3,        MMWORD PTR[GLOBAL(_7500)]
+        paddd       mm5,        MMWORD PTR[GLOBAL(_7500)]
 
-        pmulhw  mm4,    mm6         ; c * c1
-        paddw   mm3,    mm4         ; round c* c1
-
-        psubw   mm5,    mm3         ; output 30 31 32 33
-        movq    mm3,    mm5
+        psrad       mm1,        12          ; (c1 * 2217 + d1 * 5352 +  14500)>>12
+        psrad       mm4,        12          ; (c1 * 2217 + d1 * 5352 +  14500)>>12
+        psrad       mm3,        12          ; (d1 * 2217 - c1 * 5352 +   7500)>>12
+        psrad       mm5,        12          ; (d1 * 2217 - c1 * 5352 +   7500)>>12
 
+        packssdw    mm1,        mm4         ; op[1]
+        packssdw    mm3,        mm5         ; op[3]
 
         ; done with vertical
         ; transpose for the second stage
-        movq    mm4,    mm0         ; 00 01 02 03
-        movq    mm5,    mm2         ; 10 11 12 03
+        movq        mm4,        mm0         ; 00 10 20 30
+        movq        mm5,        mm2         ; 02 12 22 32
 
-        punpcklwd   mm0,    mm1     ; 00 10 01 11
-        punpckhwd   mm4,    mm1     ; 02 12 03 13
+        punpcklwd   mm0,        mm1         ; 00 01 10 11
+        punpckhwd   mm4,        mm1         ; 20 21 30 31
 
-        punpcklwd   mm2,    mm3     ; 20 30 21 31
-        punpckhwd   mm5,    mm3     ; 22 32 23 33
+        punpcklwd   mm2,        mm3         ; 02 03 12 13
+        punpckhwd   mm5,        mm3         ; 22 23 32 33
 
+        movq        mm1,        mm0         ; 00 01 10 11
+        punpckldq   mm0,        mm2         ; 00 01 02 03
 
-        movq        mm1,    mm0     ; 00 10 01 11
-        punpckldq   mm0,    mm2     ; 00 10 20 30
+        punpckhdq   mm1,        mm2         ; 01 22 12 13
 
-        punpckhdq   mm1,    mm2     ; 01 11 21 31
+        movq        mm2,        mm4         ; 20 31 30 31
+        punpckldq   mm2,        mm5         ; 20 21 22 23
 
-        movq        mm2,    mm4     ; 02 12 03 13
-        punpckldq   mm2,    mm5     ; 02 12 22 32
+        punpckhdq   mm4,        mm5         ; 30 31 32 33
 
-        punpckhdq   mm4,    mm5     ; 03 13 23 33
-        movq        mm3,    mm4
+        ; mm0 0
+        ; mm1 1
+        ; mm2 2
+        ; mm3 4
 
+        movq        mm5,        mm0
+        movq        mm3,        mm1
 
-        ; first stage
-        movq    mm5,    mm0
-        movq    mm4,    mm1
+        paddw       mm0,        mm4         ; a1 = 0 + 3
+        paddw       mm1,        mm2         ; b1 = 1 + 2
 
-        paddw   mm0,    mm3         ; a = 0 + 3
-        paddw   mm1,    mm2         ; b = 1 + 2
+        psubw       mm3,        mm2         ; c1 = 1 - 2
+        psubw       mm5,        mm4         ; d1 = 0 - 3
 
-        psubw   mm4,    mm2         ; c = 1 - 2
-        psubw   mm5,    mm3         ; d = 0 - 3
+        pxor        mm6,        mm6         ; zero out for compare
 
+        pcmpeqw     mm6,        mm5         ; d1 != 0
+
+        pandn       mm6,        MMWORD PTR[GLOBAL(_cmp_mask)]   ; clear upper,
+                                                                ; and keep bit 0 of lower
 
         ; output 0 and 2
-        movq    mm6,    [rdx +  16] ; c2
-        movq    mm2,    mm0         ; a
-        paddw   mm0,    mm1         ; a + b
+        movq        mm2,        mm0         ; a1
 
-        psubw   mm2,    mm1         ; a - b
+        paddw       mm0,        mm1         ; a1 + b1
+        psubw       mm2,        mm1         ; a1 - b1
 
-        movq    mm1,    mm0         ; a + b
-        pmulhw  mm0,    mm6         ; 00 01 02 03
+        paddw       mm0,        MMWORD PTR[GLOBAL(_7w)]
+        paddw       mm2,        MMWORD PTR[GLOBAL(_7w)]
 
-        paddw   mm0,    mm1         ; output 00 01 02 03
-        pmulhw  mm6,    mm2         ; 20 21 22 23
-
-        paddw   mm2,    mm6         ; output 20 21 22 23
+        psraw       mm0,        4           ; op[0] = (a1 + b1 + 7)>>4
+        psraw       mm2,        4           ; op[8] = (a1 - b1 + 7)>>4
 
+        movq        MMWORD PTR[rdi + 0 ],  mm0
+        movq        MMWORD PTR[rdi + 16],  mm2
 
         ; output 1 and 3
-        movq    mm6,    [rdx +  8]  ; c1
-        movq    mm7,    [rdx + 24]  ; c3
+        ; interleave c1, d1
+        movq        mm1,        mm5         ; d1
+        punpcklwd   mm1,        mm3         ; c1 d1
+        punpckhwd   mm5,        mm3         ; c1 d1
 
-        movq    mm1,    mm4         ; c
-        movq    mm3,    mm5         ; d
+        movq        mm3,        mm1
+        movq        mm4,        mm5
 
-        pmulhw  mm1,    mm7         ; c * c3
-        pmulhw  mm3,    mm6         ; d * c1
+        pmaddwd     mm1,        MMWORD PTR[GLOBAL (_5352_2217)]    ; c1*2217 + d1*5352
+        pmaddwd     mm4,        MMWORD PTR[GLOBAL (_5352_2217)]    ; c1*2217 + d1*5352
 
-        paddw   mm3,    mm5         ; d * c1 rounded
-        paddw   mm1,    mm3         ; output 10 11 12 13
+        pmaddwd     mm3,        MMWORD PTR[GLOBAL(_2217_neg5352)]  ; d1*2217 - c1*5352
+        pmaddwd     mm5,        MMWORD PTR[GLOBAL(_2217_neg5352)]  ; d1*2217 - c1*5352
 
-        movq    mm3,    mm4         ; c
-        pmulhw  mm5,    mm7         ; d * c3
+        paddd       mm1,        MMWORD PTR[GLOBAL(_12000)]
+        paddd       mm4,        MMWORD PTR[GLOBAL(_12000)]
+        paddd       mm3,        MMWORD PTR[GLOBAL(_51000)]
+        paddd       mm5,        MMWORD PTR[GLOBAL(_51000)]
 
-        pmulhw  mm4,    mm6         ; c * c1
-        paddw   mm3,    mm4         ; round c* c1
+        psrad       mm1,        16          ; (c1 * 2217 + d1 * 5352 +  14500)>>16
+        psrad       mm4,        16          ; (c1 * 2217 + d1 * 5352 +  14500)>>16
+        psrad       mm3,        16          ; (d1 * 2217 - c1 * 5352 +   7500)>>16
+        psrad       mm5,        16          ; (d1 * 2217 - c1 * 5352 +   7500)>>16
 
-        psubw   mm5,    mm3         ; output 30 31 32 33
-        movq    mm3,    mm5
-        ; done with vertical
+        packssdw    mm1,        mm4         ; op[4]
+        packssdw    mm3,        mm5         ; op[12]
 
-        pcmpeqw mm4,    mm4
-        pcmpeqw mm5,    mm5
-        psrlw   mm4,    15
-        psrlw   mm5,    15
+        paddw       mm1,        mm6         ; op[4] += (d1!=0)
 
-        psllw   mm4,    2
-        psllw   mm5,    2
+        movq        MMWORD PTR[rdi + 8 ],  mm1
+        movq        MMWORD PTR[rdi + 24],  mm3
 
-        paddw   mm0,    mm4
-        paddw   mm1,    mm5
-        paddw   mm2,    mm4
-        paddw   mm3,    mm5
-
-        psraw   mm0, 3
-        psraw   mm1, 3
-        psraw   mm2, 3
-        psraw   mm3, 3
-
-        movq        [rdi   ],   mm0
-        movq        [rdi+ 8],   mm1
-        movq        [rdi+16],   mm2
-        movq        [rdi+24],   mm3
-
-    ; begin epilog
-    pop rdi
-    pop rsi
+     ; begin epilog
+    pop         rdi
+    pop         rsi
     RESTORE_GOT
     UNSHADOW_ARGS
     pop         rbp
     ret
 
-
-;void vp8_short_fdct8x4_wmt(short *input, short *output, int pitch)
-sym(vp8_short_fdct8x4_wmt):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 3
-    GET_GOT     rbx
-    push rsi
-    push rdi
-    ; end prolog
-        mov         rsi,    arg(0) ;input
-        mov         rdi,    arg(1) ;output
-
-        lea         rdx,    [GLOBAL(dct_const_xmm)]
-        movsxd      rax,    dword ptr arg(2) ;pitch
-
-        lea         rcx,    [rsi + rax*2]
-        ; read the input data
-        movdqa      xmm0,       [rsi]
-        movdqa      xmm2,       [rsi + rax]
-
-        movdqa      xmm4,       [rcx]
-        movdqa      xmm3,       [rcx + rax]
-        ; get the constants
-        ;shift to left by 1 for prescision
-        psllw       xmm0,        3
-        psllw       xmm2,        3
-
-        psllw       xmm4,        3
-        psllw       xmm3,        3
-
-        ; transpose for the second stage
-        movdqa      xmm1,       xmm0         ; 00 01 02 03 04 05 06 07
-        movdqa      xmm5,       xmm4         ; 20 21 22 23 24 25 26 27
-
-        punpcklwd   xmm0,       xmm2         ; 00 10 01 11 02 12 03 13
-        punpckhwd   xmm1,       xmm2         ; 04 14 05 15 06 16 07 17
-
-        punpcklwd   xmm4,       xmm3         ; 20 30 21 31 22 32 23 33
-        punpckhwd   xmm5,       xmm3         ; 24 34 25 35 26 36 27 37
-
-        movdqa      xmm2,       xmm0         ; 00 10 01 11 02 12 03 13
-        punpckldq   xmm0,       xmm4         ; 00 10 20 30 01 11 21 31
-
-        punpckhdq   xmm2,       xmm4         ; 02 12 22 32 03 13 23 33
-
-
-        movdqa      xmm4,       xmm1         ; 04 14 05 15 06 16 07 17
-        punpckldq   xmm4,       xmm5         ; 04 14 24 34 05 15 25 35
-
-        punpckhdq   xmm1,       xmm5         ; 06 16 26 36 07 17 27 37
-        movdqa      xmm3,       xmm2         ; 02 12 22 32 03 13 23 33
-
-        punpckhqdq  xmm3,       xmm1         ; 03 13 23 33 07 17 27 37
-        punpcklqdq  xmm2,       xmm1         ; 02 12 22 32 06 16 26 36
-
-        movdqa      xmm1,       xmm0         ; 00 10 20 30 01 11 21 31
-        punpcklqdq  xmm0,       xmm4         ; 00 10 20 30 04 14 24 34
-
-        punpckhqdq  xmm1,       xmm4         ; 01 11 21 32 05 15 25 35
-
-        ; xmm0 0
-        ; xmm1 1
-        ; xmm2 2
-        ; xmm3 3
-
-        ; first stage
-        movdqa      xmm5,       xmm0
-        movdqa      xmm4,       xmm1
-
-        paddw       xmm0,       xmm3         ; a = 0 + 3
-        paddw       xmm1,       xmm2         ; b = 1 + 2
-
-        psubw       xmm4,       xmm2         ; c = 1 - 2
-        psubw       xmm5,       xmm3         ; d = 0 - 3
-
-
-        ; output 0 and 2
-        movdqa      xmm6,       [rdx +  32] ; c2
-        movdqa      xmm2,       xmm0         ; a
-
-        paddw       xmm0,       xmm1         ; a + b
-        psubw       xmm2,       xmm1         ; a - b
-
-        movdqa      xmm1,       xmm0         ; a + b
-        pmulhw      xmm0,       xmm6         ; 00 01 02 03
-
-        paddw       xmm0,       xmm1         ; output 00 01 02 03
-        pmulhw      xmm6,       xmm2         ; 20 21 22 23
-
-        paddw       xmm2,       xmm6         ; output 20 21 22 23
-
-        ; output 1 and 3
-        movdqa      xmm6,       [rdx + 16]  ; c1
-        movdqa      xmm7,       [rdx + 48]  ; c3
-
-        movdqa      xmm1,       xmm4         ; c
-        movdqa      xmm3,       xmm5         ; d
-
-        pmulhw      xmm1,       xmm7         ; c * c3
-        pmulhw      xmm3,       xmm6         ; d * c1
-
-        paddw       xmm3,       xmm5         ; d * c1 rounded
-        paddw       xmm1,       xmm3         ; output 10 11 12 13
-
-        movdqa      xmm3,       xmm4         ; c
-        pmulhw      xmm5,       xmm7         ; d * c3
-
-        pmulhw      xmm4,       xmm6         ; c * c1
-        paddw       xmm3,       xmm4         ; round c* c1
-
-        psubw       xmm5,       xmm3         ; output 30 31 32 33
-        movdqa      xmm3,       xmm5
-
-
-        ; done with vertical
-        ; transpose for the second stage
-        movdqa      xmm4,       xmm2         ; 02 12 22 32 06 16 26 36
-        movdqa      xmm2,       xmm1         ; 01 11 21 31 05 15 25 35
-
-        movdqa      xmm1,       xmm0         ; 00 10 20 30 04 14 24 34
-        movdqa      xmm5,       xmm4         ; 02 12 22 32 06 16 26 36
-
-        punpcklwd   xmm0,       xmm2         ; 00 01 10 11 20 21 30 31
-        punpckhwd   xmm1,       xmm2         ; 04 05 14 15 24 25 34 35
-
-        punpcklwd   xmm4,       xmm3         ; 02 03 12 13 22 23 32 33
-        punpckhwd   xmm5,       xmm3         ; 06 07 16 17 26 27 36 37
-
-        movdqa      xmm2,       xmm0         ; 00 01 10 11 20 21 30 31
-        punpckldq   xmm0,       xmm4         ; 00 01 02 03 10 11 12 13
-
-        punpckhdq   xmm2,       xmm4         ; 20 21 22 23 30 31 32 33
-
-
-        movdqa      xmm4,       xmm1         ; 04 05 14 15 24 25 34 35
-        punpckldq   xmm4,       xmm5         ; 04 05 06 07 14 15 16 17
-
-        punpckhdq   xmm1,       xmm5         ; 24 25 26 27 34 35 36 37
-        movdqa      xmm3,       xmm2         ; 20 21 22 23 30 31 32 33
-
-        punpckhqdq  xmm3,       xmm1         ; 30 31 32 33 34 35 36 37
-        punpcklqdq  xmm2,       xmm1         ; 20 21 22 23 24 25 26 27
-
-        movdqa      xmm1,       xmm0         ; 00 01 02 03 10 11 12 13
-        punpcklqdq  xmm0,       xmm4         ; 00 01 02 03 04 05 06 07
-
-        punpckhqdq  xmm1,       xmm4         ; 10 11 12 13 14 15 16 17
-
-        ; first stage
-        movdqa      xmm5,       xmm0
-        movdqa      xmm4,       xmm1
-
-        paddw       xmm0,       xmm3         ; a = 0 + 3
-        paddw       xmm1,       xmm2         ; b = 1 + 2
-
-        psubw       xmm4,       xmm2         ; c = 1 - 2
-        psubw       xmm5,       xmm3         ; d = 0 - 3
-
-
-        ; output 0 and 2
-        movdqa      xmm6,       [rdx +  32] ; c2
-        movdqa      xmm2,       xmm0         ; a
-
-        paddw       xmm0,       xmm1         ; a + b
-        psubw       xmm2,       xmm1         ; a - b
-
-        movdqa      xmm1,       xmm0         ; a + b
-        pmulhw      xmm0,       xmm6         ; 00 01 02 03
-
-        paddw       xmm0,       xmm1         ; output 00 01 02 03
-        pmulhw      xmm6,       xmm2         ; 20 21 22 23
-
-        paddw       xmm2,       xmm6         ; output 20 21 22 23
-
-        ; output 1 and 3
-        movdqa      xmm6,       [rdx + 16]  ; c1
-        movdqa      xmm7,       [rdx + 48]  ; c3
-
-        movdqa      xmm1,       xmm4         ; c
-        movdqa      xmm3,       xmm5         ; d
-
-        pmulhw      xmm1,       xmm7         ; c * c3
-        pmulhw      xmm3,       xmm6         ; d * c1
-
-        paddw       xmm3,       xmm5         ; d * c1 rounded
-        paddw       xmm1,       xmm3         ; output 10 11 12 13
-
-        movdqa      xmm3,       xmm4         ; c
-        pmulhw      xmm5,       xmm7         ; d * c3
-
-        pmulhw      xmm4,       xmm6         ; c * c1
-        paddw       xmm3,       xmm4         ; round c* c1
-
-        psubw       xmm5,       xmm3         ; output 30 31 32 33
-        movdqa      xmm3,       xmm5
-        ; done with vertical
-
-
-        pcmpeqw     xmm4,       xmm4
-        pcmpeqw     xmm5,       xmm5;
-        psrlw       xmm4,       15
-        psrlw       xmm5,       15
-
-        psllw       xmm4,       2
-        psllw       xmm5,       2
-
-        paddw       xmm0,       xmm4
-        paddw       xmm1,       xmm5
-        paddw       xmm2,       xmm4
-        paddw       xmm3,       xmm5
-
-        psraw       xmm0,       3
-        psraw       xmm1,       3
-        psraw       xmm2,       3
-        psraw       xmm3,       3
-
-        movq        QWORD PTR[rdi   ],   xmm0
-        movq        QWORD PTR[rdi+ 8],   xmm1
-        movq        QWORD PTR[rdi+16],   xmm2
-        movq        QWORD PTR[rdi+24],   xmm3
-
-        psrldq      xmm0,       8
-        psrldq      xmm1,       8
-        psrldq      xmm2,       8
-        psrldq      xmm3,       8
-
-        movq        QWORD PTR[rdi+32],   xmm0
-        movq        QWORD PTR[rdi+40],   xmm1
-        movq        QWORD PTR[rdi+48],   xmm2
-        movq        QWORD PTR[rdi+56],   xmm3
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
-
-
 SECTION_RODATA
-;static const unsigned int dct1st_stage_rounding_mmx[2] =
-align 16
-dct1st_stage_rounding_mmx:
-    times 2 dd 8192
-
-
-;static const unsigned int dct2nd_stage_rounding_mmx[2] =
-align 16
-dct2nd_stage_rounding_mmx:
-    times 2 dd 32768
-
-
-;static const short dct_matrix[4][4]=
-align 16
-dct_matrix:
-    times 4 dw 23170
-
-    dw  30274
-    dw  12540
-    dw -12540
-    dw -30274
-
-    dw 23170
-    times 2 dw -23170
-    dw 23170
-
-    dw  12540
-    dw -30274
-    dw  30274
-    dw -12540
-
-
-;static const unsigned short dct_const_mmx[4 * 4]=
-align 16
-dct_const_mmx:
-    times 4 dw 0
-    times 4 dw 60547
-    times 4 dw 46341
-    times 4 dw 25080
-
-
-;static const unsigned short dct_const_xmm[8 * 4]=
-align 16
-dct_const_xmm:
-    times 8 dw 0
-    times 8 dw 60547
-    times 8 dw 46341
-    times 8 dw 25080
+align 8
+_5352_2217:
+    dw 5352
+    dw 2217
+    dw 5352
+    dw 2217
+align 8
+_2217_neg5352:
+    dw 2217
+    dw -5352
+    dw 2217
+    dw -5352
+align 8
+_cmp_mask:
+    times 4 dw 1
+align 8
+_7w:
+    times 4 dw 7
+align 8
+_14500:
+    times 2 dd 14500
+align 8
+_7500:
+    times 2 dd 7500
+align 8
+_12000:
+    times 2 dd 12000
+align 8
+_51000:
+    times 2 dd 51000
diff --git a/vp8/encoder/x86/dct_sse2.asm b/vp8/encoder/x86/dct_sse2.asm
index 723a78d76..652dd9804 100644
--- a/vp8/encoder/x86/dct_sse2.asm
+++ b/vp8/encoder/x86/dct_sse2.asm
@@ -11,32 +11,68 @@
 
 %include "vpx_ports/x86_abi_support.asm"
 
-;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
-global sym(vp8_short_fdct4x4_sse2)
-sym(vp8_short_fdct4x4_sse2):
+%macro STACK_FRAME_CREATE 0
+%if ABI_IS_32BIT
+  %define       input       rsi
+  %define       output      rdi
+  %define       pitch       rax
     push        rbp
     mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 3
-;;    SAVE_XMM
     GET_GOT     rbx
     push        rsi
     push        rdi
     ; end prolog
 
     mov         rsi, arg(0)
-    movsxd      rax, DWORD PTR arg(2)
-    lea         rdi, [rsi + rax*2]
+    mov         rdi, arg(1)
 
-    movq        xmm0, MMWORD PTR[rsi   ]        ;03 02 01 00
-    movq        xmm2, MMWORD PTR[rsi + rax]     ;13 12 11 10
-    movq        xmm1, MMWORD PTR[rsi + rax*2]   ;23 22 21 20
-    movq        xmm3, MMWORD PTR[rdi + rax]     ;33 32 31 30
+    movsxd      rax, dword ptr arg(2)
+    lea         rcx, [rsi + rax*2]
+%else
+  %ifidn __OUTPUT_FORMAT__,x64
+    %define     input       rcx
+    %define     output      rdx
+    %define     pitch       r8
+  %else
+    %define     input       rdi
+    %define     output      rsi
+    %define     pitch       rdx
+  %endif
+%endif
+%endmacro
+
+%macro STACK_FRAME_DESTROY 0
+  %define     input
+  %define     output
+  %define     pitch
+
+%if ABI_IS_32BIT
+    pop         rdi
+    pop         rsi
+    RESTORE_GOT
+    pop         rbp
+%else
+  %ifidn __OUTPUT_FORMAT__,x64
+  %endif
+%endif
+    ret
+%endmacro
+
+;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch)
+global sym(vp8_short_fdct4x4_sse2)
+sym(vp8_short_fdct4x4_sse2):
+
+    STACK_FRAME_CREATE
+
+    movq        xmm0, MMWORD PTR[input        ] ;03 02 01 00
+    movq        xmm2, MMWORD PTR[input+  pitch] ;13 12 11 10
+    lea         input,          [input+2*pitch]
+    movq        xmm1, MMWORD PTR[input        ] ;23 22 21 20
+    movq        xmm3, MMWORD PTR[input+  pitch] ;33 32 31 30
 
     punpcklqdq  xmm0, xmm2                      ;13 12 11 10 03 02 01 00
     punpcklqdq  xmm1, xmm3                      ;33 32 31 30 23 22 21 20
 
-    mov         rdi, arg(1)
-
     movdqa      xmm2, xmm0
     punpckldq   xmm0, xmm1                      ;23 22 03 02 21 20 01 00
     punpckhdq   xmm2, xmm1                      ;33 32 13 12 31 30 11 10
@@ -51,6 +87,7 @@ sym(vp8_short_fdct4x4_sse2):
     psubw       xmm3, xmm1                      ;c1 d1 c1 d1 c1 d1 c1 d1
     psllw       xmm0, 3                         ;b1 <<= 3 a1 <<= 3
     psllw       xmm3, 3                         ;c1 <<= 3 d1 <<= 3
+
     movdqa      xmm1, xmm0
     pmaddwd     xmm0, XMMWORD PTR[GLOBAL(_mult_add)]    ;a1 + b1
     pmaddwd     xmm1, XMMWORD PTR[GLOBAL(_mult_sub)]    ;a1 - b1
@@ -121,17 +158,216 @@ sym(vp8_short_fdct4x4_sse2):
     punpcklqdq  xmm0, xmm3                      ;op[4] op[0]
     punpckhqdq  xmm1, xmm3                      ;op[12] op[8]
 
-    movdqa      XMMWORD PTR[rdi + 0], xmm0
-    movdqa      XMMWORD PTR[rdi + 16], xmm1
+    movdqa      XMMWORD PTR[output +  0], xmm0
+    movdqa      XMMWORD PTR[output + 16], xmm1
 
-    ; begin epilog
-    pop rdi
-    pop rsi
-    RESTORE_GOT
-;;    RESTORE_XMM
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY
+
+;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
+global sym(vp8_short_fdct8x4_sse2)
+sym(vp8_short_fdct8x4_sse2):
+
+    STACK_FRAME_CREATE
+
+        ; read the input data
+        movdqa      xmm0,       [input        ]
+        movdqa      xmm2,       [input+  pitch]
+        lea         input,      [input+2*pitch]
+        movdqa      xmm4,       [input        ]
+        movdqa      xmm3,       [input+  pitch]
+
+        ; transpose for the first stage
+        movdqa      xmm1,       xmm0        ; 00 01 02 03 04 05 06 07
+        movdqa      xmm5,       xmm4        ; 20 21 22 23 24 25 26 27
+
+        punpcklwd   xmm0,       xmm2        ; 00 10 01 11 02 12 03 13
+        punpckhwd   xmm1,       xmm2        ; 04 14 05 15 06 16 07 17
+
+        punpcklwd   xmm4,       xmm3        ; 20 30 21 31 22 32 23 33
+        punpckhwd   xmm5,       xmm3        ; 24 34 25 35 26 36 27 37
+
+        movdqa      xmm2,       xmm0        ; 00 10 01 11 02 12 03 13
+        punpckldq   xmm0,       xmm4        ; 00 10 20 30 01 11 21 31
+
+        punpckhdq   xmm2,       xmm4        ; 02 12 22 32 03 13 23 33
+
+        movdqa      xmm4,       xmm1        ; 04 14 05 15 06 16 07 17
+        punpckldq   xmm4,       xmm5        ; 04 14 24 34 05 15 25 35
+
+        punpckhdq   xmm1,       xmm5        ; 06 16 26 36 07 17 27 37
+        movdqa      xmm3,       xmm2        ; 02 12 22 32 03 13 23 33
+
+        punpckhqdq  xmm3,       xmm1        ; 03 13 23 33 07 17 27 37
+        punpcklqdq  xmm2,       xmm1        ; 02 12 22 32 06 16 26 36
+
+        movdqa      xmm1,       xmm0        ; 00 10 20 30 01 11 21 31
+        punpcklqdq  xmm0,       xmm4        ; 00 10 20 30 04 14 24 34
+
+        punpckhqdq  xmm1,       xmm4        ; 01 11 21 32 05 15 25 35
+
+        ; xmm0 0
+        ; xmm1 1
+        ; xmm2 2
+        ; xmm3 3
+
+        ; first stage
+        movdqa      xmm5,       xmm0
+        movdqa      xmm4,       xmm1
+
+        paddw       xmm0,       xmm3        ; a1 = 0 + 3
+        paddw       xmm1,       xmm2        ; b1 = 1 + 2
+
+        psubw       xmm4,       xmm2        ; c1 = 1 - 2
+        psubw       xmm5,       xmm3        ; d1 = 0 - 3
+
+        psllw       xmm5,        3
+        psllw       xmm4,        3
+
+        psllw       xmm0,        3
+        psllw       xmm1,        3
+
+        ; output 0 and 2
+        movdqa      xmm2,       xmm0        ; a1
+
+        paddw       xmm0,       xmm1        ; op[0] = a1 + b1
+        psubw       xmm2,       xmm1        ; op[2] = a1 - b1
+
+        ; output 1 and 3
+        ; interleave c1, d1
+        movdqa      xmm1,       xmm5        ; d1
+        punpcklwd   xmm1,       xmm4        ; c1 d1
+        punpckhwd   xmm5,       xmm4        ; c1 d1
+
+        movdqa      xmm3,       xmm1
+        movdqa      xmm4,       xmm5
+
+        pmaddwd     xmm1,       XMMWORD PTR[GLOBAL (_5352_2217)]    ; c1*2217 + d1*5352
+        pmaddwd     xmm4,       XMMWORD PTR[GLOBAL (_5352_2217)]    ; c1*2217 + d1*5352
+
+        pmaddwd     xmm3,       XMMWORD PTR[GLOBAL(_2217_neg5352)]  ; d1*2217 - c1*5352
+        pmaddwd     xmm5,       XMMWORD PTR[GLOBAL(_2217_neg5352)]  ; d1*2217 - c1*5352
+
+        paddd       xmm1,       XMMWORD PTR[GLOBAL(_14500)]
+        paddd       xmm4,       XMMWORD PTR[GLOBAL(_14500)]
+        paddd       xmm3,       XMMWORD PTR[GLOBAL(_7500)]
+        paddd       xmm5,       XMMWORD PTR[GLOBAL(_7500)]
+
+        psrad       xmm1,       12          ; (c1 * 2217 + d1 * 5352 +  14500)>>12
+        psrad       xmm4,       12          ; (c1 * 2217 + d1 * 5352 +  14500)>>12
+        psrad       xmm3,       12          ; (d1 * 2217 - c1 * 5352 +   7500)>>12
+        psrad       xmm5,       12          ; (d1 * 2217 - c1 * 5352 +   7500)>>12
+
+        packssdw    xmm1,       xmm4        ; op[1]
+        packssdw    xmm3,       xmm5        ; op[3]
+
+        ; done with vertical
+        ; transpose for the second stage
+        movdqa      xmm4,       xmm0         ; 00 10 20 30 04 14 24 34
+        movdqa      xmm5,       xmm2         ; 02 12 22 32 06 16 26 36
+
+        punpcklwd   xmm0,       xmm1         ; 00 01 10 11 20 21 30 31
+        punpckhwd   xmm4,       xmm1         ; 04 05 14 15 24 25 34 35
+
+        punpcklwd   xmm2,       xmm3         ; 02 03 12 13 22 23 32 33
+        punpckhwd   xmm5,       xmm3         ; 06 07 16 17 26 27 36 37
+
+        movdqa      xmm1,       xmm0         ; 00 01 10 11 20 21 30 31
+        punpckldq   xmm0,       xmm2         ; 00 01 02 03 10 11 12 13
+
+        punpckhdq   xmm1,       xmm2         ; 20 21 22 23 30 31 32 33
+
+        movdqa      xmm2,       xmm4         ; 04 05 14 15 24 25 34 35
+        punpckldq   xmm2,       xmm5         ; 04 05 06 07 14 15 16 17
+
+        punpckhdq   xmm4,       xmm5         ; 24 25 26 27 34 35 36 37
+        movdqa      xmm3,       xmm1         ; 20 21 22 23 30 31 32 33
+
+        punpckhqdq  xmm3,       xmm4         ; 30 31 32 33 34 35 36 37
+        punpcklqdq  xmm1,       xmm4         ; 20 21 22 23 24 25 26 27
+
+        movdqa      xmm4,       xmm0         ; 00 01 02 03 10 11 12 13
+        punpcklqdq  xmm0,       xmm2         ; 00 01 02 03 04 05 06 07
+
+        punpckhqdq  xmm4,       xmm2         ; 10 11 12 13 14 15 16 17
+
+        ; xmm0 0
+        ; xmm1 4
+        ; xmm2 1
+        ; xmm3 3
+
+        movdqa      xmm5,       xmm0
+        movdqa      xmm2,       xmm1
+
+        paddw       xmm0,       xmm3        ; a1 = 0 + 3
+        paddw       xmm1,       xmm4        ; b1 = 1 + 2
+
+        psubw       xmm4,       xmm2        ; c1 = 1 - 2
+        psubw       xmm5,       xmm3        ; d1 = 0 - 3
+
+        pxor        xmm6,       xmm6        ; zero out for compare
+
+        pcmpeqw     xmm6,       xmm5        ; d1 != 0
+
+        pandn       xmm6,       XMMWORD PTR[GLOBAL(_cmp_mask8x4)]   ; clear upper,
+                                                                    ; and keep bit 0 of lower
+
+        ; output 0 and 2
+        movdqa      xmm2,       xmm0        ; a1
+
+        paddw       xmm0,       xmm1        ; a1 + b1
+        psubw       xmm2,       xmm1        ; a1 - b1
+
+        paddw       xmm0,       XMMWORD PTR[GLOBAL(_7w)]
+        paddw       xmm2,       XMMWORD PTR[GLOBAL(_7w)]
+
+        psraw       xmm0,       4           ; op[0] = (a1 + b1 + 7)>>4
+        psraw       xmm2,       4           ; op[8] = (a1 - b1 + 7)>>4
+
+        ; output 1 and 3
+        ; interleave c1, d1
+        movdqa      xmm1,       xmm5        ; d1
+        punpcklwd   xmm1,       xmm4        ; c1 d1
+        punpckhwd   xmm5,       xmm4        ; c1 d1
+
+        movdqa      xmm3,       xmm1
+        movdqa      xmm4,       xmm5
+
+        pmaddwd     xmm1,       XMMWORD PTR[GLOBAL (_5352_2217)]    ; c1*2217 + d1*5352
+        pmaddwd     xmm4,       XMMWORD PTR[GLOBAL (_5352_2217)]    ; c1*2217 + d1*5352
+
+        pmaddwd     xmm3,       XMMWORD PTR[GLOBAL(_2217_neg5352)]  ; d1*2217 - c1*5352
+        pmaddwd     xmm5,       XMMWORD PTR[GLOBAL(_2217_neg5352)]  ; d1*2217 - c1*5352
+
+        paddd       xmm1,       XMMWORD PTR[GLOBAL(_12000)]
+        paddd       xmm4,       XMMWORD PTR[GLOBAL(_12000)]
+        paddd       xmm3,       XMMWORD PTR[GLOBAL(_51000)]
+        paddd       xmm5,       XMMWORD PTR[GLOBAL(_51000)]
+
+        psrad       xmm1,       16          ; (c1 * 2217 + d1 * 5352 +  14500)>>16
+        psrad       xmm4,       16          ; (c1 * 2217 + d1 * 5352 +  14500)>>16
+        psrad       xmm3,       16          ; (d1 * 2217 - c1 * 5352 +   7500)>>16
+        psrad       xmm5,       16          ; (d1 * 2217 - c1 * 5352 +   7500)>>16
+
+        packssdw    xmm1,       xmm4        ; op[4]
+        packssdw    xmm3,       xmm5        ; op[12]
+
+        paddw       xmm1,       xmm6        ; op[4] += (d1!=0)
+
+        movdqa      xmm4,       xmm0
+        movdqa      xmm5,       xmm2
+
+        punpcklqdq  xmm0,       xmm1
+        punpckhqdq  xmm4,       xmm1
+
+        punpcklqdq  xmm2,       xmm3
+        punpckhqdq  xmm5,       xmm3
+
+        movdqa      XMMWORD PTR[output + 0 ],  xmm0
+        movdqa      XMMWORD PTR[output + 16],  xmm2
+        movdqa      XMMWORD PTR[output + 32],  xmm4
+        movdqa      XMMWORD PTR[output + 48],  xmm5
+
+    STACK_FRAME_DESTROY
 
 SECTION_RODATA
 align 16
@@ -161,7 +397,9 @@ align 16
 _cmp_mask:
     times 4 dw 1
     times 4 dw 0
-
+align 16
+_cmp_mask8x4:
+    times 8 dw 1
 align 16
 _mult_sub:
     dw 1
@@ -176,6 +414,9 @@ align 16
 _7:
     times 4 dd 7
 align 16
+_7w:
+    times 8 dw 7
+align 16
 _14500:
     times 4 dd 14500
 align 16
diff --git a/vp8/encoder/x86/dct_x86.h b/vp8/encoder/x86/dct_x86.h
index 05824c684..59a5cb1d7 100644
--- a/vp8/encoder/x86/dct_x86.h
+++ b/vp8/encoder/x86/dct_x86.h
@@ -24,33 +24,31 @@ extern prototype_fdct(vp8_short_fdct4x4_mmx);
 extern prototype_fdct(vp8_short_fdct8x4_mmx);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
-#if 0
+
 #undef  vp8_fdct_short4x4
 #define vp8_fdct_short4x4 vp8_short_fdct4x4_mmx
 
 #undef  vp8_fdct_short8x4
 #define vp8_fdct_short8x4 vp8_short_fdct8x4_mmx
-#endif
 
 #endif
+
 #endif
 
 
 #if HAVE_SSE2
-extern prototype_fdct(vp8_short_fdct8x4_wmt);
+extern prototype_fdct(vp8_short_fdct8x4_sse2);
 extern prototype_fdct(vp8_short_walsh4x4_sse2);
 
 extern prototype_fdct(vp8_short_fdct4x4_sse2);
 
 #if !CONFIG_RUNTIME_CPU_DETECT
-#if 1
-/* short SSE2 DCT currently disabled, does not match the MMX version */
+
 #undef  vp8_fdct_short4x4
 #define vp8_fdct_short4x4 vp8_short_fdct4x4_sse2
 
 #undef  vp8_fdct_short8x4
 #define vp8_fdct_short8x4 vp8_short_fdct8x4_sse2
-#endif
 
 #undef  vp8_fdct_fast4x4
 #define vp8_fdct_fast4x4 vp8_short_fdct4x4_sse2
@@ -58,7 +56,7 @@ extern prototype_fdct(vp8_short_fdct4x4_sse2);
 #undef  vp8_fdct_fast8x4
 #define vp8_fdct_fast8x4 vp8_short_fdct8x4_sse2
 
-#undef vp8_fdct_walsh_short4x4
+#undef  vp8_fdct_walsh_short4x4
 #define vp8_fdct_walsh_short4x4  vp8_short_walsh4x4_sse2
 
 #endif
diff --git a/vp8/encoder/x86/x86_csystemdependent.c b/vp8/encoder/x86/x86_csystemdependent.c
index fb1b37ccb..781079849 100644
--- a/vp8/encoder/x86/x86_csystemdependent.c
+++ b/vp8/encoder/x86/x86_csystemdependent.c
@@ -18,11 +18,10 @@
 #if HAVE_MMX
 void vp8_short_fdct8x4_mmx(short *input, short *output, int pitch)
 {
-    vp8_short_fdct4x4_c(input,   output,    pitch);
-    vp8_short_fdct4x4_c(input + 4, output + 16, pitch);
+    vp8_short_fdct4x4_mmx(input,   output,    pitch);
+    vp8_short_fdct4x4_mmx(input + 4, output + 16, pitch);
 }
 
-
 int vp8_fast_quantize_b_impl_mmx(short *coeff_ptr, short *zbin_ptr,
                                  short *qcoeff_ptr, short *dequant_ptr,
                                  short *scan_mask, short *round_ptr,
@@ -82,12 +81,6 @@ void vp8_subtract_b_mmx(BLOCK *be, BLOCKD *bd, int pitch)
 #endif
 
 #if HAVE_SSE2
-void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch)
-{
-    vp8_short_fdct4x4_sse2(input,   output,    pitch);
-    vp8_short_fdct4x4_sse2(input + 4, output + 16, pitch);
-}
-
 int vp8_fast_quantize_b_impl_sse2(short *coeff_ptr,
                                  short *qcoeff_ptr, short *dequant_ptr,
                                  short *scan_mask, short *round_ptr,
@@ -249,18 +242,11 @@ void vp8_arch_x86_encoder_init(VP8_COMP *cpi)
         cpi->rtcd.variance.get8x8var             = vp8_get8x8var_mmx;
         cpi->rtcd.variance.get16x16var           = vp8_get16x16var_mmx;
         cpi->rtcd.variance.get4x4sse_cs          = vp8_get4x4sse_cs_mmx;
-#if 0 // new fdct
+
         cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_mmx;
         cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_mmx;
         cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_mmx;
         cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_mmx;
-#else
-        cpi->rtcd.fdct.short4x4                  = vp8_short_fdct4x4_c;
-        cpi->rtcd.fdct.short8x4                  = vp8_short_fdct8x4_c;
-        cpi->rtcd.fdct.fast4x4                   = vp8_short_fdct4x4_c;
-        cpi->rtcd.fdct.fast8x4                   = vp8_short_fdct8x4_c;
-
-#endif
 
         cpi->rtcd.fdct.walsh_short4x4            = vp8_short_walsh4x4_c;
 

From 692b10858d8be54706f3e02ba3075c9718d7d683 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Wed, 10 Nov 2010 14:51:49 -0800
Subject: [PATCH 301/307] configure : Incorrect syntax in configure

Check to see if postproc was enabled when enabling the
postproc visualizer was wrong.

Fix for bug introduced in Change Ia74f357d

Change-Id: I4bee9ad2caee3cfe3bac6972047f6af7c54cad4e
---
 configure | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/configure b/configure
index 60308b028..f32fffe38 100755
--- a/configure
+++ b/configure
@@ -538,8 +538,9 @@ process_toolchain() {
     # Other toolchain specific defaults
     case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac
 
-    enabled postproc_visualizer && {enabled postproc \
-        || die "postproc_visualizer requires postproc to be enabled"}
+    if enabled postproc_visualizer; then
+        enabled postproc || die "postproc_visualizer requires postproc to be enabled"
+    fi
 }
 
 

From 58083cb34db84349d8f138e1ee59f4aee77e6624 Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Thu, 11 Nov 2010 08:19:37 -0800
Subject: [PATCH 302/307] Revert "Remove stack shadowing for x86-64"

This reverts commit 15acc84f10cefd98b2f8dbd2eac2cc92c5a3f851.

Change-Id: Ia640be8cbc134432914849c1750f62575ea084e6
---
 vp8/encoder/x86/sad_sse3.asm | 881 ++++++++++++++++++-----------------
 1 file changed, 460 insertions(+), 421 deletions(-)

diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index e662497f1..1b7293c20 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -8,171 +8,24 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
+
 %include "vpx_ports/x86_abi_support.asm"
 
-%macro STACK_FRAME_CREATE_X3 0
-%if ABI_IS_32BIT
-  %define     src_ptr       rsi
-  %define     src_stride    rax
-  %define     ref_ptr       rdi
-  %define     ref_stride    rdx
-  %define     end_ptr       rcx
-  %define     ret_var       rbx
-  %define     result_ptr    arg(4)
-  %define     max_err       arg(4)
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-
-    mov         rsi,        arg(0)              ; src_ptr
-    mov         rdi,        arg(2)              ; ref_ptr
-
-    movsxd      rax,        dword ptr arg(1)    ; src_stride
-    movsxd      rdx,        dword ptr arg(3)    ; ref_stride
-%else
-  %ifidn __OUTPUT_FORMAT__,x64
-    %define     src_ptr     rcx
-    %define     src_stride  rdx
-    %define     ref_ptr     r8
-    %define     ref_stride  r9
-    %define     end_ptr     r10
-    %define     ret_var     r11
-    %define     result_ptr  [rsp+8+4*8]
-    %define     max_err     [rsp+8+4*8]
-  %else
-    %define     src_ptr     rdi
-    %define     src_stride  rsi
-    %define     ref_ptr     rdx
-    %define     ref_stride  rcx
-    %define     end_ptr     r9
-    %define     ret_var     r10
-    %define     result_ptr  r8
-    %define     max_err     r8
-  %endif
-%endif
-
-%endmacro
-
-%macro STACK_FRAME_DESTROY_X3 0
-  %define     src_ptr
-  %define     src_stride
-  %define     ref_ptr
-  %define     ref_stride
-  %define     end_ptr
-  %define     ret_var
-  %define     result_ptr
-  %define     max_err
-
-%if ABI_IS_32BIT
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-%else
-  %ifidn __OUTPUT_FORMAT__,x64
-  %endif
-%endif
-    ret
-%endmacro
-
-%macro STACK_FRAME_CREATE_X4 0
-%if ABI_IS_32BIT
-  %define     src_ptr       rsi
-  %define     src_stride    rax
-  %define     r0_ptr        rcx
-  %define     r1_ptr        rdx
-  %define     r2_ptr        rbx
-  %define     r3_ptr        rdi
-  %define     ref_stride    rbp
-  %define     result_ptr    arg(4)
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    push        rbx
-
-    push        rbp
-    mov         rdi,        arg(2)              ; ref_ptr_base
-
-    LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
-    mov         rsi,        arg(0)              ; src_ptr
-
-    movsxd      rbx,        dword ptr arg(1)    ; src_stride
-    movsxd      rbp,        dword ptr arg(3)    ; ref_stride
-
-    xchg        rbx,        rax
-%else
-  %ifidn __OUTPUT_FORMAT__,x64
-    %define     src_ptr     rcx
-    %define     src_stride  rdx
-    %define     r0_ptr      rsi
-    %define     r1_ptr      r10
-    %define     r2_ptr      r11
-    %define     r3_ptr      r8
-    %define     ref_stride  r9
-    %define     result_ptr  [rsp+16+4*8]
-    push        rsi
-
-    LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
-  %else
-    %define     src_ptr     rdi
-    %define     src_stride  rsi
-    %define     r0_ptr      r9
-    %define     r1_ptr      r10
-    %define     r2_ptr      r11
-    %define     r3_ptr      rdx
-    %define     ref_stride  rcx
-    %define     result_ptr  r8
-
-    LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr
-
-  %endif
-%endif
-%endmacro
-
-%macro STACK_FRAME_DESTROY_X4 0
-  %define     src_ptr
-  %define     src_stride
-  %define     r0_ptr
-  %define     r1_ptr
-  %define     r2_ptr
-  %define     r3_ptr
-  %define     ref_stride
-  %define     result_ptr
-
-%if ABI_IS_32BIT
-    pop         rbx
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-%else
-  %ifidn __OUTPUT_FORMAT__,x64
-    pop         rsi
-  %endif
-%endif
-    ret
-%endmacro
-
-%macro PROCESS_16X2X3 5
-%if %1==0
-        movdqa          xmm0,       XMMWORD PTR [%2]
-        lddqu           xmm5,       XMMWORD PTR [%3]
-        lddqu           xmm6,       XMMWORD PTR [%3+1]
-        lddqu           xmm7,       XMMWORD PTR [%3+2]
+%macro PROCESS_16X2X3 1
+%if %1
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm5,       XMMWORD PTR [rdi]
+        lddqu           xmm6,       XMMWORD PTR [rdi+1]
+        lddqu           xmm7,       XMMWORD PTR [rdi+2]
 
         psadbw          xmm5,       xmm0
         psadbw          xmm6,       xmm0
         psadbw          xmm7,       xmm0
 %else
-        movdqa          xmm0,       XMMWORD PTR [%2]
-        lddqu           xmm1,       XMMWORD PTR [%3]
-        lddqu           xmm2,       XMMWORD PTR [%3+1]
-        lddqu           xmm3,       XMMWORD PTR [%3+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm1,       XMMWORD PTR [rdi]
+        lddqu           xmm2,       XMMWORD PTR [rdi+1]
+        lddqu           xmm3,       XMMWORD PTR [rdi+2]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
@@ -182,15 +35,13 @@
         paddw           xmm6,       xmm2
         paddw           xmm7,       xmm3
 %endif
-        movdqa          xmm0,       XMMWORD PTR [%2+%4]
-        lddqu           xmm1,       XMMWORD PTR [%3+%5]
-        lddqu           xmm2,       XMMWORD PTR [%3+%5+1]
-        lddqu           xmm3,       XMMWORD PTR [%3+%5+2]
+        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
+        lddqu           xmm1,       XMMWORD PTR [rdi+rdx]
+        lddqu           xmm2,       XMMWORD PTR [rdi+rdx+1]
+        lddqu           xmm3,       XMMWORD PTR [rdi+rdx+2]
 
-%if %1==0 || %1==1
-        lea             %2,         [%2+%4*2]
-        lea             %3,         [%3+%5*2]
-%endif
+        lea             rsi,        [rsi+rax*2]
+        lea             rdi,        [rdi+rdx*2]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
@@ -201,21 +52,21 @@
         paddw           xmm7,       xmm3
 %endmacro
 
-%macro PROCESS_8X2X3 5
-%if %1==0
-        movq            mm0,       QWORD PTR [%2]
-        movq            mm5,       QWORD PTR [%3]
-        movq            mm6,       QWORD PTR [%3+1]
-        movq            mm7,       QWORD PTR [%3+2]
+%macro PROCESS_8X2X3 1
+%if %1
+        movq            mm0,       QWORD PTR [rsi]
+        movq            mm5,       QWORD PTR [rdi]
+        movq            mm6,       QWORD PTR [rdi+1]
+        movq            mm7,       QWORD PTR [rdi+2]
 
         psadbw          mm5,       mm0
         psadbw          mm6,       mm0
         psadbw          mm7,       mm0
 %else
-        movq            mm0,       QWORD PTR [%2]
-        movq            mm1,       QWORD PTR [%3]
-        movq            mm2,       QWORD PTR [%3+1]
-        movq            mm3,       QWORD PTR [%3+2]
+        movq            mm0,       QWORD PTR [rsi]
+        movq            mm1,       QWORD PTR [rdi]
+        movq            mm2,       QWORD PTR [rdi+1]
+        movq            mm3,       QWORD PTR [rdi+2]
 
         psadbw          mm1,       mm0
         psadbw          mm2,       mm0
@@ -225,15 +76,13 @@
         paddw           mm6,       mm2
         paddw           mm7,       mm3
 %endif
-        movq            mm0,       QWORD PTR [%2+%4]
-        movq            mm1,       QWORD PTR [%3+%5]
-        movq            mm2,       QWORD PTR [%3+%5+1]
-        movq            mm3,       QWORD PTR [%3+%5+2]
+        movq            mm0,       QWORD PTR [rsi+rax]
+        movq            mm1,       QWORD PTR [rdi+rdx]
+        movq            mm2,       QWORD PTR [rdi+rdx+1]
+        movq            mm3,       QWORD PTR [rdi+rdx+2]
 
-%if %1==0 || %1==1
-        lea             %2,        [%2+%4*2]
-        lea             %3,        [%3+%5*2]
-%endif
+        lea             rsi,       [rsi+rax*2]
+        lea             rdi,       [rdi+rdx*2]
 
         psadbw          mm1,       mm0
         psadbw          mm2,       mm0
@@ -252,117 +101,115 @@
         mov             %5,         [%1+REG_SZ_BYTES*3]
 %endmacro
 
-%macro PROCESS_16X2X4 8
-%if %1==0
-        movdqa          xmm0,       XMMWORD PTR [%2]
-        lddqu           xmm4,       XMMWORD PTR [%3]
-        lddqu           xmm5,       XMMWORD PTR [%4]
-        lddqu           xmm6,       XMMWORD PTR [%5]
-        lddqu           xmm7,       XMMWORD PTR [%6]
+%macro PROCESS_16X2X4 1
+%if %1
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm4,       XMMWORD PTR [rcx]
+        lddqu           xmm5,       XMMWORD PTR [rdx]
+        lddqu           xmm6,       XMMWORD PTR [rbx]
+        lddqu           xmm7,       XMMWORD PTR [rdi]
 
         psadbw          xmm4,       xmm0
         psadbw          xmm5,       xmm0
         psadbw          xmm6,       xmm0
         psadbw          xmm7,       xmm0
 %else
-        movdqa          xmm0,       XMMWORD PTR [%2]
-        lddqu           xmm1,       XMMWORD PTR [%3]
-        lddqu           xmm2,       XMMWORD PTR [%4]
-        lddqu           xmm3,       XMMWORD PTR [%5]
+        movdqa          xmm0,       XMMWORD PTR [rsi]
+        lddqu           xmm1,       XMMWORD PTR [rcx]
+        lddqu           xmm2,       XMMWORD PTR [rdx]
+        lddqu           xmm3,       XMMWORD PTR [rbx]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
         psadbw          xmm3,       xmm0
 
         paddw           xmm4,       xmm1
-        lddqu           xmm1,       XMMWORD PTR [%6]
+        lddqu           xmm1,       XMMWORD PTR [rdi]
         paddw           xmm5,       xmm2
         paddw           xmm6,       xmm3
 
         psadbw          xmm1,       xmm0
         paddw           xmm7,       xmm1
 %endif
-        movdqa          xmm0,       XMMWORD PTR [%2+%7]
-        lddqu           xmm1,       XMMWORD PTR [%3+%8]
-        lddqu           xmm2,       XMMWORD PTR [%4+%8]
-        lddqu           xmm3,       XMMWORD PTR [%5+%8]
+        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
+        lddqu           xmm1,       XMMWORD PTR [rcx+rbp]
+        lddqu           xmm2,       XMMWORD PTR [rdx+rbp]
+        lddqu           xmm3,       XMMWORD PTR [rbx+rbp]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
         psadbw          xmm3,       xmm0
 
         paddw           xmm4,       xmm1
-        lddqu           xmm1,       XMMWORD PTR [%6+%8]
+        lddqu           xmm1,       XMMWORD PTR [rdi+rbp]
         paddw           xmm5,       xmm2
         paddw           xmm6,       xmm3
 
-%if %1==0 || %1==1
-        lea             %2,         [%2+%7*2]
-        lea             %3,         [%3+%8*2]
+        lea             rsi,        [rsi+rax*2]
+        lea             rcx,        [rcx+rbp*2]
 
-        lea             %4,         [%4+%8*2]
-        lea             %5,         [%5+%8*2]
+        lea             rdx,        [rdx+rbp*2]
+        lea             rbx,        [rbx+rbp*2]
+
+        lea             rdi,        [rdi+rbp*2]
 
-        lea             %6,         [%6+%8*2]
-%endif
         psadbw          xmm1,       xmm0
         paddw           xmm7,       xmm1
 
 %endmacro
 
-%macro PROCESS_8X2X4 8
-%if %1==0
-        movq            mm0,        QWORD PTR [%2]
-        movq            mm4,        QWORD PTR [%3]
-        movq            mm5,        QWORD PTR [%4]
-        movq            mm6,        QWORD PTR [%5]
-        movq            mm7,        QWORD PTR [%6]
+%macro PROCESS_8X2X4 1
+%if %1
+        movq            mm0,        QWORD PTR [rsi]
+        movq            mm4,        QWORD PTR [rcx]
+        movq            mm5,        QWORD PTR [rdx]
+        movq            mm6,        QWORD PTR [rbx]
+        movq            mm7,        QWORD PTR [rdi]
 
         psadbw          mm4,        mm0
         psadbw          mm5,        mm0
         psadbw          mm6,        mm0
         psadbw          mm7,        mm0
 %else
-        movq            mm0,        QWORD PTR [%2]
-        movq            mm1,        QWORD PTR [%3]
-        movq            mm2,        QWORD PTR [%4]
-        movq            mm3,        QWORD PTR [%5]
+        movq            mm0,        QWORD PTR [rsi]
+        movq            mm1,        QWORD PTR [rcx]
+        movq            mm2,        QWORD PTR [rdx]
+        movq            mm3,        QWORD PTR [rbx]
 
         psadbw          mm1,        mm0
         psadbw          mm2,        mm0
         psadbw          mm3,        mm0
 
         paddw           mm4,        mm1
-        movq            mm1,        QWORD PTR [%6]
+        movq            mm1,        QWORD PTR [rdi]
         paddw           mm5,        mm2
         paddw           mm6,        mm3
 
         psadbw          mm1,        mm0
         paddw           mm7,        mm1
 %endif
-        movq            mm0,        QWORD PTR [%2+%7]
-        movq            mm1,        QWORD PTR [%3+%8]
-        movq            mm2,        QWORD PTR [%4+%8]
-        movq            mm3,        QWORD PTR [%5+%8]
+        movq            mm0,        QWORD PTR [rsi+rax]
+        movq            mm1,        QWORD PTR [rcx+rbp]
+        movq            mm2,        QWORD PTR [rdx+rbp]
+        movq            mm3,        QWORD PTR [rbx+rbp]
 
         psadbw          mm1,        mm0
         psadbw          mm2,        mm0
         psadbw          mm3,        mm0
 
         paddw           mm4,        mm1
-        movq            mm1,        QWORD PTR [%6+%8]
+        movq            mm1,        QWORD PTR [rdi+rbp]
         paddw           mm5,        mm2
         paddw           mm6,        mm3
 
-%if %1==0 || %1==1
-        lea             %2,         [%2+%7*2]
-        lea             %3,         [%3+%8*2]
+        lea             rsi,        [rsi+rax*2]
+        lea             rcx,        [rcx+rbp*2]
 
-        lea             %4,         [%4+%8*2]
-        lea             %5,         [%5+%8*2]
+        lea             rdx,        [rdx+rbp*2]
+        lea             rbx,        [rbx+rbp*2]
+
+        lea             rdi,        [rdi+rbp*2]
 
-        lea             %6,         [%6+%8*2]
-%endif
         psadbw          mm1,        mm0
         paddw           mm7,        mm1
 
@@ -376,39 +223,54 @@
 ;    int  *results)
 global sym(vp8_sad16x16x3_sse3)
 sym(vp8_sad16x16x3_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
 
-    STACK_FRAME_CREATE_X3
+        mov             rsi,        arg(0) ;src_ptr
+        mov             rdi,        arg(2) ;ref_ptr
 
-        PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
+        movsxd          rax,        dword ptr arg(1) ;src_stride
+        movsxd          rdx,        dword ptr arg(3) ;ref_stride
 
-        mov             rcx,        result_ptr
+        PROCESS_16X2X3 1
+        PROCESS_16X2X3 0
+        PROCESS_16X2X3 0
+        PROCESS_16X2X3 0
+        PROCESS_16X2X3 0
+        PROCESS_16X2X3 0
+        PROCESS_16X2X3 0
+        PROCESS_16X2X3 0
+
+        mov             rdi,        arg(4) ;Results
 
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rcx],      xmm0
+        movd            [rdi],      xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rcx+4],    xmm0
+        movd            [rdi+4],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rcx+8],    xmm0
+        movd            [rdi+8],    xmm0
 
-    STACK_FRAME_DESTROY_X3
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
 
 ;void int vp8_sad16x8x3_sse3(
 ;    unsigned char *src_ptr,
@@ -418,35 +280,50 @@ sym(vp8_sad16x16x3_sse3):
 ;    int  *results)
 global sym(vp8_sad16x8x3_sse3)
 sym(vp8_sad16x8x3_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
 
-    STACK_FRAME_CREATE_X3
+        mov             rsi,        arg(0) ;src_ptr
+        mov             rdi,        arg(2) ;ref_ptr
 
-        PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
+        movsxd          rax,        dword ptr arg(1) ;src_stride
+        movsxd          rdx,        dword ptr arg(3) ;ref_stride
 
-        mov             rcx,        result_ptr
+        PROCESS_16X2X3 1
+        PROCESS_16X2X3 0
+        PROCESS_16X2X3 0
+        PROCESS_16X2X3 0
+
+        mov             rdi,        arg(4) ;Results
 
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rcx],      xmm0
+        movd            [rdi],      xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rcx+4],    xmm0
+        movd            [rdi+4],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rcx+8],    xmm0
+        movd            [rdi+8],    xmm0
 
-    STACK_FRAME_DESTROY_X3
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
 
 ;void int vp8_sad8x16x3_sse3(
 ;    unsigned char *src_ptr,
@@ -456,26 +333,40 @@ sym(vp8_sad16x8x3_sse3):
 ;    int  *results)
 global sym(vp8_sad8x16x3_sse3)
 sym(vp8_sad8x16x3_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
 
-    STACK_FRAME_CREATE_X3
+        mov             rsi,        arg(0) ;src_ptr
+        mov             rdi,        arg(2) ;ref_ptr
 
-        PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
+        movsxd          rax,        dword ptr arg(1) ;src_stride
+        movsxd          rdx,        dword ptr arg(3) ;ref_stride
 
-        mov             rcx,        result_ptr
+        PROCESS_8X2X3 1
+        PROCESS_8X2X3 0
+        PROCESS_8X2X3 0
+        PROCESS_8X2X3 0
+        PROCESS_8X2X3 0
+        PROCESS_8X2X3 0
+        PROCESS_8X2X3 0
+        PROCESS_8X2X3 0
 
-        punpckldq       mm5,        mm6
+        mov             rdi,        arg(4) ;Results
 
-        movq            [rcx],      mm5
-        movd            [rcx+8],    mm7
+        movd            [rdi],      mm5
+        movd            [rdi+4],    mm6
+        movd            [rdi+8],    mm7
 
-    STACK_FRAME_DESTROY_X3
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
 
 ;void int vp8_sad8x8x3_sse3(
 ;    unsigned char *src_ptr,
@@ -485,22 +376,36 @@ sym(vp8_sad8x16x3_sse3):
 ;    int  *results)
 global sym(vp8_sad8x8x3_sse3)
 sym(vp8_sad8x8x3_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
 
-    STACK_FRAME_CREATE_X3
+        mov             rsi,        arg(0) ;src_ptr
+        mov             rdi,        arg(2) ;ref_ptr
 
-        PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
-        PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
+        movsxd          rax,        dword ptr arg(1) ;src_stride
+        movsxd          rdx,        dword ptr arg(3) ;ref_stride
 
-        mov             rcx,        result_ptr
+        PROCESS_8X2X3 1
+        PROCESS_8X2X3 0
+        PROCESS_8X2X3 0
+        PROCESS_8X2X3 0
 
-        punpckldq       mm5,        mm6
+        mov             rdi,        arg(4) ;Results
 
-        movq            [rcx],      mm5
-        movd            [rcx+8],    mm7
+        movd            [rdi],      mm5
+        movd            [rdi+4],    mm6
+        movd            [rdi+8],    mm7
 
-    STACK_FRAME_DESTROY_X3
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
 
 ;void int vp8_sad4x4x3_sse3(
 ;    unsigned char *src_ptr,
@@ -510,23 +415,33 @@ sym(vp8_sad8x8x3_sse3):
 ;    int  *results)
 global sym(vp8_sad4x4x3_sse3)
 sym(vp8_sad4x4x3_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    ; end prolog
 
-    STACK_FRAME_CREATE_X3
+        mov             rsi,        arg(0) ;src_ptr
+        mov             rdi,        arg(2) ;ref_ptr
 
-        movd            mm0,        DWORD PTR [src_ptr]
-        movd            mm1,        DWORD PTR [ref_ptr]
+        movsxd          rax,        dword ptr arg(1) ;src_stride
+        movsxd          rdx,        dword ptr arg(3) ;ref_stride
 
-        movd            mm2,        DWORD PTR [src_ptr+src_stride]
-        movd            mm3,        DWORD PTR [ref_ptr+ref_stride]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm1,        DWORD PTR [rdi]
+
+        movd            mm2,        DWORD PTR [rsi+rax]
+        movd            mm3,        DWORD PTR [rdi+rdx]
 
         punpcklbw       mm0,        mm2
         punpcklbw       mm1,        mm3
 
-        movd            mm4,        DWORD PTR [ref_ptr+1]
-        movd            mm5,        DWORD PTR [ref_ptr+2]
+        movd            mm4,        DWORD PTR [rdi+1]
+        movd            mm5,        DWORD PTR [rdi+2]
 
-        movd            mm2,        DWORD PTR [ref_ptr+ref_stride+1]
-        movd            mm3,        DWORD PTR [ref_ptr+ref_stride+2]
+        movd            mm2,        DWORD PTR [rdi+rdx+1]
+        movd            mm3,        DWORD PTR [rdi+rdx+2]
 
         psadbw          mm1,        mm0
 
@@ -536,27 +451,29 @@ sym(vp8_sad4x4x3_sse3):
         psadbw          mm4,        mm0
         psadbw          mm5,        mm0
 
-        lea             src_ptr,    [src_ptr+src_stride*2]
-        lea             ref_ptr,    [ref_ptr+ref_stride*2]
 
-        movd            mm0,        DWORD PTR [src_ptr]
-        movd            mm2,        DWORD PTR [ref_ptr]
 
-        movd            mm3,        DWORD PTR [src_ptr+src_stride]
-        movd            mm6,        DWORD PTR [ref_ptr+ref_stride]
+        lea             rsi,        [rsi+rax*2]
+        lea             rdi,        [rdi+rdx*2]
+
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm2,        DWORD PTR [rdi]
+
+        movd            mm3,        DWORD PTR [rsi+rax]
+        movd            mm6,        DWORD PTR [rdi+rdx]
 
         punpcklbw       mm0,        mm3
         punpcklbw       mm2,        mm6
 
-        movd            mm3,        DWORD PTR [ref_ptr+1]
-        movd            mm7,        DWORD PTR [ref_ptr+2]
+        movd            mm3,        DWORD PTR [rdi+1]
+        movd            mm7,        DWORD PTR [rdi+2]
 
         psadbw          mm2,        mm0
 
         paddw           mm1,        mm2
 
-        movd            mm2,        DWORD PTR [ref_ptr+ref_stride+1]
-        movd            mm6,        DWORD PTR [ref_ptr+ref_stride+2]
+        movd            mm2,        DWORD PTR [rdi+rdx+1]
+        movd            mm6,        DWORD PTR [rdi+rdx+2]
 
         punpcklbw       mm3,        mm2
         punpcklbw       mm7,        mm6
@@ -567,14 +484,19 @@ sym(vp8_sad4x4x3_sse3):
         paddw           mm3,        mm4
         paddw           mm7,        mm5
 
-        mov             rcx,        result_ptr
+        mov             rdi,        arg(4) ;Results
+        movd            [rdi],      mm1
 
-        punpckldq       mm1,        mm3
+        movd            [rdi+4],    mm3
+        movd            [rdi+8],    mm7
 
-        movq            [rcx],      mm1
-        movd            [rcx+8],    mm7
 
-    STACK_FRAME_DESTROY_X3
+    ; begin epilog
+    pop rdi
+    pop rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
 
 ;unsigned int vp8_sad16x16_sse3(
 ;    unsigned char *src_ptr,
@@ -585,40 +507,51 @@ sym(vp8_sad4x4x3_sse3):
 ;%define lddqu movdqu
 global sym(vp8_sad16x16_sse3)
 sym(vp8_sad16x16_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rbx
+    push        rsi
+    push        rdi
+    ; end prolog
 
-    STACK_FRAME_CREATE_X3
+        mov             rsi,        arg(0) ;src_ptr
+        mov             rdi,        arg(2) ;ref_ptr
 
-        lea             end_ptr,    [src_ptr+src_stride*8]
+        movsxd          rbx,        dword ptr arg(1) ;src_stride
+        movsxd          rdx,        dword ptr arg(3) ;ref_stride
 
-        lea             end_ptr,    [end_ptr+src_stride*8]
+        lea             rcx,        [rsi+rbx*8]
+
+        lea             rcx,        [rcx+rbx*8]
         pxor            mm7,        mm7
 
-.vp8_sad16x16_sse3_loop:
+vp8_sad16x16_sse3_loop:
 
-        movq            ret_var,    mm7
-        cmp             ret_var,    max_err
-        jg              .vp8_sad16x16_early_exit
+        movq            rax,        mm7
+        cmp             rax,        arg(4)
+        jg              vp8_sad16x16_early_exit
 
-        movq            mm0,        QWORD PTR [src_ptr]
-        movq            mm2,        QWORD PTR [src_ptr+8]
+        movq            mm0,        QWORD PTR [rsi]
+        movq            mm2,        QWORD PTR [rsi+8]
 
-        movq            mm1,        QWORD PTR [ref_ptr]
-        movq            mm3,        QWORD PTR [ref_ptr+8]
+        movq            mm1,        QWORD PTR [rdi]
+        movq            mm3,        QWORD PTR [rdi+8]
 
-        movq            mm4,        QWORD PTR [src_ptr+src_stride]
-        movq            mm5,        QWORD PTR [ref_ptr+ref_stride]
+        movq            mm4,        QWORD PTR [rsi+rbx]
+        movq            mm5,        QWORD PTR [rdi+rdx]
 
         psadbw          mm0,        mm1
         psadbw          mm2,        mm3
 
-        movq            mm1,        QWORD PTR [src_ptr+src_stride+8]
-        movq            mm3,        QWORD PTR [ref_ptr+ref_stride+8]
+        movq            mm1,        QWORD PTR [rsi+rbx+8]
+        movq            mm3,        QWORD PTR [rdi+rdx+8]
 
         psadbw          mm4,        mm5
         psadbw          mm1,        mm3
 
-        lea             src_ptr,    [src_ptr+src_stride*2]
-        lea             ref_ptr,    [ref_ptr+ref_stride*2]
+        lea             rsi,        [rsi+rbx*2]
+        lea             rdi,        [rdi+rdx*2]
 
         paddw           mm0,        mm2
         paddw           mm4,        mm1
@@ -626,16 +559,20 @@ sym(vp8_sad16x16_sse3):
         paddw           mm7,        mm0
         paddw           mm7,        mm4
 
-        cmp             src_ptr,    end_ptr
-        jne             .vp8_sad16x16_sse3_loop
+        cmp             rsi,        rcx
+        jne             vp8_sad16x16_sse3_loop
 
-        movq            ret_var,    mm7
+        movq            rax,        mm7
 
-.vp8_sad16x16_early_exit:
+vp8_sad16x16_early_exit:
 
-        mov             rax,        ret_var
-
-    STACK_FRAME_DESTROY_X3
+    ; begin epilog
+    pop         rdi
+    pop         rsi
+    pop         rbx
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
 
 ;void vp8_sad16x16x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -645,48 +582,69 @@ sym(vp8_sad16x16_sse3):
 ;    int  *results)
 global sym(vp8_sad16x16x4d_sse3)
 sym(vp8_sad16x16x4d_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    push        rbx
+    ; end prolog
 
-    STACK_FRAME_CREATE_X4
+        push            rbp
+        mov             rdi,        arg(2) ; ref_ptr_base
 
-        PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
+
+        mov             rsi,        arg(0) ;src_ptr
+
+        movsxd          rbx,        dword ptr arg(1) ;src_stride
+        movsxd          rbp,        dword ptr arg(3) ;ref_stride
+
+        xchg            rbx,        rax
+
+        PROCESS_16X2X4 1
+        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0
 
-%if ABI_IS_32BIT
         pop             rbp
-%endif
-        mov             rcx,        result_ptr
+        mov             rdi,        arg(4) ;Results
 
         movq            xmm0,       xmm4
         psrldq          xmm4,       8
 
         paddw           xmm0,       xmm4
-        movd            [rcx],      xmm0
+        movd            [rdi],      xmm0
 ;-
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rcx+4],    xmm0
+        movd            [rdi+4],    xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rcx+8],    xmm0
+        movd            [rdi+8],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rcx+12],   xmm0
+        movd            [rdi+12],   xmm0
 
-    STACK_FRAME_DESTROY_X4
+    ; begin epilog
+    pop         rbx
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
 
 ;void vp8_sad16x8x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -696,44 +654,65 @@ sym(vp8_sad16x16x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad16x8x4d_sse3)
 sym(vp8_sad16x8x4d_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    push        rbx
+    ; end prolog
 
-    STACK_FRAME_CREATE_X4
+        push            rbp
+        mov             rdi,        arg(2) ; ref_ptr_base
 
-        PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
+
+        mov             rsi,        arg(0) ;src_ptr
+
+        movsxd          rbx,        dword ptr arg(1) ;src_stride
+        movsxd          rbp,        dword ptr arg(3) ;ref_stride
+
+        xchg            rbx,        rax
+
+        PROCESS_16X2X4 1
+        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0
 
-%if ABI_IS_32BIT
         pop             rbp
-%endif
-        mov             rcx,        result_ptr
+        mov             rdi,        arg(4) ;Results
 
         movq            xmm0,       xmm4
         psrldq          xmm4,       8
 
         paddw           xmm0,       xmm4
-        movd            [rcx],      xmm0
+        movd            [rdi],      xmm0
 ;-
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rcx+4],    xmm0
+        movd            [rdi+4],    xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rcx+8],    xmm0
+        movd            [rdi+8],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rcx+12],   xmm0
+        movd            [rdi+12],   xmm0
 
-    STACK_FRAME_DESTROY_X4
+    ; begin epilog
+    pop         rbx
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
 
 ;void int vp8_sad8x16x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -743,30 +722,50 @@ sym(vp8_sad16x8x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad8x16x4d_sse3)
 sym(vp8_sad8x16x4d_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    push        rbx
+    ; end prolog
 
-    STACK_FRAME_CREATE_X4
+        push            rbp
+        mov             rdi,        arg(2) ; ref_ptr_base
 
-        PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
+
+        mov             rsi,        arg(0) ;src_ptr
+
+        movsxd          rbx,        dword ptr arg(1) ;src_stride
+        movsxd          rbp,        dword ptr arg(3) ;ref_stride
+
+        xchg            rbx,        rax
+
+        PROCESS_8X2X4 1
+        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0
 
-%if ABI_IS_32BIT
         pop             rbp
-%endif
-        mov             rcx,        result_ptr
+        mov             rdi,        arg(4) ;Results
 
-        punpckldq       mm4,        mm5
-        punpckldq       mm6,        mm7
+        movd            [rdi],      mm4
+        movd            [rdi+4],    mm5
+        movd            [rdi+8],    mm6
+        movd            [rdi+12],   mm7
 
-        movq            [rcx],      mm4
-        movq            [rcx+8],    mm6
-
-    STACK_FRAME_DESTROY_X4
+    ; begin epilog
+    pop         rbx
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
 
 ;void int vp8_sad8x8x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -776,26 +775,46 @@ sym(vp8_sad8x16x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad8x8x4d_sse3)
 sym(vp8_sad8x8x4d_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    push        rbx
+    ; end prolog
 
-    STACK_FRAME_CREATE_X4
+        push            rbp
+        mov             rdi,        arg(2) ; ref_ptr_base
 
-        PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
-        PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
+
+        mov             rsi,        arg(0) ;src_ptr
+
+        movsxd          rbx,        dword ptr arg(1) ;src_stride
+        movsxd          rbp,        dword ptr arg(3) ;ref_stride
+
+        xchg            rbx,        rax
+
+        PROCESS_8X2X4 1
+        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0
 
-%if ABI_IS_32BIT
         pop             rbp
-%endif
-        mov             rcx,        result_ptr
+        mov             rdi,        arg(4) ;Results
 
-        punpckldq       mm4,        mm5
-        punpckldq       mm6,        mm7
+        movd            [rdi],      mm4
+        movd            [rdi+4],    mm5
+        movd            [rdi+8],    mm6
+        movd            [rdi+12],   mm7
 
-        movq            [rcx],      mm4
-        movq            [rcx+8],    mm6
-
-    STACK_FRAME_DESTROY_X4
+    ; begin epilog
+    pop         rbx
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret
 
 ;void int vp8_sad4x4x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -805,26 +824,43 @@ sym(vp8_sad8x8x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad4x4x4d_sse3)
 sym(vp8_sad4x4x4d_sse3):
+    push        rbp
+    mov         rbp, rsp
+    SHADOW_ARGS_TO_STACK 5
+    push        rsi
+    push        rdi
+    push        rbx
+    ; end prolog
 
-    STACK_FRAME_CREATE_X4
+        push            rbp
+        mov             rdi,        arg(2) ; ref_ptr_base
 
-        movd            mm0,        DWORD PTR [src_ptr]
-        movd            mm1,        DWORD PTR [r0_ptr]
+        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
 
-        movd            mm2,        DWORD PTR [src_ptr+src_stride]
-        movd            mm3,        DWORD PTR [r0_ptr+ref_stride]
+        mov             rsi,        arg(0) ;src_ptr
+
+        movsxd          rbx,        dword ptr arg(1) ;src_stride
+        movsxd          rbp,        dword ptr arg(3) ;ref_stride
+
+        xchg            rbx,        rax
+
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm1,        DWORD PTR [rcx]
+
+        movd            mm2,        DWORD PTR [rsi+rax]
+        movd            mm3,        DWORD PTR [rcx+rbp]
 
         punpcklbw       mm0,        mm2
         punpcklbw       mm1,        mm3
 
-        movd            mm4,        DWORD PTR [r1_ptr]
-        movd            mm5,        DWORD PTR [r2_ptr]
+        movd            mm4,        DWORD PTR [rdx]
+        movd            mm5,        DWORD PTR [rbx]
 
-        movd            mm6,        DWORD PTR [r3_ptr]
-        movd            mm2,        DWORD PTR [r1_ptr+ref_stride]
+        movd            mm6,        DWORD PTR [rdi]
+        movd            mm2,        DWORD PTR [rdx+rbp]
 
-        movd            mm3,        DWORD PTR [r2_ptr+ref_stride]
-        movd            mm7,        DWORD PTR [r3_ptr+ref_stride]
+        movd            mm3,        DWORD PTR [rbx+rbp]
+        movd            mm7,        DWORD PTR [rdi+rbp]
 
         psadbw          mm1,        mm0
 
@@ -839,40 +875,37 @@ sym(vp8_sad4x4x4d_sse3):
 
 
 
-        lea             src_ptr,    [src_ptr+src_stride*2]
-        lea             r0_ptr,     [r0_ptr+ref_stride*2]
+        lea             rsi,        [rsi+rax*2]
+        lea             rcx,        [rcx+rbp*2]
 
-        lea             r1_ptr,     [r1_ptr+ref_stride*2]
-        lea             r2_ptr,     [r2_ptr+ref_stride*2]
+        lea             rdx,        [rdx+rbp*2]
+        lea             rbx,        [rbx+rbp*2]
 
-        lea             r3_ptr,     [r3_ptr+ref_stride*2]
+        lea             rdi,        [rdi+rbp*2]
 
-        movd            mm0,        DWORD PTR [src_ptr]
-        movd            mm2,        DWORD PTR [r0_ptr]
+        movd            mm0,        DWORD PTR [rsi]
+        movd            mm2,        DWORD PTR [rcx]
 
-        movd            mm3,        DWORD PTR [src_ptr+src_stride]
-        movd            mm7,        DWORD PTR [r0_ptr+ref_stride]
+        movd            mm3,        DWORD PTR [rsi+rax]
+        movd            mm7,        DWORD PTR [rcx+rbp]
 
         punpcklbw       mm0,        mm3
         punpcklbw       mm2,        mm7
 
-        movd            mm3,        DWORD PTR [r1_ptr]
-        movd            mm7,        DWORD PTR [r2_ptr]
+        movd            mm3,        DWORD PTR [rdx]
+        movd            mm7,        DWORD PTR [rbx]
 
         psadbw          mm2,        mm0
-%if ABI_IS_32BIT
         mov             rax,        rbp
 
         pop             rbp
-%define     ref_stride    rax
-%endif
-        mov             rsi,        result_ptr
+        mov             rsi,        arg(4) ;Results
 
         paddw           mm1,        mm2
         movd            [rsi],      mm1
 
-        movd            mm2,        DWORD PTR [r1_ptr+ref_stride]
-        movd            mm1,        DWORD PTR [r2_ptr+ref_stride]
+        movd            mm2,        DWORD PTR [rdx+rax]
+        movd            mm1,        DWORD PTR [rbx+rax]
 
         punpcklbw       mm3,        mm2
         punpcklbw       mm7,        mm1
@@ -880,8 +913,8 @@ sym(vp8_sad4x4x4d_sse3):
         psadbw          mm3,        mm0
         psadbw          mm7,        mm0
 
-        movd            mm2,        DWORD PTR [r3_ptr]
-        movd            mm1,        DWORD PTR [r3_ptr+ref_stride]
+        movd            mm2,        DWORD PTR [rdi]
+        movd            mm1,        DWORD PTR [rdi+rax]
 
         paddw           mm3,        mm4
         paddw           mm7,        mm5
@@ -896,4 +929,10 @@ sym(vp8_sad4x4x4d_sse3):
         movd            [rsi+12],   mm2
 
 
-    STACK_FRAME_DESTROY_X4
+    ; begin epilog
+    pop         rbx
+    pop         rdi
+    pop         rsi
+    UNSHADOW_ARGS
+    pop         rbp
+    ret

From 0a49747b01401900272ccf0dfbb6481429707ad5 Mon Sep 17 00:00:00 2001
From: John Koleszar <jkoleszar@google.com>
Date: Thu, 11 Nov 2010 12:41:07 -0500
Subject: [PATCH 303/307] quantizer: fix assertion in fast quantizer path

The fast quantizer assembly code has not been updated to match the new
exact quantizer, which was made the default in commit 6adbe09.
Specifically, they are not aware of the potential for the coefficient
to be scaled, which results in the quantized result exceeding the range
of the DCT. This patch restores the previous behavior of using the
non-shifted coefficients when in the fast quantizer code path, but
unfortunately requires rebuilding the tables when switching between the
two.

Change-Id: I0a33f5b3850335011a06906f49fafed54dda9546
---
 vp8/encoder/encodeframe.c | 47 ++++++++++++++----------
 vp8/encoder/onyx_if.c     |  3 ++
 vp8/encoder/quantize.c    | 75 +++++++++++++++++++++------------------
 3 files changed, 72 insertions(+), 53 deletions(-)

diff --git a/vp8/encoder/encodeframe.c b/vp8/encoder/encodeframe.c
index b67edd39f..2aac20b31 100644
--- a/vp8/encoder/encodeframe.c
+++ b/vp8/encoder/encodeframe.c
@@ -146,16 +146,25 @@ static const int qzbin_factors_y2[129] =
 
 #define EXACT_QUANT
 #ifdef EXACT_QUANT
-static void vp8cx_invert_quant(short *quant, short *shift, short d)
+static void vp8cx_invert_quant(int improved_quant, short *quant,
+                               short *shift, short d)
 {
-    unsigned t;
-    int l;
-    t = d;
-    for(l = 0; t > 1; l++)
-        t>>=1;
-    t = 1 + (1<<(16+l))/d;
-    *quant = (short)(t - (1<<16));
-    *shift = l;
+    if(improved_quant)
+    {
+        unsigned t;
+        int l;
+        t = d;
+        for(l = 0; t > 1; l++)
+            t>>=1;
+        t = 1 + (1<<(16+l))/d;
+        *quant = (short)(t - (1<<16));
+        *shift = l;
+    }
+    else
+    {
+        *quant = (1 << 16) / d;
+        *shift = 0;
+    }
 }
 
 void vp8cx_init_quantizer(VP8_COMP *cpi)
@@ -170,7 +179,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
     {
         // dc values
         quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q);
-        vp8cx_invert_quant(cpi->Y1quant[Q] + 0,
+        vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0,
                            cpi->Y1quant_shift[Q] + 0, quant_val);
         cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
         cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -178,7 +187,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
         cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q);
-        vp8cx_invert_quant(cpi->Y2quant[Q] + 0,
+        vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0,
                            cpi->Y2quant_shift[Q] + 0, quant_val);
         cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
         cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7;
@@ -186,7 +195,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
         cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7;
 
         quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q);
-        vp8cx_invert_quant(cpi->UVquant[Q] + 0,
+        vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0,
                            cpi->UVquant_shift[Q] + 0, quant_val);
         cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;;
         cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -199,7 +208,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
             int rc = vp8_default_zig_zag1d[i];
 
             quant_val = vp8_ac_yquant(Q);
-            vp8cx_invert_quant(cpi->Y1quant[Q] + rc,
+            vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + rc,
                                cpi->Y1quant_shift[Q] + rc, quant_val);
             cpi->Y1zbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
             cpi->Y1round[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -207,7 +216,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
             cpi->zrun_zbin_boost_y1[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q);
-            vp8cx_invert_quant(cpi->Y2quant[Q] + rc,
+            vp8cx_invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + rc,
                                cpi->Y2quant_shift[Q] + rc, quant_val);
             cpi->Y2zbin[Q][rc] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7;
             cpi->Y2round[Q][rc] = (qrounding_factors_y2[Q] * quant_val) >> 7;
@@ -215,7 +224,7 @@ void vp8cx_init_quantizer(VP8_COMP *cpi)
             cpi->zrun_zbin_boost_y2[Q][i] = (quant_val * zbin_boost[i]) >> 7;
 
             quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q);
-            vp8cx_invert_quant(cpi->UVquant[Q] + rc,
+            vp8cx_invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + rc,
                                cpi->UVquant_shift[Q] + rc, quant_val);
             cpi->UVzbin[Q][rc] = ((qzbin_factors[Q] * quant_val) + 64) >> 7;
             cpi->UVround[Q][rc] = (qrounding_factors[Q] * quant_val) >> 7;
@@ -405,14 +414,14 @@ void encode_mb_row(VP8_COMP *cpi,
     // Set up limit values for vertical motion vector components
     // to prevent them extending beyond the UMV borders
     x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16));
-    x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) 
+    x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16)
                         + (VP8BORDERINPIXELS - 16);
 
     // for each macroblock col in image
     for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
     {
-        // Distance of Mb to the left & right edges, specified in 
-        // 1/8th pel units as they are always compared to values 
+        // Distance of Mb to the left & right edges, specified in
+        // 1/8th pel units as they are always compared to values
         // that are in 1/8th pel units
         xd->mb_to_left_edge = -((mb_col * 16) << 3);
         xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3;
@@ -420,7 +429,7 @@ void encode_mb_row(VP8_COMP *cpi,
         // Set up limit values for horizontal motion vector components
         // to prevent them extending beyond the UMV borders
         x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16));
-        x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) 
+        x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16)
                             + (VP8BORDERINPIXELS - 16);
 
         xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset;
diff --git a/vp8/encoder/onyx_if.c b/vp8/encoder/onyx_if.c
index ba7bb104b..00ecf97a6 100644
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -563,6 +563,7 @@ void vp8_set_speed_features(VP8_COMP *cpi)
     int Speed = cpi->Speed;
     int i;
     VP8_COMMON *cm = &cpi->common;
+    int last_improved_quant = sf->improved_quant;
 
     // Initialise default mode frequency sampling variables
     for (i = 0; i < MAX_MODES; i ++)
@@ -1262,6 +1263,8 @@ void vp8_set_speed_features(VP8_COMP *cpi)
     {
         cpi->mb.quantize_b      = QUANTIZE_INVOKE(&cpi->rtcd.quantize, fastquantb);
     }
+    if (cpi->sf.improved_quant != last_improved_quant)
+        vp8cx_init_quantizer(cpi);
 
 #if CONFIG_RUNTIME_CPU_DETECT
     cpi->mb.e_mbd.rtcd = &cpi->common.rtcd;
diff --git a/vp8/encoder/quantize.c b/vp8/encoder/quantize.c
index c2c0351c0..a1be6614b 100644
--- a/vp8/encoder/quantize.c
+++ b/vp8/encoder/quantize.c
@@ -17,7 +17,8 @@
 #include "predictdc.h"
 
 #define EXACT_QUANT
-#ifdef EXACT_QUANT
+
+#ifdef EXACT_FASTQUANT
 void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
 {
     int i, rc, eob;
@@ -64,6 +65,45 @@ void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
     d->eob = eob + 1;
 }
 
+#else
+
+void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
+{
+    int i, rc, eob;
+    int zbin;
+    int x, y, z, sz;
+    short *coeff_ptr   = b->coeff;
+    short *round_ptr   = b->round;
+    short *quant_ptr   = b->quant;
+    short *qcoeff_ptr  = d->qcoeff;
+    short *dqcoeff_ptr = d->dqcoeff;
+    short *dequant_ptr = d->dequant;
+
+    eob = -1;
+    for (i = 0; i < 16; i++)
+    {
+        rc   = vp8_default_zig_zag1d[i];
+        z    = coeff_ptr[rc];
+
+        sz = (z >> 31);                                 // sign of z
+        x  = (z ^ sz) - sz;                             // x = abs(z)
+
+        y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
+        x  = (y ^ sz) - sz;                         // get the sign back
+        qcoeff_ptr[rc] = x;                          // write to destination
+        dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
+
+        if (y)
+        {
+            eob = i;                                // last nonzero coeffs
+        }
+    }
+    d->eob = eob + 1;
+}
+
+#endif
+
+#ifdef EXACT_QUANT
 void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
 {
     int i, rc, eob;
@@ -178,39 +218,6 @@ void vp8_strict_quantize_b(BLOCK *b, BLOCKD *d)
 }
 
 #else
-void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d)
-{
-    int i, rc, eob;
-    int zbin;
-    int x, y, z, sz;
-    short *coeff_ptr   = b->coeff;
-    short *round_ptr   = b->round;
-    short *quant_ptr   = b->quant;
-    short *qcoeff_ptr  = d->qcoeff;
-    short *dqcoeff_ptr = d->dqcoeff;
-    short *dequant_ptr = d->dequant;
-
-    eob = -1;
-    for (i = 0; i < 16; i++)
-    {
-        rc   = vp8_default_zig_zag1d[i];
-        z    = coeff_ptr[rc];
-
-        sz = (z >> 31);                                 // sign of z
-        x  = (z ^ sz) - sz;                             // x = abs(z)
-
-        y  = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; // quantize (x)
-        x  = (y ^ sz) - sz;                         // get the sign back
-        qcoeff_ptr[rc] = x;                          // write to destination
-        dqcoeff_ptr[rc] = x * dequant_ptr[rc];        // dequantized value
-
-        if (y)
-        {
-            eob = i;                                // last nonzero coeffs
-        }
-    }
-    d->eob = eob + 1;
-}
 
 void vp8_regular_quantize_b(BLOCK *b, BLOCKD *d)
 {

From ef2f27f10e39bc21254effe1083773c9b761d487 Mon Sep 17 00:00:00 2001
From: Yaowu Xu <yaowu@google.com>
Date: Wed, 10 Nov 2010 21:16:17 -0800
Subject: [PATCH 304/307] make rdmult adaptive for intra in quantizer RDO

This intends to correct the tendency that VP8 aggressively favors rate
on intra coded frames. Experiments tested different numbers in [0, 1]
and found 9/16 overall provided about 2-4% gains for all-intra coded
clips based on vpx-ssim metric. The impact on regular encoded clips
is much smaller but positive overall. Overall impact on psnr is also
positive even though very small.

Change-Id: If808553aaaa87fdd44691f9787820ac9856d9f8a
---
 vp8/encoder/encodemb.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vp8/encoder/encodemb.c b/vp8/encoder/encodemb.c
index 043eac219..e9753ac48 100644
--- a/vp8/encoder/encodemb.c
+++ b/vp8/encoder/encodemb.c
@@ -309,8 +309,10 @@ void vp8_optimize_b(MACROBLOCK *mb, int ib, int type,
     eob = d->eob;
 
     /* Now set up a Viterbi trellis to evaluate alternative roundings. */
-    /* TODO: These should vary with the block type, since the quantizer does. */
     rdmult = (mb->rdmult << 2)*err_mult;
+    if(mb->e_mbd.mode_info_context->mbmi.ref_frame==INTRA_FRAME)
+        rdmult = (rdmult * 9)>>4;
+
     rddiv = mb->rddiv;
     best_mask[0] = best_mask[1] = 0;
     /* Initialize the sentinel node of the trellis. */

From 8c2dfde3ed0c6e99ec20b9a4f85e2e4772a956ba Mon Sep 17 00:00:00 2001
From: Frank Galligan <fgalligan@google.com>
Date: Wed, 3 Nov 2010 23:33:00 -0400
Subject: [PATCH 305/307] Fixed bug first cluster timecode of webm file is
 wrong.

When the first pts equaled 0 ivfenc was incorrectly increasing the
pts by 1. I changed the pts and last pts to be signed. I also set
the default value of last pts to -1.

Change-Id: I30bcec5af9b16d93fa9e3abbea7764b133e9cd73
---
 vpxenc.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vpxenc.c b/vpxenc.c
index b139c6829..af9839ce5 100644
--- a/vpxenc.c
+++ b/vpxenc.c
@@ -435,7 +435,7 @@ struct EbmlGlobal
     int debug;
 
     FILE    *stream;
-    uint64_t last_pts_ms;
+    int64_t last_pts_ms;
     vpx_rational_t  framerate;
 
     /* These pointers are to the start of an element */
@@ -648,7 +648,7 @@ write_webm_block(EbmlGlobal                *glob,
     unsigned char  track_number;
     unsigned short block_timecode = 0;
     unsigned char  flags;
-    uint64_t       pts_ms;
+    int64_t        pts_ms;
     int            start_cluster = 0, is_keyframe;
 
     /* Calculate the PTS of this frame in milliseconds */
@@ -1074,6 +1074,7 @@ int main(int argc, const char **argv_)
     int                      psnr_count = 0;
 
     exec_name = argv_[0];
+    ebml.last_pts_ms = -1;
 
     if (argc < 3)
         usage_exit();

From 373f5c31443caafab5b21947ad8aa5ec50b0423d Mon Sep 17 00:00:00 2001
From: Paul Wilkins <paulwilkins@google.com>
Date: Mon, 15 Nov 2010 17:47:12 +0000
Subject: [PATCH 306/307] Bad cost tables used in ARNR filtering.

The use of incorrect mv costing tables in the ARNR sub-pel
filtering code led to corruption of the altref buffer in some cases,
particularly at low data rates.

The average gain from this fix is about 0.3% but there are a few
extreme cases where nasty and visible artifacts manifested and
for these few data points the improvement is > 10%.

PGW and AWG

Change-Id: I95cc02b196a433e71d0d2bd2b933fe68ed31e796
---
 vp8/encoder/temporal_filter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vp8/encoder/temporal_filter.c b/vp8/encoder/temporal_filter.c
index fd5dd7ede..abc50270c 100644
--- a/vp8/encoder/temporal_filter.c
+++ b/vp8/encoder/temporal_filter.c
@@ -292,7 +292,7 @@ static int find_matching_mb
         bestsme = cpi->find_fractional_mv_step(x, b, d,
                     &d->bmi.mv.as_mv, &best_ref_mv1,
                     x->errorperbit, &cpi->fn_ptr[BLOCK_16X16],
-                    cpi->mb.mvcost);
+                    mvcost);
     }
 #endif
 

From e180255375e339deba943ed9afaee1719f3c16bc Mon Sep 17 00:00:00 2001
From: Fritz Koenig <frkoenig@google.com>
Date: Thu, 11 Nov 2010 10:15:58 -0800
Subject: [PATCH 307/307] Remove stack shadowing for x86-x64 for SAD functions.

x86-64 passes arguments in registers.  There is no need to push
them to the stack before using them.

This fixes 15acc84f10cefd98b2f8dbd2eac2cc92c5a3f851 where ebx
was not getting preserved on x86.

Change-Id: I1214b5f818a0201f75ab6ad7d5c6f448e09b16c2
---
 vp8/encoder/x86/sad_sse3.asm | 879 +++++++++++++++++------------------
 1 file changed, 419 insertions(+), 460 deletions(-)

diff --git a/vp8/encoder/x86/sad_sse3.asm b/vp8/encoder/x86/sad_sse3.asm
index 1b7293c20..575417516 100644
--- a/vp8/encoder/x86/sad_sse3.asm
+++ b/vp8/encoder/x86/sad_sse3.asm
@@ -8,24 +8,169 @@
 ;  be found in the AUTHORS file in the root of the source tree.
 ;
 
-
 %include "vpx_ports/x86_abi_support.asm"
 
-%macro PROCESS_16X2X3 1
-%if %1
-        movdqa          xmm0,       XMMWORD PTR [rsi]
-        lddqu           xmm5,       XMMWORD PTR [rdi]
-        lddqu           xmm6,       XMMWORD PTR [rdi+1]
-        lddqu           xmm7,       XMMWORD PTR [rdi+2]
+%macro STACK_FRAME_CREATE_X3 0
+%if ABI_IS_32BIT
+  %define     src_ptr       rsi
+  %define     src_stride    rax
+  %define     ref_ptr       rdi
+  %define     ref_stride    rdx
+  %define     end_ptr       rcx
+  %define     ret_var       rbx
+  %define     result_ptr    arg(4)
+  %define     max_err       arg(4)
+    push        rbp
+    mov         rbp,        rsp
+    push        rsi
+    push        rdi
+    push        rbx
+
+    mov         rsi,        arg(0)              ; src_ptr
+    mov         rdi,        arg(2)              ; ref_ptr
+
+    movsxd      rax,        dword ptr arg(1)    ; src_stride
+    movsxd      rdx,        dword ptr arg(3)    ; ref_stride
+%else
+  %ifidn __OUTPUT_FORMAT__,x64
+    %define     src_ptr     rcx
+    %define     src_stride  rdx
+    %define     ref_ptr     r8
+    %define     ref_stride  r9
+    %define     end_ptr     r10
+    %define     ret_var     r11
+    %define     result_ptr  [rsp+8+4*8]
+    %define     max_err     [rsp+8+4*8]
+  %else
+    %define     src_ptr     rdi
+    %define     src_stride  rsi
+    %define     ref_ptr     rdx
+    %define     ref_stride  rcx
+    %define     end_ptr     r9
+    %define     ret_var     r10
+    %define     result_ptr  r8
+    %define     max_err     r8
+  %endif
+%endif
+
+%endmacro
+
+%macro STACK_FRAME_DESTROY_X3 0
+  %define     src_ptr
+  %define     src_stride
+  %define     ref_ptr
+  %define     ref_stride
+  %define     end_ptr
+  %define     ret_var
+  %define     result_ptr
+  %define     max_err
+
+%if ABI_IS_32BIT
+    pop         rbx
+    pop         rdi
+    pop         rsi
+    pop         rbp
+%else
+  %ifidn __OUTPUT_FORMAT__,x64
+  %endif
+%endif
+    ret
+%endmacro
+
+%macro STACK_FRAME_CREATE_X4 0
+%if ABI_IS_32BIT
+  %define     src_ptr       rsi
+  %define     src_stride    rax
+  %define     r0_ptr        rcx
+  %define     r1_ptr        rdx
+  %define     r2_ptr        rbx
+  %define     r3_ptr        rdi
+  %define     ref_stride    rbp
+  %define     result_ptr    arg(4)
+    push        rbp
+    mov         rbp,        rsp
+    push        rsi
+    push        rdi
+    push        rbx
+
+    push        rbp
+    mov         rdi,        arg(2)              ; ref_ptr_base
+
+    LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
+
+    mov         rsi,        arg(0)              ; src_ptr
+
+    movsxd      rbx,        dword ptr arg(1)    ; src_stride
+    movsxd      rbp,        dword ptr arg(3)    ; ref_stride
+
+    xchg        rbx,        rax
+%else
+  %ifidn __OUTPUT_FORMAT__,x64
+    %define     src_ptr     rcx
+    %define     src_stride  rdx
+    %define     r0_ptr      rsi
+    %define     r1_ptr      r10
+    %define     r2_ptr      r11
+    %define     r3_ptr      r8
+    %define     ref_stride  r9
+    %define     result_ptr  [rsp+16+4*8]
+    push        rsi
+
+    LOAD_X4_ADDRESSES r8, r0_ptr, r1_ptr, r2_ptr, r3_ptr
+  %else
+    %define     src_ptr     rdi
+    %define     src_stride  rsi
+    %define     r0_ptr      r9
+    %define     r1_ptr      r10
+    %define     r2_ptr      r11
+    %define     r3_ptr      rdx
+    %define     ref_stride  rcx
+    %define     result_ptr  r8
+
+    LOAD_X4_ADDRESSES rdx, r0_ptr, r1_ptr, r2_ptr, r3_ptr
+
+  %endif
+%endif
+%endmacro
+
+%macro STACK_FRAME_DESTROY_X4 0
+  %define     src_ptr
+  %define     src_stride
+  %define     r0_ptr
+  %define     r1_ptr
+  %define     r2_ptr
+  %define     r3_ptr
+  %define     ref_stride
+  %define     result_ptr
+
+%if ABI_IS_32BIT
+    pop         rbx
+    pop         rdi
+    pop         rsi
+    pop         rbp
+%else
+  %ifidn __OUTPUT_FORMAT__,x64
+    pop         rsi
+  %endif
+%endif
+    ret
+%endmacro
+
+%macro PROCESS_16X2X3 5
+%if %1==0
+        movdqa          xmm0,       XMMWORD PTR [%2]
+        lddqu           xmm5,       XMMWORD PTR [%3]
+        lddqu           xmm6,       XMMWORD PTR [%3+1]
+        lddqu           xmm7,       XMMWORD PTR [%3+2]
 
         psadbw          xmm5,       xmm0
         psadbw          xmm6,       xmm0
         psadbw          xmm7,       xmm0
 %else
-        movdqa          xmm0,       XMMWORD PTR [rsi]
-        lddqu           xmm1,       XMMWORD PTR [rdi]
-        lddqu           xmm2,       XMMWORD PTR [rdi+1]
-        lddqu           xmm3,       XMMWORD PTR [rdi+2]
+        movdqa          xmm0,       XMMWORD PTR [%2]
+        lddqu           xmm1,       XMMWORD PTR [%3]
+        lddqu           xmm2,       XMMWORD PTR [%3+1]
+        lddqu           xmm3,       XMMWORD PTR [%3+2]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
@@ -35,13 +180,15 @@
         paddw           xmm6,       xmm2
         paddw           xmm7,       xmm3
 %endif
-        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
-        lddqu           xmm1,       XMMWORD PTR [rdi+rdx]
-        lddqu           xmm2,       XMMWORD PTR [rdi+rdx+1]
-        lddqu           xmm3,       XMMWORD PTR [rdi+rdx+2]
+        movdqa          xmm0,       XMMWORD PTR [%2+%4]
+        lddqu           xmm1,       XMMWORD PTR [%3+%5]
+        lddqu           xmm2,       XMMWORD PTR [%3+%5+1]
+        lddqu           xmm3,       XMMWORD PTR [%3+%5+2]
 
-        lea             rsi,        [rsi+rax*2]
-        lea             rdi,        [rdi+rdx*2]
+%if %1==0 || %1==1
+        lea             %2,         [%2+%4*2]
+        lea             %3,         [%3+%5*2]
+%endif
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
@@ -52,21 +199,21 @@
         paddw           xmm7,       xmm3
 %endmacro
 
-%macro PROCESS_8X2X3 1
-%if %1
-        movq            mm0,       QWORD PTR [rsi]
-        movq            mm5,       QWORD PTR [rdi]
-        movq            mm6,       QWORD PTR [rdi+1]
-        movq            mm7,       QWORD PTR [rdi+2]
+%macro PROCESS_8X2X3 5
+%if %1==0
+        movq            mm0,       QWORD PTR [%2]
+        movq            mm5,       QWORD PTR [%3]
+        movq            mm6,       QWORD PTR [%3+1]
+        movq            mm7,       QWORD PTR [%3+2]
 
         psadbw          mm5,       mm0
         psadbw          mm6,       mm0
         psadbw          mm7,       mm0
 %else
-        movq            mm0,       QWORD PTR [rsi]
-        movq            mm1,       QWORD PTR [rdi]
-        movq            mm2,       QWORD PTR [rdi+1]
-        movq            mm3,       QWORD PTR [rdi+2]
+        movq            mm0,       QWORD PTR [%2]
+        movq            mm1,       QWORD PTR [%3]
+        movq            mm2,       QWORD PTR [%3+1]
+        movq            mm3,       QWORD PTR [%3+2]
 
         psadbw          mm1,       mm0
         psadbw          mm2,       mm0
@@ -76,13 +223,15 @@
         paddw           mm6,       mm2
         paddw           mm7,       mm3
 %endif
-        movq            mm0,       QWORD PTR [rsi+rax]
-        movq            mm1,       QWORD PTR [rdi+rdx]
-        movq            mm2,       QWORD PTR [rdi+rdx+1]
-        movq            mm3,       QWORD PTR [rdi+rdx+2]
+        movq            mm0,       QWORD PTR [%2+%4]
+        movq            mm1,       QWORD PTR [%3+%5]
+        movq            mm2,       QWORD PTR [%3+%5+1]
+        movq            mm3,       QWORD PTR [%3+%5+2]
 
-        lea             rsi,       [rsi+rax*2]
-        lea             rdi,       [rdi+rdx*2]
+%if %1==0 || %1==1
+        lea             %2,        [%2+%4*2]
+        lea             %3,        [%3+%5*2]
+%endif
 
         psadbw          mm1,       mm0
         psadbw          mm2,       mm0
@@ -101,115 +250,117 @@
         mov             %5,         [%1+REG_SZ_BYTES*3]
 %endmacro
 
-%macro PROCESS_16X2X4 1
-%if %1
-        movdqa          xmm0,       XMMWORD PTR [rsi]
-        lddqu           xmm4,       XMMWORD PTR [rcx]
-        lddqu           xmm5,       XMMWORD PTR [rdx]
-        lddqu           xmm6,       XMMWORD PTR [rbx]
-        lddqu           xmm7,       XMMWORD PTR [rdi]
+%macro PROCESS_16X2X4 8
+%if %1==0
+        movdqa          xmm0,       XMMWORD PTR [%2]
+        lddqu           xmm4,       XMMWORD PTR [%3]
+        lddqu           xmm5,       XMMWORD PTR [%4]
+        lddqu           xmm6,       XMMWORD PTR [%5]
+        lddqu           xmm7,       XMMWORD PTR [%6]
 
         psadbw          xmm4,       xmm0
         psadbw          xmm5,       xmm0
         psadbw          xmm6,       xmm0
         psadbw          xmm7,       xmm0
 %else
-        movdqa          xmm0,       XMMWORD PTR [rsi]
-        lddqu           xmm1,       XMMWORD PTR [rcx]
-        lddqu           xmm2,       XMMWORD PTR [rdx]
-        lddqu           xmm3,       XMMWORD PTR [rbx]
+        movdqa          xmm0,       XMMWORD PTR [%2]
+        lddqu           xmm1,       XMMWORD PTR [%3]
+        lddqu           xmm2,       XMMWORD PTR [%4]
+        lddqu           xmm3,       XMMWORD PTR [%5]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
         psadbw          xmm3,       xmm0
 
         paddw           xmm4,       xmm1
-        lddqu           xmm1,       XMMWORD PTR [rdi]
+        lddqu           xmm1,       XMMWORD PTR [%6]
         paddw           xmm5,       xmm2
         paddw           xmm6,       xmm3
 
         psadbw          xmm1,       xmm0
         paddw           xmm7,       xmm1
 %endif
-        movdqa          xmm0,       XMMWORD PTR [rsi+rax]
-        lddqu           xmm1,       XMMWORD PTR [rcx+rbp]
-        lddqu           xmm2,       XMMWORD PTR [rdx+rbp]
-        lddqu           xmm3,       XMMWORD PTR [rbx+rbp]
+        movdqa          xmm0,       XMMWORD PTR [%2+%7]
+        lddqu           xmm1,       XMMWORD PTR [%3+%8]
+        lddqu           xmm2,       XMMWORD PTR [%4+%8]
+        lddqu           xmm3,       XMMWORD PTR [%5+%8]
 
         psadbw          xmm1,       xmm0
         psadbw          xmm2,       xmm0
         psadbw          xmm3,       xmm0
 
         paddw           xmm4,       xmm1
-        lddqu           xmm1,       XMMWORD PTR [rdi+rbp]
+        lddqu           xmm1,       XMMWORD PTR [%6+%8]
         paddw           xmm5,       xmm2
         paddw           xmm6,       xmm3
 
-        lea             rsi,        [rsi+rax*2]
-        lea             rcx,        [rcx+rbp*2]
+%if %1==0 || %1==1
+        lea             %2,         [%2+%7*2]
+        lea             %3,         [%3+%8*2]
 
-        lea             rdx,        [rdx+rbp*2]
-        lea             rbx,        [rbx+rbp*2]
-
-        lea             rdi,        [rdi+rbp*2]
+        lea             %4,         [%4+%8*2]
+        lea             %5,         [%5+%8*2]
 
+        lea             %6,         [%6+%8*2]
+%endif
         psadbw          xmm1,       xmm0
         paddw           xmm7,       xmm1
 
 %endmacro
 
-%macro PROCESS_8X2X4 1
-%if %1
-        movq            mm0,        QWORD PTR [rsi]
-        movq            mm4,        QWORD PTR [rcx]
-        movq            mm5,        QWORD PTR [rdx]
-        movq            mm6,        QWORD PTR [rbx]
-        movq            mm7,        QWORD PTR [rdi]
+%macro PROCESS_8X2X4 8
+%if %1==0
+        movq            mm0,        QWORD PTR [%2]
+        movq            mm4,        QWORD PTR [%3]
+        movq            mm5,        QWORD PTR [%4]
+        movq            mm6,        QWORD PTR [%5]
+        movq            mm7,        QWORD PTR [%6]
 
         psadbw          mm4,        mm0
         psadbw          mm5,        mm0
         psadbw          mm6,        mm0
         psadbw          mm7,        mm0
 %else
-        movq            mm0,        QWORD PTR [rsi]
-        movq            mm1,        QWORD PTR [rcx]
-        movq            mm2,        QWORD PTR [rdx]
-        movq            mm3,        QWORD PTR [rbx]
+        movq            mm0,        QWORD PTR [%2]
+        movq            mm1,        QWORD PTR [%3]
+        movq            mm2,        QWORD PTR [%4]
+        movq            mm3,        QWORD PTR [%5]
 
         psadbw          mm1,        mm0
         psadbw          mm2,        mm0
         psadbw          mm3,        mm0
 
         paddw           mm4,        mm1
-        movq            mm1,        QWORD PTR [rdi]
+        movq            mm1,        QWORD PTR [%6]
         paddw           mm5,        mm2
         paddw           mm6,        mm3
 
         psadbw          mm1,        mm0
         paddw           mm7,        mm1
 %endif
-        movq            mm0,        QWORD PTR [rsi+rax]
-        movq            mm1,        QWORD PTR [rcx+rbp]
-        movq            mm2,        QWORD PTR [rdx+rbp]
-        movq            mm3,        QWORD PTR [rbx+rbp]
+        movq            mm0,        QWORD PTR [%2+%7]
+        movq            mm1,        QWORD PTR [%3+%8]
+        movq            mm2,        QWORD PTR [%4+%8]
+        movq            mm3,        QWORD PTR [%5+%8]
 
         psadbw          mm1,        mm0
         psadbw          mm2,        mm0
         psadbw          mm3,        mm0
 
         paddw           mm4,        mm1
-        movq            mm1,        QWORD PTR [rdi+rbp]
+        movq            mm1,        QWORD PTR [%6+%8]
         paddw           mm5,        mm2
         paddw           mm6,        mm3
 
-        lea             rsi,        [rsi+rax*2]
-        lea             rcx,        [rcx+rbp*2]
+%if %1==0 || %1==1
+        lea             %2,         [%2+%7*2]
+        lea             %3,         [%3+%8*2]
 
-        lea             rdx,        [rdx+rbp*2]
-        lea             rbx,        [rbx+rbp*2]
-
-        lea             rdi,        [rdi+rbp*2]
+        lea             %4,         [%4+%8*2]
+        lea             %5,         [%5+%8*2]
 
+        lea             %6,         [%6+%8*2]
+%endif
         psadbw          mm1,        mm0
         paddw           mm7,        mm1
 
@@ -223,54 +374,39 @@
 ;    int  *results)
 global sym(vp8_sad16x16x3_sse3)
 sym(vp8_sad16x16x3_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rax,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
 
-        PROCESS_16X2X3 1
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-
-        mov             rdi,        arg(4) ;Results
+        mov             rcx,        result_ptr
 
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rdi],      xmm0
+        movd            [rcx],      xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rdi+4],    xmm0
+        movd            [rcx+4],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rdi+8],    xmm0
+        movd            [rcx+8],    xmm0
 
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X3
 
 ;void int vp8_sad16x8x3_sse3(
 ;    unsigned char *src_ptr,
@@ -280,50 +416,35 @@ sym(vp8_sad16x16x3_sse3):
 ;    int  *results)
 global sym(vp8_sad16x8x3_sse3)
 sym(vp8_sad16x8x3_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rax,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
 
-        PROCESS_16X2X3 1
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-        PROCESS_16X2X3 0
-
-        mov             rdi,        arg(4) ;Results
+        mov             rcx,        result_ptr
 
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rdi],      xmm0
+        movd            [rcx],      xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rdi+4],    xmm0
+        movd            [rcx+4],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rdi+8],    xmm0
+        movd            [rcx+8],    xmm0
 
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X3
 
 ;void int vp8_sad8x16x3_sse3(
 ;    unsigned char *src_ptr,
@@ -333,40 +454,26 @@ sym(vp8_sad16x8x3_sse3):
 ;    int  *results)
 global sym(vp8_sad8x16x3_sse3)
 sym(vp8_sad8x16x3_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rax,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
 
-        PROCESS_8X2X3 1
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
+        mov             rcx,        result_ptr
 
-        mov             rdi,        arg(4) ;Results
+        punpckldq       mm5,        mm6
 
-        movd            [rdi],      mm5
-        movd            [rdi+4],    mm6
-        movd            [rdi+8],    mm7
+        movq            [rcx],      mm5
+        movd            [rcx+8],    mm7
 
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X3
 
 ;void int vp8_sad8x8x3_sse3(
 ;    unsigned char *src_ptr,
@@ -376,36 +483,22 @@ sym(vp8_sad8x16x3_sse3):
 ;    int  *results)
 global sym(vp8_sad8x8x3_sse3)
 sym(vp8_sad8x8x3_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rax,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride
+        PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride
 
-        PROCESS_8X2X3 1
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
-        PROCESS_8X2X3 0
+        mov             rcx,        result_ptr
 
-        mov             rdi,        arg(4) ;Results
+        punpckldq       mm5,        mm6
 
-        movd            [rdi],      mm5
-        movd            [rdi+4],    mm6
-        movd            [rdi+8],    mm7
+        movq            [rcx],      mm5
+        movd            [rcx+8],    mm7
 
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X3
 
 ;void int vp8_sad4x4x3_sse3(
 ;    unsigned char *src_ptr,
@@ -415,33 +508,23 @@ sym(vp8_sad8x8x3_sse3):
 ;    int  *results)
 global sym(vp8_sad4x4x3_sse3)
 sym(vp8_sad4x4x3_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rax,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        movd            mm0,        DWORD PTR [src_ptr]
+        movd            mm1,        DWORD PTR [ref_ptr]
 
-        movd            mm0,        DWORD PTR [rsi]
-        movd            mm1,        DWORD PTR [rdi]
-
-        movd            mm2,        DWORD PTR [rsi+rax]
-        movd            mm3,        DWORD PTR [rdi+rdx]
+        movd            mm2,        DWORD PTR [src_ptr+src_stride]
+        movd            mm3,        DWORD PTR [ref_ptr+ref_stride]
 
         punpcklbw       mm0,        mm2
         punpcklbw       mm1,        mm3
 
-        movd            mm4,        DWORD PTR [rdi+1]
-        movd            mm5,        DWORD PTR [rdi+2]
+        movd            mm4,        DWORD PTR [ref_ptr+1]
+        movd            mm5,        DWORD PTR [ref_ptr+2]
 
-        movd            mm2,        DWORD PTR [rdi+rdx+1]
-        movd            mm3,        DWORD PTR [rdi+rdx+2]
+        movd            mm2,        DWORD PTR [ref_ptr+ref_stride+1]
+        movd            mm3,        DWORD PTR [ref_ptr+ref_stride+2]
 
         psadbw          mm1,        mm0
 
@@ -451,29 +534,27 @@ sym(vp8_sad4x4x3_sse3):
         psadbw          mm4,        mm0
         psadbw          mm5,        mm0
 
+        lea             src_ptr,    [src_ptr+src_stride*2]
+        lea             ref_ptr,    [ref_ptr+ref_stride*2]
 
+        movd            mm0,        DWORD PTR [src_ptr]
+        movd            mm2,        DWORD PTR [ref_ptr]
 
-        lea             rsi,        [rsi+rax*2]
-        lea             rdi,        [rdi+rdx*2]
-
-        movd            mm0,        DWORD PTR [rsi]
-        movd            mm2,        DWORD PTR [rdi]
-
-        movd            mm3,        DWORD PTR [rsi+rax]
-        movd            mm6,        DWORD PTR [rdi+rdx]
+        movd            mm3,        DWORD PTR [src_ptr+src_stride]
+        movd            mm6,        DWORD PTR [ref_ptr+ref_stride]
 
         punpcklbw       mm0,        mm3
         punpcklbw       mm2,        mm6
 
-        movd            mm3,        DWORD PTR [rdi+1]
-        movd            mm7,        DWORD PTR [rdi+2]
+        movd            mm3,        DWORD PTR [ref_ptr+1]
+        movd            mm7,        DWORD PTR [ref_ptr+2]
 
         psadbw          mm2,        mm0
 
         paddw           mm1,        mm2
 
-        movd            mm2,        DWORD PTR [rdi+rdx+1]
-        movd            mm6,        DWORD PTR [rdi+rdx+2]
+        movd            mm2,        DWORD PTR [ref_ptr+ref_stride+1]
+        movd            mm6,        DWORD PTR [ref_ptr+ref_stride+2]
 
         punpcklbw       mm3,        mm2
         punpcklbw       mm7,        mm6
@@ -484,19 +565,14 @@ sym(vp8_sad4x4x3_sse3):
         paddw           mm3,        mm4
         paddw           mm7,        mm5
 
-        mov             rdi,        arg(4) ;Results
-        movd            [rdi],      mm1
+        mov             rcx,        result_ptr
 
-        movd            [rdi+4],    mm3
-        movd            [rdi+8],    mm7
+        punpckldq       mm1,        mm3
 
+        movq            [rcx],      mm1
+        movd            [rcx+8],    mm7
 
-    ; begin epilog
-    pop rdi
-    pop rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X3
 
 ;unsigned int vp8_sad16x16_sse3(
 ;    unsigned char *src_ptr,
@@ -507,51 +583,40 @@ sym(vp8_sad4x4x3_sse3):
 ;%define lddqu movdqu
 global sym(vp8_sad16x16_sse3)
 sym(vp8_sad16x16_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rbx
-    push        rsi
-    push        rdi
-    ; end prolog
 
-        mov             rsi,        arg(0) ;src_ptr
-        mov             rdi,        arg(2) ;ref_ptr
+    STACK_FRAME_CREATE_X3
 
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rdx,        dword ptr arg(3) ;ref_stride
+        lea             end_ptr,    [src_ptr+src_stride*8]
 
-        lea             rcx,        [rsi+rbx*8]
-
-        lea             rcx,        [rcx+rbx*8]
+        lea             end_ptr,    [end_ptr+src_stride*8]
         pxor            mm7,        mm7
 
-vp8_sad16x16_sse3_loop:
+.vp8_sad16x16_sse3_loop:
 
-        movq            rax,        mm7
-        cmp             rax,        arg(4)
-        jg              vp8_sad16x16_early_exit
+        movq            ret_var,    mm7
+        cmp             ret_var,    max_err
+        jg              .vp8_sad16x16_early_exit
 
-        movq            mm0,        QWORD PTR [rsi]
-        movq            mm2,        QWORD PTR [rsi+8]
+        movq            mm0,        QWORD PTR [src_ptr]
+        movq            mm2,        QWORD PTR [src_ptr+8]
 
-        movq            mm1,        QWORD PTR [rdi]
-        movq            mm3,        QWORD PTR [rdi+8]
+        movq            mm1,        QWORD PTR [ref_ptr]
+        movq            mm3,        QWORD PTR [ref_ptr+8]
 
-        movq            mm4,        QWORD PTR [rsi+rbx]
-        movq            mm5,        QWORD PTR [rdi+rdx]
+        movq            mm4,        QWORD PTR [src_ptr+src_stride]
+        movq            mm5,        QWORD PTR [ref_ptr+ref_stride]
 
         psadbw          mm0,        mm1
         psadbw          mm2,        mm3
 
-        movq            mm1,        QWORD PTR [rsi+rbx+8]
-        movq            mm3,        QWORD PTR [rdi+rdx+8]
+        movq            mm1,        QWORD PTR [src_ptr+src_stride+8]
+        movq            mm3,        QWORD PTR [ref_ptr+ref_stride+8]
 
         psadbw          mm4,        mm5
         psadbw          mm1,        mm3
 
-        lea             rsi,        [rsi+rbx*2]
-        lea             rdi,        [rdi+rdx*2]
+        lea             src_ptr,    [src_ptr+src_stride*2]
+        lea             ref_ptr,    [ref_ptr+ref_stride*2]
 
         paddw           mm0,        mm2
         paddw           mm4,        mm1
@@ -559,20 +624,16 @@ vp8_sad16x16_sse3_loop:
         paddw           mm7,        mm0
         paddw           mm7,        mm4
 
-        cmp             rsi,        rcx
-        jne             vp8_sad16x16_sse3_loop
+        cmp             src_ptr,    end_ptr
+        jne             .vp8_sad16x16_sse3_loop
 
-        movq            rax,        mm7
+        movq            ret_var,    mm7
 
-vp8_sad16x16_early_exit:
+.vp8_sad16x16_early_exit:
 
-    ; begin epilog
-    pop         rdi
-    pop         rsi
-    pop         rbx
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+        mov             rax,        ret_var
+
+    STACK_FRAME_DESTROY_X3
 
 ;void vp8_sad16x16x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -582,69 +643,48 @@ vp8_sad16x16_early_exit:
 ;    int  *results)
 global sym(vp8_sad16x16x4d_sse3)
 sym(vp8_sad16x16x4d_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    push        rbx
-    ; end prolog
 
-        push            rbp
-        mov             rdi,        arg(2) ; ref_ptr_base
+    STACK_FRAME_CREATE_X4
 
-        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
-        mov             rsi,        arg(0) ;src_ptr
-
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rbp,        dword ptr arg(3) ;ref_stride
-
-        xchg            rbx,        rax
-
-        PROCESS_16X2X4 1
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
 
+%if ABI_IS_32BIT
         pop             rbp
-        mov             rdi,        arg(4) ;Results
+%endif
+        mov             rcx,        result_ptr
 
         movq            xmm0,       xmm4
         psrldq          xmm4,       8
 
         paddw           xmm0,       xmm4
-        movd            [rdi],      xmm0
+        movd            [rcx],      xmm0
 ;-
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rdi+4],    xmm0
+        movd            [rcx+4],    xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rdi+8],    xmm0
+        movd            [rcx+8],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rdi+12],   xmm0
+        movd            [rcx+12],   xmm0
 
-    ; begin epilog
-    pop         rbx
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X4
 
 ;void vp8_sad16x8x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -654,65 +694,44 @@ sym(vp8_sad16x16x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad16x8x4d_sse3)
 sym(vp8_sad16x8x4d_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    push        rbx
-    ; end prolog
 
-        push            rbp
-        mov             rdi,        arg(2) ; ref_ptr_base
+    STACK_FRAME_CREATE_X4
 
-        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
-        mov             rsi,        arg(0) ;src_ptr
-
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rbp,        dword ptr arg(3) ;ref_stride
-
-        xchg            rbx,        rax
-
-        PROCESS_16X2X4 1
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
-        PROCESS_16X2X4 0
+        PROCESS_16X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_16X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
 
+%if ABI_IS_32BIT
         pop             rbp
-        mov             rdi,        arg(4) ;Results
+%endif
+        mov             rcx,        result_ptr
 
         movq            xmm0,       xmm4
         psrldq          xmm4,       8
 
         paddw           xmm0,       xmm4
-        movd            [rdi],      xmm0
+        movd            [rcx],      xmm0
 ;-
         movq            xmm0,       xmm5
         psrldq          xmm5,       8
 
         paddw           xmm0,       xmm5
-        movd            [rdi+4],    xmm0
+        movd            [rcx+4],    xmm0
 ;-
         movq            xmm0,       xmm6
         psrldq          xmm6,       8
 
         paddw           xmm0,       xmm6
-        movd            [rdi+8],    xmm0
+        movd            [rcx+8],    xmm0
 ;-
         movq            xmm0,       xmm7
         psrldq          xmm7,       8
 
         paddw           xmm0,       xmm7
-        movd            [rdi+12],   xmm0
+        movd            [rcx+12],   xmm0
 
-    ; begin epilog
-    pop         rbx
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X4
 
 ;void int vp8_sad8x16x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -722,50 +741,30 @@ sym(vp8_sad16x8x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad8x16x4d_sse3)
 sym(vp8_sad8x16x4d_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    push        rbx
-    ; end prolog
 
-        push            rbp
-        mov             rdi,        arg(2) ; ref_ptr_base
+    STACK_FRAME_CREATE_X4
 
-        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
-        mov             rsi,        arg(0) ;src_ptr
-
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rbp,        dword ptr arg(3) ;ref_stride
-
-        xchg            rbx,        rax
-
-        PROCESS_8X2X4 1
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
 
+%if ABI_IS_32BIT
         pop             rbp
-        mov             rdi,        arg(4) ;Results
+%endif
+        mov             rcx,        result_ptr
 
-        movd            [rdi],      mm4
-        movd            [rdi+4],    mm5
-        movd            [rdi+8],    mm6
-        movd            [rdi+12],   mm7
+        punpckldq       mm4,        mm5
+        punpckldq       mm6,        mm7
 
-    ; begin epilog
-    pop         rbx
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+        movq            [rcx],      mm4
+        movq            [rcx+8],    mm6
+
+    STACK_FRAME_DESTROY_X4
 
 ;void int vp8_sad8x8x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -775,46 +774,26 @@ sym(vp8_sad8x16x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad8x8x4d_sse3)
 sym(vp8_sad8x8x4d_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    push        rbx
-    ; end prolog
 
-        push            rbp
-        mov             rdi,        arg(2) ; ref_ptr_base
+    STACK_FRAME_CREATE_X4
 
-        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
-
-        mov             rsi,        arg(0) ;src_ptr
-
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rbp,        dword ptr arg(3) ;ref_stride
-
-        xchg            rbx,        rax
-
-        PROCESS_8X2X4 1
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
-        PROCESS_8X2X4 0
+        PROCESS_8X2X4 0, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 1, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
+        PROCESS_8X2X4 2, src_ptr, r0_ptr, r1_ptr, r2_ptr, r3_ptr, src_stride, ref_stride
 
+%if ABI_IS_32BIT
         pop             rbp
-        mov             rdi,        arg(4) ;Results
+%endif
+        mov             rcx,        result_ptr
 
-        movd            [rdi],      mm4
-        movd            [rdi+4],    mm5
-        movd            [rdi+8],    mm6
-        movd            [rdi+12],   mm7
+        punpckldq       mm4,        mm5
+        punpckldq       mm6,        mm7
 
-    ; begin epilog
-    pop         rbx
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+        movq            [rcx],      mm4
+        movq            [rcx+8],    mm6
+
+    STACK_FRAME_DESTROY_X4
 
 ;void int vp8_sad4x4x4d_sse3(
 ;    unsigned char *src_ptr,
@@ -824,43 +803,26 @@ sym(vp8_sad8x8x4d_sse3):
 ;    int  *results)
 global sym(vp8_sad4x4x4d_sse3)
 sym(vp8_sad4x4x4d_sse3):
-    push        rbp
-    mov         rbp, rsp
-    SHADOW_ARGS_TO_STACK 5
-    push        rsi
-    push        rdi
-    push        rbx
-    ; end prolog
 
-        push            rbp
-        mov             rdi,        arg(2) ; ref_ptr_base
+    STACK_FRAME_CREATE_X4
 
-        LOAD_X4_ADDRESSES rdi, rcx, rdx, rax, rdi
+        movd            mm0,        DWORD PTR [src_ptr]
+        movd            mm1,        DWORD PTR [r0_ptr]
 
-        mov             rsi,        arg(0) ;src_ptr
-
-        movsxd          rbx,        dword ptr arg(1) ;src_stride
-        movsxd          rbp,        dword ptr arg(3) ;ref_stride
-
-        xchg            rbx,        rax
-
-        movd            mm0,        DWORD PTR [rsi]
-        movd            mm1,        DWORD PTR [rcx]
-
-        movd            mm2,        DWORD PTR [rsi+rax]
-        movd            mm3,        DWORD PTR [rcx+rbp]
+        movd            mm2,        DWORD PTR [src_ptr+src_stride]
+        movd            mm3,        DWORD PTR [r0_ptr+ref_stride]
 
         punpcklbw       mm0,        mm2
         punpcklbw       mm1,        mm3
 
-        movd            mm4,        DWORD PTR [rdx]
-        movd            mm5,        DWORD PTR [rbx]
+        movd            mm4,        DWORD PTR [r1_ptr]
+        movd            mm5,        DWORD PTR [r2_ptr]
 
-        movd            mm6,        DWORD PTR [rdi]
-        movd            mm2,        DWORD PTR [rdx+rbp]
+        movd            mm6,        DWORD PTR [r3_ptr]
+        movd            mm2,        DWORD PTR [r1_ptr+ref_stride]
 
-        movd            mm3,        DWORD PTR [rbx+rbp]
-        movd            mm7,        DWORD PTR [rdi+rbp]
+        movd            mm3,        DWORD PTR [r2_ptr+ref_stride]
+        movd            mm7,        DWORD PTR [r3_ptr+ref_stride]
 
         psadbw          mm1,        mm0
 
@@ -875,37 +837,40 @@ sym(vp8_sad4x4x4d_sse3):
 
 
 
-        lea             rsi,        [rsi+rax*2]
-        lea             rcx,        [rcx+rbp*2]
+        lea             src_ptr,    [src_ptr+src_stride*2]
+        lea             r0_ptr,     [r0_ptr+ref_stride*2]
 
-        lea             rdx,        [rdx+rbp*2]
-        lea             rbx,        [rbx+rbp*2]
+        lea             r1_ptr,     [r1_ptr+ref_stride*2]
+        lea             r2_ptr,     [r2_ptr+ref_stride*2]
 
-        lea             rdi,        [rdi+rbp*2]
+        lea             r3_ptr,     [r3_ptr+ref_stride*2]
 
-        movd            mm0,        DWORD PTR [rsi]
-        movd            mm2,        DWORD PTR [rcx]
+        movd            mm0,        DWORD PTR [src_ptr]
+        movd            mm2,        DWORD PTR [r0_ptr]
 
-        movd            mm3,        DWORD PTR [rsi+rax]
-        movd            mm7,        DWORD PTR [rcx+rbp]
+        movd            mm3,        DWORD PTR [src_ptr+src_stride]
+        movd            mm7,        DWORD PTR [r0_ptr+ref_stride]
 
         punpcklbw       mm0,        mm3
         punpcklbw       mm2,        mm7
 
-        movd            mm3,        DWORD PTR [rdx]
-        movd            mm7,        DWORD PTR [rbx]
+        movd            mm3,        DWORD PTR [r1_ptr]
+        movd            mm7,        DWORD PTR [r2_ptr]
 
         psadbw          mm2,        mm0
+%if ABI_IS_32BIT
         mov             rax,        rbp
 
         pop             rbp
-        mov             rsi,        arg(4) ;Results
+%define     ref_stride    rax
+%endif
+        mov             rsi,        result_ptr
 
         paddw           mm1,        mm2
         movd            [rsi],      mm1
 
-        movd            mm2,        DWORD PTR [rdx+rax]
-        movd            mm1,        DWORD PTR [rbx+rax]
+        movd            mm2,        DWORD PTR [r1_ptr+ref_stride]
+        movd            mm1,        DWORD PTR [r2_ptr+ref_stride]
 
         punpcklbw       mm3,        mm2
         punpcklbw       mm7,        mm1
@@ -913,8 +878,8 @@ sym(vp8_sad4x4x4d_sse3):
         psadbw          mm3,        mm0
         psadbw          mm7,        mm0
 
-        movd            mm2,        DWORD PTR [rdi]
-        movd            mm1,        DWORD PTR [rdi+rax]
+        movd            mm2,        DWORD PTR [r3_ptr]
+        movd            mm1,        DWORD PTR [r3_ptr+ref_stride]
 
         paddw           mm3,        mm4
         paddw           mm7,        mm5
@@ -929,10 +894,4 @@ sym(vp8_sad4x4x4d_sse3):
         movd            [rsi+12],   mm2
 
 
-    ; begin epilog
-    pop         rbx
-    pop         rdi
-    pop         rsi
-    UNSHADOW_ARGS
-    pop         rbp
-    ret
+    STACK_FRAME_DESTROY_X4