Merge remote branch 'internal/upstream' into HEAD

Conflicts: vp8/common/alloccommon.c vp8/common/onyxc_int.h vp8/vp8_cx_iface.c vpxenc.c
2010-11-04 21:50:37 -04:00
parent 6804199073 507eb4b577
commit 362f763cfe
226 changed files with 16995 additions and 9616 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,19 +1,16 @@
-*~
 *.a
+*.asm.s
 *.d
 *.o
-*-old
-*-new
-*.mk
-*.asm
-TAGS
-.bins
-.libs
-.deps
-/.cflags-new
-/.cflags-old
+*~
+/*-*.mk
+/*.asm
+/*.doxy
+/.bins
+/.deps
 /.docs
 /.install-*
+/.libs
 /Makefile
 /config.err
 /config.mk
@@ -23,14 +20,11 @@ TAGS
 /decode_with_drops
 /decode_with_drops.c
 /decode_with_drops.dox
-/docs-generic-gnu.mk
 /docs/
 /doxyfile
 /error_resilient
 /error_resilient.c
 /error_resilient.dox
-/examples-generic-gnu.mk
-/examples.doxy
 /force_keyframe
 /force_keyframe.c
 /force_keyframe.dox
@@ -38,8 +32,7 @@ TAGS
 /ivfdec.dox
 /ivfenc
 /ivfenc.dox
-/libs-generic-gnu.mk
-/libs.doxy
+/obj_int_extract
 /postproc
 /postproc.c
 /postproc.dox
@@ -66,3 +59,4 @@ TAGS
 /vpx_config.c
 /vpx_config.h
 /vpx_version.h
+TAGS
--- a/2
+++ b/2
@@ -1,6 +1,7 @@
 # This file is automatically generated from the git commit history
 # by tools/gen_authors.sh.

+Aaron Watry <awatry@gmail.com>
 Adrian Grange <agrange@google.com>
 Alex Converse <alex.converse@gmail.com>
 Andres Mejia <mcitadel@gmail.com>
@@ -20,6 +21,7 @@ Justin Clift <justin@salasaga.org>
 Justin Lebar <justin.lebar@gmail.com>
 Luca Barbato <lu_zero@gentoo.org>
 Makoto Kato <makoto.kt@gmail.com>
+Martin Ettl <ettl.martin78@googlemail.com>
 Michael Kohler <michaelkohler@live.com>
 Paul Wilkins <paulwilkins@google.com>
 Pavol Rusnak <stick@gk2.sk>
--- a/108
+++ b/108
@@ -1,3 +1,111 @@
+2010-10-28 v0.9.5 "Aylesbury"
+  Our first named release, focused on a faster decoder, and a better encoder.
+
+  - Upgrading:
+    This release incorporates backwards-incompatible changes to the
+    ivfenc and ivfdec tools. These tools are now called vpxenc and vpxdec.
+
+    vpxdec
+      * the -q (quiet) option has been removed, and replaced with
+        -v (verbose). the output is quiet by default. Use -v to see
+        the version number of the binary.
+
+      * The default behavior is now to write output to a single file
+        instead of individual frames. The -y option has been removed.
+        Y4M output is the default.
+
+      * For raw I420/YV12 output instead of Y4M, the --i420 or --yv12
+        options must be specified.
+
+          $ ivfdec -o OUTPUT INPUT
+          $ vpxdec --i420 -o OUTPUT INPUT
+
+      * If an output file is not specified, the default is to write
+        Y4M to stdout. This makes piping more natural.
+
+          $ ivfdec -y -o - INPUT | ...
+          $ vpxdec INPUT | ...
+
+      * The output file has additional flexibility for formatting the
+        filename. It supports escape characters for constructing a
+        filename from the width, height, and sequence number. This
+        replaces the -p option. To get the equivalent:
+
+          $ ivfdec -p frame INPUT
+          $ vpxdec --i420 -o frame-%wx%h-%4.i420 INPUT
+
+    vpxenc
+      * The output file must be specified with -o, rather than as the
+        last argument.
+
+          $ ivfenc <options> INPUT OUTPUT
+          $ vpxenc <options> -o OUTPUT INPUT
+
+      * The output defaults to webm. To get IVF output, use the --ivf
+        option.
+
+          $ ivfenc <options> INPUT OUTPUT.ivf
+          $ vpxenc <options> -o OUTPUT.ivf --ivf INPUT
+
+
+  - Enhancements:
+      ivfenc and ivfdec have been renamed to vpxenc, vpxdec.
+      vpxdec supports .webm input
+      vpxdec writes .y4m by default
+      vpxenc writes .webm output by default
+      vpxenc --psnr now shows the average/overall PSNR at the end
+      ARM platforms now support runtime cpu detection
+      vpxdec visualizations added for motion vectors, block modes, references
+      vpxdec now silent by default
+      vpxdec --progress shows frame-by-frame timing information
+      vpxenc supports the distinction between --fps and --timebase
+      NASM is now a supported assembler
+      configure: enable PIC for shared libs by default
+      configure: add --enable-small
+      configure: support for ppc32-linux-gcc
+      configure: support for sparc-solaris-gcc
+
+  - Bugs:
+      Improve handling of invalid frames
+      Fix valgrind errors in the NEON loop filters.
+      Fix loopfilter delta zero transitions
+      Fix valgrind errors in vp8_sixtap_predict8x4_armv6().
+      Build fixes for darwin-icc
+
+  - Speed:
+      20-40% (average 28%) improvement in libvpx decoder speed,
+      including:
+        Rewrite vp8_short_walsh4x4_sse2()
+        Optimizations on the loopfilters.
+        Miscellaneous improvements for Atom
+        Add 4-tap version of 2nd-pass ARMv6 MC filter.
+        Improved multithread utilization
+        Better instruction choices on x86
+        reorder data to use wider instructions
+        Update NEON wide idcts
+        Make block access to frame buffer sequential
+        Improved subset block search
+        Bilinear subpixel optimizations for ssse3.
+        Decrease memory footprint
+
+      Encoder speed improvements (percentage gain not measured):
+        Skip unnecessary search of identical frames
+        Add SSE2 subtract functions
+        Improve bounds checking in vp8_diamond_search_sadx4()
+        Added vp8_fast_quantize_b_sse2
+
+  - Quality:
+      Over 7% overall PSNR improvement (6.3% SSIM) in "best" quality
+      encoding mode, and up to 60% improvement on very noisy, still
+      or slow moving source video
+
+        Motion compensated temporal filter for Alt-Ref Noise Reduction
+        Improved use of trellis quantization on 2nd order Y blocks
+        Tune effect of motion on KF/GF boost in two pass
+        Allow coefficient optimization for good quality speed 0.
+        Improved control of active min quantizer for two pass.
+        Enable ARFs for non-lagged compress
+
 2010-09-02 v0.9.2
  - Enhancements:
      Disable frame dropping by default
--- a/2
+++ b/2
@@ -89,7 +89,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  toolchain, the following command could be used (note, POSIX SH syntax, adapt
  to your shell as necessary):

-    $ CROSS=mipsel-linux-uclibc- ../libvpx/src/configure
+    $ CROSS=mipsel-linux-uclibc- ../libvpx/configure

  In addition, the executables to be invoked can be overridden by specifying the
  environment variables: CC, AR, LD, AS, STRIP, NM. Additional flags can be
--- a/args.c
+++ b/args.c
@@ -120,9 +120,13 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs)
        char *long_val = def->has_val ? "=<arg>" : "";

        if (def->short_name && def->long_name)
-            snprintf(option_text, 37, "-%s%s, --%s%s",
-                     def->short_name, short_val,
+        {
+            char *comma = def->has_val ? "," : ",      ";
+
+            snprintf(option_text, 37, "-%s%s%s --%s%6s",
+                     def->short_name, short_val, comma,
                     def->long_name, long_val);
+        }
        else if (def->short_name)
            snprintf(option_text, 37, "-%s%s",
                     def->short_name, short_val);
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -65,7 +65,7 @@ endif
 BUILD_ROOT?=.
 VPATH=$(SRC_PATH_BARE)
 CFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH)
-ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH)
+ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT)/ -I$(SRC_PATH)/
 DIST_DIR?=dist
 HOSTCC?=gcc
 TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN)))
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -255,9 +255,10 @@ TMP_H="${TMPDIRx}/vpx-conf-$$-${RANDOM}.h"
 TMP_C="${TMPDIRx}/vpx-conf-$$-${RANDOM}.c"
 TMP_O="${TMPDIRx}/vpx-conf-$$-${RANDOM}.o"
 TMP_X="${TMPDIRx}/vpx-conf-$$-${RANDOM}.x"
+TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RANDOM}.asm"

 clean_temp_files() {
-    rm -f ${TMP_C} ${TMP_H} ${TMP_O} ${TMP_X}
+    rm -f ${TMP_C} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM}
 }

 #
@@ -322,6 +323,21 @@ check_add_ldflags() {
    add_ldflags "$@"
 }

+check_asm_align() {
+    log check_asm_align "$@"
+    cat >${TMP_ASM} <<EOF
+section .rodata
+align 16
+EOF
+    log_file ${TMP_ASM}
+    check_cmd ${AS} ${ASFLAGS} -o ${TMP_O} ${TMP_ASM}
+    readelf -WS ${TMP_O} >${TMP_X}
+    log_file ${TMP_X}
+    if ! grep -q '\.rodata .* 16$' ${TMP_X}; then
+        die "${AS} ${ASFLAGS} does not support section alignment (nasm <=2.08?)"
+    fi
+}
+
 write_common_config_banner() {
    echo '# This file automatically generated by configure. Do not edit!' > config.mk
    echo "TOOLCHAIN := ${toolchain}" >> config.mk
@@ -440,13 +456,18 @@ process_common_cmdline() {
        disable builtin_libc
        alt_libc="${optval}"
        ;;
+        --as=*)
+        [ "${optval}" = yasm -o "${optval}" = nasm -o "${optval}" = auto ] \
+            || die "Must be yasm, nasm or auto: ${optval}"
+        alt_as="${optval}"
+        ;;
        --prefix=*)
        prefix="${optval}"
        ;;
        --libdir=*)
        libdir="${optval}"
        ;;
-        --libc|--prefix|--libdir)
+        --libc|--as|--prefix|--libdir)
        die "Option ${opt} requires argument"
        ;;
        --help|-h) show_help
@@ -511,6 +532,9 @@ process_common_toolchain() {
            *powerpc*)
                tgt_isa=ppc32
                ;;
+            *sparc*)
+                tgt_isa=sparc
+                ;;
        esac

        # detect tgt_os
@@ -530,6 +554,9 @@ process_common_toolchain() {
            *linux*|*bsd*)
                tgt_os=linux
                ;;
+            *solaris2.10)
+                tgt_os=solaris
+                ;;
        esac

        if [ -n "$tgt_isa" ] && [ -n "$tgt_os" ]; then
@@ -581,6 +608,13 @@ process_common_toolchain() {
            ;;
    esac

+    # Handle Solaris variants. Solaris 10 needs -lposix4
+    case ${toolchain} in
+        *-solaris-*)
+            add_extralibs -lposix4
+            ;;
+    esac
+
    # Process ARM architecture variants
    case ${toolchain} in
    arm*|iwmmxt*)
@@ -790,6 +824,7 @@ process_common_toolchain() {
        soft_enable sse2
        soft_enable sse3
        soft_enable ssse3
+        soft_enable sse4_1

        case  ${tgt_os} in
            win*)
@@ -802,6 +837,7 @@ process_common_toolchain() {
                ;;
        esac

+        AS="${alt_as:-${AS:-auto}}"
        case  ${tgt_cc} in
            icc*)
                CC=${CC:-icc}
@@ -830,7 +866,16 @@ process_common_toolchain() {
                ;;
        esac

-        AS=yasm
+        case "${AS}" in
+            auto|"")
+                which nasm >/dev/null 2>&1 && AS=nasm
+                which yasm >/dev/null 2>&1 && AS=yasm
+                [ "${AS}" = auto -o -z "${AS}" ] \
+                    && die "Neither yasm nor nasm have been found"
+                ;;
+        esac
+        log_echo "  using $AS"
+        [ "${AS##*/}" = nasm ] && add_asflags -Ox
        AS_SFX=.asm
        case  ${tgt_os} in
            win*)
@@ -839,7 +884,9 @@ process_common_toolchain() {
            ;;
            linux*|solaris*)
                add_asflags -f elf${bits}
-                enabled debug && add_asflags -g dwarf2
+                enabled debug && [ "${AS}" = yasm ] && add_asflags -g dwarf2
+                enabled debug && [ "${AS}" = nasm ] && add_asflags -g
+                [ "${AS##*/}" = nasm ] && check_asm_align
            ;;
            darwin*)
                add_asflags -f macho${bits}
@@ -852,7 +899,7 @@ process_common_toolchain() {
                # enabled icc && ! enabled pic && add_cflags -fno-pic -mdynamic-no-pic
                enabled icc && ! enabled pic && add_cflags -fno-pic
            ;;
-            *) log "Warning: Unknown os $tgt_os while setting up yasm flags"
+            *) log "Warning: Unknown os $tgt_os while setting up $AS flags"
            ;;
        esac
    ;;
--- a/4
+++ b/4
@@ -23,6 +23,7 @@ Advanced options:
  ${toggle_libs}                  don't build libraries
  ${toggle_examples}              don't build examples
  --libc=PATH                     path to alternate libc
+  --as={yasm|nasm|auto}           use specified assembler [auto, yasm preferred]
  ${toggle_fast_unaligned}        don't use unaligned accesses, even when
                                  supported by hardware [auto]
  ${toggle_codec_srcs}            in/exclude codec library source code
@@ -100,6 +101,7 @@ all_platforms="${all_platforms} ppc32-linux-gcc"
 all_platforms="${all_platforms} ppc64-darwin8-gcc"
 all_platforms="${all_platforms} ppc64-darwin9-gcc"
 all_platforms="${all_platforms} ppc64-linux-gcc"
+all_platforms="${all_platforms} sparc-solaris-gcc"
 all_platforms="${all_platforms} x86-darwin8-gcc"
 all_platforms="${all_platforms} x86-darwin8-icc"
 all_platforms="${all_platforms} x86-darwin9-gcc"
@@ -197,6 +199,7 @@ ARCH_EXT_LIST="
    sse2
    sse3
    ssse3
+    sse4_1

    altivec
 "
@@ -271,6 +274,7 @@ CMDLINE_SELECT="
    libs
    examples
    libc
+    as
    fast_unaligned
    codec_srcs
    debug_libs
--- a/examples.mk
+++ b/examples.mk
@@ -12,19 +12,40 @@
 # List of examples to build. UTILS are files that are taken from the source
 # tree directly, and GEN_EXAMPLES are files that are created from the
 # examples folder.
-UTILS-$(CONFIG_DECODERS)    += ivfdec.c
-ivfdec.SRCS                 += md5_utils.c md5_utils.h
-ivfdec.SRCS                 += vpx_ports/vpx_timer.h
-ivfdec.SRCS                 += vpx/vpx_integer.h
-ivfdec.SRCS                 += args.c args.h vpx_ports/config.h
-ivfdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
-ivfdec.DESCRIPTION           = Full featured decoder
-UTILS-$(CONFIG_ENCODERS)    += ivfenc.c
-ivfenc.SRCS                 += args.c args.h y4minput.c y4minput.h
-ivfenc.SRCS                 += vpx_ports/config.h vpx_ports/mem_ops.h
-ivfenc.SRCS                 += vpx_ports/mem_ops_aligned.h
-ivfenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
-ivfenc.DESCRIPTION           = Full featured encoder
+UTILS-$(CONFIG_DECODERS)    += vpxdec.c
+vpxdec.SRCS                 += md5_utils.c md5_utils.h
+vpxdec.SRCS                 += vpx_ports/vpx_timer.h
+vpxdec.SRCS                 += vpx/vpx_integer.h
+vpxdec.SRCS                 += args.c args.h vpx_ports/config.h
+vpxdec.SRCS                 += tools_common.c tools_common.h
+vpxdec.SRCS                 += nestegg/halloc/halloc.h
+vpxdec.SRCS                 += nestegg/halloc/src/align.h
+vpxdec.SRCS                 += nestegg/halloc/src/halloc.c
+vpxdec.SRCS                 += nestegg/halloc/src/hlist.h
+vpxdec.SRCS                 += nestegg/halloc/src/macros.h
+vpxdec.SRCS                 += nestegg/include/nestegg/nestegg.h
+vpxdec.SRCS                 += nestegg/src/nestegg.c
+vpxdec.GUID                  = BA5FE66F-38DD-E034-F542-B1578C5FB950
+vpxdec.DESCRIPTION           = Full featured decoder
+UTILS-$(CONFIG_ENCODERS)    += vpxenc.c
+vpxenc.SRCS                 += args.c args.h y4minput.c y4minput.h
+vpxenc.SRCS                 += tools_common.c tools_common.h
+vpxenc.SRCS                 += vpx_ports/config.h vpx_ports/mem_ops.h
+vpxenc.SRCS                 += vpx_ports/mem_ops_aligned.h
+vpxenc.SRCS                 += libmkv/EbmlIDs.h
+vpxenc.SRCS                 += libmkv/EbmlWriter.c
+vpxenc.SRCS                 += libmkv/EbmlWriter.h
+vpxenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
+vpxenc.DESCRIPTION           = Full featured encoder
+
+# Clean up old ivfenc, ivfdec binaries.
+ifeq ($(CONFIG_MSVS),yes)
+CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfenc.exe)
+CLEAN-OBJS += $(foreach p,$(VS_PLATFORMS),$(p)/Release/ivfdec.exe)
+else
+CLEAN-OBJS += ivfenc{.c.o,.c.d,.dox,.exe,}
+CLEAN-OBJS += ivfdec{.c.o,.c.d,.dox,.exe,}
+endif

 # XMA example disabled for now, not used in VP8
 #UTILS-$(CONFIG_DECODERS)    += example_xma.c
--- a/examples/decoder_tmpl.c
+++ b/examples/decoder_tmpl.c
@@ -61,8 +61,8 @@ int main(int argc, char **argv) {
        die("Failed to open %s for writing", argv[2]);

    /* Read file header */
-    fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile);
-    if(!(file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
+    if(!(fread(file_hdr, 1, IVF_FILE_HDR_SZ, infile) == IVF_FILE_HDR_SZ
+         && file_hdr[0]=='D' && file_hdr[1]=='K' && file_hdr[2]=='I'
         && file_hdr[3]=='F'))
        die("%s is not an IVF file.", argv[1]);

--- a/examples/decoder_tmpl.txt
+++ b/examples/decoder_tmpl.txt
@@ -48,7 +48,7 @@ for(plane=0; plane < 3; plane++) {
    unsigned char *buf =img->planes[plane];

    for(y=0; y<img->d_h >> (plane?1:0); y++) {
-        fwrite(buf, 1, img->d_w >> (plane?1:0), outfile);
+        if(fwrite(buf, 1, img->d_w >> (plane?1:0), outfile));
        buf += img->stride[plane];
    }
 }
--- a/examples/encoder_tmpl.c
+++ b/examples/encoder_tmpl.c
@@ -85,7 +85,7 @@ static void write_ivf_file_header(FILE *outfile,
    mem_put_le32(header+24, frame_cnt);           /* length */
    mem_put_le32(header+28, 0);                   /* unused */

-    fwrite(header, 1, 32, outfile);
+    if(fwrite(header, 1, 32, outfile));
 }


@@ -103,7 +103,7 @@ static void write_ivf_frame_header(FILE *outfile,
    mem_put_le32(header+4, pts&0xFFFFFFFF);
    mem_put_le32(header+8, pts >> 32);

-    fwrite(header, 1, 12, outfile);
+    if(fwrite(header, 1, 12, outfile));
 }

 int main(int argc, char **argv) {
--- a/examples/encoder_tmpl.txt
+++ b/examples/encoder_tmpl.txt
@@ -61,8 +61,8 @@ if(vpx_codec_encode(&codec, frame_avail? &raw : NULL, frame_cnt,
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME
 case VPX_CODEC_CX_FRAME_PKT:
    write_ivf_frame_header(outfile, pkt);
-    fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
-           outfile);
+    if(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz,
+              outfile));
    break;
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PROCESS_FRAME

--- a/ivfdec.c
+++ b/ivfdec.c
@@ -1,640 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/* This is a simple program that reads ivf files and decodes them
- * using the new interface. Decoded frames are output as YV12 raw.
- */
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdarg.h>
-#include <string.h>
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx_config.h"
-#include "vpx/vpx_decoder.h"
-#include "vpx_ports/vpx_timer.h"
-#if CONFIG_VP8_DECODER
-#include "vpx/vp8dx.h"
-#endif
-#if CONFIG_MD5
-#include "md5_utils.h"
-#endif
-
-static const char *exec_name;
-
-static const struct
-{
-    char const *name;
-    const vpx_codec_iface_t *iface;
-    unsigned int             fourcc;
-    unsigned int             fourcc_mask;
-} ifaces[] =
-{
-#if CONFIG_VP8_DECODER
-    {"vp8",  &vpx_codec_vp8_dx_algo,   0x00385056, 0x00FFFFFF},
-#endif
-};
-
-#include "args.h"
-static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1,
-                                  "Codec to use");
-static const arg_def_t prefixarg = ARG_DEF("p", "prefix", 1,
-                                   "Prefix to use when saving frames");
-static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0,
-                                  "Output file is YV12 ");
-static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0,
-                                  "Output file is I420 (default)");
-static const arg_def_t flipuvarg = ARG_DEF(NULL, "flipuv", 0,
-                                   "Synonym for --yv12");
-static const arg_def_t noblitarg = ARG_DEF(NULL, "noblit", 0,
-                                   "Don't process the decoded frames");
-static const arg_def_t progressarg = ARG_DEF(NULL, "progress", 0,
-                                     "Show progress after each frame decodes");
-static const arg_def_t limitarg = ARG_DEF(NULL, "limit", 1,
-                                  "Stop decoding after n frames");
-static const arg_def_t postprocarg = ARG_DEF(NULL, "postproc", 0,
-                                     "Postprocess decoded frames");
-static const arg_def_t summaryarg = ARG_DEF(NULL, "summary", 0,
-                                    "Show timing summary");
-static const arg_def_t outputfile = ARG_DEF("o", "output", 1,
-                                    "Output raw yv12 file instead of images");
-static const arg_def_t usey4marg = ARG_DEF("y", "y4m", 0,
-                                    "Output file is YUV4MPEG2");
-static const arg_def_t threadsarg = ARG_DEF("t", "threads", 1,
-                                    "Max threads to use");
-static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0,
-                                  "Suppress version string");
-
-#if CONFIG_MD5
-static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0,
-                                        "Compute the MD5 sum of the decoded frame");
-#endif
-static const arg_def_t *all_args[] =
-{
-    &codecarg, &prefixarg, &use_yv12, &use_i420, &flipuvarg, &noblitarg,
-    &progressarg, &limitarg, &postprocarg, &summaryarg, &outputfile,
-    &usey4marg, &threadsarg, &quietarg,
-#if CONFIG_MD5
-    &md5arg,
-#endif
-    NULL
-};
-
-#if CONFIG_VP8_DECODER
-static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1,
-                                        "Enable VP8 postproc add noise");
-static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0,
-                                 "Enable VP8 deblocking");
-static const arg_def_t demacroblock_level = ARG_DEF(NULL, "demacroblock-level", 1,
-        "Enable VP8 demacroblocking, w/ level");
-static const arg_def_t pp_debug_info = ARG_DEF(NULL, "pp-debug-info", 1,
-                                       "Enable VP8 visible debug info");
-
-
-static const arg_def_t *vp8_pp_args[] =
-{
-    &addnoise_level, &deblock, &demacroblock_level, &pp_debug_info,
-    NULL
-};
-#endif
-
-static void usage_exit()
-{
-    int i;
-
-    fprintf(stderr, "Usage: %s <options> filename\n\n"
-            "Options:\n", exec_name);
-    arg_show_usage(stderr, all_args);
-#if CONFIG_VP8_DECODER
-    fprintf(stderr, "\nvp8 Postprocessing Options:\n");
-    arg_show_usage(stderr, vp8_pp_args);
-#endif
-    fprintf(stderr, "\nIncluded decoders:\n\n");
-
-    for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
-        fprintf(stderr, "    %-6s - %s\n",
-                ifaces[i].name,
-                vpx_codec_iface_name(ifaces[i].iface));
-
-    exit(EXIT_FAILURE);
-}
-
-void die(const char *fmt, ...)
-{
-    va_list ap;
-    va_start(ap, fmt);
-    vfprintf(stderr, fmt, ap);
-    fprintf(stderr, "\n");
-    usage_exit();
-}
-
-static unsigned int mem_get_le16(const void *vmem)
-{
-    unsigned int  val;
-    const unsigned char *mem = (const unsigned char *)vmem;
-
-    val = mem[1] << 8;
-    val |= mem[0];
-    return val;
-}
-
-static unsigned int mem_get_le32(const void *vmem)
-{
-    unsigned int  val;
-    const unsigned char *mem = (const unsigned char *)vmem;
-
-    val = mem[3] << 24;
-    val |= mem[2] << 16;
-    val |= mem[1] << 8;
-    val |= mem[0];
-    return val;
-}
-
-#define IVF_FRAME_HDR_SZ (sizeof(uint32_t) + sizeof(uint64_t))
-#define RAW_FRAME_HDR_SZ (sizeof(uint32_t))
-static int read_frame(FILE                  *infile,
-                      uint8_t               **buf,
-                      uint32_t              *buf_sz,
-                      uint32_t              *buf_alloc_sz,
-                      int                    is_ivf)
-{
-    char     raw_hdr[IVF_FRAME_HDR_SZ];
-    uint32_t new_buf_sz;
-
-    /* For both the raw and ivf formats, the frame size is the first 4 bytes
-     * of the frame header. We just need to special case on the header
-     * size.
-     */
-    if (fread(raw_hdr, is_ivf ? IVF_FRAME_HDR_SZ : RAW_FRAME_HDR_SZ, 1,
-              infile) != 1)
-    {
-        if (!feof(infile))
-            fprintf(stderr, "Failed to read frame size\n");
-
-        new_buf_sz = 0;
-    }
-    else
-    {
-        new_buf_sz = mem_get_le32(raw_hdr);
-
-        if (new_buf_sz > 256 * 1024 * 1024)
-        {
-            fprintf(stderr, "Error: Read invalid frame size (%u)\n",
-                    new_buf_sz);
-            new_buf_sz = 0;
-        }
-
-        if (!is_ivf && new_buf_sz > 256 * 1024)
-            fprintf(stderr, "Warning: Read invalid frame size (%u)"
-                    " - not a raw file?\n", new_buf_sz);
-
-        if (new_buf_sz > *buf_alloc_sz)
-        {
-            uint8_t *new_buf = realloc(*buf, 2 * new_buf_sz);
-
-            if (new_buf)
-            {
-                *buf = new_buf;
-                *buf_alloc_sz = 2 * new_buf_sz;
-            }
-            else
-            {
-                fprintf(stderr, "Failed to allocate compressed data buffer\n");
-                new_buf_sz = 0;
-            }
-        }
-    }
-
-    *buf_sz = new_buf_sz;
-
-    if (*buf_sz)
-    {
-        if (fread(*buf, 1, *buf_sz, infile) != *buf_sz)
-        {
-            fprintf(stderr, "Failed to read full frame\n");
-            return 1;
-        }
-
-        return 0;
-    }
-
-    return 1;
-}
-
-void *out_open(const char *out_fn, int do_md5)
-{
-    void *out = NULL;
-
-    if (do_md5)
-    {
-#if CONFIG_MD5
-        MD5Context *md5_ctx = out = malloc(sizeof(MD5Context));
-        (void)out_fn;
-        MD5Init(md5_ctx);
-#endif
-    }
-    else
-    {
-        FILE *outfile = out = strcmp("-", out_fn) ? fopen(out_fn, "wb") : stdout;
-
-        if (!outfile)
-        {
-            fprintf(stderr, "Failed to output file");
-            exit(EXIT_FAILURE);
-        }
-    }
-
-    return out;
-}
-
-void out_put(void *out, const uint8_t *buf, unsigned int len, int do_md5)
-{
-    if (do_md5)
-    {
-#if CONFIG_MD5
-        MD5Update(out, buf, len);
-#endif
-    }
-    else
-    {
-        fwrite(buf, 1, len, out);
-    }
-}
-
-void out_close(void *out, const char *out_fn, int do_md5)
-{
-    if (do_md5)
-    {
-#if CONFIG_MD5
-        uint8_t md5[16];
-        int i;
-
-        MD5Final(md5, out);
-        free(out);
-
-        for (i = 0; i < 16; i++)
-            printf("%02x", md5[i]);
-
-        printf("  %s\n", out_fn);
-#endif
-    }
-    else
-    {
-        fclose(out);
-    }
-}
-
-unsigned int file_is_ivf(FILE *infile,
-                         unsigned int *fourcc,
-                         unsigned int *width,
-                         unsigned int *height,
-                         unsigned int *timebase_num,
-                         unsigned int *timebase_den)
-{
-    char raw_hdr[32];
-    int is_ivf = 0;
-
-    if (fread(raw_hdr, 1, 32, infile) == 32)
-    {
-        if (raw_hdr[0] == 'D' && raw_hdr[1] == 'K'
-            && raw_hdr[2] == 'I' && raw_hdr[3] == 'F')
-        {
-            is_ivf = 1;
-
-            if (mem_get_le16(raw_hdr + 4) != 0)
-                fprintf(stderr, "Error: Unrecognized IVF version! This file may not"
-                        " decode properly.");
-
-            *fourcc = mem_get_le32(raw_hdr + 8);
-            *width = mem_get_le16(raw_hdr + 12);
-            *height = mem_get_le16(raw_hdr + 14);
-            *timebase_den = mem_get_le32(raw_hdr + 16);
-            *timebase_num = mem_get_le32(raw_hdr + 20);
-        }
-    }
-
-    if (!is_ivf)
-        rewind(infile);
-
-    return is_ivf;
-}
-
-int main(int argc, const char **argv_)
-{
-    vpx_codec_ctx_t          decoder;
-    char                  *prefix = NULL, *fn = NULL;
-    int                    i;
-    uint8_t               *buf = NULL;
-    uint32_t               buf_sz = 0, buf_alloc_sz = 0;
-    FILE                  *infile;
-    int                    frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0, do_md5 = 0, progress = 0;
-    int                    stop_after = 0, postproc = 0, summary = 0, quiet = 0;
-    vpx_codec_iface_t       *iface = NULL;
-    unsigned int           is_ivf, fourcc;
-    unsigned long          dx_time = 0;
-    struct arg               arg;
-    char                   **argv, **argi, **argj;
-    const char                   *fn2 = 0;
-    int                     use_y4m = 0;
-    unsigned int            width;
-    unsigned int            height;
-    unsigned int            timebase_num;
-    unsigned int            timebase_den;
-    void                   *out = NULL;
-    vpx_codec_dec_cfg_t     cfg = {0};
-#if CONFIG_VP8_DECODER
-    vp8_postproc_cfg_t      vp8_pp_cfg = {0};
-#endif
-
-    /* Parse command line */
-    exec_name = argv_[0];
-    argv = argv_dup(argc - 1, argv_ + 1);
-
-    for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step)
-    {
-        memset(&arg, 0, sizeof(arg));
-        arg.argv_step = 1;
-
-        if (arg_match(&arg, &codecarg, argi))
-        {
-            int j, k = -1;
-
-            for (j = 0; j < sizeof(ifaces) / sizeof(ifaces[0]); j++)
-                if (!strcmp(ifaces[j].name, arg.val))
-                    k = j;
-
-            if (k >= 0)
-                iface = ifaces[k].iface;
-            else
-                die("Error: Unrecognized argument (%s) to --codec\n",
-                    arg.val);
-        }
-        else if (arg_match(&arg, &outputfile, argi))
-            fn2 = arg.val;
-        else if (arg_match(&arg, &usey4marg, argi))
-            use_y4m = 1;
-        else if (arg_match(&arg, &prefixarg, argi))
-            prefix = strdup(arg.val);
-        else if (arg_match(&arg, &use_yv12, argi))
-            flipuv = 1;
-        else if (arg_match(&arg, &use_i420, argi))
-            flipuv = 0;
-        else if (arg_match(&arg, &flipuvarg, argi))
-            flipuv = 1;
-        else if (arg_match(&arg, &noblitarg, argi))
-            noblit = 1;
-        else if (arg_match(&arg, &progressarg, argi))
-            progress = 1;
-        else if (arg_match(&arg, &limitarg, argi))
-            stop_after = arg_parse_uint(&arg);
-        else if (arg_match(&arg, &postprocarg, argi))
-            postproc = 1;
-        else if (arg_match(&arg, &md5arg, argi))
-            do_md5 = 1;
-        else if (arg_match(&arg, &summaryarg, argi))
-            summary = 1;
-        else if (arg_match(&arg, &threadsarg, argi))
-            cfg.threads = arg_parse_uint(&arg);
-        else if (arg_match(&arg, &quietarg, argi))
-            quiet = 1;
-
-#if CONFIG_VP8_DECODER
-        else if (arg_match(&arg, &addnoise_level, argi))
-        {
-            postproc = 1;
-            vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE;
-            vp8_pp_cfg.noise_level = arg_parse_uint(&arg);
-        }
-        else if (arg_match(&arg, &demacroblock_level, argi))
-        {
-            postproc = 1;
-            vp8_pp_cfg.post_proc_flag |= VP8_DEMACROBLOCK;
-            vp8_pp_cfg.deblocking_level = arg_parse_uint(&arg);
-        }
-        else if (arg_match(&arg, &deblock, argi))
-        {
-            postproc = 1;
-            vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK;
-        }
-        else if (arg_match(&arg, &pp_debug_info, argi))
-        {
-            unsigned int level = arg_parse_uint(&arg);
-
-            postproc = 1;
-            vp8_pp_cfg.post_proc_flag &= ~0x7;
-
-            if (level)
-                vp8_pp_cfg.post_proc_flag |= 8 << (level - 1);
-        }
-
-#endif
-        else
-            argj++;
-    }
-
-    /* Check for unrecognized options */
-    for (argi = argv; *argi; argi++)
-        if (argi[0][0] == '-' && strlen(argi[0]) > 1)
-            die("Error: Unrecognized option %s\n", *argi);
-
-    /* Handle non-option arguments */
-    fn = argv[0];
-
-    if (!fn)
-        usage_exit();
-
-    if (!prefix)
-        prefix = strdup("img");
-
-    /* Open file */
-    infile = strcmp(fn, "-") ? fopen(fn, "rb") : stdin;
-
-    if (!infile)
-    {
-        fprintf(stderr, "Failed to open file");
-        return EXIT_FAILURE;
-    }
-
-    if (fn2)
-        out = out_open(fn2, do_md5);
-
-    is_ivf = file_is_ivf(infile, &fourcc, &width, &height,
-                         &timebase_num, &timebase_den);
-
-    if (is_ivf)
-    {
-        if (use_y4m)
-        {
-            char buffer[128];
-            if (!fn2)
-            {
-                fprintf(stderr, "YUV4MPEG2 output only supported with -o.\n");
-                return EXIT_FAILURE;
-            }
-            /*Correct for the factor of 2 applied to the timebase in the
-               encoder.*/
-            if(timebase_den&1)timebase_num<<=1;
-            else timebase_den>>=1;
-            /*Note: We can't output an aspect ratio here because IVF doesn't
-               store one, and neither does VP8.
-              That will have to wait until these tools support WebM natively.*/
-            sprintf(buffer, "YUV4MPEG2 C%s W%u H%u F%u:%u I%c\n",
-                    "420jpeg", width, height, timebase_den, timebase_num, 'p');
-            out_put(out, (unsigned char *)buffer, strlen(buffer), do_md5);
-        }
-
-        /* Try to determine the codec from the fourcc. */
-        for (i = 0; i < sizeof(ifaces) / sizeof(ifaces[0]); i++)
-            if ((fourcc & ifaces[i].fourcc_mask) == ifaces[i].fourcc)
-            {
-                vpx_codec_iface_t  *ivf_iface = ifaces[i].iface;
-
-                if (iface && iface != ivf_iface)
-                    fprintf(stderr, "Notice -- IVF header indicates codec: %s\n",
-                            ifaces[i].name);
-                else
-                    iface = ivf_iface;
-
-                break;
-            }
-    }
-    else if(use_y4m)
-    {
-        fprintf(stderr, "YUV4MPEG2 output only supported from IVF input.\n");
-        return EXIT_FAILURE;
-    }
-
-    if (vpx_codec_dec_init(&decoder, iface ? iface :  ifaces[0].iface, &cfg,
-                           postproc ? VPX_CODEC_USE_POSTPROC : 0))
-    {
-        fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder));
-        return EXIT_FAILURE;
-    }
-
-    if (!quiet)
-        fprintf(stderr, "%s\n", decoder.name);
-
-#if CONFIG_VP8_DECODER
-
-    if (vp8_pp_cfg.post_proc_flag
-        && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg))
-    {
-        fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder));
-        return EXIT_FAILURE;
-    }
-
-#endif
-
-    /* Decode file */
-    while (!read_frame(infile, &buf, &buf_sz, &buf_alloc_sz, is_ivf))
-    {
-        vpx_codec_iter_t  iter = NULL;
-        vpx_image_t    *img;
-        struct vpx_usec_timer timer;
-
-        vpx_usec_timer_start(&timer);
-
-        if (vpx_codec_decode(&decoder, buf, buf_sz, NULL, 0))
-        {
-            const char *detail = vpx_codec_error_detail(&decoder);
-            fprintf(stderr, "Failed to decode frame: %s\n", vpx_codec_error(&decoder));
-
-            if (detail)
-                fprintf(stderr, "  Additional information: %s\n", detail);
-
-            goto fail;
-        }
-
-        vpx_usec_timer_mark(&timer);
-        dx_time += vpx_usec_timer_elapsed(&timer);
-
-        ++frame_in;
-
-        if (progress)
-            fprintf(stderr, "decoded frame %d.\n", frame_in);
-
-        if ((img = vpx_codec_get_frame(&decoder, &iter)))
-            ++frame_out;
-
-        if (!noblit)
-        {
-            if (img)
-            {
-                unsigned int y;
-                char out_fn[128+24];
-                uint8_t *buf;
-                const char *sfx = flipuv ? "yv12" : "i420";
-
-                if (!fn2)
-                {
-                    sprintf(out_fn, "%s-%dx%d-%04d.%s",
-                            prefix, img->d_w, img->d_h, frame_in, sfx);
-                    out = out_open(out_fn, do_md5);
-                }
-                else if(use_y4m)
-                    out_put(out, (unsigned char *)"FRAME\n", 6, do_md5);
-
-                buf = img->planes[VPX_PLANE_Y];
-
-                for (y = 0; y < img->d_h; y++)
-                {
-                    out_put(out, buf, img->d_w, do_md5);
-                    buf += img->stride[VPX_PLANE_Y];
-                }
-
-                buf = img->planes[flipuv?VPX_PLANE_V:VPX_PLANE_U];
-
-                for (y = 0; y < (1 + img->d_h) / 2; y++)
-                {
-                    out_put(out, buf, (1 + img->d_w) / 2, do_md5);
-                    buf += img->stride[VPX_PLANE_U];
-                }
-
-                buf = img->planes[flipuv?VPX_PLANE_U:VPX_PLANE_V];
-
-                for (y = 0; y < (1 + img->d_h) / 2; y++)
-                {
-                    out_put(out, buf, (1 + img->d_w) / 2, do_md5);
-                    buf += img->stride[VPX_PLANE_V];
-                }
-
-                if (!fn2)
-                    out_close(out, out_fn, do_md5);
-            }
-        }
-
-        if (stop_after && frame_in >= stop_after)
-            break;
-    }
-
-    if (summary)
-    {
-        fprintf(stderr, "%d decoded frames/%d showed frames in %lu us (%.2f fps)\n",
-                frame_in, frame_out, dx_time, (float)frame_out * 1000000.0 / (float)dx_time);
-    }
-
-fail:
-
-    if (vpx_codec_destroy(&decoder))
-    {
-        fprintf(stderr, "Failed to destroy decoder: %s\n", vpx_codec_error(&decoder));
-        return EXIT_FAILURE;
-    }
-
-    if (fn2)
-        out_close(out, fn2, do_md5);
-
-    free(buf);
-    fclose(infile);
-    free(prefix);
-    free(argv);
-
-    return EXIT_SUCCESS;
-}
--- a/libmkv/EbmlBufferWriter.c
+++ b/libmkv/EbmlBufferWriter.c
@@ -0,0 +1,60 @@
+//#include <strmif.h>
+#include "EbmlBufferWriter.h"
+#include "EbmlWriter.h"
+//#include <cassert>
+//#include <limits>
+//#include <malloc.h>  //_alloca
+#include <stdlib.h>
+#include <wchar.h>
+#include <string.h>
+
+void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
+{
+    unsigned char *src = glob->buf;
+    src += glob->offset;
+    memcpy(src, buffer_in, len);
+    glob->offset += len;
+}
+
+static void _Serialize(EbmlGlobal *glob, const unsigned char *p, const unsigned char *q)
+{
+    while (q != p)
+    {
+        --q;
+
+        unsigned long cbWritten;
+        memcpy(&(glob->buf[glob->offset]), q, 1);
+        glob->offset ++;
+    }
+}
+
+void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len)
+{
+    //assert(buf);
+
+    const unsigned char *const p = (const unsigned char *)(buffer_in);
+    const unsigned char *const q = p + len;
+
+    _Serialize(glob, p, q);
+}
+
+
+void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id)
+{
+    Ebml_WriteID(glob, class_id);
+    ebmlLoc->offset = glob->offset;
+    //todo this is always taking 8 bytes, this may need later optimization
+    unsigned long long unknownLen =  0x01FFFFFFFFFFFFFFLLU;
+    Ebml_Serialize(glob, (void *)&unknownLen, 8); //this is a key that says lenght unknown
+}
+
+void Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc)
+{
+    unsigned long long size = glob->offset - ebmlLoc->offset - 8;
+    unsigned long long curOffset = glob->offset;
+    glob->offset = ebmlLoc->offset;
+    size |=  0x0100000000000000LLU;
+    Ebml_Serialize(glob, &size, 8);
+    glob->offset = curOffset;
+}
+
--- a/libmkv/EbmlBufferWriter.h
+++ b/libmkv/EbmlBufferWriter.h
@@ -0,0 +1,21 @@
+#ifndef EBMLBUFFERWRITER_HPP
+#define EBMLBUFFERWRITER_HPP
+
+typedef struct
+{
+    unsigned long long offset;
+} EbmlLoc;
+
+typedef struct
+{
+    unsigned char *buf;
+    unsigned int length;
+    unsigned int offset;
+} EbmlGlobal;
+
+
+void Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, unsigned long class_id);
+void Ebml_EndSubElement(EbmlGlobal *glob,  EbmlLoc *ebmlLoc);
+
+
+#endif
--- a/libmkv/EbmlIDs.h
+++ b/libmkv/EbmlIDs.h
@@ -0,0 +1,231 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#ifndef MKV_DEFS_HPP
+#define MKV_DEFS_HPP 1
+
+//Commenting out values not available in webm, but available in matroska
+
+enum mkv
+{
+    EBML = 0x1A45DFA3,
+    EBMLVersion = 0x4286,
+    EBMLReadVersion = 0x42F7,
+    EBMLMaxIDLength = 0x42F2,
+    EBMLMaxSizeLength = 0x42F3,
+    DocType = 0x4282,
+    DocTypeVersion = 0x4287,
+    DocTypeReadVersion = 0x4285,
+//  CRC_32 = 0xBF,
+    Void = 0xEC,
+    SignatureSlot = 0x1B538667,
+    SignatureAlgo = 0x7E8A,
+    SignatureHash = 0x7E9A,
+    SignaturePublicKey = 0x7EA5,
+    Signature = 0x7EB5,
+    SignatureElements = 0x7E5B,
+    SignatureElementList = 0x7E7B,
+    SignedElement = 0x6532,
+    //segment
+    Segment = 0x18538067,
+    //Meta Seek Information
+    SeekHead = 0x114D9B74,
+    Seek = 0x4DBB,
+    SeekID = 0x53AB,
+    SeekPosition = 0x53AC,
+    //Segment Information
+    Info = 0x1549A966,
+//  SegmentUID = 0x73A4,
+//  SegmentFilename = 0x7384,
+//  PrevUID = 0x3CB923,
+//  PrevFilename = 0x3C83AB,
+//  NextUID = 0x3EB923,
+//  NextFilename = 0x3E83BB,
+//  SegmentFamily = 0x4444,
+//  ChapterTranslate = 0x6924,
+//  ChapterTranslateEditionUID = 0x69FC,
+//  ChapterTranslateCodec = 0x69BF,
+//  ChapterTranslateID = 0x69A5,
+    TimecodeScale = 0x2AD7B1,
+    Segment_Duration = 0x4489,
+    DateUTC = 0x4461,
+//  Title = 0x7BA9,
+    MuxingApp = 0x4D80,
+    WritingApp = 0x5741,
+    //Cluster
+    Cluster = 0x1F43B675,
+    Timecode = 0xE7,
+//  SilentTracks = 0x5854,
+//  SilentTrackNumber = 0x58D7,
+//  Position = 0xA7,
+    PrevSize = 0xAB,
+    BlockGroup = 0xA0,
+    Block = 0xA1,
+//  BlockVirtual = 0xA2,
+//  BlockAdditions = 0x75A1,
+//  BlockMore = 0xA6,
+//  BlockAddID = 0xEE,
+//  BlockAdditional = 0xA5,
+    BlockDuration = 0x9B,
+//  ReferencePriority = 0xFA,
+    ReferenceBlock = 0xFB,
+//  ReferenceVirtual = 0xFD,
+//  CodecState = 0xA4,
+//  Slices = 0x8E,
+//  TimeSlice = 0xE8,
+    LaceNumber = 0xCC,
+//  FrameNumber = 0xCD,
+//  BlockAdditionID = 0xCB,
+//  MkvDelay = 0xCE,
+//  Cluster_Duration = 0xCF,
+    SimpleBlock = 0xA3,
+//  EncryptedBlock = 0xAF,
+    //Track
+    Tracks = 0x1654AE6B,
+    TrackEntry = 0xAE,
+    TrackNumber = 0xD7,
+    TrackUID = 0x73C5,
+    TrackType = 0x83,
+    FlagEnabled = 0xB9,
+    FlagDefault = 0x88,
+    FlagForced = 0x55AA,
+    FlagLacing = 0x9C,
+//  MinCache = 0x6DE7,
+//  MaxCache = 0x6DF8,
+    DefaultDuration = 0x23E383,
+//  TrackTimecodeScale = 0x23314F,
+//  TrackOffset = 0x537F,
+//  MaxBlockAdditionID = 0x55EE,
+    Name = 0x536E,
+    Language = 0x22B59C,
+    CodecID = 0x86,
+    CodecPrivate = 0x63A2,
+    CodecName = 0x258688,
+//  AttachmentLink = 0x7446,
+//  CodecSettings = 0x3A9697,
+//  CodecInfoURL = 0x3B4040,
+//  CodecDownloadURL = 0x26B240,
+//  CodecDecodeAll = 0xAA,
+//  TrackOverlay = 0x6FAB,
+//  TrackTranslate = 0x6624,
+//  TrackTranslateEditionUID = 0x66FC,
+//  TrackTranslateCodec = 0x66BF,
+//  TrackTranslateTrackID = 0x66A5,
+    //video
+    Video = 0xE0,
+    FlagInterlaced = 0x9A,
+//  StereoMode = 0x53B8,
+    PixelWidth = 0xB0,
+    PixelHeight = 0xBA,
+    PixelCropBottom = 0x54AA,
+    PixelCropTop = 0x54BB,
+    PixelCropLeft = 0x54CC,
+    PixelCropRight = 0x54DD,
+    DisplayWidth = 0x54B0,
+    DisplayHeight = 0x54BA,
+    DisplayUnit = 0x54B2,
+    AspectRatioType = 0x54B3,
+//  ColourSpace = 0x2EB524,
+//  GammaValue = 0x2FB523,
+    FrameRate = 0x2383E3,
+    //end video
+    //audio
+    Audio = 0xE1,
+    SamplingFrequency = 0xB5,
+    OutputSamplingFrequency = 0x78B5,
+    Channels = 0x9F,
+//  ChannelPositions = 0x7D7B,
+    BitDepth = 0x6264,
+    //end audio
+    //content encoding
+//  ContentEncodings = 0x6d80,
+//  ContentEncoding = 0x6240,
+//  ContentEncodingOrder = 0x5031,
+//  ContentEncodingScope = 0x5032,
+//  ContentEncodingType = 0x5033,
+//  ContentCompression = 0x5034,
+//  ContentCompAlgo = 0x4254,
+//  ContentCompSettings = 0x4255,
+//  ContentEncryption = 0x5035,
+//  ContentEncAlgo = 0x47e1,
+//  ContentEncKeyID = 0x47e2,
+//  ContentSignature = 0x47e3,
+//  ContentSigKeyID = 0x47e4,
+//  ContentSigAlgo = 0x47e5,
+//  ContentSigHashAlgo = 0x47e6,
+    //end content encoding
+    //Cueing Data
+    Cues = 0x1C53BB6B,
+    CuePoint = 0xBB,
+    CueTime = 0xB3,
+    CueTrackPositions = 0xB7,
+    CueTrack = 0xF7,
+    CueClusterPosition = 0xF1,
+    CueBlockNumber = 0x5378,
+//  CueCodecState = 0xEA,
+//  CueReference = 0xDB,
+//  CueRefTime = 0x96,
+//  CueRefCluster = 0x97,
+//  CueRefNumber = 0x535F,
+//  CueRefCodecState = 0xEB,
+    //Attachment
+//  Attachments = 0x1941A469,
+//  AttachedFile = 0x61A7,
+//  FileDescription = 0x467E,
+//  FileName = 0x466E,
+//  FileMimeType = 0x4660,
+//  FileData = 0x465C,
+//  FileUID = 0x46AE,
+//  FileReferral = 0x4675,
+    //Chapters
+//  Chapters = 0x1043A770,
+//  EditionEntry = 0x45B9,
+//  EditionUID = 0x45BC,
+//  EditionFlagHidden = 0x45BD,
+//  EditionFlagDefault = 0x45DB,
+//  EditionFlagOrdered = 0x45DD,
+//  ChapterAtom = 0xB6,
+//  ChapterUID = 0x73C4,
+//  ChapterTimeStart = 0x91,
+//  ChapterTimeEnd = 0x92,
+//  ChapterFlagHidden = 0x98,
+//  ChapterFlagEnabled = 0x4598,
+//  ChapterSegmentUID = 0x6E67,
+//  ChapterSegmentEditionUID = 0x6EBC,
+//  ChapterPhysicalEquiv = 0x63C3,
+//  ChapterTrack = 0x8F,
+//  ChapterTrackNumber = 0x89,
+//  ChapterDisplay = 0x80,
+//  ChapString = 0x85,
+//  ChapLanguage = 0x437C,
+//  ChapCountry = 0x437E,
+//  ChapProcess = 0x6944,
+//  ChapProcessCodecID = 0x6955,
+//  ChapProcessPrivate = 0x450D,
+//  ChapProcessCommand = 0x6911,
+//  ChapProcessTime = 0x6922,
+//  ChapProcessData = 0x6933,
+    //Tagging
+//  Tags = 0x1254C367,
+//  Tag = 0x7373,
+//  Targets = 0x63C0,
+//  TargetTypeValue = 0x68CA,
+//  TargetType = 0x63CA,
+//  Tagging_TrackUID = 0x63C5,
+//  Tagging_EditionUID = 0x63C9,
+//  Tagging_ChapterUID = 0x63C4,
+//  AttachmentUID = 0x63C6,
+//  SimpleTag = 0x67C8,
+//  TagName = 0x45A3,
+//  TagLanguage = 0x447A,
+//  TagDefault = 0x4484,
+//  TagString = 0x4487,
+//  TagBinary = 0x4485,
+};
+#endif
--- a/libmkv/EbmlWriter.c
+++ b/libmkv/EbmlWriter.c
@@ -0,0 +1,166 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#include "EbmlWriter.h"
+#include <stdlib.h>
+#include <wchar.h>
+#include <string.h>
+#if defined(_MSC_VER)
+#define LITERALU64(n) n
+#else
+#define LITERALU64(n) n##LLU
+#endif
+
+void Ebml_WriteLen(EbmlGlobal *glob, long long val)
+{
+    //TODO check and make sure we are not > than 0x0100000000000000LLU
+    unsigned char size = 8; //size in bytes to output
+    unsigned long long minVal = LITERALU64(0x00000000000000ff); //mask to compare for byte size
+
+    for (size = 1; size < 8; size ++)
+    {
+        if (val < minVal)
+            break;
+
+        minVal = (minVal << 7);
+    }
+
+    val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7));
+
+    Ebml_Serialize(glob, (void *) &val, size);
+}
+
+void Ebml_WriteString(EbmlGlobal *glob, const char *str)
+{
+    const size_t size_ = strlen(str);
+    const unsigned long long  size = size_;
+    Ebml_WriteLen(glob, size);
+    //TODO: it's not clear from the spec whether the nul terminator
+    //should be serialized too.  For now we omit the null terminator.
+    Ebml_Write(glob, str, size);
+}
+
+void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr)
+{
+    const size_t strlen = wcslen(wstr);
+
+    //TODO: it's not clear from the spec whether the nul terminator
+    //should be serialized too.  For now we include it.
+    const unsigned long long  size = strlen;
+
+    Ebml_WriteLen(glob, size);
+    Ebml_Write(glob, wstr, size);
+}
+
+void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id)
+{
+    if (class_id >= 0x01000000)
+        Ebml_Serialize(glob, (void *)&class_id, 4);
+    else if (class_id >= 0x00010000)
+        Ebml_Serialize(glob, (void *)&class_id, 3);
+    else if (class_id >= 0x00000100)
+        Ebml_Serialize(glob, (void *)&class_id, 2);
+    else
+        Ebml_Serialize(glob, (void *)&class_id, 1);
+}
+void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui)
+{
+    unsigned char sizeSerialized = 8 | 0x80;
+    Ebml_WriteID(glob, class_id);
+    Ebml_Serialize(glob, &sizeSerialized, 1);
+    Ebml_Serialize(glob, &ui, 8);
+}
+
+void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui)
+{
+    unsigned char size = 8; //size in bytes to output
+    unsigned char sizeSerialized = 0;
+    unsigned long minVal;
+
+    Ebml_WriteID(glob, class_id);
+    minVal = 0x7fLU; //mask to compare for byte size
+
+    for (size = 1; size < 4; size ++)
+    {
+        if (ui < minVal)
+        {
+            break;
+        }
+
+        minVal <<= 7;
+    }
+
+    sizeSerialized = 0x80 | size;
+    Ebml_Serialize(glob, &sizeSerialized, 1);
+    Ebml_Serialize(glob, &ui, size);
+}
+//TODO: perhaps this is a poor name for this id serializer helper function
+void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin)
+{
+    int size;
+    for (size=4; size > 1; size--)
+    {
+        if (bin & 0x000000ff << ((size-1) * 8))
+            break;
+    }
+    Ebml_WriteID(glob, class_id);
+    Ebml_WriteLen(glob, size);
+    Ebml_WriteID(glob, bin);
+}
+
+void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d)
+{
+    unsigned char len = 0x88;
+
+    Ebml_WriteID(glob, class_id);
+    Ebml_Serialize(glob, &len, 1);
+    Ebml_Serialize(glob,  &d, 8);
+}
+
+void Ebml_WriteSigned16(EbmlGlobal *glob, short val)
+{
+    signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8;
+    Ebml_Serialize(glob, &out, 3);
+}
+
+void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s)
+{
+    Ebml_WriteID(glob, class_id);
+    Ebml_WriteString(glob, s);
+}
+
+void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s)
+{
+    Ebml_WriteID(glob,  class_id);
+    Ebml_WriteUTF8(glob,  s);
+}
+
+void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length)
+{
+    unsigned char size = 4;
+    Ebml_WriteID(glob, class_id);
+    Ebml_WriteLen(glob, data_length);
+    Ebml_Write(glob,  data, data_length);
+}
+
+void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize)
+{
+    unsigned char tmp = 0;
+    unsigned long i = 0;
+
+    Ebml_WriteID(glob, 0xEC);
+    Ebml_WriteLen(glob, vSize);
+
+    for (i = 0; i < vSize; i++)
+    {
+        Ebml_Write(glob, &tmp, 1);
+    }
+}
+
+//TODO Serialize Date
--- a/libmkv/EbmlWriter.h
+++ b/libmkv/EbmlWriter.h
@@ -0,0 +1,38 @@
+#ifndef EBMLWRITER_HPP
+#define EBMLWRITER_HPP
+
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+//note: you must define write and serialize functions as well as your own EBML_GLOBAL
+//These functions MUST be implemented
+#include <stddef.h>
+#include "vpx/vpx_integer.h"
+
+typedef struct EbmlGlobal EbmlGlobal;
+void  Ebml_Serialize(EbmlGlobal *glob, const void *, unsigned long);
+void  Ebml_Write(EbmlGlobal *glob, const void *, unsigned long);
+/////
+
+
+void Ebml_WriteLen(EbmlGlobal *glob, long long val);
+void Ebml_WriteString(EbmlGlobal *glob, const char *str);
+void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr);
+void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id);
+void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui);
+void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
+void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long ui);
+void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d);
+//TODO make this more generic to signed
+void Ebml_WriteSigned16(EbmlGlobal *glob, short val);
+void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s);
+void Ebml_SerializeUTF8(EbmlGlobal *glob, unsigned long class_id, wchar_t *s);
+void Ebml_SerializeData(EbmlGlobal *glob, unsigned long class_id, unsigned char *data, unsigned long data_length);
+void Ebml_WriteVoid(EbmlGlobal *glob, unsigned long vSize);
+//TODO need date function
+#endif
--- a/libmkv/Makefile
+++ b/libmkv/Makefile
@@ -0,0 +1,25 @@
+#Variables
+CC=gcc
+LINKER=gcc
+FLAGS=
+
+
+#Build Targets
+EbmlWriter.o: EbmlWriter.c EbmlWriter.h
+	$(CC) $(FLAGS) -c EbmlWriter.c
+
+EbmlBufferWriter.o: EbmlBufferWriter.c EbmlBufferWriter.h
+	$(CC) $(FLAGS) -c EbmlBufferWriter.c
+	
+MkvElement.o: MkvElement.c WebMElement.h
+	$(CC) $(FLAGS) -c MkvElement.c
+	
+testlibmkv.o: testlibmkv.c
+	$(CC) $(FLAGS) -c testlibmkv.c
+	
+testlibmkv: testlibmkv.o MkvElement.o EbmlBufferWriter.o EbmlWriter.o
+	$(LINKER) $(FLAGS) testlibmkv.o MkvElement.o EbmlBufferWriter.o EbmlWriter.o -o testlibmkv
+
+clean:
+	rm -rf *.o testlibmkv
+	
--- a/libmkv/WebMElement.c
+++ b/libmkv/WebMElement.c
@@ -0,0 +1,220 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#include "EbmlBufferWriter.h"
+#include "EbmlIDs.h"
+#include "WebMElement.h"
+#include <stdio.h>
+
+#define kVorbisPrivateMaxSize  4000
+
+void writeHeader(EbmlGlobal *glob)
+{
+    EbmlLoc start;
+    Ebml_StartSubElement(glob, &start, EBML);
+    Ebml_SerializeUnsigned(glob, EBMLVersion, 1);
+    Ebml_SerializeUnsigned(glob, EBMLReadVersion, 1); //EBML Read Version
+    Ebml_SerializeUnsigned(glob, EBMLMaxIDLength, 4); //EBML Max ID Length
+    Ebml_SerializeUnsigned(glob, EBMLMaxSizeLength, 8); //EBML Max Size Length
+    Ebml_SerializeString(glob, DocType, "webm"); //Doc Type
+    Ebml_SerializeUnsigned(glob, DocTypeVersion, 2); //Doc Type Version
+    Ebml_SerializeUnsigned(glob, DocTypeReadVersion, 2); //Doc Type Read Version
+    Ebml_EndSubElement(glob, &start);
+}
+
+void writeSimpleBlock(EbmlGlobal *glob, unsigned char trackNumber, short timeCode,
+                      int isKeyframe, unsigned char lacingFlag, int discardable,
+                      unsigned char *data, unsigned long dataLength)
+{
+    Ebml_WriteID(glob, SimpleBlock);
+    unsigned long blockLength = 4 + dataLength;
+    blockLength |= 0x10000000; //TODO check length < 0x0FFFFFFFF
+    Ebml_Serialize(glob, &blockLength, 4);
+    trackNumber |= 0x80;  //TODO check track nubmer < 128
+    Ebml_Write(glob, &trackNumber, 1);
+    //Ebml_WriteSigned16(glob, timeCode,2); //this is 3 bytes
+    Ebml_Serialize(glob, &timeCode, 2);
+    unsigned char flags = 0x00 | (isKeyframe ? 0x80 : 0x00) | (lacingFlag << 1) | discardable;
+    Ebml_Write(glob, &flags, 1);
+    Ebml_Write(glob, data, dataLength);
+}
+
+static UInt64 generateTrackID(unsigned int trackNumber)
+{
+    UInt64 t = time(NULL) * trackNumber;
+    UInt64 r = rand();
+    r = r << 32;
+    r +=  rand();
+    UInt64 rval = t ^ r;
+    return rval;
+}
+
+void writeVideoTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
+                     char *codecId, unsigned int pixelWidth, unsigned int pixelHeight,
+                     double frameRate)
+{
+    EbmlLoc start;
+    Ebml_StartSubElement(glob, &start, TrackEntry);
+    Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+    UInt64 trackID = generateTrackID(trackNumber);
+    Ebml_SerializeUnsigned(glob, TrackUID, trackID);
+    Ebml_SerializeString(glob, CodecName, "VP8");  //TODO shouldn't be fixed
+
+    Ebml_SerializeUnsigned(glob, TrackType, 1); //video is always 1
+    Ebml_SerializeString(glob, CodecID, codecId);
+    {
+        EbmlLoc videoStart;
+        Ebml_StartSubElement(glob, &videoStart, Video);
+        Ebml_SerializeUnsigned(glob, PixelWidth, pixelWidth);
+        Ebml_SerializeUnsigned(glob, PixelHeight, pixelHeight);
+        Ebml_SerializeFloat(glob, FrameRate, frameRate);
+        Ebml_EndSubElement(glob, &videoStart); //Video
+    }
+    Ebml_EndSubElement(glob, &start); //Track Entry
+}
+void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
+                     char *codecId, double samplingFrequency, unsigned int channels,
+                     unsigned char *private, unsigned long privateSize)
+{
+    EbmlLoc start;
+    Ebml_StartSubElement(glob, &start, TrackEntry);
+    Ebml_SerializeUnsigned(glob, TrackNumber, trackNumber);
+    UInt64 trackID = generateTrackID(trackNumber);
+    Ebml_SerializeUnsigned(glob, TrackUID, trackID);
+    Ebml_SerializeUnsigned(glob, TrackType, 2); //audio is always 2
+    //I am using defaults for thesed required fields
+    /*  Ebml_SerializeUnsigned(glob, FlagEnabled, 1);
+        Ebml_SerializeUnsigned(glob, FlagDefault, 1);
+        Ebml_SerializeUnsigned(glob, FlagForced, 1);
+        Ebml_SerializeUnsigned(glob, FlagLacing, flagLacing);*/
+    Ebml_SerializeString(glob, CodecID, codecId);
+    Ebml_SerializeData(glob, CodecPrivate, private, privateSize);
+
+    Ebml_SerializeString(glob, CodecName, "VORBIS");  //fixed for now
+    {
+        EbmlLoc AudioStart;
+        Ebml_StartSubElement(glob, &AudioStart, Audio);
+        Ebml_SerializeFloat(glob, SamplingFrequency, samplingFrequency);
+        Ebml_SerializeUnsigned(glob, Channels, channels);
+        Ebml_EndSubElement(glob, &AudioStart);
+    }
+    Ebml_EndSubElement(glob, &start);
+}
+void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc* startInfo, unsigned long timeCodeScale, double duration)
+{
+    Ebml_StartSubElement(ebml, startInfo, Info);
+    Ebml_SerializeUnsigned(ebml, TimecodeScale, timeCodeScale);
+    Ebml_SerializeFloat(ebml, Segment_Duration, duration * 1000.0); //Currently fixed to using milliseconds
+    Ebml_SerializeString(ebml, 0x4D80, "QTmuxingAppLibWebM-0.0.1");
+    Ebml_SerializeString(ebml, 0x5741, "QTwritingAppLibWebM-0.0.1");
+    Ebml_EndSubElement(ebml, startInfo);
+}
+
+/*
+void Mkv_InitializeSegment(Ebml& ebml_out, EbmlLoc& ebmlLoc)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x18538067);
+}
+
+void Mkv_InitializeSeek(Ebml& ebml_out, EbmlLoc& ebmlLoc)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x114d9b74);
+}
+void Mkv_WriteSeekInformation(Ebml& ebml_out, SeekStruct& seekInformation)
+{
+    EbmlLoc ebmlLoc;
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x4dbb);
+    Ebml_SerializeString(ebml_out, 0x53ab, seekInformation.SeekID);
+    Ebml_SerializeUnsigned(ebml_out, 0x53ac, seekInformation.SeekPosition);
+    Ebml_EndSubElement(ebml_out, ebmlLoc);
+}
+
+void Mkv_WriteSegmentInformation(Ebml& ebml_out, SegmentInformationStruct& segmentInformation)
+{
+    Ebml_SerializeUnsigned(ebml_out, 0x73a4, segmentInformation.segmentUID);
+    if (segmentInformation.filename != 0)
+        Ebml_SerializeString(ebml_out, 0x7384, segmentInformation.filename);
+    Ebml_SerializeUnsigned(ebml_out, 0x2AD7B1, segmentInformation.TimecodeScale);
+    Ebml_SerializeUnsigned(ebml_out, 0x4489, segmentInformation.Duration);
+    //TODO date
+    Ebml_SerializeWString(ebml_out, 0x4D80, L"MKVMUX");
+    Ebml_SerializeWString(ebml_out, 0x5741, segmentInformation.WritingApp);
+}
+
+void Mkv_InitializeTrack(Ebml& ebml_out, EbmlLoc& ebmlLoc)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1654AE6B);
+}
+
+static void Mkv_WriteGenericTrackData(Ebml& ebml_out, TrackStruct& track)
+{
+    Ebml_SerializeUnsigned(ebml_out, 0xD7, track.TrackNumber);
+    Ebml_SerializeUnsigned(ebml_out, 0x73C5, track.TrackUID);
+    Ebml_SerializeUnsigned(ebml_out, 0x83, track.TrackType);
+    Ebml_SerializeUnsigned(ebml_out, 0xB9, track.FlagEnabled ? 1 :0);
+    Ebml_SerializeUnsigned(ebml_out, 0x88, track.FlagDefault ? 1 :0);
+    Ebml_SerializeUnsigned(ebml_out, 0x55AA, track.FlagForced ? 1 :0);
+    if (track.Language != 0)
+        Ebml_SerializeString(ebml_out, 0x22B59C, track.Language);
+    if (track.CodecID != 0)
+        Ebml_SerializeString(ebml_out, 0x86, track.CodecID);
+    if (track.CodecPrivate != 0)
+        Ebml_SerializeData(ebml_out, 0x63A2, track.CodecPrivate, track.CodecPrivateLength);
+    if (track.CodecName != 0)
+        Ebml_SerializeWString(ebml_out, 0x258688, track.CodecName);
+}
+
+void Mkv_WriteVideoTrack(Ebml& ebml_out, TrackStruct & track, VideoTrackStruct& video)
+{
+    EbmlLoc trackHeadLoc, videoHeadLoc;
+    Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE);  //start Track
+    Mkv_WriteGenericTrackData(ebml_out, track);
+    Ebml_StartSubElement(ebml_out, videoHeadLoc, 0xE0);  //start Video
+    Ebml_SerializeUnsigned(ebml_out, 0x9A, video.FlagInterlaced ? 1 :0);
+    Ebml_SerializeUnsigned(ebml_out, 0xB0, video.PixelWidth);
+    Ebml_SerializeUnsigned(ebml_out, 0xBA, video.PixelHeight);
+    Ebml_SerializeUnsigned(ebml_out, 0x54B0, video.PixelDisplayWidth);
+    Ebml_SerializeUnsigned(ebml_out, 0x54BA, video.PixelDisplayHeight);
+    Ebml_SerializeUnsigned(ebml_out, 0x54B2, video.displayUnit);
+    Ebml_SerializeFloat(ebml_out, 0x2383E3, video.FrameRate);
+    Ebml_EndSubElement(ebml_out, videoHeadLoc);
+    Ebml_EndSubElement(ebml_out, trackHeadLoc);
+
+}
+
+void Mkv_WriteAudioTrack(Ebml& ebml_out, TrackStruct & track, AudioTrackStruct& video)
+{
+    EbmlLoc trackHeadLoc, audioHeadLoc;
+    Ebml_StartSubElement(ebml_out, trackHeadLoc, 0xAE);
+    Mkv_WriteGenericTrackData(ebml_out, track);
+    Ebml_StartSubElement(ebml_out, audioHeadLoc, 0xE0);  //start Audio
+    Ebml_SerializeFloat(ebml_out, 0xB5, video.SamplingFrequency);
+    Ebml_SerializeUnsigned(ebml_out, 0x9F, video.Channels);
+    Ebml_SerializeUnsigned(ebml_out, 0x6264, video.BitDepth);
+    Ebml_EndSubElement(ebml_out, audioHeadLoc); // end audio
+    Ebml_EndSubElement(ebml_out, trackHeadLoc);
+}
+
+void Mkv_WriteEbmlClusterHead(Ebml& ebml_out,  EbmlLoc& ebmlLoc, ClusterHeadStruct & clusterHead)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0x1F43B675);
+    Ebml_SerializeUnsigned(ebml_out, 0x6264, clusterHead.TimeCode);
+}
+
+void Mkv_WriteSimpleBlockHead(Ebml& ebml_out,  EbmlLoc& ebmlLoc, SimpleBlockStruct& block)
+{
+    Ebml_StartSubElement(ebml_out, ebmlLoc, 0xA3);
+    Ebml_Write1UInt(ebml_out, block.TrackNumber);
+    Ebml_WriteSigned16(ebml_out,block.TimeCode);
+    unsigned char flags = 0x00 | (block.iskey ? 0x80:0x00) | (block.lacing << 1) | block.discardable;
+    Ebml_Write1UInt(ebml_out, flags);  //TODO this may be the wrong function
+    Ebml_Serialize(ebml_out, block.data, block.dataLength);
+    Ebml_EndSubElement(ebml_out,ebmlLoc);
+}
+*/
--- a/libmkv/WebMElement.h
+++ b/libmkv/WebMElement.h
@@ -0,0 +1,35 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+#ifndef MKV_CONTEXT_HPP
+#define MKV_CONTEXT_HPP 1
+
+void writeSimpleBock(EbmlGlobal *ebml, unsigned char trackNumber, unsigned short timeCode,
+                     int isKeyframe, unsigned char lacingFlag, int  discardable,
+                     unsigned char *data, unsigned long dataLength);
+
+
+// these are helper functions
+void writeHeader(EbmlGlobal *ebml);
+void writeSegmentInformation(EbmlGlobal *ebml, EbmlLoc* startInfo , unsigned long timeCodeScale, double duration);
+//this function is a helper only, it assumes a lot of defaults
+void writeVideoTrack(EbmlGlobal *ebml, unsigned int trackNumber, int flagLacing,
+                     char *codecId, unsigned int pixelWidth, unsigned int pixelHeight,
+                     double frameRate);
+void writeAudioTrack(EbmlGlobal *glob, unsigned int trackNumber, int flagLacing,
+                     char *codecId, double samplingFrequency, unsigned int channels,
+                     unsigned char *private, unsigned long privateSize);
+
+void writeSimpleBlock(EbmlGlobal *ebml, unsigned char trackNumber, short timeCode,
+                      int isKeyframe, unsigned char lacingFlag, int discardable,
+                      unsigned char *data, unsigned long dataLength);
+
+
+
+#endif
--- a/libmkv/testlibmkv.c
+++ b/libmkv/testlibmkv.c
@@ -0,0 +1,63 @@
+// Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+//
+// Use of this source code is governed by a BSD-style license
+// that can be found in the LICENSE file in the root of the source
+// tree. An additional intellectual property rights grant can be found
+// in the file PATENTS.  All contributing project authors may
+// be found in the AUTHORS file in the root of the source tree.
+
+
+
+#include "EbmlIDs.h"
+#include "EbmlBufferWriter.h"
+#include "WebMElement.h"
+
+#include <stdio.h>
+int main(int argc, char *argv[])
+{
+    //init the datatype we're using for ebml output
+    unsigned char data[8192];
+    EbmlGlobal ebml;
+    ebml.buf = data;
+    ebml.offset = 0;
+    ebml.length = 8192;
+
+    writeHeader(&ebml);
+    {
+        EbmlLoc startSegment;
+        Ebml_StartSubElement(&ebml, &startSegment, Segment); //segment
+        {
+            //segment info
+            EbmlLoc startInfo;
+            Ebml_StartSubElement(&ebml, &startInfo, Info);
+            Ebml_SerializeString(&ebml, 0x4D80, "muxingAppLibMkv");
+            Ebml_SerializeString(&ebml, 0x5741, "writingAppLibMkv");
+            Ebml_EndSubElement(&ebml, &startInfo);
+        }
+
+        {
+            EbmlLoc trackStart;
+            Ebml_StartSubElement(&ebml, &trackStart, Tracks);
+            writeVideoTrack(&ebml, 1, 1, "V_MS/VFW/FOURCC", 320, 240, 29.97);
+            //writeAudioTrack(&ebml,2,1, "A_VORBIS", 32000, 1, NULL, 0);
+            Ebml_EndSubElement(&ebml, &trackStart);
+        }
+
+        {
+            EbmlLoc clusterStart;
+            Ebml_StartSubElement(&ebml, &clusterStart, Cluster); //cluster
+            Ebml_SerializeUnsigned(&ebml, Timecode, 0);
+
+            unsigned char someData[4] = {1, 2, 3, 4};
+            writeSimpleBlock(&ebml, 1, 0, 1, 0, 0, someData, 4);
+            Ebml_EndSubElement(&ebml, &clusterStart);
+        }    //end cluster
+        Ebml_EndSubElement(&ebml, &startSegment);
+    }
+
+    //dump ebml stuff to the file
+    FILE *file_out = fopen("test.mkv", "wb");
+    size_t bytesWritten = fwrite(data, 1, ebml.offset, file_out);
+    fclose(file_out);
+    return 0;
+}
--- a/libs.mk
+++ b/libs.mk
@@ -91,7 +91,9 @@ ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emms.asm
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86.h
 CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_abi_support.asm
+CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/x86_cpuid.c
 endif
+CODEC_SRCS-$(ARCH_ARM) += vpx_ports/arm_cpudetect.c
 CODEC_SRCS-$(ARCH_ARM) += $(BUILD_PFX)vpx_config.asm
 CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
--- a/nestegg/.gitignore
+++ b/nestegg/.gitignore
@@ -0,0 +1,40 @@
+*.lo
+*.o
+*.swp
+*~
+.deps
+.dirstamp
+.libs
+Makefile
+Makefile.in
+_stdint.h
+aclocal.m4
+autom4te.cache
+compile
+config.guess
+config.h
+config.h.in
+config.log
+config.status
+config.sub
+configure
+depcomp
+docs/Doxyfile
+docs/doxygen-build.stamp
+docs/html
+install-sh
+libtool
+ltmain.sh
+m4/libtool.m4
+m4/ltoptions.m4
+m4/ltsugar.m4
+m4/ltversion.m4
+m4/lt~obsolete.m4
+missing
+nestegg-uninstalled.pc
+nestegg.pc
+src/.dirstamp
+src/libnestegg.la
+stamp-h1
+test/test
+include/nestegg/nestegg-stdint.h
--- a/nestegg/AUTHORS
+++ b/nestegg/AUTHORS
@@ -0,0 +1 @@
+Matthew Gregan <kinetik@flim.org>
--- a/nestegg/INSTALL
+++ b/nestegg/INSTALL
@@ -0,0 +1,8 @@
+Build instructions for libnestegg
+=================================
+
+0. Change directory into the source directory.
+1. Run |autoreconf --install| to generate configure.
+2. Run |./configure| to configure the build.
+3. Run |make| to build.
+4. Run |make check| to run the test suite.
--- a/nestegg/LICENSE
+++ b/nestegg/LICENSE
@@ -0,0 +1,13 @@
+Copyright © 2010 Mozilla Foundation
+
+Permission to use, copy, modify, and distribute this software for any
+purpose with or without fee is hereby granted, provided that the above
+copyright notice and this permission notice appear in all copies.
+
+THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
--- a/nestegg/Makefile.am
+++ b/nestegg/Makefile.am
@@ -0,0 +1,51 @@
+AUTOMAKE_OPTIONS = foreign 1.11 no-dist-gzip dist-bzip2 subdir-objects
+ACLOCAL_AMFLAGS = -I m4
+
+INCLUDES = -I$(top_srcdir)/include -I. -I$(top_srcdir)/halloc
+AM_CFLAGS = -ansi -pedantic -Wall -Wextra -Wno-long-long -O0 -g
+
+SUBDIRS = docs
+
+EXTRA_DIST = \
+	AUTHORS README LICENSE \
+	nestegg-uninstalled.pc.in \
+	m4/as-ac-expand.m4 \
+	m4/pkg.m4 \
+	m4/ax_create_stdint_h.m4 \
+	halloc/src/halloc.c \
+	halloc/halloc.h \
+	halloc/src/align.h \
+	halloc/src/hlist.h \
+	halloc/src/macros.h
+
+pkgconfigdir = $(libdir)/pkgconfig
+pkgconfig_DATA = nestegg.pc
+
+nesteggincludedir = $(includedir)/nestegg
+nestegginclude_HEADERS = include/nestegg/nestegg.h include/nestegg/nestegg-stdint.h
+
+lib_LTLIBRARIES = src/libnestegg.la
+
+src_libnestegg_la_SOURCES = \
+	src/nestegg.c \
+	halloc/src/halloc.c \
+	halloc/halloc.h \
+	halloc/src/align.h \
+	halloc/src/hlist.h \
+	halloc/src/macros.h
+
+check_PROGRAMS = test/test
+
+test_test_SOURCES = test/test.c
+test_test_LDADD = src/libnestegg.la
+
+DISTCLEANFILES = include/nestegg/nestegg-stdint.h
+
+dist-hook:
+	find $(distdir) -type d -name '.git' | xargs rm -rf
+
+debug:
+	$(MAKE) all CFLAGS="@DEBUG@"
+
+profile:
+	$(MAKE) all CFLAGS="@PROFILE@"
--- a/nestegg/README
+++ b/nestegg/README
@@ -0,0 +1,6 @@
+See INSTALL for build instructions.
+
+Licensed under an ISC-style license.  See LICENSE for details.
+
+The source under the halloc/ directory is licensed under a BSD license.  See
+halloc/halloc.h for details.
--- a/nestegg/TODO
+++ b/nestegg/TODO
@@ -0,0 +1,21 @@
+- Document when read, seek, tell callbacks are used.
+- Add an automated testsuite.
+- Test (and fix, if necessary) support for unknown sizes.
+- Test (and fix, if necessary) support for large files.
+- Read past unknown elements rather than seeking.
+- Try to handle unknown elements with unknown sizes.
+- Formalize handling of default element values.
+- Try to resynchronize stream when read_block fails so that failure to parse
+  a single block can be treated as non-fatal.
+- Make logging more useful to API users.
+- Avoid reparsing Cues and ignore any SeekHead at end of file.
+- Optionally build a Cue index as Clusters are parsed.
+- Support seeking without Cues.
+- Avoid building a list of Clusters as they are parsed and retain only the
+  last one parsed.
+- Add an asynchronous error code to struct nestegg and ensure that API calls
+  continue to fail safely one a fatal error has been returned.
+- Modify parser/data structures to provide a clean separation.  Perhaps the
+  parser should return a generic tree of nodes that a second pass uses to
+  initialize the main data structures.
+- Use pool allocator for all allocations.
--- a/nestegg/configure.ac
+++ b/nestegg/configure.ac
@@ -0,0 +1,124 @@
+dnl ------------------------------------------------
+dnl Initialization and Versioning
+dnl ------------------------------------------------
+
+AC_INIT(libnestegg,[0.1git])
+
+AC_CANONICAL_HOST
+AC_CANONICAL_TARGET
+
+AC_CONFIG_MACRO_DIR([m4])
+
+AM_CONFIG_HEADER([config.h])
+AC_CONFIG_SRCDIR([src/nestegg.c])
+AM_INIT_AUTOMAKE
+
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
+
+dnl Library versioning
+dnl CURRENT, REVISION, AGE
+dnl - library source changed -> increment REVISION
+dnl - interfaces added/removed/changed -> increment CURRENT, REVISION = 0
+dnl - interfaces added -> increment AGE
+dnl - interfaces removed -> AGE = 0
+
+NESTEGG_CURRENT=0
+NESTEGG_REVISION=0
+NESTEGG_AGE=1
+AC_SUBST(NESTEGG_CURRENT)
+AC_SUBST(NESTEGG_REVISION)
+AC_SUBST(NESTEGG_AGE)
+
+
+dnl --------------------------------------------------  
+dnl Check for programs
+dnl --------------------------------------------------  
+
+dnl save $CFLAGS since AC_PROG_CC likes to insert "-g -O2"
+dnl if $CFLAGS is blank
+cflags_save="$CFLAGS"
+AC_PROG_CC
+AC_PROG_CPP
+CFLAGS="$cflags_save"
+
+AM_PROG_CC_C_O
+AC_LIBTOOL_WIN32_DLL
+AM_PROG_LIBTOOL
+
+dnl Check for doxygen
+AC_ARG_ENABLE([doc],
+	AS_HELP_STRING([--enable-doc], [Build API documentation]),
+	[ac_enable_doc=$enableval], [ac_enable_doc=auto])
+
+if test "x$ac_enable_doc" != "xno"; then
+	AC_CHECK_PROG(HAVE_DOXYGEN, doxygen, true, false)
+
+	if test "x$HAVE_DOXYGEN" = "xfalse" -a "x$ac_enable_doc" = "xyes"; then
+		AC_MSG_ERROR([*** API documentation explicitly requested but Doxygen not found])
+	fi
+else
+	HAVE_DOXYGEN=false
+fi
+AM_CONDITIONAL(HAVE_DOXYGEN,$HAVE_DOXYGEN)
+if test $HAVE_DOXYGEN = "false"; then
+        AC_MSG_WARN([*** doxygen not found, API documentation will not be built])
+fi
+
+# Generate portable stdint.h replacement
+AX_CREATE_STDINT_H(include/nestegg/nestegg-stdint.h)
+
+# Test whenever ld supports -version-script
+AC_PROG_LD
+AC_PROG_LD_GNU
+AC_MSG_CHECKING([how to control symbol export])
+
+dnl --------------------------------------------------
+dnl Do substitutions
+dnl --------------------------------------------------
+
+AC_SUBST(DEBUG)
+AC_SUBST(PROFILE)
+
+AC_OUTPUT([
+  Makefile 
+  docs/Makefile
+  docs/Doxyfile
+  nestegg.pc
+  nestegg-uninstalled.pc
+])
+
+AS_AC_EXPAND(LIBDIR, ${libdir})
+AS_AC_EXPAND(INCLUDEDIR, ${includedir})
+AS_AC_EXPAND(BINDIR, ${bindir})
+AS_AC_EXPAND(DOCDIR, ${docdir})
+
+if test $HAVE_DOXYGEN = "false"; then
+  doc_build="no"
+else
+  doc_build="yes"
+fi
+
+AC_MSG_RESULT([
+------------------------------------------------------------------------
+  $PACKAGE $VERSION:  Automatic configuration OK.
+
+  General configuration:
+
+    API Documentation: .......... ${doc_build}
+
+  Installation paths:
+
+    libnestegg: .................. ${LIBDIR}
+    C header files: .............. ${INCLUDEDIR}/nestegg
+    Documentation: ............... ${DOCDIR}
+
+  Building:
+
+    Type 'make' to compile $PACKAGE.
+
+    Type 'make install' to install $PACKAGE.
+
+  Example programs will be built but not installed.
+------------------------------------------------------------------------
+])
+
--- a/nestegg/docs/Doxyfile.in
+++ b/nestegg/docs/Doxyfile.in
--- a/nestegg/docs/Makefile.am
+++ b/nestegg/docs/Makefile.am
@@ -0,0 +1,38 @@
+doc_DATA = doxygen-build.stamp
+
+EXTRA_DIST = Doxyfile.in
+
+if HAVE_DOXYGEN
+doxygen-build.stamp: Doxyfile
+	doxygen
+	touch doxygen-build.stamp
+else
+doxygen-build.stamp:
+	echo "*** Warning: Doxygen not found; documentation will not be built."
+	touch doxygen-build.stamp
+endif
+
+dist_docdir = $(distdir)/libnestegg
+
+dist-hook:
+	if test -d html; then \
+	  mkdir $(dist_docdir); \
+	  echo -n "copying built documenation..."; \
+	  cp -rp html $(dist_docdir)/html; \
+	  echo "OK"; \
+	fi
+
+
+install-data-local: doxygen-build.stamp
+	$(mkinstalldirs) $(DESTDIR)$(docdir)
+	if test -d html; then \
+	  cp -rp html $(DESTDIR)$(docdir)/html; \
+	fi
+
+uninstall-local:
+	rm -rf $(DESTDIR)$(docdir)
+
+clean-local:
+	if test -d html; then rm -rf html; fi
+	if test -f doxygen-build.stamp; then rm -f doxygen-build.stamp; fi
+
--- a/nestegg/halloc/README
+++ b/nestegg/halloc/README
@@ -0,0 +1,45 @@
+halloc 1.2.1
+============
+      
+	Hierarchical memory heap interface - an extension to standard
+	malloc/free interface that simplifies tasks of memory disposal 
+	when allocated structures exhibit hierarchical properties.
+
+	http://swapped.cc/halloc
+=
+	To build libhalloc.a with GNU tools run
+		make
+
+	To install in /usr/include and /usr/lib
+		make install
+
+	To cleanup the build files 
+		make clean
+=
+	halloc-1.2.1
+		* fixed a double-free bug in _set_allocator() as per
+		  Matthew Gregan comments
+
+		* switched to using NULL instead of 0 where applicable
+
+	halloc-1.2.0
+		* added missing <string.h> include to halloc.c
+		
+		* improved standard compliance thanks to the feedback
+		  received from Stan Tobias. Two things were fixed -
+		  
+		- hblock_t structure no longer uses zero-sized 'data'
+		  array, which happened to be common, but non-standard
+		  extension; 
+		  
+		- secondly, added the code to test the behaviour of 
+		  realloc(ptr, 0). Standard allows it NOT to act as
+		  free(), in which case halloc will use its own version
+		  of allocator calling free() when neccessary.
+
+	halloc-1.1.0
+		* initial public release (rewrite of hhmalloc library)
+
+=============================================================================
+Copyright (c) 2004-2010, Alex Pankratov (ap@swapped.cc). All rights reserved.
+
--- a/nestegg/halloc/halloc.h
+++ b/nestegg/halloc/halloc.h
@@ -0,0 +1,43 @@
+/*
+ *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *	Hierarchical memory allocator, 1.2.1
+ *	http://swapped.cc/halloc
+ */
+
+/*
+ *	The program is distributed under terms of BSD license. 
+ *	You can obtain the copy of the license by visiting:
+ *	
+ *	http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_HALLOC_H_
+#define _LIBP_HALLOC_H_
+
+#include <stddef.h>  /* size_t */
+
+/*
+ *	Core API
+ */
+void * halloc (void * block, size_t len);
+void   hattach(void * block, void * parent);
+
+/*
+ *	standard malloc/free api
+ */
+void * h_malloc (size_t len);
+void * h_calloc (size_t n, size_t len);
+void * h_realloc(void * p, size_t len);
+void   h_free   (void * p);
+char * h_strdup (const char * str);
+
+/*
+ *	the underlying allocator
+ */
+typedef void * (* realloc_t)(void * ptr, size_t len);
+
+extern realloc_t halloc_allocator;
+
+#endif
+
--- a/nestegg/halloc/src/align.h
+++ b/nestegg/halloc/src/align.h
@@ -0,0 +1,36 @@
+/*
+ *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *	Hierarchical memory allocator, 1.2.1
+ *	http://swapped.cc/halloc
+ */
+
+/*
+ *	The program is distributed under terms of BSD license. 
+ *	You can obtain the copy of the license by visiting:
+ *	
+ *	http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_ALIGN_H_
+#define _LIBP_ALIGN_H_
+
+/*
+ *	a type with the most strict alignment requirements
+ */
+union max_align
+{
+	char   c;
+	short  s;
+	long   l;
+	int    i;
+	float  f;
+	double d;
+	void * v;
+	void (*q)(void);
+};
+
+typedef union max_align max_align_t;
+
+#endif
+
--- a/nestegg/halloc/src/halloc.c
+++ b/nestegg/halloc/src/halloc.c
@@ -0,0 +1,254 @@
+/*
+ *	Copyright (c) 2004i-2010 Alex Pankratov. All rights reserved.
+ *
+ *	Hierarchical memory allocator, 1.2.1
+ *	http://swapped.cc/halloc
+ */
+
+/*
+ *	The program is distributed under terms of BSD license. 
+ *	You can obtain the copy of the license by visiting:
+ *	
+ *	http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#include <stdlib.h>  /* realloc */
+#include <string.h>  /* memset & co */
+
+#include "../halloc.h"
+#include "align.h"
+#include "hlist.h"
+
+/*
+ *	block control header
+ */
+typedef struct hblock
+{
+#ifndef NDEBUG
+#define HH_MAGIC    0x20040518L
+	long          magic;
+#endif
+	hlist_item_t  siblings; /* 2 pointers */
+	hlist_head_t  children; /* 1 pointer  */
+	max_align_t   data[1];  /* not allocated, see below */
+	
+} hblock_t;
+
+#define sizeof_hblock offsetof(hblock_t, data)
+
+/*
+ *
+ */
+realloc_t halloc_allocator = NULL;
+
+#define allocator halloc_allocator
+
+/*
+ *	static methods
+ */
+static void _set_allocator(void);
+static void * _realloc(void * ptr, size_t n);
+
+static int  _relate(hblock_t * b, hblock_t * p);
+static void _free_children(hblock_t * p);
+
+/*
+ *	Core API
+ */
+void * halloc(void * ptr, size_t len)
+{
+	hblock_t * p;
+
+	/* set up default allocator */
+	if (! allocator)
+	{
+		_set_allocator();
+		assert(allocator);
+	}
+
+	/* calloc */
+	if (! ptr)
+	{
+		if (! len)
+			return NULL;
+
+		p = allocator(0, len + sizeof_hblock);
+		if (! p)
+			return NULL;
+#ifndef NDEBUG
+		p->magic = HH_MAGIC;
+#endif
+		hlist_init(&p->children);
+		hlist_init_item(&p->siblings);
+
+		return p->data;
+	}
+
+	p = structof(ptr, hblock_t, data);
+	assert(p->magic == HH_MAGIC);
+
+	/* realloc */
+	if (len)
+	{
+		p = allocator(p, len + sizeof_hblock);
+		if (! p)
+			return NULL;
+
+		hlist_relink(&p->siblings);
+		hlist_relink_head(&p->children);
+		
+		return p->data;
+	}
+
+	/* free */
+	_free_children(p);
+	hlist_del(&p->siblings);
+	allocator(p, 0);
+
+	return NULL;
+}
+
+void hattach(void * block, void * parent)
+{
+	hblock_t * b, * p;
+	
+	if (! block)
+	{
+		assert(! parent);
+		return;
+	}
+
+	/* detach */
+	b = structof(block, hblock_t, data);
+	assert(b->magic == HH_MAGIC);
+
+	hlist_del(&b->siblings);
+
+	if (! parent)
+		return;
+
+	/* attach */
+	p = structof(parent, hblock_t, data);
+	assert(p->magic == HH_MAGIC);
+	
+	/* sanity checks */
+	assert(b != p);          /* trivial */
+	assert(! _relate(p, b)); /* heavy ! */
+
+	hlist_add(&p->children, &b->siblings);
+}
+
+/*
+ *	malloc/free api
+ */
+void * h_malloc(size_t len)
+{
+	return halloc(0, len);
+}
+
+void * h_calloc(size_t n, size_t len)
+{
+	void * ptr = halloc(0, len*=n);
+	return ptr ? memset(ptr, 0, len) : NULL;
+}
+
+void * h_realloc(void * ptr, size_t len)
+{
+	return halloc(ptr, len);
+}
+
+void   h_free(void * ptr)
+{
+	halloc(ptr, 0);
+}
+
+char * h_strdup(const char * str)
+{
+	size_t len = strlen(str);
+	char * ptr = halloc(0, len + 1);
+	return ptr ? (ptr[len] = 0, memcpy(ptr, str, len)) : NULL;
+}
+
+/*
+ *	static stuff
+ */
+static void _set_allocator(void)
+{
+	void * p;
+	assert(! allocator);
+	
+	/*
+	 *	the purpose of the test below is to check the behaviour
+	 *	of realloc(ptr, 0), which is defined in the standard
+	 *	as an implementation-specific. if it returns zero,
+	 *	then it's equivalent to free(). it can however return
+	 *	non-zero, in which case it cannot be used for freeing
+	 *	memory blocks and we'll need to supply our own version
+	 *
+	 *	Thanks to Stan Tobias for pointing this tricky part out.
+	 */
+	allocator = realloc;
+	if (! (p = malloc(1)))
+		/* hmm */
+		return;
+		
+	if ((p = realloc(p, 0)))
+	{
+		/* realloc cannot be used as free() */
+		allocator = _realloc;
+		free(p);
+	}
+}
+
+static void * _realloc(void * ptr, size_t n)
+{
+	/*
+	 *	free'ing realloc()
+	 */
+	if (n)
+		return realloc(ptr, n);
+	free(ptr);
+	return NULL;
+}
+
+static int _relate(hblock_t * b, hblock_t * p)
+{
+	hlist_item_t * i;
+
+	if (!b || !p)
+		return 0;
+
+	/* 
+	 *  since there is no 'parent' pointer, which would've allowed
+	 *  O(log(n)) upward traversal, the check must use O(n) downward 
+	 *  iteration of the entire hierarchy; and this can be VERY SLOW
+	 */
+	hlist_for_each(i, &p->children)
+	{
+		hblock_t * q = structof(i, hblock_t, siblings);
+		if (q == b || _relate(b, q))
+			return 1;
+	}
+	return 0;
+}
+
+static void _free_children(hblock_t * p)
+{
+	hlist_item_t * i, * tmp;
+	
+#ifndef NDEBUG
+	/*
+	 *	this catches loops in hierarchy with almost zero 
+	 *	overhead (compared to _relate() running time)
+	 */
+	assert(p && p->magic == HH_MAGIC);
+	p->magic = 0; 
+#endif
+	hlist_for_each_safe(i, tmp, &p->children)
+	{
+		hblock_t * q = structof(i, hblock_t, siblings);
+		_free_children(q);
+		allocator(q, 0);
+	}
+}
+
--- a/nestegg/halloc/src/hlist.h
+++ b/nestegg/halloc/src/hlist.h
@@ -0,0 +1,136 @@
+/*
+ *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *	Hierarchical memory allocator, 1.2.1
+ *	http://swapped.cc/halloc
+ */
+
+/*
+ *	The program is distributed under terms of BSD license. 
+ *	You can obtain the copy of the license by visiting:
+ *	
+ *	http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_HLIST_H_
+#define _LIBP_HLIST_H_
+
+#include <assert.h>
+#include "macros.h"  /* static_inline */
+
+/*
+ *	weak double-linked list w/ tail sentinel
+ */
+typedef struct hlist_head  hlist_head_t;
+typedef struct hlist_item  hlist_item_t;
+
+/*
+ *
+ */
+struct hlist_head
+{
+	hlist_item_t * next;
+};
+
+struct hlist_item
+{
+	hlist_item_t * next;
+	hlist_item_t ** prev;
+};
+
+/*
+ *	shared tail sentinel
+ */
+struct hlist_item hlist_null;
+
+/*
+ *
+ */
+#define __hlist_init(h)      { &hlist_null }
+#define __hlist_init_item(i) { &hlist_null, &(i).next }
+
+static_inline void hlist_init(hlist_head_t * h);
+static_inline void hlist_init_item(hlist_item_t * i);
+
+/* static_inline void hlist_purge(hlist_head_t * h); */
+
+/* static_inline bool_t hlist_empty(const hlist_head_t * h); */
+
+/* static_inline hlist_item_t * hlist_head(const hlist_head_t * h); */
+
+/* static_inline hlist_item_t * hlist_next(const hlist_item_t * i); */
+/* static_inline hlist_item_t * hlist_prev(const hlist_item_t * i, 
+                                           const hlist_head_t * h); */
+
+static_inline void hlist_add(hlist_head_t * h, hlist_item_t * i);
+
+/* static_inline void hlist_add_prev(hlist_item_t * l, hlist_item_t * i); */
+/* static_inline void hlist_add_next(hlist_item_t * l, hlist_item_t * i); */
+
+static_inline void hlist_del(hlist_item_t * i);
+
+static_inline void hlist_relink(hlist_item_t * i);
+static_inline void hlist_relink_head(hlist_head_t * h);
+
+#define hlist_for_each(i, h) \
+	for (i = (h)->next; i != &hlist_null; i = i->next)
+
+#define hlist_for_each_safe(i, tmp, h) \
+	for (i = (h)->next, tmp = i->next; \
+	     i!= &hlist_null; \
+	     i = tmp, tmp = i->next)
+
+/*
+ *	static
+ */
+static_inline void hlist_init(hlist_head_t * h)
+{
+	assert(h);
+	h->next = &hlist_null;
+}
+
+static_inline void hlist_init_item(hlist_item_t * i)
+{
+	assert(i);
+	i->prev = &i->next;
+	i->next = &hlist_null;
+}
+
+static_inline void hlist_add(hlist_head_t * h, hlist_item_t * i)
+{
+	hlist_item_t * next;
+	assert(h && i);
+	
+	next = i->next = h->next;
+	next->prev = &i->next;
+	h->next = i;
+	i->prev = &h->next;
+}
+
+static_inline void hlist_del(hlist_item_t * i)
+{
+	hlist_item_t * next;
+	assert(i);
+
+	next = i->next;
+	next->prev = i->prev;
+	*i->prev = next;
+	
+	hlist_init_item(i);
+}
+
+static_inline void hlist_relink(hlist_item_t * i)
+{
+	assert(i);
+	*i->prev = i;
+	i->next->prev = &i->next;
+}
+
+static_inline void hlist_relink_head(hlist_head_t * h)
+{
+	assert(h);
+	h->next->prev = &h->next;
+}
+
+#endif
+
--- a/nestegg/halloc/src/macros.h
+++ b/nestegg/halloc/src/macros.h
@@ -0,0 +1,36 @@
+/*
+ *	Copyright (c) 2004-2010 Alex Pankratov. All rights reserved.
+ *
+ *	Hierarchical memory allocator, 1.2.1
+ *	http://swapped.cc/halloc
+ */
+
+/*
+ *	The program is distributed under terms of BSD license. 
+ *	You can obtain the copy of the license by visiting:
+ *	
+ *	http://www.opensource.org/licenses/bsd-license.php
+ */
+
+#ifndef _LIBP_MACROS_H_
+#define _LIBP_MACROS_H_
+
+#include <stddef.h>  /* offsetof */
+
+/*
+ 	restore pointer to the structure by a pointer to its field
+ */
+#define structof(p,t,f) ((t*)(- offsetof(t,f) + (char*)(p)))
+
+/*
+ *	redefine for the target compiler
+ */
+#ifdef _WIN32
+#define static_inline static __inline
+#else
+#define static_inline static __inline__
+#endif
+
+
+#endif
+
--- a/nestegg/include/nestegg/nestegg.h
+++ b/nestegg/include/nestegg/nestegg.h
@@ -0,0 +1,292 @@
+/*
+ * Copyright © 2010 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+#ifndef   NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79
+#define   NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79
+
+#include "vpx/vpx_integer.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @mainpage
+
+    @section intro Introduction
+
+    This is the documentation fot the <tt>libnestegg</tt> C API.
+    <tt>libnestegg</tt> is a demultiplexing library for <a
+    href="http://www.matroska.org/">Matroska</a> and <a
+    href="http://www.webmproject.org/">WebMedia</a> media files.
+
+    @section example Example code
+
+    @code
+    nestegg * demux_ctx;
+    nestegg_init(&demux_ctx, io, NULL);
+
+    nestegg_packet * pkt;
+    while ((r = nestegg_read_packet(demux_ctx, &pkt)) > 0) {
+      unsigned int track;
+
+      nestegg_packet_track(pkt, &track);
+
+      // This example decodes the first track only.
+      if (track == 0) {
+        unsigned int chunk, chunks;
+
+        nestegg_packet_count(pkt, &chunks);
+
+        // Decode each chunk of data.
+        for (chunk = 0; chunk < chunks; ++chunk) {
+          unsigned char * data;
+          size_t data_size;
+
+          nestegg_packet_data(pkt, chunk, &data, &data_size);
+
+          example_codec_decode(codec_ctx, data, data_size);
+        }
+      }
+
+      nestegg_free_packet(pkt);
+    }
+
+    nestegg_destroy(demux_ctx);
+    @endcode
+*/
+
+
+/** @file
+    The <tt>libnestegg</tt> C API. */
+
+#define NESTEGG_TRACK_VIDEO 0 /**< Track is of type video. */
+#define NESTEGG_TRACK_AUDIO 1 /**< Track is of type audio. */
+
+#define NESTEGG_CODEC_VP8    0 /**< Track uses Google On2 VP8 codec. */
+#define NESTEGG_CODEC_VORBIS 1 /**< Track uses Xiph Vorbis codec. */
+
+#define NESTEGG_SEEK_SET 0 /**< Seek offset relative to beginning of stream. */
+#define NESTEGG_SEEK_CUR 1 /**< Seek offset relative to current position in stream. */
+#define NESTEGG_SEEK_END 2 /**< Seek offset relative to end of stream. */
+
+#define NESTEGG_LOG_DEBUG    1     /**< Debug level log message. */
+#define NESTEGG_LOG_INFO     10    /**< Informational level log message. */
+#define NESTEGG_LOG_WARNING  100   /**< Warning level log message. */
+#define NESTEGG_LOG_ERROR    1000  /**< Error level log message. */
+#define NESTEGG_LOG_CRITICAL 10000 /**< Critical level log message. */
+
+typedef struct nestegg nestegg;               /**< Opaque handle referencing the stream state. */
+typedef struct nestegg_packet nestegg_packet; /**< Opaque handle referencing a packet of data. */
+
+/** User supplied IO context. */
+typedef struct {
+  /** User supplied read callback.
+      @param buffer   Buffer to read data into.
+      @param length   Length of supplied buffer in bytes.
+      @param userdata The #userdata supplied by the user.
+      @retval  1 Read succeeded.
+      @retval  0 End of stream.
+      @retval -1 Error. */
+  int (* read)(void * buffer, size_t length, void * userdata);
+
+  /** User supplied seek callback.
+      @param offset   Offset within the stream to seek to.
+      @param whence   Seek direction.  One of #NESTEGG_SEEK_SET,
+                      #NESTEGG_SEEK_CUR, or #NESTEGG_SEEK_END.
+      @param userdata The #userdata supplied by the user.
+      @retval  0 Seek succeeded.
+      @retval -1 Error. */
+  int (* seek)(int64_t offset, int whence, void * userdata);
+
+  /** User supplied tell callback.
+      @param userdata The #userdata supplied by the user.
+      @returns Current position within the stream.
+      @retval -1 Error. */
+  int64_t (* tell)(void * userdata);
+
+  /** User supplied pointer to be passed to the IO callbacks. */
+  void * userdata;
+} nestegg_io;
+
+/** Parameters specific to a video track. */
+typedef struct {
+  unsigned int width;          /**< Width of the video frame in pixels. */
+  unsigned int height;         /**< Height of the video frame in pixels. */
+  unsigned int display_width;  /**< Display width of the video frame in pixels. */
+  unsigned int display_height; /**< Display height of the video frame in pixels. */
+  unsigned int crop_bottom;    /**< Pixels to crop from the bottom of the frame. */
+  unsigned int crop_top;       /**< Pixels to crop from the top of the frame. */
+  unsigned int crop_left;      /**< Pixels to crop from the left of the frame. */
+  unsigned int crop_right;     /**< Pixels to crop from the right of the frame. */
+} nestegg_video_params;
+
+/** Parameters specific to an audio track. */
+typedef struct {
+  double rate;           /**< Sampling rate in Hz. */
+  unsigned int channels; /**< Number of audio channels. */
+  unsigned int depth;    /**< Bits per sample. */
+} nestegg_audio_params;
+
+/** Logging callback function pointer. */
+typedef void (* nestegg_log)(nestegg * context, unsigned int severity, char const * format, ...);
+
+/** Initialize a nestegg context.  During initialization the parser will
+    read forward in the stream processing all elements until the first
+    block of media is reached.  All track metadata has been processed at this point.
+    @param context  Storage for the new nestegg context.  @see nestegg_destroy
+    @param io       User supplied IO context.
+    @param callback Optional logging callback function pointer.  May be NULL.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_init(nestegg ** context, nestegg_io io, nestegg_log callback);
+
+/** Destroy a nestegg context and free associated memory.
+    @param context #nestegg context to be freed.  @see nestegg_init */
+void nestegg_destroy(nestegg * context);
+
+/** Query the duration of the media stream in nanoseconds.
+    @param context  Stream context initialized by #nestegg_init.
+    @param duration Storage for the queried duration.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_duration(nestegg * context, uint64_t * duration);
+
+/** Query the tstamp scale of the media stream in nanoseconds.
+    Timecodes presented by nestegg have been scaled by this value
+    before presentation to the caller.
+    @param context Stream context initialized by #nestegg_init.
+    @param scale   Storage for the queried scale factor.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_tstamp_scale(nestegg * context, uint64_t * scale);
+
+/** Query the number of tracks in the media stream.
+    @param context Stream context initialized by #nestegg_init.
+    @param tracks  Storage for the queried track count.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_count(nestegg * context, unsigned int * tracks);
+
+/** Seek @a track to @a tstamp.  Stream seek will terminate at the earliest
+    key point in the stream at or before @a tstamp.  Other tracks in the
+    stream will output packets with unspecified but nearby timestamps.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param tstamp  Absolute timestamp in nanoseconds.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_seek(nestegg * context, unsigned int track, uint64_t tstamp);
+
+/** Query the type specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @retval #NESTEGG_TRACK_VIDEO Track type is video.
+    @retval #NESTEGG_TRACK_AUDIO Track type is audio.
+    @retval -1 Error. */
+int nestegg_track_type(nestegg * context, unsigned int track);
+
+/** Query the codec ID specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @retval #NESTEGG_CODEC_VP8    Track codec is VP8.
+    @retval #NESTEGG_CODEC_VORBIS Track codec is Vorbis.
+    @retval -1 Error. */
+int nestegg_track_codec_id(nestegg * context, unsigned int track);
+
+/** Query the number of codec initialization chunks for @a track.  Each
+    chunk of data should be passed to the codec initialization functions in
+    the order returned.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param count   Storage for the queried chunk count.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_codec_data_count(nestegg * context, unsigned int track,
+                                   unsigned int * count);
+
+/** Get a pointer to chunk number @a item of codec initialization data for
+    @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param item    Zero based chunk item number.
+    @param data    Storage for the queried data pointer.
+                   The data is owned by the #nestegg context.
+    @param length  Storage for the queried data size.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_codec_data(nestegg * context, unsigned int track, unsigned int item,
+                             unsigned char ** data, size_t * length);
+
+/** Query the video parameters specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param params  Storage for the queried video parameters.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_video_params(nestegg * context, unsigned int track,
+                               nestegg_video_params * params);
+
+/** Query the audio parameters specified by @a track.
+    @param context Stream context initialized by #nestegg_init.
+    @param track   Zero based track number.
+    @param params  Storage for the queried audio parameters.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_track_audio_params(nestegg * context, unsigned int track,
+                               nestegg_audio_params * params);
+
+/** Read a packet of media data.  A packet consists of one or more chunks of
+    data associated with a single track.  nestegg_read_packet should be
+    called in a loop while the return value is 1 to drive the stream parser
+    forward.  @see nestegg_free_packet
+    @param context Context returned by #nestegg_init.
+    @param packet  Storage for the returned nestegg_packet.
+    @retval  1 Additional packets may be read in subsequent calls.
+    @retval  0 End of stream.
+    @retval -1 Error. */
+int nestegg_read_packet(nestegg * context, nestegg_packet ** packet);
+
+/** Destroy a nestegg_packet and free associated memory.
+    @param packet #nestegg_packet to be freed. @see nestegg_read_packet */
+void nestegg_free_packet(nestegg_packet * packet);
+
+/** Query the track number of @a packet.
+    @param packet Packet initialized by #nestegg_read_packet.
+    @param track  Storage for the queried zero based track index.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_track(nestegg_packet * packet, unsigned int * track);
+
+/** Query the time stamp in nanoseconds of @a packet.
+    @param packet Packet initialized by #nestegg_read_packet.
+    @param tstamp Storage for the queried timestamp in nanoseconds.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_tstamp(nestegg_packet * packet, uint64_t * tstamp);
+
+/** Query the number of data chunks contained in @a packet.
+    @param packet Packet initialized by #nestegg_read_packet.
+    @param count  Storage for the queried timestamp in nanoseconds.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_count(nestegg_packet * packet, unsigned int * count);
+
+/** Get a pointer to chunk number @a item of packet data.
+    @param packet  Packet initialized by #nestegg_read_packet.
+    @param item    Zero based chunk item number.
+    @param data    Storage for the queried data pointer.
+                   The data is owned by the #nestegg_packet packet.
+    @param length  Storage for the queried data size.
+    @retval  0 Success.
+    @retval -1 Error. */
+int nestegg_packet_data(nestegg_packet * packet, unsigned int item,
+                        unsigned char ** data, size_t * length);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* NESTEGG_671cac2a_365d_ed69_d7a3_4491d3538d79 */
--- a/nestegg/m4/as-ac-expand.m4
+++ b/nestegg/m4/as-ac-expand.m4
@@ -0,0 +1,43 @@
+dnl as-ac-expand.m4 0.2.0
+dnl autostars m4 macro for expanding directories using configure's prefix
+dnl thomas@apestaart.org
+
+dnl AS_AC_EXPAND(VAR, CONFIGURE_VAR)
+dnl example
+dnl AS_AC_EXPAND(SYSCONFDIR, $sysconfdir)
+dnl will set SYSCONFDIR to /usr/local/etc if prefix=/usr/local
+
+AC_DEFUN([AS_AC_EXPAND],
+[
+  EXP_VAR=[$1]
+  FROM_VAR=[$2]
+
+  dnl first expand prefix and exec_prefix if necessary
+  prefix_save=$prefix
+  exec_prefix_save=$exec_prefix
+
+  dnl if no prefix given, then use /usr/local, the default prefix
+  if test "x$prefix" = "xNONE"; then
+    prefix="$ac_default_prefix"
+  fi
+  dnl if no exec_prefix given, then use prefix
+  if test "x$exec_prefix" = "xNONE"; then
+    exec_prefix=$prefix
+  fi
+
+  full_var="$FROM_VAR"
+  dnl loop until it doesn't change anymore
+  while true; do
+    new_full_var="`eval echo $full_var`"
+    if test "x$new_full_var" = "x$full_var"; then break; fi
+    full_var=$new_full_var
+  done
+
+  dnl clean up
+  full_var=$new_full_var
+  AC_SUBST([$1], "$full_var")
+
+  dnl restore prefix and exec_prefix
+  prefix=$prefix_save
+  exec_prefix=$exec_prefix_save
+])
--- a/nestegg/m4/ax_create_stdint_h.m4
+++ b/nestegg/m4/ax_create_stdint_h.m4
@@ -0,0 +1,695 @@
+dnl @synopsis AX_CREATE_STDINT_H [( HEADER-TO-GENERATE [, HEDERS-TO-CHECK])]
+dnl
+dnl the "ISO C9X: 7.18 Integer types <stdint.h>" section requires the
+dnl existence of an include file <stdint.h> that defines a set of
+dnl typedefs, especially uint8_t,int32_t,uintptr_t. Many older
+dnl installations will not provide this file, but some will have the
+dnl very same definitions in <inttypes.h>. In other enviroments we can
+dnl use the inet-types in <sys/types.h> which would define the typedefs
+dnl int8_t and u_int8_t respectivly.
+dnl
+dnl This macros will create a local "_stdint.h" or the headerfile given
+dnl as an argument. In many cases that file will just "#include
+dnl <stdint.h>" or "#include <inttypes.h>", while in other environments
+dnl it will provide the set of basic 'stdint's definitions/typedefs:
+dnl
+dnl   int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,intptr_t,uintptr_t
+dnl   int_least32_t.. int_fast32_t.. intmax_t
+dnl
+dnl which may or may not rely on the definitions of other files, or
+dnl using the AC_CHECK_SIZEOF macro to determine the actual sizeof each
+dnl type.
+dnl
+dnl if your header files require the stdint-types you will want to
+dnl create an installable file mylib-int.h that all your other
+dnl installable header may include. So if you have a library package
+dnl named "mylib", just use
+dnl
+dnl      AX_CREATE_STDINT_H(mylib-int.h)
+dnl
+dnl in configure.ac and go to install that very header file in
+dnl Makefile.am along with the other headers (mylib.h) - and the
+dnl mylib-specific headers can simply use "#include <mylib-int.h>" to
+dnl obtain the stdint-types.
+dnl
+dnl Remember, if the system already had a valid <stdint.h>, the
+dnl generated file will include it directly. No need for fuzzy
+dnl HAVE_STDINT_H things... (oops, GCC 4.2.x has deliberatly disabled
+dnl its stdint.h for non-c99 compilation and the c99-mode is not the
+dnl default. Therefore this macro will not use the compiler's stdint.h
+dnl - please complain to the GCC developers).
+dnl
+dnl @category C
+dnl @author Guido U. Draheim <guidod@gmx.de>
+dnl @version 2006-10-13
+dnl @license GPLWithACException
+
+AC_DEFUN([AX_CHECK_DATA_MODEL],[
+   AC_CHECK_SIZEOF(char)
+   AC_CHECK_SIZEOF(short)
+   AC_CHECK_SIZEOF(int)
+   AC_CHECK_SIZEOF(long)
+   AC_CHECK_SIZEOF(void*)
+   ac_cv_char_data_model=""
+   ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_char"
+   ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_short"
+   ac_cv_char_data_model="$ac_cv_char_data_model$ac_cv_sizeof_int"
+   ac_cv_long_data_model=""
+   ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_int"
+   ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_long"
+   ac_cv_long_data_model="$ac_cv_long_data_model$ac_cv_sizeof_voidp"
+   AC_MSG_CHECKING([data model])
+   case "$ac_cv_char_data_model/$ac_cv_long_data_model" in
+    122/242)     ac_cv_data_model="IP16"  ; n="standard 16bit machine" ;;
+    122/244)     ac_cv_data_model="LP32"  ; n="standard 32bit machine" ;;
+    122/*)       ac_cv_data_model="i16"   ; n="unusual int16 model" ;;
+    124/444)     ac_cv_data_model="ILP32" ; n="standard 32bit unixish" ;;
+    124/488)     ac_cv_data_model="LP64"  ; n="standard 64bit unixish" ;;
+    124/448)     ac_cv_data_model="LLP64" ; n="unusual 64bit unixish" ;;
+    124/*)       ac_cv_data_model="i32"   ; n="unusual int32 model" ;;
+    128/888)     ac_cv_data_model="ILP64" ; n="unusual 64bit numeric" ;;
+    128/*)       ac_cv_data_model="i64"   ; n="unusual int64 model" ;;
+    222/*2)      ac_cv_data_model="DSP16" ; n="strict 16bit dsptype" ;;
+    333/*3)      ac_cv_data_model="DSP24" ; n="strict 24bit dsptype" ;;
+    444/*4)      ac_cv_data_model="DSP32" ; n="strict 32bit dsptype" ;;
+    666/*6)      ac_cv_data_model="DSP48" ; n="strict 48bit dsptype" ;;
+    888/*8)      ac_cv_data_model="DSP64" ; n="strict 64bit dsptype" ;;
+    222/*|333/*|444/*|666/*|888/*) :
+                 ac_cv_data_model="iDSP"  ; n="unusual dsptype" ;;
+     *)          ac_cv_data_model="none"  ; n="very unusual model" ;;
+   esac
+   AC_MSG_RESULT([$ac_cv_data_model ($ac_cv_long_data_model, $n)])
+])
+
+dnl AX_CHECK_HEADER_STDINT_X([HEADERLIST][,ACTION-IF])
+AC_DEFUN([AX_CHECK_HEADER_STDINT_X],[
+AC_CACHE_CHECK([for stdint uintptr_t], [ac_cv_header_stdint_x],[
+ ac_cv_header_stdint_x="" # the 1997 typedefs (inttypes.h)
+  AC_MSG_RESULT([(..)])
+  for i in m4_ifval([$1],[$1],[stdint.h inttypes.h sys/inttypes.h sys/types.h])
+  do
+   unset ac_cv_type_uintptr_t
+   unset ac_cv_type_uint64_t
+   AC_CHECK_TYPE(uintptr_t,[ac_cv_header_stdint_x=$i],continue,[#include <$i>])
+   AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>])
+   m4_ifvaln([$1],[$1]) break
+  done
+  AC_MSG_CHECKING([for stdint uintptr_t])
+ ])
+])
+
+AC_DEFUN([AX_CHECK_HEADER_STDINT_O],[
+AC_CACHE_CHECK([for stdint uint32_t], [ac_cv_header_stdint_o],[
+ ac_cv_header_stdint_o="" # the 1995 typedefs (sys/inttypes.h)
+  AC_MSG_RESULT([(..)])
+  for i in m4_ifval([$1],[$1],[inttypes.h sys/inttypes.h sys/types.h stdint.h])
+  do
+   unset ac_cv_type_uint32_t
+   unset ac_cv_type_uint64_t
+   AC_CHECK_TYPE(uint32_t,[ac_cv_header_stdint_o=$i],continue,[#include <$i>])
+   AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>])
+   m4_ifvaln([$1],[$1]) break
+   break;
+  done
+  AC_MSG_CHECKING([for stdint uint32_t])
+ ])
+])
+
+AC_DEFUN([AX_CHECK_HEADER_STDINT_U],[
+AC_CACHE_CHECK([for stdint u_int32_t], [ac_cv_header_stdint_u],[
+ ac_cv_header_stdint_u="" # the BSD typedefs (sys/types.h)
+  AC_MSG_RESULT([(..)])
+  for i in m4_ifval([$1],[$1],[sys/types.h inttypes.h sys/inttypes.h]) ; do
+   unset ac_cv_type_u_int32_t
+   unset ac_cv_type_u_int64_t
+   AC_CHECK_TYPE(u_int32_t,[ac_cv_header_stdint_u=$i],continue,[#include <$i>])
+   AC_CHECK_TYPE(u_int64_t,[and64="/u_int64_t"],[and64=""],[#include<$i>])
+   m4_ifvaln([$1],[$1]) break
+   break;
+  done
+  AC_MSG_CHECKING([for stdint u_int32_t])
+ ])
+])
+
+AC_DEFUN([AX_CREATE_STDINT_H],
+[# ------ AX CREATE STDINT H -------------------------------------
+AC_MSG_CHECKING([for stdint types])
+ac_stdint_h=`echo ifelse($1, , _stdint.h, $1)`
+# try to shortcircuit - if the default include path of the compiler
+# can find a "stdint.h" header then we assume that all compilers can.
+AC_CACHE_VAL([ac_cv_header_stdint_t],[
+old_CXXFLAGS="$CXXFLAGS" ; CXXFLAGS=""
+old_CPPFLAGS="$CPPFLAGS" ; CPPFLAGS=""
+old_CFLAGS="$CFLAGS"     ; CFLAGS=""
+AC_TRY_COMPILE([#include <stdint.h>],[int_least32_t v = 0;],
+[ac_cv_stdint_result="(assuming C99 compatible system)"
+ ac_cv_header_stdint_t="stdint.h"; ],
+[ac_cv_header_stdint_t=""])
+if test "$GCC" = "yes" && test ".$ac_cv_header_stdint_t" = "."; then
+CFLAGS="-std=c99"
+AC_TRY_COMPILE([#include <stdint.h>],[int_least32_t v = 0;],
+[AC_MSG_WARN(your GCC compiler has a defunct stdint.h for its default-mode)])
+fi
+CXXFLAGS="$old_CXXFLAGS"
+CPPFLAGS="$old_CPPFLAGS"
+CFLAGS="$old_CFLAGS" ])
+
+v="... $ac_cv_header_stdint_h"
+if test "$ac_stdint_h" = "stdint.h" ; then
+ AC_MSG_RESULT([(are you sure you want them in ./stdint.h?)])
+elif test "$ac_stdint_h" = "inttypes.h" ; then
+ AC_MSG_RESULT([(are you sure you want them in ./inttypes.h?)])
+elif test "_$ac_cv_header_stdint_t" = "_" ; then
+ AC_MSG_RESULT([(putting them into $ac_stdint_h)$v])
+else
+ ac_cv_header_stdint="$ac_cv_header_stdint_t"
+ AC_MSG_RESULT([$ac_cv_header_stdint (shortcircuit)])
+fi
+
+if test "_$ac_cv_header_stdint_t" = "_" ; then # can not shortcircuit..
+
+dnl .....intro message done, now do a few system checks.....
+dnl btw, all old CHECK_TYPE macros do automatically "DEFINE" a type,
+dnl therefore we use the autoconf implementation detail CHECK_TYPE_NEW
+dnl instead that is triggered with 3 or more arguments (see types.m4)
+
+inttype_headers=`echo $2 | sed -e 's/,/ /g'`
+
+ac_cv_stdint_result="(no helpful system typedefs seen)"
+AX_CHECK_HEADER_STDINT_X(dnl
+   stdint.h inttypes.h sys/inttypes.h $inttype_headers,
+   ac_cv_stdint_result="(seen uintptr_t$and64 in $i)")
+
+if test "_$ac_cv_header_stdint_x" = "_" ; then
+AX_CHECK_HEADER_STDINT_O(dnl,
+   inttypes.h sys/inttypes.h stdint.h $inttype_headers,
+   ac_cv_stdint_result="(seen uint32_t$and64 in $i)")
+fi
+
+if test "_$ac_cv_header_stdint_x" = "_" ; then
+if test "_$ac_cv_header_stdint_o" = "_" ; then
+AX_CHECK_HEADER_STDINT_U(dnl,
+   sys/types.h inttypes.h sys/inttypes.h $inttype_headers,
+   ac_cv_stdint_result="(seen u_int32_t$and64 in $i)")
+fi fi
+
+dnl if there was no good C99 header file, do some typedef checks...
+if test "_$ac_cv_header_stdint_x" = "_" ; then
+   AC_MSG_CHECKING([for stdint datatype model])
+   AC_MSG_RESULT([(..)])
+   AX_CHECK_DATA_MODEL
+fi
+
+if test "_$ac_cv_header_stdint_x" != "_" ; then
+   ac_cv_header_stdint="$ac_cv_header_stdint_x"
+elif  test "_$ac_cv_header_stdint_o" != "_" ; then
+   ac_cv_header_stdint="$ac_cv_header_stdint_o"
+elif  test "_$ac_cv_header_stdint_u" != "_" ; then
+   ac_cv_header_stdint="$ac_cv_header_stdint_u"
+else
+   ac_cv_header_stdint="stddef.h"
+fi
+
+AC_MSG_CHECKING([for extra inttypes in chosen header])
+AC_MSG_RESULT([($ac_cv_header_stdint)])
+dnl see if int_least and int_fast types are present in _this_ header.
+unset ac_cv_type_int_least32_t
+unset ac_cv_type_int_fast32_t
+AC_CHECK_TYPE(int_least32_t,,,[#include <$ac_cv_header_stdint>])
+AC_CHECK_TYPE(int_fast32_t,,,[#include<$ac_cv_header_stdint>])
+AC_CHECK_TYPE(intmax_t,,,[#include <$ac_cv_header_stdint>])
+
+fi # shortcircut to system "stdint.h"
+# ------------------ PREPARE VARIABLES ------------------------------
+if test "$GCC" = "yes" ; then
+ac_cv_stdint_message="using gnu compiler "`$CC --version | head -1`
+else
+ac_cv_stdint_message="using $CC"
+fi
+
+AC_MSG_RESULT([make use of $ac_cv_header_stdint in $ac_stdint_h dnl
+$ac_cv_stdint_result])
+
+dnl -----------------------------------------------------------------
+# ----------------- DONE inttypes.h checks START header -------------
+AC_CONFIG_COMMANDS([$ac_stdint_h],[
+AC_MSG_NOTICE(creating $ac_stdint_h : $_ac_stdint_h)
+ac_stdint=$tmp/_stdint.h
+
+echo "#ifndef" $_ac_stdint_h >$ac_stdint
+echo "#define" $_ac_stdint_h "1" >>$ac_stdint
+echo "#ifndef" _GENERATED_STDINT_H >>$ac_stdint
+echo "#define" _GENERATED_STDINT_H '"'$PACKAGE $VERSION'"' >>$ac_stdint
+echo "/* generated $ac_cv_stdint_message */" >>$ac_stdint
+if test "_$ac_cv_header_stdint_t" != "_" ; then
+echo "#define _STDINT_HAVE_STDINT_H" "1" >>$ac_stdint
+echo "#include <stdint.h>" >>$ac_stdint
+echo "#endif" >>$ac_stdint
+echo "#endif" >>$ac_stdint
+else
+
+cat >>$ac_stdint <<STDINT_EOF
+
+/* ................... shortcircuit part ........................... */
+
+#if defined HAVE_STDINT_H || defined _STDINT_HAVE_STDINT_H
+#include <stdint.h>
+#else
+#include <stddef.h>
+
+/* .................... configured part ............................ */
+
+STDINT_EOF
+
+echo "/* whether we have a C99 compatible stdint header file */" >>$ac_stdint
+if test "_$ac_cv_header_stdint_x" != "_" ; then
+  ac_header="$ac_cv_header_stdint_x"
+  echo "#define _STDINT_HEADER_INTPTR" '"'"$ac_header"'"' >>$ac_stdint
+else
+  echo "/* #undef _STDINT_HEADER_INTPTR */" >>$ac_stdint
+fi
+
+echo "/* whether we have a C96 compatible inttypes header file */" >>$ac_stdint
+if  test "_$ac_cv_header_stdint_o" != "_" ; then
+  ac_header="$ac_cv_header_stdint_o"
+  echo "#define _STDINT_HEADER_UINT32" '"'"$ac_header"'"' >>$ac_stdint
+else
+  echo "/* #undef _STDINT_HEADER_UINT32 */" >>$ac_stdint
+fi
+
+echo "/* whether we have a BSD compatible inet types header */" >>$ac_stdint
+if  test "_$ac_cv_header_stdint_u" != "_" ; then
+  ac_header="$ac_cv_header_stdint_u"
+  echo "#define _STDINT_HEADER_U_INT32" '"'"$ac_header"'"' >>$ac_stdint
+else
+  echo "/* #undef _STDINT_HEADER_U_INT32 */" >>$ac_stdint
+fi
+
+echo "" >>$ac_stdint
+
+if test "_$ac_header" != "_" ; then if test "$ac_header" != "stddef.h" ; then
+  echo "#include <$ac_header>" >>$ac_stdint
+  echo "" >>$ac_stdint
+fi fi
+
+echo "/* which 64bit typedef has been found */" >>$ac_stdint
+if test "$ac_cv_type_uint64_t" = "yes" ; then
+echo "#define   _STDINT_HAVE_UINT64_T" "1"  >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_UINT64_T */" >>$ac_stdint
+fi
+if test "$ac_cv_type_u_int64_t" = "yes" ; then
+echo "#define   _STDINT_HAVE_U_INT64_T" "1"  >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_U_INT64_T */" >>$ac_stdint
+fi
+echo "" >>$ac_stdint
+
+echo "/* which type model has been detected */" >>$ac_stdint
+if test "_$ac_cv_char_data_model" != "_" ; then
+echo "#define   _STDINT_CHAR_MODEL" "$ac_cv_char_data_model" >>$ac_stdint
+echo "#define   _STDINT_LONG_MODEL" "$ac_cv_long_data_model" >>$ac_stdint
+else
+echo "/* #undef _STDINT_CHAR_MODEL // skipped */" >>$ac_stdint
+echo "/* #undef _STDINT_LONG_MODEL // skipped */" >>$ac_stdint
+fi
+echo "" >>$ac_stdint
+
+echo "/* whether int_least types were detected */" >>$ac_stdint
+if test "$ac_cv_type_int_least32_t" = "yes"; then
+echo "#define   _STDINT_HAVE_INT_LEAST32_T" "1"  >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_INT_LEAST32_T */" >>$ac_stdint
+fi
+echo "/* whether int_fast types were detected */" >>$ac_stdint
+if test "$ac_cv_type_int_fast32_t" = "yes"; then
+echo "#define   _STDINT_HAVE_INT_FAST32_T" "1" >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_INT_FAST32_T */" >>$ac_stdint
+fi
+echo "/* whether intmax_t type was detected */" >>$ac_stdint
+if test "$ac_cv_type_intmax_t" = "yes"; then
+echo "#define   _STDINT_HAVE_INTMAX_T" "1" >>$ac_stdint
+else
+echo "/* #undef _STDINT_HAVE_INTMAX_T */" >>$ac_stdint
+fi
+echo "" >>$ac_stdint
+
+  cat >>$ac_stdint <<STDINT_EOF
+/* .................... detections part ............................ */
+
+/* whether we need to define bitspecific types from compiler base types */
+#ifndef _STDINT_HEADER_INTPTR
+#ifndef _STDINT_HEADER_UINT32
+#ifndef _STDINT_HEADER_U_INT32
+#define _STDINT_NEED_INT_MODEL_T
+#else
+#define _STDINT_HAVE_U_INT_TYPES
+#endif
+#endif
+#endif
+
+#ifdef _STDINT_HAVE_U_INT_TYPES
+#undef _STDINT_NEED_INT_MODEL_T
+#endif
+
+#ifdef  _STDINT_CHAR_MODEL
+#if     _STDINT_CHAR_MODEL+0 == 122 || _STDINT_CHAR_MODEL+0 == 124
+#ifndef _STDINT_BYTE_MODEL
+#define _STDINT_BYTE_MODEL 12
+#endif
+#endif
+#endif
+
+#ifndef _STDINT_HAVE_INT_LEAST32_T
+#define _STDINT_NEED_INT_LEAST_T
+#endif
+
+#ifndef _STDINT_HAVE_INT_FAST32_T
+#define _STDINT_NEED_INT_FAST_T
+#endif
+
+#ifndef _STDINT_HEADER_INTPTR
+#define _STDINT_NEED_INTPTR_T
+#ifndef _STDINT_HAVE_INTMAX_T
+#define _STDINT_NEED_INTMAX_T
+#endif
+#endif
+
+
+/* .................... definition part ............................ */
+
+/* some system headers have good uint64_t */
+#ifndef _HAVE_UINT64_T
+#if     defined _STDINT_HAVE_UINT64_T  || defined HAVE_UINT64_T
+#define _HAVE_UINT64_T
+#elif   defined _STDINT_HAVE_U_INT64_T || defined HAVE_U_INT64_T
+#define _HAVE_UINT64_T
+typedef u_int64_t uint64_t;
+#endif
+#endif
+
+#ifndef _HAVE_UINT64_T
+/* .. here are some common heuristics using compiler runtime specifics */
+#if defined __STDC_VERSION__ && defined __STDC_VERSION__ >= 199901L
+#define _HAVE_UINT64_T
+#define _HAVE_LONGLONG_UINT64_T
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+
+#elif !defined __STRICT_ANSI__
+#if defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__
+#define _HAVE_UINT64_T
+typedef __int64 int64_t;
+typedef unsigned __int64 uint64_t;
+
+#elif defined __GNUC__ || defined __MWERKS__ || defined __ELF__
+/* note: all ELF-systems seem to have loff-support which needs 64-bit */
+#if !defined _NO_LONGLONG
+#define _HAVE_UINT64_T
+#define _HAVE_LONGLONG_UINT64_T
+typedef long long int64_t;
+typedef unsigned long long uint64_t;
+#endif
+
+#elif defined __alpha || (defined __mips && defined _ABIN32)
+#if !defined _NO_LONGLONG
+typedef long int64_t;
+typedef unsigned long uint64_t;
+#endif
+  /* compiler/cpu type to define int64_t */
+#endif
+#endif
+#endif
+
+#if defined _STDINT_HAVE_U_INT_TYPES
+/* int8_t int16_t int32_t defined by inet code, redeclare the u_intXX types */
+typedef u_int8_t uint8_t;
+typedef u_int16_t uint16_t;
+typedef u_int32_t uint32_t;
+
+/* glibc compatibility */
+#ifndef __int8_t_defined
+#define __int8_t_defined
+#endif
+#endif
+
+#ifdef _STDINT_NEED_INT_MODEL_T
+/* we must guess all the basic types. Apart from byte-adressable system, */
+/* there a few 32-bit-only dsp-systems that we guard with BYTE_MODEL 8-} */
+/* (btw, those nibble-addressable systems are way off, or so we assume) */
+
+dnl   /* have a look at "64bit and data size neutrality" at */
+dnl   /* http://unix.org/version2/whatsnew/login_64bit.html */
+dnl   /* (the shorthand "ILP" types always have a "P" part) */
+
+#if defined _STDINT_BYTE_MODEL
+#if _STDINT_LONG_MODEL+0 == 242
+/* 2:4:2 =  IP16 = a normal 16-bit system                */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned long   uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          long    int32_t;
+#endif
+#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL == 444
+/* 2:4:4 =  LP32 = a 32-bit system derived from a 16-bit */
+/* 4:4:4 = ILP32 = a normal 32-bit system                */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned int    uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          int     int32_t;
+#endif
+#elif _STDINT_LONG_MODEL+0 == 484 || _STDINT_LONG_MODEL+0 == 488
+/* 4:8:4 =  IP32 = a 32-bit system prepared for 64-bit    */
+/* 4:8:8 =  LP64 = a normal 64-bit system                 */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned int    uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          int     int32_t;
+#endif
+/* this system has a "long" of 64bit */
+#ifndef _HAVE_UINT64_T
+#define _HAVE_UINT64_T
+typedef unsigned long   uint64_t;
+typedef          long    int64_t;
+#endif
+#elif _STDINT_LONG_MODEL+0 == 448
+/*      LLP64   a 64-bit system derived from a 32-bit system */
+typedef unsigned char   uint8_t;
+typedef unsigned short  uint16_t;
+typedef unsigned int    uint32_t;
+#ifndef __int8_t_defined
+#define __int8_t_defined
+typedef          char    int8_t;
+typedef          short   int16_t;
+typedef          int     int32_t;
+#endif
+/* assuming the system has a "long long" */
+#ifndef _HAVE_UINT64_T
+#define _HAVE_UINT64_T
+#define _HAVE_LONGLONG_UINT64_T
+typedef unsigned long long uint64_t;
+typedef          long long  int64_t;
+#endif
+#else
+#define _STDINT_NO_INT32_T
+#endif
+#else
+#define _STDINT_NO_INT8_T
+#define _STDINT_NO_INT32_T
+#endif
+#endif
+
+/*
+ * quote from SunOS-5.8 sys/inttypes.h:
+ * Use at your own risk.  As of February 1996, the committee is squarely
+ * behind the fixed sized types; the "least" and "fast" types are still being
+ * discussed.  The probability that the "fast" types may be removed before
+ * the standard is finalized is high enough that they are not currently
+ * implemented.
+ */
+
+#if defined _STDINT_NEED_INT_LEAST_T
+typedef  int8_t    int_least8_t;
+typedef  int16_t   int_least16_t;
+typedef  int32_t   int_least32_t;
+#ifdef _HAVE_UINT64_T
+typedef  int64_t   int_least64_t;
+#endif
+
+typedef uint8_t   uint_least8_t;
+typedef uint16_t  uint_least16_t;
+typedef uint32_t  uint_least32_t;
+#ifdef _HAVE_UINT64_T
+typedef uint64_t  uint_least64_t;
+#endif
+  /* least types */
+#endif
+
+#if defined _STDINT_NEED_INT_FAST_T
+typedef  int8_t    int_fast8_t;
+typedef  int       int_fast16_t;
+typedef  int32_t   int_fast32_t;
+#ifdef _HAVE_UINT64_T
+typedef  int64_t   int_fast64_t;
+#endif
+
+typedef uint8_t   uint_fast8_t;
+typedef unsigned  uint_fast16_t;
+typedef uint32_t  uint_fast32_t;
+#ifdef _HAVE_UINT64_T
+typedef uint64_t  uint_fast64_t;
+#endif
+  /* fast types */
+#endif
+
+#ifdef _STDINT_NEED_INTMAX_T
+#ifdef _HAVE_UINT64_T
+typedef  int64_t       intmax_t;
+typedef uint64_t      uintmax_t;
+#else
+typedef          long  intmax_t;
+typedef unsigned long uintmax_t;
+#endif
+#endif
+
+#ifdef _STDINT_NEED_INTPTR_T
+#ifndef __intptr_t_defined
+#define __intptr_t_defined
+/* we encourage using "long" to store pointer values, never use "int" ! */
+#if   _STDINT_LONG_MODEL+0 == 242 || _STDINT_LONG_MODEL+0 == 484
+typedef  unsigned int   uintptr_t;
+typedef           int    intptr_t;
+#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL+0 == 444
+typedef  unsigned long  uintptr_t;
+typedef           long   intptr_t;
+#elif _STDINT_LONG_MODEL+0 == 448 && defined _HAVE_UINT64_T
+typedef        uint64_t uintptr_t;
+typedef         int64_t  intptr_t;
+#else /* matches typical system types ILP32 and LP64 - but not IP16 or LLP64 */
+typedef  unsigned long  uintptr_t;
+typedef           long   intptr_t;
+#endif
+#endif
+#endif
+
+/* The ISO C99 standard specifies that in C++ implementations these
+   should only be defined if explicitly requested.  */
+#if !defined __cplusplus || defined __STDC_CONSTANT_MACROS
+#ifndef UINT32_C
+
+/* Signed.  */
+# define INT8_C(c)      c
+# define INT16_C(c)     c
+# define INT32_C(c)     c
+# ifdef _HAVE_LONGLONG_UINT64_T
+#  define INT64_C(c)    c ## L
+# else
+#  define INT64_C(c)    c ## LL
+# endif
+
+/* Unsigned.  */
+# define UINT8_C(c)     c ## U
+# define UINT16_C(c)    c ## U
+# define UINT32_C(c)    c ## U
+# ifdef _HAVE_LONGLONG_UINT64_T
+#  define UINT64_C(c)   c ## UL
+# else
+#  define UINT64_C(c)   c ## ULL
+# endif
+
+/* Maximal type.  */
+# ifdef _HAVE_LONGLONG_UINT64_T
+#  define INTMAX_C(c)   c ## L
+#  define UINTMAX_C(c)  c ## UL
+# else
+#  define INTMAX_C(c)   c ## LL
+#  define UINTMAX_C(c)  c ## ULL
+# endif
+
+  /* literalnumbers */
+#endif
+#endif
+
+/* These limits are merily those of a two complement byte-oriented system */
+
+/* Minimum of signed integral types.  */
+# define INT8_MIN               (-128)
+# define INT16_MIN              (-32767-1)
+# define INT32_MIN              (-2147483647-1)
+# define INT64_MIN              (-__INT64_C(9223372036854775807)-1)
+/* Maximum of signed integral types.  */
+# define INT8_MAX               (127)
+# define INT16_MAX              (32767)
+# define INT32_MAX              (2147483647)
+# define INT64_MAX              (__INT64_C(9223372036854775807))
+
+/* Maximum of unsigned integral types.  */
+# define UINT8_MAX              (255)
+# define UINT16_MAX             (65535)
+# define UINT32_MAX             (4294967295U)
+# define UINT64_MAX             (__UINT64_C(18446744073709551615))
+
+/* Minimum of signed integral types having a minimum size.  */
+# define INT_LEAST8_MIN         INT8_MIN
+# define INT_LEAST16_MIN        INT16_MIN
+# define INT_LEAST32_MIN        INT32_MIN
+# define INT_LEAST64_MIN        INT64_MIN
+/* Maximum of signed integral types having a minimum size.  */
+# define INT_LEAST8_MAX         INT8_MAX
+# define INT_LEAST16_MAX        INT16_MAX
+# define INT_LEAST32_MAX        INT32_MAX
+# define INT_LEAST64_MAX        INT64_MAX
+
+/* Maximum of unsigned integral types having a minimum size.  */
+# define UINT_LEAST8_MAX        UINT8_MAX
+# define UINT_LEAST16_MAX       UINT16_MAX
+# define UINT_LEAST32_MAX       UINT32_MAX
+# define UINT_LEAST64_MAX       UINT64_MAX
+
+  /* shortcircuit*/
+#endif
+  /* once */
+#endif
+#endif
+STDINT_EOF
+fi
+    if cmp -s $ac_stdint_h $ac_stdint 2>/dev/null; then
+      AC_MSG_NOTICE([$ac_stdint_h is unchanged])
+    else
+      ac_dir=`AS_DIRNAME(["$ac_stdint_h"])`
+      AS_MKDIR_P(["$ac_dir"])
+      rm -f $ac_stdint_h
+      mv $ac_stdint $ac_stdint_h
+    fi
+],[# variables for create stdint.h replacement
+PACKAGE="$PACKAGE"
+VERSION="$VERSION"
+ac_stdint_h="$ac_stdint_h"
+_ac_stdint_h=AS_TR_CPP(_$PACKAGE-$ac_stdint_h)
+ac_cv_stdint_message="$ac_cv_stdint_message"
+ac_cv_header_stdint_t="$ac_cv_header_stdint_t"
+ac_cv_header_stdint_x="$ac_cv_header_stdint_x"
+ac_cv_header_stdint_o="$ac_cv_header_stdint_o"
+ac_cv_header_stdint_u="$ac_cv_header_stdint_u"
+ac_cv_type_uint64_t="$ac_cv_type_uint64_t"
+ac_cv_type_u_int64_t="$ac_cv_type_u_int64_t"
+ac_cv_char_data_model="$ac_cv_char_data_model"
+ac_cv_long_data_model="$ac_cv_long_data_model"
+ac_cv_type_int_least32_t="$ac_cv_type_int_least32_t"
+ac_cv_type_int_fast32_t="$ac_cv_type_int_fast32_t"
+ac_cv_type_intmax_t="$ac_cv_type_intmax_t"
+])
+])
--- a/nestegg/m4/pkg.m4
+++ b/nestegg/m4/pkg.m4
@@ -0,0 +1,157 @@
+# pkg.m4 - Macros to locate and utilise pkg-config.            -*- Autoconf -*-
+#
+# Copyright © 2004 Scott James Remnant <scott@netsplit.com>.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+#
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# PKG_PROG_PKG_CONFIG([MIN-VERSION])
+# ----------------------------------
+AC_DEFUN([PKG_PROG_PKG_CONFIG],
+[m4_pattern_forbid([^_?PKG_[A-Z_]+$])
+m4_pattern_allow([^PKG_CONFIG(_PATH)?$])
+AC_ARG_VAR([PKG_CONFIG], [path to pkg-config utility])dnl
+if test "x$ac_cv_env_PKG_CONFIG_set" != "xset"; then
+	AC_PATH_TOOL([PKG_CONFIG], [pkg-config])
+fi
+if test -n "$PKG_CONFIG"; then
+	_pkg_min_version=m4_default([$1], [0.9.0])
+	AC_MSG_CHECKING([pkg-config is at least version $_pkg_min_version])
+	if $PKG_CONFIG --atleast-pkgconfig-version $_pkg_min_version; then
+		AC_MSG_RESULT([yes])
+	else
+		AC_MSG_RESULT([no])
+		PKG_CONFIG=""
+	fi
+
+fi[]dnl
+])# PKG_PROG_PKG_CONFIG
+
+# PKG_CHECK_EXISTS(MODULES, [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+#
+# Check to see whether a particular set of modules exists.  Similar
+# to PKG_CHECK_MODULES(), but does not set variables or print errors.
+#
+#
+# Similar to PKG_CHECK_MODULES, make sure that the first instance of
+# this or PKG_CHECK_MODULES is called, or make sure to call
+# PKG_CHECK_EXISTS manually
+# --------------------------------------------------------------
+AC_DEFUN([PKG_CHECK_EXISTS],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+if test -n "$PKG_CONFIG" && \
+    AC_RUN_LOG([$PKG_CONFIG --exists --print-errors "$1"]); then
+  m4_ifval([$2], [$2], [:])
+m4_ifvaln([$3], [else
+  $3])dnl
+fi])
+
+
+# _PKG_CONFIG([VARIABLE], [COMMAND], [MODULES])
+# ---------------------------------------------
+m4_define([_PKG_CONFIG],
+[if test -n "$PKG_CONFIG"; then
+    if test -n "$$1"; then
+        pkg_cv_[]$1="$$1"
+    else
+        PKG_CHECK_EXISTS([$3],
+                         [pkg_cv_[]$1=`$PKG_CONFIG --[]$2 "$3" 2>/dev/null`],
+			 [pkg_failed=yes])
+    fi
+else
+	pkg_failed=untried
+fi[]dnl
+])# _PKG_CONFIG
+
+# _PKG_SHORT_ERRORS_SUPPORTED
+# -----------------------------
+AC_DEFUN([_PKG_SHORT_ERRORS_SUPPORTED],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+        _pkg_short_errors_supported=yes
+else
+        _pkg_short_errors_supported=no
+fi[]dnl
+])# _PKG_SHORT_ERRORS_SUPPORTED
+
+
+# PKG_CHECK_MODULES(VARIABLE-PREFIX, MODULES, [ACTION-IF-FOUND],
+# [ACTION-IF-NOT-FOUND])
+#
+#
+# Note that if there is a possibility the first call to
+# PKG_CHECK_MODULES might not happen, you should be sure to include an
+# explicit call to PKG_PROG_PKG_CONFIG in your configure.ac
+#
+#
+# --------------------------------------------------------------
+AC_DEFUN([PKG_CHECK_MODULES],
+[AC_REQUIRE([PKG_PROG_PKG_CONFIG])dnl
+AC_ARG_VAR([$1][_CFLAGS], [C compiler flags for $1, overriding pkg-config])dnl
+AC_ARG_VAR([$1][_LIBS], [linker flags for $1, overriding pkg-config])dnl
+
+pkg_failed=no
+AC_MSG_CHECKING([for $1])
+
+_PKG_CONFIG([$1][_CFLAGS], [cflags], [$2])
+_PKG_CONFIG([$1][_LIBS], [libs], [$2])
+
+m4_define([_PKG_TEXT], [Alternatively, you may set the environment variables $1[]_CFLAGS
+and $1[]_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.])
+
+if test $pkg_failed = yes; then
+        _PKG_SHORT_ERRORS_SUPPORTED
+        if test $_pkg_short_errors_supported = yes; then
+	        $1[]_PKG_ERRORS=`$PKG_CONFIG --short-errors --errors-to-stdout --print-errors "$2"`
+        else
+	        $1[]_PKG_ERRORS=`$PKG_CONFIG --errors-to-stdout --print-errors "$2"`
+        fi
+	# Put the nasty error message in config.log where it belongs
+	echo "$$1[]_PKG_ERRORS" >&AS_MESSAGE_LOG_FD
+
+	ifelse([$4], , [AC_MSG_ERROR(dnl
+[Package requirements ($2) were not met:
+
+$$1_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+_PKG_TEXT
+])],
+		[AC_MSG_RESULT([no])
+                $4])
+elif test $pkg_failed = untried; then
+	ifelse([$4], , [AC_MSG_FAILURE(dnl
+[The pkg-config script could not be found or is too old.  Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+_PKG_TEXT
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.])],
+		[$4])
+else
+	$1[]_CFLAGS=$pkg_cv_[]$1[]_CFLAGS
+	$1[]_LIBS=$pkg_cv_[]$1[]_LIBS
+        AC_MSG_RESULT([yes])
+	ifelse([$3], , :, [$3])
+fi[]dnl
+])# PKG_CHECK_MODULES
--- a/nestegg/nestegg-uninstalled.pc.in
+++ b/nestegg/nestegg-uninstalled.pc.in
@@ -0,0 +1,13 @@
+# nestegg uninstalled pkg-config file
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: nestegg
+Description: WebM/Matroska demuxer
+Version: @VERSION@
+Conflicts:
+Libs: -L${libdir} -lnestegg
+Cflags: -I${includedir}
--- a/nestegg/nestegg.pc.in
+++ b/nestegg/nestegg.pc.in
@@ -0,0 +1,13 @@
+# nestegg installed pkg-config file
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: nestegg
+Description: WebM/Matroska demuxer
+Version: @VERSION@
+Conflicts:
+Libs: -L${libdir} -lnestegg
+Cflags: -I${includedir}
--- a/nestegg/src/nestegg.c
+++ b/nestegg/src/nestegg.c
--- a/nestegg/test/test.c
+++ b/nestegg/test/test.c
@@ -0,0 +1,248 @@
+/*
+ * Copyright © 2010 Mozilla Foundation
+ *
+ * This program is made available under an ISC-style license.  See the
+ * accompanying file LICENSE for details.
+ */
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include "nestegg/nestegg.h"
+
+#undef DEBUG
+#define SEEK_TEST
+
+static int
+stdio_read(void * p, size_t length, void * fp)
+{
+  size_t r;
+
+  r = fread(p, length, 1, fp);
+  if (r == 0 && feof(fp))
+    return 0;
+  return r == 0 ? -1 : 1;
+}
+
+static int
+stdio_seek(int64_t offset, int whence, void * fp)
+{
+  return fseek(fp, offset, whence);
+}
+
+static int64_t
+stdio_tell(void * fp)
+{
+  return ftell(fp);
+}
+
+static void
+log_callback(nestegg * ctx, unsigned int severity, char const * fmt, ...)
+{
+  va_list ap;
+  char const * sev = NULL;
+
+#ifndef DEBUG
+  if (severity < NESTEGG_LOG_WARNING)
+    return;
+#endif
+
+  switch (severity) {
+  case NESTEGG_LOG_DEBUG:
+    sev = "debug:   ";
+    break;
+  case NESTEGG_LOG_WARNING:
+    sev = "warning: ";
+    break;
+  case NESTEGG_LOG_CRITICAL:
+    sev = "critical:";
+    break;
+  default:
+    sev = "unknown: ";
+  }
+
+  fprintf(stderr, "%p %s ", (void *) ctx, sev);
+
+  va_start(ap, fmt);
+  vfprintf(stderr, fmt, ap);
+  va_end(ap);
+
+  fprintf(stderr, "\n");
+}
+
+int
+main(int argc, char * argv[])
+{
+  FILE * fp;
+  int r, type;
+  nestegg * ctx;
+  nestegg_audio_params aparams;
+  nestegg_packet * pkt;
+  nestegg_video_params vparams;
+  size_t length, size;
+  uint64_t duration, tstamp, pkt_tstamp;
+  unsigned char * codec_data, * ptr;
+  unsigned int cnt, i, j, track, tracks, pkt_cnt, pkt_track;
+  unsigned int data_items = 0;
+  nestegg_io io = {
+    stdio_read,
+    stdio_seek,
+    stdio_tell,
+    NULL
+  };
+
+  if (argc != 2)
+    return EXIT_FAILURE;
+
+  fp = fopen(argv[1], "rb");
+  if (!fp)
+    return EXIT_FAILURE;
+
+  io.userdata = fp;
+
+  ctx = NULL;
+  r = nestegg_init(&ctx, io, log_callback);
+  if (r != 0)
+    return EXIT_FAILURE;
+
+  nestegg_track_count(ctx, &tracks);
+  nestegg_duration(ctx, &duration);
+#ifdef DEBUG
+  fprintf(stderr, "media has %u tracks and duration %fs\n", tracks, duration / 1e9);
+#endif
+
+  for (i = 0; i < tracks; ++i) {
+    type = nestegg_track_type(ctx, i);
+#ifdef DEBUG
+    fprintf(stderr, "track %u: type: %d codec: %d", i,
+            type, nestegg_track_codec_id(ctx, i));
+#endif
+    nestegg_track_codec_data_count(ctx, i, &data_items);
+    for (j = 0; j < data_items; ++j) {
+      nestegg_track_codec_data(ctx, i, j, &codec_data, &length);
+#ifdef DEBUG
+      fprintf(stderr, " (%p, %u)", codec_data, (unsigned int) length);
+#endif
+    }
+    if (type == NESTEGG_TRACK_VIDEO) {
+      nestegg_track_video_params(ctx, i, &vparams);
+#ifdef DEBUG
+      fprintf(stderr, " video: %ux%u (d: %ux%u %ux%ux%ux%u)",
+              vparams.width, vparams.height,
+              vparams.display_width, vparams.display_height,
+              vparams.crop_top, vparams.crop_left, vparams.crop_bottom, vparams.crop_right);
+#endif
+    } else if (type == NESTEGG_TRACK_AUDIO) {
+      nestegg_track_audio_params(ctx, i, &aparams);
+#ifdef DEBUG
+      fprintf(stderr, " audio: %.2fhz %u bit %u channels",
+              aparams.rate, aparams.depth, aparams.channels);
+#endif
+    }
+#ifdef DEBUG
+    fprintf(stderr, "\n");
+#endif
+  }
+
+#ifdef SEEK_TEST
+#ifdef DEBUG
+  fprintf(stderr, "seek to middle\n");
+#endif
+  r = nestegg_track_seek(ctx, 0, duration / 2);
+  if (r == 0) {
+#ifdef DEBUG
+    fprintf(stderr, "middle ");
+#endif
+    r = nestegg_read_packet(ctx, &pkt);
+    if (r == 1) {
+      nestegg_packet_track(pkt, &track);
+      nestegg_packet_count(pkt, &cnt);
+      nestegg_packet_tstamp(pkt, &tstamp);
+#ifdef DEBUG
+      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
+#endif
+      nestegg_free_packet(pkt);
+    } else {
+#ifdef DEBUG
+      fprintf(stderr, "middle seek failed\n");
+#endif
+    }
+  }
+
+#ifdef DEBUG
+  fprintf(stderr, "seek to ~end\n");
+#endif
+  r = nestegg_track_seek(ctx, 0, duration - (duration / 10));
+  if (r == 0) {
+#ifdef DEBUG
+    fprintf(stderr, "end ");
+#endif
+    r = nestegg_read_packet(ctx, &pkt);
+    if (r == 1) {
+      nestegg_packet_track(pkt, &track);
+      nestegg_packet_count(pkt, &cnt);
+      nestegg_packet_tstamp(pkt, &tstamp);
+#ifdef DEBUG
+      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
+#endif
+      nestegg_free_packet(pkt);
+    } else {
+#ifdef DEBUG
+      fprintf(stderr, "end seek failed\n");
+#endif
+    }
+  }
+
+#ifdef DEBUG
+  fprintf(stderr, "seek to ~start\n");
+#endif
+  r = nestegg_track_seek(ctx, 0, duration / 10);
+  if (r == 0) {
+#ifdef DEBUG
+    fprintf(stderr, "start ");
+#endif
+    r = nestegg_read_packet(ctx, &pkt);
+    if (r == 1) {
+      nestegg_packet_track(pkt, &track);
+      nestegg_packet_count(pkt, &cnt);
+      nestegg_packet_tstamp(pkt, &tstamp);
+#ifdef DEBUG
+      fprintf(stderr, "* t %u pts %f frames %u\n", track, tstamp / 1e9, cnt);
+#endif
+      nestegg_free_packet(pkt);
+    } else {
+#ifdef DEBUG
+      fprintf(stderr, "start seek failed\n");
+#endif
+    }
+  }
+#endif
+
+  while (nestegg_read_packet(ctx, &pkt) > 0) {
+    nestegg_packet_track(pkt, &pkt_track);
+    nestegg_packet_count(pkt, &pkt_cnt);
+    nestegg_packet_tstamp(pkt, &pkt_tstamp);
+
+#ifdef DEBUG
+    fprintf(stderr, "t %u pts %f frames %u: ", pkt_track, pkt_tstamp / 1e9, pkt_cnt);
+#endif
+
+    for (i = 0; i < pkt_cnt; ++i) {
+      nestegg_packet_data(pkt, i, &ptr, &size);
+#ifdef DEBUG
+      fprintf(stderr, "%u ", (unsigned int) size);
+#endif
+    }
+#ifdef DEBUG
+    fprintf(stderr, "\n");
+#endif
+
+    nestegg_free_packet(pkt);
+  }
+
+  nestegg_destroy(ctx);
+  fclose(fp);
+
+  return EXIT_SUCCESS;
+}
--- a/release.sh
+++ b/release.sh
@@ -1,210 +0,0 @@
-#!/bin/sh
-##
-##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license
-##  that can be found in the LICENSE file in the root of the source
-##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may
-##  be found in the AUTHORS file in the root of the source tree.
-##
-
-
-
-self=$0
-
-for opt; do
-    case $opt in
-        --clean) clean=yes;;
-        -j*) jopt=$opt;;
-        *) echo "Unsupported option $opt"; exit 1;;
-    esac
-done
-
-TAB="$(printf '\t')"
-cat > release.mk << EOF
-%\$(BUILD_SFX).tar.bz2: %/.done
-${TAB}@echo "\$(subst .tar.bz2,,\$@): tarball"
-${TAB}@cd \$(dir \$<); tar -cf - \$(subst .tar.bz2,,\$@) | bzip2 > ../\$@
-
-%\$(BUILD_SFX).zip: %/.done
-${TAB}@echo "\$(subst .zip,,\$@): zip"
-${TAB}@rm -f \$@; cd \$(dir \$<); zip -rq ../\$@ \$(subst .zip,,\$@)
-
-logs/%\$(BUILD_SFX).log.bz2: %/.done
-${TAB}@echo "\$(subst .log.bz2,,\$(notdir \$@)): tarlog"
-${TAB}@mkdir -p logs
-${TAB}@cat \$< | bzip2 > \$@
-
-%/.done:
-${TAB}@mkdir -p \$(dir \$@)
-${TAB}@echo "\$(dir \$@): configure \$(CONFIG_OPTS) \$(EXTRA_PATH)"
-${TAB}@cd \$(dir \$@); export PATH=\$\$PATH\$(EXTRA_PATH); ../\$(SRC_ROOT)/configure \$(CONFIG_OPTS) >makelog.txt 2>&1
-${TAB}@echo "\$(dir \$@): make"
-${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) >>makelog.txt 2>&1
-${TAB}@echo "\$(dir \$@): test install"
-${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) install >>makelog.txt 2>&1
-${TAB}@cd \$(dir \$@)/dist/build; PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) >>makelog.txt 2>&1
-${TAB}@echo "\$(dir \$@): install"
-${TAB}@cd \$(dir \$@); PATH=\$\$PATH\$(EXTRA_PATH) \$(MAKE) install DIST_DIR=\$(TGT) >>makelog.txt 2>&1
-${TAB}@touch \$@
-
-#include release-deps.mk
-EOF
-
-#[ -f release-deps.mk ] || \
-#    find ${self%/*} -name .git -prune -o -type f -print0 \
-#    | xargs -0 -n1 echo \
-#    | sed -e 's; ;\\ ;g' | awk '{print "$(TGT)/.done: "$0}' > release-deps.mk
-
-build_config_list() {
-    for codec in $CODEC_LIST; do
-        for arch in $ARCH_LIST; do
-            if [ -n "$OS_LIST" ]; then
-                for os in $OS_LIST; do
-                    CONFIGS="$CONFIGS vpx-${codec}-${arch}-${os}"
-                done
-            else
-                CONFIGS="$CONFIGS vpx-${codec}-${arch}"
-            fi
-        done
-    done
-}
-
-CODEC_LIST="vp8 vp8cx vp8dx"
-case `uname` in
-    Linux*)
-        ARCH_LIST="x86 x86_64"
-        OS_LIST="linux"
-        build_config_list
-        ARCH_LIST="armv5te armv6 armv7"
-        OS_LIST="linux-gcc"
-
-        ;;
-    CYGWIN*)
-        TAR_SFX=.zip
-        for vs in vs7 vs8; do
-            for arch in x86-win32 x86_64-win64; do
-                for msvcrt in md mt; do
-                    case $vs,$arch in
-                        vs7,x86_64-win64) continue ;;
-                    esac
-                    ARCH_LIST="$ARCH_LIST ${arch}${msvcrt}-${vs}"
-                done
-            done
-        done
-        ;;
-    Darwin*)
-        ARCH_LIST="universal"
-        OS_LIST="darwin8 darwin9"
-        ;;
-    sun_os*)
-        ARCH_LIST="x86 x86_64"
-        OS_LIST="solaris"
-        ;;
-esac
-build_config_list
-
-TAR_SFX=${TAR_SFX:-.tar.bz2}
-ARM_TOOLCHAIN=/usr/local/google/csl-2009q3-67
-for cfg in $CONFIGS; do
-    full_cfg=$cfg
-    cfg=${cfg#vpx-}
-    opts=
-    rm -f makelog.txt
-
-    case $cfg in
-        src-*)  opts="$opts --enable-codec-srcs"
-                cfg=${cfg#src-}
-                ;;
-        eval-*) opts="$opts --enable-eval-limit"
-                cfg=${cfg#src-}
-                ;;
-    esac
-
-    case $cfg in
-        #
-        # Linux
-        #
-        *x86-linux)
-            opts="$opts --target=x86-linux-gcc" ;;
-        *x86_64-linux)
-            opts="$opts --target=x86_64-linux-gcc" ;;
-        *arm*-linux-gcc)
-            armv=${cfg##*armv}
-            armv=${armv%%-*}
-            opts="$opts --target=armv${armv}-linux-gcc" ;;
-        *arm*-linux-rvct)
-            armv=${cfg##*armv}
-            armv=${armv%%-*}
-            opts="$opts --target=armv${armv}-linux-rvct"
-            opts="$opts --libc=${ARM_TOOLCHAIN}/arm-none-linux-gnueabi/libc" ;;
-
-
-        #
-        # Windows
-        #
-        # need --enable-debug-libs for now until we're smarter about
-        # building the debug/release from the customer installed
-        # environment
-        *-x86-win32*-vs*)
-            opts="$opts --target=x86-win32-vs${cfg##*-vs} --enable-debug-libs";;
-        *-x86_64-win64*-vs8)
-            opts="$opts --target=x86_64-win64-vs8 --enable-debug-libs" ;;
-
-        #
-        # Darwin
-        #
-        *-universal-darwin*)
-            opts="$opts --target=universal-darwin${cfg##*-darwin}-gcc" ;;
-
-        #
-        # Solaris
-        #
-        *x86-solaris)
-            opts="$opts --target=x86-solaris-gcc" ;;
-        *x86_64-solaris)
-            opts="$opts --target=x86_64-solaris-gcc" ;;
-    esac
-
-    case $cfg in
-        *x86-linux | *x86-solaris) opts="$opts --enable-pic" ;;
-    esac
-
-    case $cfg in
-        *-win[36][24]mt*)  opts="$opts --enable-static-msvcrt" ;;
-        *-win[36][24]md*)  opts="$opts --disable-static-msvcrt" ;;
-    esac
-
-    opts="$opts --disable-codecs"
-    case $cfg in
-        vp8*) opts="$opts --enable-vp8" ;;
-    esac
-    case $cfg in
-        *cx-*) opts="${opts}-encoder" ;;
-        *dx-*) opts="${opts}-decoder" ;;
-    esac
-    opts="$opts --enable-postproc"
-
-    [ "x${clean}" = "xyes" ] \
-        && rm -rf ${full_cfg}${BUILD_SFX}${TAR_SFX} \
-        && rm -rf logs/${full_cfg}${BUILD_SFX}.log.bz2
-
-    TGT=${full_cfg}${BUILD_SFX}
-    BUILD_TARGETS="logs/${TGT}.log.bz2 ${TGT}${TAR_SFX}"
-    echo "${BUILD_TARGETS}: CONFIG_OPTS=$opts" >>release.mk
-    echo "${BUILD_TARGETS}: TGT=${TGT}" >>release.mk
-    case $cfg in
-        *-arm*-linux-*)
-            echo "${BUILD_TARGETS}: EXTRA_PATH=:${ARM_TOOLCHAIN}/bin/" >>release.mk ;;
-        *-vs7)
-            echo "${BUILD_TARGETS}: EXTRA_PATH=:/cygdrive/c/Program\ Files/Microsoft\ Visual\ Studio\ .NET\ 2003/Common7/IDE" >>release.mk ;;
-        *-vs8)
-            echo "${BUILD_TARGETS}: EXTRA_PATH=:/cygdrive/c/Program\ Files/Microsoft\ Visual\ Studio\ 8/Common7/IDE" >>release.mk ;;
-    esac
-    MAKE_TGTS="$MAKE_TGTS ${TGT}${TAR_SFX} logs/${TGT}.log.bz2"
-done
-
-
-${MAKE:-make} ${jopt:--j3} -f release.mk  \
-    SRC_ROOT=${self%/*} BUILD_SFX=${BUILD_SFX} ${MAKE_TGTS}
--- a/solution.mk
+++ b/solution.mk
@@ -22,7 +22,7 @@ else
 vpx.sln: $(wildcard *.vcproj)
 	@echo "    [CREATE] $@"
 	$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
-            $(if $(filter %vpx.vcproj,$^),--dep=ivfdec:vpx) \
+            $(if $(filter %vpx.vcproj,$^),--dep=vpxdec:vpx) \
            $(if $(filter %vpx.vcproj,$^),--dep=xma:vpx) \
            --ver=$(CONFIG_VS_VERSION)\
            --target=$(TOOLCHAIN)\
--- a/tools_common.c
+++ b/tools_common.c
@@ -0,0 +1,24 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <stdio.h>
+#include "tools_common.h"
+#ifdef _WIN32
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+FILE* set_binary_mode(FILE *stream)
+{
+    (void)stream;
+#ifdef _WIN32
+    _setmode(_fileno(stream), _O_BINARY);
+#endif
+    return stream;
+}
--- a/tools_common.h
+++ b/tools_common.h
@@ -0,0 +1,16 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TOOLS_COMMON_H
+#define TOOLS_COMMON_H
+
+/* Sets a stdio stream into binary mode */
+FILE* set_binary_mode(FILE *stream);
+
+#endif
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -56,7 +56,7 @@ int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height)

    vp8_de_alloc_frame_buffers(oci);

-    // our internal buffers are always multiples of 16
+    /* our internal buffers are always multiples of 16 */
    if ((width & 0xf) != 0)
        width += 16 - (width & 0xf);

@@ -179,10 +179,10 @@ void vp8_create_common(VP8_COMMON *oci)
    oci->clr_type = REG_YUV;
    oci->clamp_type = RECON_CLAMP_REQUIRED;

-    // Initialise reference frame sign bias structure to defaults
+    /* Initialise reference frame sign bias structure to defaults */
    vpx_memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias));

-    // Default disable buffer to buffer copying
+    /* Default disable buffer to buffer copying */
    oci->copy_buffer_to_gf = 0;
    oci->copy_buffer_to_arf = 0;
 }
--- a/vp8/common/arm/arm_systemdependent.c
+++ b/vp8/common/arm/arm_systemdependent.c
@@ -0,0 +1,136 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "vpx_ports/arm.h"
+#include "g_common.h"
+#include "pragmas.h"
+#include "subpixel.h"
+#include "loopfilter.h"
+#include "recon.h"
+#include "idct.h"
+#include "onyxc_int.h"
+
+extern void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
+
+extern void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
+extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
+
+void vp8_arch_arm_common_init(VP8_COMMON *ctx)
+{
+#if CONFIG_RUNTIME_CPU_DETECT
+    VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
+    int flags = arm_cpu_caps();
+    int has_edsp = flags & HAS_EDSP;
+    int has_media = flags & HAS_MEDIA;
+    int has_neon = flags & HAS_NEON;
+    rtcd->flags = flags;
+
+    /* Override default functions with fastest ones for this CPU. */
+#if HAVE_ARMV6
+    if (has_media)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_armv6;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_v6;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_v6;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
+        rtcd->recon.recon       = vp8_recon_b_armv6;
+        rtcd->recon.recon2      = vp8_recon2b_armv6;
+        rtcd->recon.recon4      = vp8_recon4b_armv6;
+    }
+#endif
+
+#if HAVE_ARMV7
+    if (has_neon)
+    {
+        rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
+        rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
+        rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
+        rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
+        rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
+        rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
+        rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
+        rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
+
+        rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
+        rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
+        rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
+        rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
+
+        rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
+        rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
+        rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
+        rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
+        rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
+        rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
+        rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
+        rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
+
+        rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
+        rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
+        rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
+        rtcd->recon.recon       = vp8_recon_b_neon;
+        rtcd->recon.recon2      = vp8_recon2b_neon;
+        rtcd->recon.recon4      = vp8_recon4b_neon;
+        rtcd->recon.recon_mb    = vp8_recon_mb_neon;
+
+    }
+#endif
+
+#endif
+
+#if HAVE_ARMV6
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_media)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
+        vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
+    }
+#endif
+
+#if HAVE_ARMV7
+#if CONFIG_RUNTIME_CPU_DETECT
+    if (has_neon)
+#endif
+    {
+        vp8_build_intra_predictors_mby_ptr =
+         vp8_build_intra_predictors_mby_neon;
+        vp8_build_intra_predictors_mby_s_ptr =
+         vp8_build_intra_predictors_mby_s_neon;
+    }
+#endif
+}
--- a/vp8/common/arm/armv6/filter_v6.asm
+++ b/vp8/common/arm/armv6/filter_v6.asm
@@ -11,6 +11,7 @@

    EXPORT  |vp8_filter_block2d_first_pass_armv6|
    EXPORT  |vp8_filter_block2d_second_pass_armv6|
+    EXPORT  |vp8_filter4_block2d_second_pass_armv6|
    EXPORT  |vp8_filter_block2d_first_pass_only_armv6|
    EXPORT  |vp8_filter_block2d_second_pass_only_armv6|

@@ -192,6 +193,64 @@

    ENDP

+;---------------------------------
+; r0    short         *src_ptr,
+; r1    unsigned char *output_ptr,
+; r2    unsigned int output_pitch,
+; r3    unsigned int cnt,
+; stack const short *vp8_filter
+;---------------------------------
+|vp8_filter4_block2d_second_pass_armv6| PROC
+    stmdb   sp!, {r4 - r11, lr}
+
+    ldr     r11, [sp, #36]                  ; vp8_filter address
+    mov     r7, r3, lsl #16                 ; height is top part of counter
+
+    ldr     r4, [r11]                       ; load up packed filter coefficients
+    add     lr, r1, r3                      ; save final destination pointer
+    ldr     r5, [r11, #4]
+    ldr     r6, [r11, #8]
+
+    pkhbt   r12, r5, r4                     ; pack the filter differently
+    pkhbt   r11, r6, r5
+    mov     r4, #0x40                       ; rounding factor (for smlad{x})
+
+|height_loop_2nd_4|
+    ldrd    r8, [r0, #-4]                   ; load the data
+    orr     r7, r7, r3, lsr #1              ; loop counter
+
+|width_loop_2nd_4|
+    ldr     r10, [r0, #4]!
+    smladx  r6, r9, r12, r4                 ; apply filter
+    pkhbt   r8, r9, r8
+    smlad   r5, r8, r12, r4
+    pkhbt   r8, r10, r9
+    smladx  r6, r10, r11, r6
+    sub     r7, r7, #1
+    smlad   r5, r8, r11, r5
+
+    mov     r8, r9                          ; shift the data for the next loop
+    mov     r9, r10
+
+    usat    r6, #8, r6, asr #7              ; shift and clamp
+    usat    r5, #8, r5, asr #7
+
+    strb    r5, [r1], r2                    ; the result is transposed back and stored
+    tst     r7, #0xff
+    strb    r6, [r1], r2
+
+    bne     width_loop_2nd_4
+
+    subs    r7, r7, #0x10000
+    add     r0, r0, #16                     ; update src for next loop
+    sub     r1, lr, r7, lsr #16             ; update dst for next loop
+
+    bne     height_loop_2nd_4
+
+    ldmia   sp!, {r4 - r11, pc}
+
+    ENDP
+
 ;------------------------------------
 ; r0    unsigned char *src_ptr
 ; r1    unsigned char *output_ptr,
--- a/vp8/common/arm/bilinearfilter_arm.c
+++ b/vp8/common/arm/bilinearfilter_arm.c
@@ -49,7 +49,7 @@ extern void vp8_filter_block2d_bil_second_pass_armv6
    const short *vp8_filter
 );

-/*
+#if 0
 void vp8_filter_block2d_bil_first_pass_6
 (
    unsigned char *src_ptr,
@@ -66,14 +66,14 @@ void vp8_filter_block2d_bil_first_pass_6
    {
        for ( j=0; j<output_width; j++ )
        {
-            // Apply bilinear filter
+            /* Apply bilinear filter */
            output_ptr[j] = ( ( (int)src_ptr[0]          * vp8_filter[0]) +
                               ((int)src_ptr[1] * vp8_filter[1]) +
                                (VP8_FILTER_WEIGHT/2) ) >> VP8_FILTER_SHIFT;
            src_ptr++;
        }

-        // Next row...
+        /* Next row... */
        src_ptr    += src_pixels_per_line - output_width;
        output_ptr += output_width;
    }
@@ -96,7 +96,7 @@ void vp8_filter_block2d_bil_second_pass_6
    {
        for ( j=0; j<output_width; j++ )
        {
-            // Apply filter
+            /* Apply filter */
            Temp =  ((int)src_ptr[0]         * vp8_filter[0]) +
                    ((int)src_ptr[output_width] * vp8_filter[1]) +
                    (VP8_FILTER_WEIGHT/2);
@@ -104,12 +104,12 @@ void vp8_filter_block2d_bil_second_pass_6
            src_ptr++;
        }

-        // Next row...
-        //src_ptr    += src_pixels_per_line - output_width;
+        /* Next row... */
+        /*src_ptr    += src_pixels_per_line - output_width;*/
        output_ptr += output_pitch;
    }
 }
-*/
+#endif

 void vp8_filter_block2d_bil_armv6
 (
@@ -124,13 +124,13 @@ void vp8_filter_block2d_bil_armv6
 )
 {

-    unsigned short FData[36*16]; // Temp data bufffer used in filtering
+    unsigned short FData[36*16]; /* Temp data bufffer used in filtering */

-    // First filter 1-D horizontally...
-    // pixel_step = 1;
+    /* First filter 1-D horizontally... */
+    /* pixel_step = 1; */
    vp8_filter_block2d_bil_first_pass_armv6(src_ptr, FData, src_pixels_per_line, Height + 1, Width, HFilter);

-    // then 1-D vertically...
+    /* then 1-D vertically... */
    vp8_filter_block2d_bil_second_pass_armv6(FData, output_ptr, dst_pitch, Height, Width, VFilter);
 }

--- a/vp8/common/arm/filter_arm.c
+++ b/vp8/common/arm/filter_arm.c
@@ -20,13 +20,13 @@

 DECLARE_ALIGNED(16, static const short, sub_pel_filters[8][6]) =
 {
-    { 0,  0,  128,    0,   0,  0 },         // note that 1/8 pel positions are just as per alpha -0.5 bicubic
+    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
    { 0, -6,  123,   12,  -1,  0 },
-    { 2, -11, 108,   36,  -8,  1 },         // New 1/4 pel 6 tap filter
+    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
    { 0, -9,   93,   50,  -6,  0 },
-    { 3, -16,  77,   77, -16,  3 },         // New 1/2 pel 6 tap filter
+    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
    { 0, -6,   50,   93,  -9,  0 },
-    { 1, -8,   36,  108, -11,  2 },         // New 1/4 pel 6 tap filter
+    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
    { 0, -1,   12,  123,  -6,  0 },
 };

@@ -50,6 +50,15 @@ extern void vp8_filter_block2d_second_pass_armv6
    const short *vp8_filter
 );

+extern void vp8_filter4_block2d_second_pass_armv6
+(
+    short         *src_ptr,
+    unsigned char *output_ptr,
+    unsigned int output_pitch,
+    unsigned int cnt,
+    const short *vp8_filter
+);
+
 extern void vp8_filter_block2d_first_pass_only_armv6
 (
    unsigned char *src_ptr,
@@ -84,39 +93,43 @@ void vp8_sixtap_predict_armv6
 {
    const short  *HFilter;
    const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 12*4); /* Temp data bufffer used in filtering */


-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */

-    // Vfilter is null. First pass only
+    /* Vfilter is null. First pass only */
    if (xoffset && !yoffset)
    {
-        //vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
-        //vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );
+        /*vp8_filter_block2d_first_pass_armv6 ( src_ptr, FData+2, src_pixels_per_line, 4, 4, HFilter );
+        vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, VFilter );*/

        vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, HFilter);
    }
-    // Hfilter is null. Second pass only
+    /* Hfilter is null. Second pass only */
    else if (!xoffset && yoffset)
    {
        vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 4, dst_pitch, VFilter);
    }
    else
    {
-        // Vfilter is a 4 tap filter
+        /* Vfilter is a 4 tap filter */
        if (yoffset & 0x1)
+        {
            vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 4, 7, HFilter);
-        // Vfilter is 6 tap filter
+            vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
+        }
+        /* Vfilter is 6 tap filter */
        else
+        {
            vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 4, 9, HFilter);
-
-        vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
+            vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 4, VFilter);
+        }
    }
 }

-/*
+#if 0
 void vp8_sixtap_predict8x4_armv6
 (
    unsigned char  *src_ptr,
@@ -129,33 +142,33 @@ void vp8_sixtap_predict8x4_armv6
 {
    const short  *HFilter;
    const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */

-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */


-//  if (xoffset && !yoffset)
-//  {
-//      vp8_filter_block2d_first_pass_only_armv6 (  src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
-//  }
-    // Hfilter is null. Second pass only
-//  else if (!xoffset && yoffset)
-//  {
-//      vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
-//  }
-//  else
-//  {
-//      if (yoffset & 0x1)
-    //      vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
-    //  else
+    /*if (xoffset && !yoffset)
+    {
+        vp8_filter_block2d_first_pass_only_armv6 (  src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter );
+    }*/
+    /* Hfilter is null. Second pass only */
+    /*else if (!xoffset && yoffset)
+    {
+        vp8_filter_block2d_second_pass_only_armv6 ( src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter );
+    }
+    else
+    {
+        if (yoffset & 0x1)
+            vp8_filter_block2d_first_pass_armv6 ( src_ptr-src_pixels_per_line, FData+1, src_pixels_per_line, 8, 7, HFilter );
+        else*/

        vp8_filter_block2d_first_pass_armv6 ( src_ptr-(2*src_pixels_per_line), FData, src_pixels_per_line, 8, 9, HFilter );

        vp8_filter_block2d_second_pass_armv6 ( FData+2, dst_ptr, dst_pitch, 4, 8, VFilter );
-//  }
+    /*}*/
 }
-*/
+#endif

 void vp8_sixtap_predict8x8_armv6
 (
@@ -169,16 +182,16 @@ void vp8_sixtap_predict8x8_armv6
 {
    const short  *HFilter;
    const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 16*8); /* Temp data bufffer used in filtering */

-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */

    if (xoffset && !yoffset)
    {
        vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, HFilter);
    }
-    // Hfilter is null. Second pass only
+    /* Hfilter is null. Second pass only */
    else if (!xoffset && yoffset)
    {
        vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 8, dst_pitch, VFilter);
@@ -186,11 +199,15 @@ void vp8_sixtap_predict8x8_armv6
    else
    {
        if (yoffset & 0x1)
+        {
            vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 8, 11, HFilter);
+            vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
+        }
        else
+        {
            vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 8, 13, HFilter);
-
-        vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
+            vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 8, VFilter);
+        }
    }
 }

@@ -207,16 +224,16 @@ void vp8_sixtap_predict16x16_armv6
 {
    const short  *HFilter;
    const short  *VFilter;
-    DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16);    // Temp data bufffer used in filtering
+    DECLARE_ALIGNED_ARRAY(4, short, FData, 24*16);    /* Temp data bufffer used in filtering */

-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];       // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];       /* 6 tap */

    if (xoffset && !yoffset)
    {
        vp8_filter_block2d_first_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, HFilter);
    }
-    // Hfilter is null. Second pass only
+    /* Hfilter is null. Second pass only */
    else if (!xoffset && yoffset)
    {
        vp8_filter_block2d_second_pass_only_armv6(src_ptr, dst_ptr, src_pixels_per_line, 16, dst_pitch, VFilter);
@@ -224,11 +241,15 @@ void vp8_sixtap_predict16x16_armv6
    else
    {
        if (yoffset & 0x1)
+        {
            vp8_filter_block2d_first_pass_armv6(src_ptr - src_pixels_per_line, FData + 1, src_pixels_per_line, 16, 19, HFilter);
+            vp8_filter4_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
+        }
        else
+        {
            vp8_filter_block2d_first_pass_armv6(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 16, 21, HFilter);
-
-        vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
+            vp8_filter_block2d_second_pass_armv6(FData + 2, dst_ptr, dst_pitch, 16, VFilter);
+        }
    }

 }
--- a/vp8/common/arm/idct_arm.h
+++ b/vp8/common/arm/idct_arm.h
@@ -19,6 +19,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_v6);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_v6);
 extern prototype_second_order(vp8_short_inv_walsh4x4_v6);

+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_idct_idct1
 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_v6

@@ -34,6 +35,7 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_v6);
 #undef  vp8_idct_iwalsh16
 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_v6
 #endif
+#endif

 #if HAVE_ARMV7
 extern prototype_idct(vp8_short_idct4x4llm_1_neon);
@@ -42,6 +44,7 @@ extern prototype_idct_scalar_add(vp8_dc_only_idct_add_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_1_neon);
 extern prototype_second_order(vp8_short_inv_walsh4x4_neon);

+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_idct_idct1
 #define vp8_idct_idct1 vp8_short_idct4x4llm_1_neon

@@ -57,5 +60,6 @@ extern prototype_second_order(vp8_short_inv_walsh4x4_neon);
 #undef  vp8_idct_iwalsh16
 #define vp8_idct_iwalsh16 vp8_short_inv_walsh4x4_neon
 #endif
+#endif

 #endif
--- a/vp8/common/arm/loopfilter_arm.c
+++ b/vp8/common/arm/loopfilter_arm.c
@@ -35,8 +35,8 @@ extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_neon;


 #if HAVE_ARMV6
-//ARMV6 loopfilter functions
-// Horizontal MB filtering
+/*ARMV6 loopfilter functions*/
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -60,7 +60,7 @@ void vp8_loop_filter_mbhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
    vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }

-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                               int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -84,7 +84,7 @@ void vp8_loop_filter_mbvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsi
    vp8_loop_filter_simple_vertical_edge_armv6(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }

-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -112,7 +112,7 @@ void vp8_loop_filter_bhs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
    vp8_loop_filter_simple_horizontal_edge_armv6(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 }

-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -142,8 +142,8 @@ void vp8_loop_filter_bvs_armv6(unsigned char *y_ptr, unsigned char *u_ptr, unsig
 #endif

 #if HAVE_ARMV7
-// NEON loopfilter functions
-// Horizontal MB filtering
+/* NEON loopfilter functions */
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -164,7 +164,7 @@ void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }

-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                              int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -185,7 +185,7 @@ void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsig
    vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }

-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                             int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -210,7 +210,7 @@ void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsign
    vp8_loop_filter_simple_horizontal_edge_neon(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 }

-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                             int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
--- a/vp8/common/arm/loopfilter_arm.h
+++ b/vp8/common/arm/loopfilter_arm.h
@@ -22,6 +22,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_armv6);
 extern prototype_loopfilter_block(vp8_loop_filter_mbhs_armv6);
 extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);

+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_lf_normal_mb_v
 #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_armv6

@@ -46,6 +47,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_armv6);
 #undef  vp8_lf_simple_b_h
 #define vp8_lf_simple_b_h vp8_loop_filter_bhs_armv6
 #endif
+#endif

 #if HAVE_ARMV7
 extern prototype_loopfilter_block(vp8_loop_filter_mbv_neon);
@@ -57,6 +59,7 @@ extern prototype_loopfilter_block(vp8_loop_filter_bvs_neon);
 extern prototype_loopfilter_block(vp8_loop_filter_mbhs_neon);
 extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);

+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_lf_normal_mb_v
 #define vp8_lf_normal_mb_v vp8_loop_filter_mbv_neon

@@ -81,5 +84,6 @@ extern prototype_loopfilter_block(vp8_loop_filter_bhs_neon);
 #undef  vp8_lf_simple_b_h
 #define vp8_lf_simple_b_h vp8_loop_filter_bhs_neon
 #endif
+#endif

 #endif
--- a/vp8/common/arm/neon/loopfilter_neon.asm
+++ b/vp8/common/arm/neon/loopfilter_neon.asm
@@ -0,0 +1,409 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_loop_filter_horizontal_edge_y_neon|
+    EXPORT  |vp8_loop_filter_horizontal_edge_uv_neon|
+    EXPORT  |vp8_loop_filter_vertical_edge_y_neon|
+    EXPORT  |vp8_loop_filter_vertical_edge_uv_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; flimit, limit, and thresh should be positive numbers.
+; All 16 elements in these variables are equal.
+
+; void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
+;                                             const signed char *flimit,
+;                                             const signed char *limit,
+;                                             const signed char *thresh,
+;                                             int count)
+; r0    unsigned char *src
+; r1    int pitch
+; r2    const signed char *flimit
+; r3    const signed char *limit
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_loop_filter_horizontal_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    sub         r2, r0, r1, lsl #2          ; move src pointer down by 4 lines
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {q3}, [r2], r1              ; p3
+    vld1.u8     {q4}, [r2], r1              ; p2
+    vld1.u8     {q5}, [r2], r1              ; p1
+    vld1.u8     {q6}, [r2], r1              ; p0
+    vld1.u8     {q7}, [r2], r1              ; q0
+    vld1.u8     {q8}, [r2], r1              ; q1
+    vld1.u8     {q9}, [r2], r1              ; q2
+    vld1.u8     {q10}, [r2]                 ; q3
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    sub         r0, r0, r1, lsl #1
+
+    bl          vp8_loop_filter_neon
+
+    vst1.u8     {q5}, [r0], r1              ; store op1
+    vst1.u8     {q6}, [r0], r1              ; store op0
+    vst1.u8     {q7}, [r0], r1              ; store oq0
+    vst1.u8     {q8}, [r0], r1              ; store oq1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
+
+; void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch
+;                                              const signed char *flimit,
+;                                              const signed char *limit,
+;                                              const signed char *thresh,
+;                                              unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_loop_filter_horizontal_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r2, [sp, #8]                ; load v ptr
+
+    sub         r3, r0, r1, lsl #2          ; move u pointer down by 4 lines
+    vld1.u8     {d6}, [r3], r1              ; p3
+    vld1.u8     {d8}, [r3], r1              ; p2
+    vld1.u8     {d10}, [r3], r1             ; p1
+    vld1.u8     {d12}, [r3], r1             ; p0
+    vld1.u8     {d14}, [r3], r1             ; q0
+    vld1.u8     {d16}, [r3], r1             ; q1
+    vld1.u8     {d18}, [r3], r1             ; q2
+    vld1.u8     {d20}, [r3]                 ; q3
+
+    ldr         r3, [sp, #4]                ; load thresh pointer
+
+    sub         r12, r2, r1, lsl #2         ; move v pointer down by 4 lines
+    vld1.u8     {d7}, [r12], r1             ; p3
+    vld1.u8     {d9}, [r12], r1             ; p2
+    vld1.u8     {d11}, [r12], r1            ; p1
+    vld1.u8     {d13}, [r12], r1            ; p0
+    vld1.u8     {d15}, [r12], r1            ; q0
+    vld1.u8     {d17}, [r12], r1            ; q1
+    vld1.u8     {d19}, [r12], r1            ; q2
+    vld1.u8     {d21}, [r12]                ; q3
+
+    vld1.s8     {d4[], d5[]}, [r3]          ; thresh
+
+    bl          vp8_loop_filter_neon
+
+    sub         r0, r0, r1, lsl #1
+    sub         r2, r2, r1, lsl #1
+
+    vst1.u8     {d10}, [r0], r1             ; store u op1
+    vst1.u8     {d11}, [r2], r1             ; store v op1
+    vst1.u8     {d12}, [r0], r1             ; store u op0
+    vst1.u8     {d13}, [r2], r1             ; store v op0
+    vst1.u8     {d14}, [r0], r1             ; store u oq0
+    vst1.u8     {d15}, [r2], r1             ; store v oq0
+    vst1.u8     {d16}, [r0]                 ; store u oq1
+    vst1.u8     {d17}, [r2]                 ; store v oq1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_horizontal_edge_uv_neon|
+
+; void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
+;                                           const signed char *flimit,
+;                                           const signed char *limit,
+;                                           const signed char *thresh,
+;                                           int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_loop_filter_vertical_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    sub         r2, r0, #4                  ; src ptr down by 4 columns
+    sub         r0, r0, #2                  ; dst ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {d6}, [r2], r1              ; load first 8-line src data
+    vld1.u8     {d8}, [r2], r1
+    vld1.u8     {d10}, [r2], r1
+    vld1.u8     {d12}, [r2], r1
+    vld1.u8     {d14}, [r2], r1
+    vld1.u8     {d16}, [r2], r1
+    vld1.u8     {d18}, [r2], r1
+    vld1.u8     {d20}, [r2], r1
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    vld1.u8     {d7}, [r2], r1              ; load second 8-line src data
+    vld1.u8     {d9}, [r2], r1
+    vld1.u8     {d11}, [r2], r1
+    vld1.u8     {d13}, [r2], r1
+    vld1.u8     {d15}, [r2], r1
+    vld1.u8     {d17}, [r2], r1
+    vld1.u8     {d19}, [r2], r1
+    vld1.u8     {d21}, [r2]
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    bl          vp8_loop_filter_neon
+
+    vswp        d12, d11
+    vswp        d16, d13
+    vswp        d14, d12
+    vswp        d16, d15
+
+    ;store op1, op0, oq0, oq1
+    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
+    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
+    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
+    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
+    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
+    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r0], r1
+    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r0], r1
+    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r0], r1
+    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r0], r1
+    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r0], r1
+    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
+    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r0], r1
+    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r0]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_vertical_edge_y_neon|
+
+; void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch
+;                                            const signed char *flimit,
+;                                            const signed char *limit,
+;                                            const signed char *thresh,
+;                                            unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_loop_filter_vertical_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r12, r0, #4                  ; move u pointer down by 4 columns
+    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+
+    ldr         r2, [sp, #8]                ; load v ptr
+
+    vld1.u8     {d6}, [r12], r1              ;load u data
+    vld1.u8     {d8}, [r12], r1
+    vld1.u8     {d10}, [r12], r1
+    vld1.u8     {d12}, [r12], r1
+    vld1.u8     {d14}, [r12], r1
+    vld1.u8     {d16}, [r12], r1
+    vld1.u8     {d18}, [r12], r1
+    vld1.u8     {d20}, [r12]
+
+    sub         r3, r2, #4                  ; move v pointer down by 4 columns
+    vld1.u8     {d7}, [r3], r1              ;load v data
+    vld1.u8     {d9}, [r3], r1
+    vld1.u8     {d11}, [r3], r1
+    vld1.u8     {d13}, [r3], r1
+    vld1.u8     {d15}, [r3], r1
+    vld1.u8     {d17}, [r3], r1
+    vld1.u8     {d19}, [r3], r1
+    vld1.u8     {d21}, [r3]
+
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    bl          vp8_loop_filter_neon
+
+    sub         r0, r0, #2
+    sub         r2, r2, #2
+
+    vswp        d12, d11
+    vswp        d16, d13
+    vswp        d14, d12
+    vswp        d16, d15
+
+    ;store op1, op0, oq0, oq1
+    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
+    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
+    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
+    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
+    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
+    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
+    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
+    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
+    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
+    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
+    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
+    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
+    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
+    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
+    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0]
+    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_loop_filter_vertical_edge_uv_neon|
+
+; void vp8_loop_filter_neon();
+; This is a helper function for the loopfilters. The invidual functions do the
+; necessary load, transpose (if necessary) and store.
+
+; r0-r3 PRESERVE
+; q0    flimit
+; q1    limit
+; q2    thresh
+; q3    p3
+; q4    p2
+; q5    p1
+; q6    p0
+; q7    q0
+; q8    q1
+; q9    q2
+; q10   q3
+|vp8_loop_filter_neon| PROC
+    ldr         r12, _lf_coeff_
+
+    ; vp8_filter_mask
+    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
+    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
+    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
+    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
+    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
+    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
+    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
+
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q4
+    vmax.u8     q15, q11, q12
+
+    ; vp8_hevmask
+    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
+    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
+    vmax.u8     q15, q15, q3
+
+    vadd.u8     q0, q0, q0                  ; flimit * 2
+    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
+    vcge.u8     q15, q1, q15
+
+    vabd.u8     q2, q5, q8                  ; a = abs(p1 - q1)
+    vqadd.u8    q9, q9, q9                  ; b = abs(p0 - q0) * 2
+    vshr.u8     q2, q2, #1                  ; a = a / 2
+    vqadd.u8    q9, q9, q2                  ; a = b + a
+    vcge.u8     q9, q0, q9                  ; (a > flimit * 2 + limit) * -1
+
+    vld1.u8     {q0}, [r12]!
+
+    ; vp8_filter() function
+    ; convert to signed
+    veor        q7, q7, q0                  ; qs0
+    veor        q6, q6, q0                  ; ps0
+    veor        q5, q5, q0                  ; ps1
+    veor        q8, q8, q0                  ; qs1
+
+    vld1.u8     {q10}, [r12]!
+
+    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
+    vsubl.s8    q11, d15, d13
+
+    vmovl.u8    q4, d20
+
+    vqsub.s8    q1, q5, q8                  ; vp8_filter = clamp(ps1-qs1)
+    vorr        q14, q13, q14               ; vp8_hevmask
+
+    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
+    vmul.i16    q11, q11, q4
+
+    vand        q1, q1, q14                 ; vp8_filter &= hev
+    vand        q15, q15, q9                ; vp8_filter_mask
+
+    vaddw.s8    q2, q2, d2
+    vaddw.s8    q11, q11, d3
+
+    vld1.u8     {q9}, [r12]!
+
+    ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vqmovn.s16  d2, q2
+    vqmovn.s16  d3, q11
+    vand        q1, q1, q15                 ; vp8_filter &= mask
+
+    vqadd.s8    q2, q1, q10                 ; Filter2 = clamp(vp8_filter+3)
+    vqadd.s8    q1, q1, q9                  ; Filter1 = clamp(vp8_filter+4)
+    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
+    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
+
+    vqadd.s8    q11, q6, q2                 ; u = clamp(ps0 + Filter2)
+    vqsub.s8    q10, q7, q1                 ; u = clamp(qs0 - Filter1)
+
+    ; outer tap adjustments: ++vp8_filter >> 1
+    vrshr.s8    q1, q1, #1
+    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
+
+    vqadd.s8    q13, q5, q1                 ; u = clamp(ps1 + vp8_filter)
+    vqsub.s8    q12, q8, q1                 ; u = clamp(qs1 - vp8_filter)
+
+    veor        q5, q13, q0                 ; *op1 = u^0x80
+    veor        q6, q11, q0                 ; *op0 = u^0x80
+    veor        q7, q10, q0                 ; *oq0 = u^0x80
+    veor        q8, q12, q0                 ; *oq1 = u^0x80
+
+    bx          lr
+    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
+
+    AREA    loopfilter_dat, DATA, READONLY
+_lf_coeff_
+    DCD     lf_coeff
+lf_coeff
+    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
+    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
+    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
+    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
+
+    END
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_uv_neon.asm
@@ -1,178 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_horizontal_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-
-|vp8_loop_filter_horizontal_edge_uv_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move u pointer down by 4 lines
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-
-    ldr         r2, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    sub         r2, r2, r1, lsl #2          ; move v pointer down by 4 lines
-
-    vld1.u8     {d6}, [r0], r1              ; p3
-    vld1.u8     {d7}, [r2], r1              ; p3
-    vld1.u8     {d8}, [r0], r1              ; p2
-    vld1.u8     {d9}, [r2], r1              ; p2
-    vld1.u8     {d10}, [r0], r1             ; p1
-    vld1.u8     {d11}, [r2], r1             ; p1
-    vld1.u8     {d12}, [r0], r1             ; p0
-    vld1.u8     {d13}, [r2], r1             ; p0
-    vld1.u8     {d14}, [r0], r1             ; q0
-    vld1.u8     {d15}, [r2], r1             ; q0
-    vld1.u8     {d16}, [r0], r1             ; q1
-    vld1.u8     {d17}, [r2], r1             ; q1
-    vld1.u8     {d18}, [r0], r1             ; q2
-    vld1.u8     {d19}, [r2], r1             ; q2
-    vld1.u8     {d20}, [r0], r1             ; q3
-    vld1.u8     {d21}, [r2], r1             ; q3
-
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-
-    ldr         r12, _lfhuv_coeff_
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15                ; (max  > limit) * -1
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-
-    ;calculate output
-    vqadd.s8    q11, q6, q2             ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #2
-    sub         r0, r0, r1, lsl #1
-    ;
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-
-    sub         r2, r2, r1, lsl #2
-    sub         r2, r2, r1, lsl #1
-    ;;
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-    ;
-
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-    ;
-
-    vst1.u8     {d10}, [r0], r1             ; store u op1
-    vst1.u8     {d11}, [r2], r1             ; store v op1
-    vst1.u8     {d12}, [r0], r1             ; store u op0
-    vst1.u8     {d13}, [r2], r1             ; store v op0
-    vst1.u8     {d14}, [r0], r1             ; store u oq0
-    vst1.u8     {d15}, [r2], r1             ; store v oq0
-    vst1.u8     {d16}, [r0], r1             ; store u oq1
-    vst1.u8     {d17}, [r2], r1             ; store v oq1
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_horizontal_edge_uv_neon|
-
-;-----------------
-    AREA    hloopfilteruv_dat, DATA, READWRITE          ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_lfhuv_coeff_
-    DCD     lfhuv_coeff
-lfhuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
--- a/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterhorizontaledge_y_neon.asm
@@ -1,161 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_horizontal_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-
-|vp8_loop_filter_horizontal_edge_y_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move src pointer down by 4 lines
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    vld1.u8     {q3}, [r0], r1              ; p3
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-    vld1.u8     {q4}, [r0], r1              ; p2
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.u8     {q5}, [r0], r1              ; p1
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vld1.u8     {q6}, [r0], r1              ; p0
-    ldr         r12, _lfhy_coeff_
-    vld1.u8     {q7}, [r0], r1              ; q0
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vld1.u8     {q8}, [r0], r1              ; q1
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vld1.u8     {q9}, [r0], r1              ; q2
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vld1.u8     {q10}, [r0], r1             ; q3
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-
-    ;calculate output
-    vqadd.s8    q11, q6, q2                 ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #2
-    sub         r0, r0, r1, lsl #1
-    ;
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-    ;
-    add         r2, r1, r0
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
-    add         r3, r2, r1
-
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-
-    add         r12, r3, r1
-
-    vst1.u8     {q5}, [r0]                  ; store op1
-    vst1.u8     {q6}, [r2]                  ; store op0
-    vst1.u8     {q7}, [r3]                  ; store oq0
-    vst1.u8     {q8}, [r12]                 ; store oq1
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_horizontal_edge_y_neon|
-
-;-----------------
-    AREA    hloopfiltery_dat, DATA, READWRITE           ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_lfhy_coeff_
-    DCD     lfhy_coeff
-lfhy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
--- a/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_uv_neon.asm
@@ -1,203 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_vertical_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-
-|vp8_loop_filter_vertical_edge_uv_neon| PROC
-    sub         r0, r0, #4          ; move u pointer down by 4 columns
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-
-    ldr         r2, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    sub         r2, r2, #4          ; move v pointer down by 4 columns
-
-    vld1.u8     {d6}, [r0], r1              ;load u data
-    vld1.u8     {d7}, [r2], r1              ;load v data
-    vld1.u8     {d8}, [r0], r1
-    vld1.u8     {d9}, [r2], r1
-    vld1.u8     {d10}, [r0], r1
-    vld1.u8     {d11}, [r2], r1
-    vld1.u8     {d12}, [r0], r1
-    vld1.u8     {d13}, [r2], r1
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d15}, [r2], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d17}, [r2], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d19}, [r2], r1
-    vld1.u8     {d20}, [r0], r1
-    vld1.u8     {d21}, [r2], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-
-    ldr         r12, _vlfuv_coeff_
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-    ;calculate output
-    vqadd.s8    q11, q6, q2             ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #3
-    add         r0, r0, #2
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-
-    sub         r2, r2, r1, lsl #3
-    add         r2, r2, #2
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-
-    vswp        d12, d11
-    vswp        d16, d13
-    vswp        d14, d12
-    vswp        d16, d15
-
-    ;store op1, op0, oq0, oq1
-    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0], r1
-    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r2], r1
-    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r0], r1
-    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r2], r1
-    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r0], r1
-    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r2], r1
-    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r0], r1
-    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r2], r1
-    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r0], r1
-    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r2], r1
-    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
-    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r2], r1
-    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0], r1
-    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r2], r1
-    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r0], r1
-    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2], r1
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_vertical_edge_uv_neon|
-
-;-----------------
-    AREA    vloopfilteruv_dat, DATA, READWRITE          ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_vlfuv_coeff_
-    DCD     vlfuv_coeff
-vlfuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
--- a/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/loopfilterverticaledge_y_neon.asm
@@ -1,207 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_loop_filter_vertical_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-
-|vp8_loop_filter_vertical_edge_y_neon| PROC
-    sub         r0, r0, #4                  ; move src pointer down by 4 columns
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    vld1.u8     {d6}, [r0], r1              ; load first 8-line src data
-    vld1.s8     {d0[], d1[]}, [r2]          ; flimit
-    vld1.u8     {d8}, [r0], r1
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.u8     {d10}, [r0], r1
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vld1.u8     {d12}, [r0], r1
-    ldr         r12, _vlfy_coeff_
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d20}, [r0], r1
-
-    vld1.u8     {d7}, [r0], r1              ; load second 8-line src data
-    vld1.u8     {d9}, [r0], r1
-    vld1.u8     {d11}, [r0], r1
-    vld1.u8     {d13}, [r0], r1
-    vld1.u8     {d15}, [r0], r1
-    vld1.u8     {d17}, [r0], r1
-    vld1.u8     {d19}, [r0], r1
-    vld1.u8     {d21}, [r0], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q4, q10, q9                 ; abs(q3 - q2)
-    vabd.u8     q9, q6, q7                  ; abs(p0 - q0)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q4
-    vmax.u8     q15, q11, q12
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vadd.u8     q0, q0, q0                  ; flimit * 2
-    vadd.u8     q0, q0, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q2, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q9, q9, q9                  ; abs(p0 - q0) * 2
-    vshr.u8     q2, q2, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q9, q9, q2                  ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q9, q0, q9                  ; (abs(p0 - q0)*2 + abs(p1-q1)/2 > flimit*2 + limit)*-1
-
-    vld1.u8     {q0}, [r12]!
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-;;;;;;;;;;;;;;
-    vld1.u8     {q10}, [r12]!
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q11, d15, d13
-
-    vmovl.u8    q4, d20
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vmul.i16    q2, q2, q4                  ; 3 * ( qs0 - ps0)
-    vmul.i16    q11, q11, q4
-
-    vand        q1, q1, q14                 ; vp8_filter &= hev
-    vand        q15, q15, q9                ; vp8_filter_mask
-
-    vaddw.s8    q2, q2, d2
-    vaddw.s8    q11, q11, d3
-
-    vld1.u8     {q9}, [r12]!
-    ;
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q11
-    ;;
-
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-    vqadd.s8    q2, q1, q10                 ; Filter2 = vp8_signed_char_clamp(vp8_filter+3)
-    vqadd.s8    q1, q1, q9                  ; Filter1 = vp8_signed_char_clamp(vp8_filter+4)
-    vshr.s8     q2, q2, #3                  ; Filter2 >>= 3
-    vshr.s8     q1, q1, #3                  ; Filter1 >>= 3
-    ;calculate output
-    vqadd.s8    q11, q6, q2             ; u = vp8_signed_char_clamp(ps0 + Filter2)
-    vqsub.s8    q10, q7, q1                 ; u = vp8_signed_char_clamp(qs0 - Filter1)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vrshr.s8    q1, q1, #1                  ;round/shift:  vp8_filter += 1; vp8_filter >>= 1
-
-    sub         r0, r0, r1, lsl #4
-    add         r0, r0, #2
-    ;
-
-    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
-    add         r2, r0, r1
-    ;
-
-    vqadd.s8    q13, q5, q1                 ; u = vp8_signed_char_clamp(ps1 + vp8_filter)
-    vqsub.s8    q12, q8, q1                 ; u = vp8_signed_char_clamp(qs1 - vp8_filter)
-
-    veor        q7, q10, q0                 ; *oq0 = u^0x80
-    veor        q5, q13, q0                 ; *op1 = u^0x80
-    veor        q6, q11, q0                 ; *op0 = u^0x80
-    veor        q8, q12, q0                 ; *oq1 = u^0x80
-    add         r3, r2, r1
-    ;
-    vswp        d12, d11
-    vswp        d16, d13
-    add         r12, r3, r1
-    vswp        d14, d12
-    vswp        d16, d15
-
-    ;store op1, op0, oq0, oq1
-    vst4.8      {d10[0], d11[0], d12[0], d13[0]}, [r0]
-    vst4.8      {d10[1], d11[1], d12[1], d13[1]}, [r2]
-    vst4.8      {d10[2], d11[2], d12[2], d13[2]}, [r3]
-    vst4.8      {d10[3], d11[3], d12[3], d13[3]}, [r12], r1
-    add         r0, r12, r1
-    vst4.8      {d10[4], d11[4], d12[4], d13[4]}, [r12]
-    vst4.8      {d10[5], d11[5], d12[5], d13[5]}, [r0], r1
-    add         r2, r0, r1
-    vst4.8      {d10[6], d11[6], d12[6], d13[6]}, [r0]
-    vst4.8      {d10[7], d11[7], d12[7], d13[7]}, [r2], r1
-    add         r3, r2, r1
-    vst4.8      {d14[0], d15[0], d16[0], d17[0]}, [r2]
-    vst4.8      {d14[1], d15[1], d16[1], d17[1]}, [r3], r1
-    add         r12, r3, r1
-    vst4.8      {d14[2], d15[2], d16[2], d17[2]}, [r3]
-    vst4.8      {d14[3], d15[3], d16[3], d17[3]}, [r12], r1
-    add         r0, r12, r1
-    vst4.8      {d14[4], d15[4], d16[4], d17[4]}, [r12]
-    vst4.8      {d14[5], d15[5], d16[5], d17[5]}, [r0], r1
-    add         r2, r0, r1
-    vst4.8      {d14[6], d15[6], d16[6], d17[6]}, [r0]
-    vst4.8      {d14[7], d15[7], d16[7], d17[7]}, [r2]
-
-    bx          lr
-    ENDP        ; |vp8_loop_filter_vertical_edge_y_neon|
-
-;-----------------
-    AREA    vloopfiltery_dat, DATA, READWRITE           ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_vlfy_coeff_
-    DCD     vlfy_coeff
-vlfy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x01010101, 0x01010101, 0x01010101, 0x01010101
-
-    END
--- a/vp8/common/arm/neon/mbloopfilter_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilter_neon.asm
@@ -0,0 +1,519 @@
+;
+;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+;
+;  Use of this source code is governed by a BSD-style license
+;  that can be found in the LICENSE file in the root of the source
+;  tree. An additional intellectual property rights grant can be found
+;  in the file PATENTS.  All contributing project authors may
+;  be found in the AUTHORS file in the root of the source tree.
+;
+
+
+    EXPORT  |vp8_mbloop_filter_horizontal_edge_y_neon|
+    EXPORT  |vp8_mbloop_filter_horizontal_edge_uv_neon|
+    EXPORT  |vp8_mbloop_filter_vertical_edge_y_neon|
+    EXPORT  |vp8_mbloop_filter_vertical_edge_uv_neon|
+    ARM
+    REQUIRE8
+    PRESERVE8
+
+    AREA ||.text||, CODE, READONLY, ALIGN=2
+
+; flimit, limit, and thresh should be positive numbers.
+; All 16 elements in these variables are equal.
+
+; void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch,
+;                                               const signed char *flimit,
+;                                               const signed char *limit,
+;                                               const signed char *thresh,
+;                                               int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, r1, lsl #2          ; move src pointer down by 4 lines
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    vld1.u8     {q3}, [r0], r1              ; p3
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    vld1.u8     {q4}, [r0], r1              ; p2
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    vld1.u8     {q5}, [r0], r1              ; p1
+    vld1.u8     {q6}, [r0], r1              ; p0
+    vld1.u8     {q7}, [r0], r1              ; q0
+    vld1.u8     {q8}, [r0], r1              ; q1
+    vld1.u8     {q9}, [r0], r1              ; q2
+    vld1.u8     {q10}, [r0], r1             ; q3
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    add         r0, r0, r1
+    add         r2, r0, r1
+    add         r3, r2, r1
+
+    vst1.u8     {q4}, [r0]                  ; store op2
+    vst1.u8     {q5}, [r2]                  ; store op1
+    vst1.u8     {q6}, [r3], r1              ; store op0
+    add         r12, r3, r1
+    vst1.u8     {q7}, [r3]                  ; store oq0
+    vst1.u8     {q8}, [r12], r1             ; store oq1
+    vst1.u8     {q9}, [r12]             ; store oq2
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_horizontal_edge_y_neon|
+
+; void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch,
+;                                                const signed char *flimit,
+;                                                const signed char *limit,
+;                                                const signed char *thresh,
+;                                                unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, r1, lsl #2          ; move u pointer down by 4 lines
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r3, [sp, #8]                ; load v ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+    sub         r3, r3, r1, lsl #2          ; move v pointer down by 4 lines
+
+    vld1.u8     {d6}, [r0], r1              ; p3
+    vld1.u8     {d7}, [r3], r1              ; p3
+    vld1.u8     {d8}, [r0], r1              ; p2
+    vld1.u8     {d9}, [r3], r1              ; p2
+    vld1.u8     {d10}, [r0], r1             ; p1
+    vld1.u8     {d11}, [r3], r1             ; p1
+    vld1.u8     {d12}, [r0], r1             ; p0
+    vld1.u8     {d13}, [r3], r1             ; p0
+    vld1.u8     {d14}, [r0], r1             ; q0
+    vld1.u8     {d15}, [r3], r1             ; q0
+    vld1.u8     {d16}, [r0], r1             ; q1
+    vld1.u8     {d17}, [r3], r1             ; q1
+    vld1.u8     {d18}, [r0], r1             ; q2
+    vld1.u8     {d19}, [r3], r1             ; q2
+    vld1.u8     {d20}, [r0], r1             ; q3
+    vld1.u8     {d21}, [r3], r1             ; q3
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    sub         r3, r3, r1, lsl #3
+
+    add         r0, r0, r1
+    add         r3, r3, r1
+
+    vst1.u8     {d8}, [r0], r1              ; store u op2
+    vst1.u8     {d9}, [r3], r1              ; store v op2
+    vst1.u8     {d10}, [r0], r1             ; store u op1
+    vst1.u8     {d11}, [r3], r1             ; store v op1
+    vst1.u8     {d12}, [r0], r1             ; store u op0
+    vst1.u8     {d13}, [r3], r1             ; store v op0
+    vst1.u8     {d14}, [r0], r1             ; store u oq0
+    vst1.u8     {d15}, [r3], r1             ; store v oq0
+    vst1.u8     {d16}, [r0], r1             ; store u oq1
+    vst1.u8     {d17}, [r3], r1             ; store v oq1
+    vst1.u8     {d18}, [r0], r1             ; store u oq2
+    vst1.u8     {d19}, [r3], r1             ; store v oq2
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
+
+; void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch,
+;                                             const signed char *flimit,
+;                                             const signed char *limit,
+;                                             const signed char *thresh,
+;                                             int count)
+; r0    unsigned char *src,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  int count (unused)
+|vp8_mbloop_filter_vertical_edge_y_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, #4                  ; move src pointer down by 4 columns
+
+    vld1.u8     {d6}, [r0], r1              ; load first 8-line src data
+    ldr         r12, [sp, #4]               ; load thresh pointer
+    vld1.u8     {d8}, [r0], r1
+    sub         sp, sp, #32
+    vld1.u8     {d10}, [r0], r1
+    vld1.u8     {d12}, [r0], r1
+    vld1.u8     {d14}, [r0], r1
+    vld1.u8     {d16}, [r0], r1
+    vld1.u8     {d18}, [r0], r1
+    vld1.u8     {d20}, [r0], r1
+
+    vld1.u8     {d7}, [r0], r1              ; load second 8-line src data
+    vld1.u8     {d9}, [r0], r1
+    vld1.u8     {d11}, [r0], r1
+    vld1.u8     {d13}, [r0], r1
+    vld1.u8     {d15}, [r0], r1
+    vld1.u8     {d17}, [r0], r1
+    vld1.u8     {d19}, [r0], r1
+    vld1.u8     {d21}, [r0], r1
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    mov         r12, sp
+    vst1.u8     {q3}, [r12]!
+    vst1.u8     {q10}, [r12]!
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #4
+
+    add         r2, r0, r1
+
+    add         r3, r2, r1
+
+    vld1.u8     {q3}, [sp]!
+    vld1.u8     {q10}, [sp]!
+
+    ;transpose to 16x8 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+    add         r12, r3, r1
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    ;store op2, op1, op0, oq0, oq1, oq2
+    vst1.8      {d6}, [r0]
+    vst1.8      {d8}, [r2]
+    vst1.8      {d10}, [r3]
+    vst1.8      {d12}, [r12], r1
+    add         r0, r12, r1
+    vst1.8      {d14}, [r12]
+    vst1.8      {d16}, [r0], r1
+    add         r2, r0, r1
+    vst1.8      {d18}, [r0]
+    vst1.8      {d20}, [r2], r1
+    add         r3, r2, r1
+    vst1.8      {d7}, [r2]
+    vst1.8      {d9}, [r3], r1
+    add         r12, r3, r1
+    vst1.8      {d11}, [r3]
+    vst1.8      {d13}, [r12], r1
+    add         r0, r12, r1
+    vst1.8      {d15}, [r12]
+    vst1.8      {d17}, [r0], r1
+    add         r2, r0, r1
+    vst1.8      {d19}, [r0]
+    vst1.8      {d21}, [r2]
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_vertical_edge_y_neon|
+
+; void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch,
+;                                              const signed char *flimit,
+;                                              const signed char *limit,
+;                                              const signed char *thresh,
+;                                              unsigned char *v)
+; r0    unsigned char *u,
+; r1    int pitch,
+; r2    const signed char *flimit,
+; r3    const signed char *limit,
+; sp    const signed char *thresh,
+; sp+4  unsigned char *v
+|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
+    stmdb       sp!, {lr}
+    sub         r0, r0, #4                  ; move src pointer down by 4 columns
+    vld1.s8     {d2[], d3[]}, [r3]          ; limit
+    ldr         r3, [sp, #8]                ; load v ptr
+    ldr         r12, [sp, #4]               ; load thresh pointer
+
+    sub         r3, r3, #4                  ; move v pointer down by 4 columns
+
+    vld1.u8     {d6}, [r0], r1              ;load u data
+    vld1.u8     {d7}, [r3], r1              ;load v data
+    vld1.u8     {d8}, [r0], r1
+    vld1.u8     {d9}, [r3], r1
+    vld1.u8     {d10}, [r0], r1
+    vld1.u8     {d11}, [r3], r1
+    vld1.u8     {d12}, [r0], r1
+    vld1.u8     {d13}, [r3], r1
+    vld1.u8     {d14}, [r0], r1
+    vld1.u8     {d15}, [r3], r1
+    vld1.u8     {d16}, [r0], r1
+    vld1.u8     {d17}, [r3], r1
+    vld1.u8     {d18}, [r0], r1
+    vld1.u8     {d19}, [r3], r1
+    vld1.u8     {d20}, [r0], r1
+    vld1.u8     {d21}, [r3], r1
+
+    ;transpose to 8x16 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    sub         sp, sp, #32
+    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
+    mov         r12, sp
+    vst1.u8     {q3}, [r12]!
+    vst1.u8     {q10}, [r12]!
+
+    bl          vp8_mbloop_filter_neon
+
+    sub         r0, r0, r1, lsl #3
+    sub         r3, r3, r1, lsl #3
+
+    vld1.u8     {q3}, [sp]!
+    vld1.u8     {q10}, [sp]!
+
+    ;transpose to 16x8 matrix
+    vtrn.32     q3, q7
+    vtrn.32     q4, q8
+    vtrn.32     q5, q9
+    vtrn.32     q6, q10
+
+    vtrn.16     q3, q5
+    vtrn.16     q4, q6
+    vtrn.16     q7, q9
+    vtrn.16     q8, q10
+
+    vtrn.8      q3, q4
+    vtrn.8      q5, q6
+    vtrn.8      q7, q8
+    vtrn.8      q9, q10
+
+    ;store op2, op1, op0, oq0, oq1, oq2
+    vst1.8      {d6}, [r0], r1
+    vst1.8      {d7}, [r3], r1
+    vst1.8      {d8}, [r0], r1
+    vst1.8      {d9}, [r3], r1
+    vst1.8      {d10}, [r0], r1
+    vst1.8      {d11}, [r3], r1
+    vst1.8      {d12}, [r0], r1
+    vst1.8      {d13}, [r3], r1
+    vst1.8      {d14}, [r0], r1
+    vst1.8      {d15}, [r3], r1
+    vst1.8      {d16}, [r0], r1
+    vst1.8      {d17}, [r3], r1
+    vst1.8      {d18}, [r0], r1
+    vst1.8      {d19}, [r3], r1
+    vst1.8      {d20}, [r0], r1
+    vst1.8      {d21}, [r3], r1
+
+    ldmia       sp!, {pc}
+    ENDP        ; |vp8_mbloop_filter_vertical_edge_uv_neon|
+
+; void vp8_mbloop_filter_neon()
+; This is a helper function for the macroblock loopfilters. The individual
+; functions do the necessary load, transpose (if necessary), preserve (if
+; necessary) and store.
+
+; TODO:
+; The vertical filter writes p3/q3 back out because two 4 element writes are
+; much simpler than ordering and writing two 3 element sets (or three 2 elements
+; sets, or whichever other combinations are possible).
+; If we can preserve q3 and q10, the vertical filter will be able to avoid
+; storing those values on the stack and reading them back after the filter.
+
+; r0,r1 PRESERVE
+; r2    flimit
+; r3    PRESERVE
+; q1    limit
+; q2    thresh
+; q3    p3
+; q4    p2
+; q5    p1
+; q6    p0
+; q7    q0
+; q8    q1
+; q9    q2
+; q10   q3
+
+|vp8_mbloop_filter_neon| PROC
+    ldr         r12, _mblf_coeff_
+
+    ; vp8_filter_mask
+    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
+    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
+    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
+    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
+    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
+    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
+
+    vmax.u8     q11, q11, q12
+    vmax.u8     q12, q13, q14
+    vmax.u8     q3, q3, q0
+    vmax.u8     q15, q11, q12
+
+    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
+
+    ; vp8_hevmask
+    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh) * -1
+    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh) * -1
+    vmax.u8     q15, q15, q3
+
+    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
+
+    vld1.u8     {q0}, [r12]!
+
+    vadd.u8     q2, q2, q2                  ; flimit * 2
+    vadd.u8     q2, q2, q1                  ; flimit * 2 +  limit
+    vcge.u8     q15, q1, q15
+
+    vabd.u8     q1, q5, q8                  ; a = abs(p1 - q1)
+    vqadd.u8    q12, q12, q12               ; b = abs(p0 - q0) * 2
+    vshr.u8     q1, q1, #1                  ; a = a / 2
+    vqadd.u8    q12, q12, q1                ; a = b + a
+    vcge.u8     q12, q2, q12                ; (a > flimit * 2 + limit) * -1
+
+    ; vp8_filter
+    ; convert to signed
+    veor        q7, q7, q0                  ; qs0
+    veor        q6, q6, q0                  ; ps0
+    veor        q5, q5, q0                  ; ps1
+    veor        q8, q8, q0                  ; qs1
+    veor        q4, q4, q0                  ; ps2
+    veor        q9, q9, q0                  ; qs2
+
+    vorr        q14, q13, q14               ; vp8_hevmask
+
+    vsubl.s8    q2, d14, d12                ; qs0 - ps0
+    vsubl.s8    q13, d15, d13
+
+    vqsub.s8    q1, q5, q8                  ; vp8_filter = clamp(ps1-qs1)
+
+    vadd.s16    q10, q2, q2                 ; 3 * (qs0 - ps0)
+    vadd.s16    q11, q13, q13
+    vand        q15, q15, q12               ; vp8_filter_mask
+
+    vadd.s16    q2, q2, q10
+    vadd.s16    q13, q13, q11
+
+    vld1.u8     {q12}, [r12]!               ; #3
+
+    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
+    vaddw.s8    q13, q13, d3
+
+    vld1.u8     {q11}, [r12]!               ; #4
+
+    ; vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0))
+    vqmovn.s16  d2, q2
+    vqmovn.s16  d3, q13
+
+    vand        q1, q1, q15                 ; vp8_filter &= mask
+
+    vld1.u8     {q15}, [r12]!               ; #63
+    ;
+    vand        q13, q1, q14                ; Filter2 &= hev
+
+    vld1.u8     {d7}, [r12]!                ; #9
+
+    vqadd.s8    q2, q13, q11                ; Filter1 = clamp(Filter2+4)
+    vqadd.s8    q13, q13, q12               ; Filter2 = clamp(Filter2+3)
+
+    vld1.u8     {d6}, [r12]!                ; #18
+
+    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
+    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
+
+    vmov        q10, q15
+    vmov        q12, q15
+
+    vqsub.s8    q7, q7, q2                  ; qs0 = clamp(qs0 - Filter1)
+
+    vld1.u8     {d5}, [r12]!                ; #27
+
+    vqadd.s8    q6, q6, q13                 ; ps0 = clamp(ps0 + Filter2)
+
+    vbic        q1, q1, q14                 ; vp8_filter &= ~hev
+
+    ; roughly 1/7th difference across boundary
+    ; roughly 2/7th difference across boundary
+    ; roughly 3/7th difference across boundary
+    vmov        q11, q15
+    vmov        q13, q15
+    vmov        q14, q15
+
+    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
+    vmlal.s8    q11, d3, d7
+    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
+    vmlal.s8    q13, d3, d6
+    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
+    vmlal.s8    q15, d3, d5
+    vqshrn.s16  d20, q10, #7                ; u = clamp((63 + Filter2 * 9)>>7)
+    vqshrn.s16  d21, q11, #7
+    vqshrn.s16  d24, q12, #7                ; u = clamp((63 + Filter2 * 18)>>7)
+    vqshrn.s16  d25, q13, #7
+    vqshrn.s16  d28, q14, #7                ; u = clamp((63 + Filter2 * 27)>>7)
+    vqshrn.s16  d29, q15, #7
+
+    vqsub.s8    q11, q9, q10                ; s = clamp(qs2 - u)
+    vqadd.s8    q10, q4, q10                ; s = clamp(ps2 + u)
+    vqsub.s8    q13, q8, q12                ; s = clamp(qs1 - u)
+    vqadd.s8    q12, q5, q12                ; s = clamp(ps1 + u)
+    vqsub.s8    q15, q7, q14                ; s = clamp(qs0 - u)
+    vqadd.s8    q14, q6, q14                ; s = clamp(ps0 + u)
+    veor        q9, q11, q0                 ; *oq2 = s^0x80
+    veor        q4, q10, q0                 ; *op2 = s^0x80
+    veor        q8, q13, q0                 ; *oq1 = s^0x80
+    veor        q5, q12, q0                 ; *op2 = s^0x80
+    veor        q7, q15, q0                 ; *oq0 = s^0x80
+    veor        q6, q14, q0                 ; *op0 = s^0x80
+
+    bx          lr
+    ENDP        ; |vp8_mbloop_filter_neon|
+
+    AREA    mbloopfilter_dat, DATA, READONLY
+_mblf_coeff_
+    DCD     mblf_coeff
+mblf_coeff
+    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
+    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
+    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
+    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
+    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
+    DCD     0x1b1b1b1b, 0x1b1b1b1b
+
+    END
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_uv_neon.asm
@@ -1,220 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_horizontal_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-|vp8_mbloop_filter_horizontal_edge_uv_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move u pointer down by 4 lines
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    ldr         r3, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-    sub         r3, r3, r1, lsl #2          ; move v pointer down by 4 lines
-
-    vld1.u8     {d6}, [r0], r1              ; p3
-    vld1.u8     {d7}, [r3], r1              ; p3
-    vld1.u8     {d8}, [r0], r1              ; p2
-    vld1.u8     {d9}, [r3], r1              ; p2
-    vld1.u8     {d10}, [r0], r1             ; p1
-    vld1.u8     {d11}, [r3], r1             ; p1
-    vld1.u8     {d12}, [r0], r1             ; p0
-    vld1.u8     {d13}, [r3], r1             ; p0
-    vld1.u8     {d14}, [r0], r1             ; q0
-    vld1.u8     {d15}, [r3], r1             ; q0
-    vld1.u8     {d16}, [r0], r1             ; q1
-    vld1.u8     {d17}, [r3], r1             ; q1
-    vld1.u8     {d18}, [r0], r1             ; q2
-    vld1.u8     {d19}, [r3], r1             ; q2
-    vld1.u8     {d20}, [r0], r1             ; q3
-    vld1.u8     {d21}, [r3], r1             ; q3
-
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-
-    ldr         r12, _mbhlfuv_coeff_
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 +  limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-
-    sub         r0, r0, r1, lsl #3
-    sub         r3, r3, r1, lsl #3
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-
-    add         r0, r0, r1
-    add         r3, r3, r1
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-
-    vst1.u8     {d8}, [r0], r1              ; store u op2
-    vst1.u8     {d9}, [r3], r1              ; store v op2
-    vst1.u8     {d10}, [r0], r1             ; store u op1
-    vst1.u8     {d11}, [r3], r1             ; store v op1
-    vst1.u8     {d12}, [r0], r1             ; store u op0
-    vst1.u8     {d13}, [r3], r1             ; store v op0
-    vst1.u8     {d14}, [r0], r1             ; store u oq0
-    vst1.u8     {d15}, [r3], r1             ; store v oq0
-    vst1.u8     {d16}, [r0], r1             ; store u oq1
-    vst1.u8     {d17}, [r3], r1             ; store v oq1
-    vst1.u8     {d18}, [r0], r1             ; store u oq2
-    vst1.u8     {d19}, [r3], r1             ; store v oq2
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_horizontal_edge_uv_neon|
-
-;-----------------
-    AREA    mbhloopfilteruv_dat, DATA, READWRITE            ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbhlfuv_coeff_
-    DCD     mbhlfuv_coeff
-mbhlfuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
--- a/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterhorizontaledge_y_neon.asm
@@ -1,201 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_horizontal_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-|vp8_mbloop_filter_horizontal_edge_y_neon| PROC
-    sub         r0, r0, r1, lsl #2          ; move src pointer down by 4 lines
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    vld1.u8     {q3}, [r0], r1              ; p3
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vld1.u8     {q4}, [r0], r1              ; p2
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vld1.u8     {q5}, [r0], r1              ; p1
-    ldr         r12, _mbhlfy_coeff_
-    vld1.u8     {q6}, [r0], r1              ; p0
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vld1.u8     {q7}, [r0], r1              ; q0
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vld1.u8     {q8}, [r0], r1              ; q1
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vld1.u8     {q9}, [r0], r1              ; q2
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vld1.u8     {q10}, [r0], r1             ; q3
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-    sub         r0, r0, r1, lsl #3
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-    add         r0, r0, r1
-    add         r2, r0, r1
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-    add         r3, r2, r1
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-
-    vst1.u8     {q4}, [r0]                  ; store op2
-    vst1.u8     {q5}, [r2]                  ; store op1
-    vst1.u8     {q6}, [r3], r1              ; store op0
-    add         r12, r3, r1
-    vst1.u8     {q7}, [r3]                  ; store oq0
-    vst1.u8     {q8}, [r12], r1             ; store oq1
-    vst1.u8     {q9}, [r12]             ; store oq2
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_horizontal_edge_y_neon|
-
-;-----------------
-    AREA    mbhloopfiltery_dat, DATA, READWRITE         ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbhlfy_coeff_
-    DCD     mbhlfy_coeff
-mbhlfy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_uv_neon.asm
@@ -1,262 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_vertical_edge_uv_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *u,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; stack(r5) unsigned char *v
-|vp8_mbloop_filter_vertical_edge_uv_neon| PROC
-    sub         r0, r0, #4                  ; move src pointer down by 4 columns
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    ldr         r3, [sp, #4]                ; load v ptr
-    ldr         r12, [sp, #0]               ; load thresh pointer
-
-    sub         r3, r3, #4                  ; move v pointer down by 4 columns
-
-    vld1.u8     {d6}, [r0], r1              ;load u data
-    vld1.u8     {d7}, [r3], r1              ;load v data
-    vld1.u8     {d8}, [r0], r1
-    vld1.u8     {d9}, [r3], r1
-    vld1.u8     {d10}, [r0], r1
-    vld1.u8     {d11}, [r3], r1
-    vld1.u8     {d12}, [r0], r1
-    vld1.u8     {d13}, [r3], r1
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d15}, [r3], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d17}, [r3], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d19}, [r3], r1
-    vld1.u8     {d20}, [r0], r1
-    vld1.u8     {d21}, [r3], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    sub         sp, sp, #32
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    vst1.u8     {q3}, [sp]!
-    ldr         r12, _mbvlfuv_coeff_
-    vst1.u8     {q10}, [sp]!
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-    ;
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-
-    sub         r0, r0, r1, lsl #3
-    sub         r3, r3, r1, lsl #3
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-
-    sub         sp, sp, #32
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-    vld1.u8     {q3}, [sp]!
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-    vld1.u8     {q10}, [sp]!
-
-    ;transpose to 16x8 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    ;store op2, op1, op0, oq0, oq1, oq2
-    vst1.8      {d6}, [r0], r1
-    vst1.8      {d7}, [r3], r1
-    vst1.8      {d8}, [r0], r1
-    vst1.8      {d9}, [r3], r1
-    vst1.8      {d10}, [r0], r1
-    vst1.8      {d11}, [r3], r1
-    vst1.8      {d12}, [r0], r1
-    vst1.8      {d13}, [r3], r1
-    vst1.8      {d14}, [r0], r1
-    vst1.8      {d15}, [r3], r1
-    vst1.8      {d16}, [r0], r1
-    vst1.8      {d17}, [r3], r1
-    vst1.8      {d18}, [r0], r1
-    vst1.8      {d19}, [r3], r1
-    vst1.8      {d20}, [r0], r1
-    vst1.8      {d21}, [r3], r1
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_vertical_edge_uv_neon|
-
-;-----------------
-    AREA    mbvloopfilteruv_dat, DATA, READWRITE            ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbvlfuv_coeff_
-    DCD     mbvlfuv_coeff
-mbvlfuv_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
--- a/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
+++ b/vp8/common/arm/neon/mbloopfilterverticaledge_y_neon.asm
@@ -1,267 +0,0 @@
-;
-;  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    EXPORT  |vp8_mbloop_filter_vertical_edge_y_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-;Note: flimit, limit, and thresh shpuld be positive numbers. All 16 elements in flimit
-;are equal. So, in the code, only one load is needed
-;for flimit. Same way applies to limit and thresh.
-; r0    unsigned char *s,
-; r1    int p, //pitch
-; r2    const signed char *flimit,
-; r3    const signed char *limit,
-; stack(r4) const signed char *thresh,
-; //stack(r5)   int count --unused
-|vp8_mbloop_filter_vertical_edge_y_neon| PROC
-    sub         r0, r0, #4                  ; move src pointer down by 4 columns
-
-    vld1.u8     {d6}, [r0], r1              ; load first 8-line src data
-    ldr         r12, [sp, #0]               ; load thresh pointer
-    vld1.u8     {d8}, [r0], r1
-    sub         sp, sp, #32
-    vld1.u8     {d10}, [r0], r1
-    vld1.u8     {d12}, [r0], r1
-    vld1.u8     {d14}, [r0], r1
-    vld1.u8     {d16}, [r0], r1
-    vld1.u8     {d18}, [r0], r1
-    vld1.u8     {d20}, [r0], r1
-
-    vld1.u8     {d7}, [r0], r1              ; load second 8-line src data
-    vld1.u8     {d9}, [r0], r1
-    vld1.u8     {d11}, [r0], r1
-    vld1.u8     {d13}, [r0], r1
-    vld1.u8     {d15}, [r0], r1
-    vld1.u8     {d17}, [r0], r1
-    vld1.u8     {d19}, [r0], r1
-    vld1.u8     {d21}, [r0], r1
-
-    ;transpose to 8x16 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    vld1.s8     {d2[], d3[]}, [r3]          ; limit
-    vst1.u8     {q3}, [sp]!
-    vld1.s8     {d4[], d5[]}, [r12]         ; thresh
-    ldr         r12, _mbvlfy_coeff_
-    vst1.u8     {q10}, [sp]!
-
-    ; vp8_filter_mask
-    vabd.u8     q11, q3, q4                 ; abs(p3 - p2)
-    vabd.u8     q12, q4, q5                 ; abs(p2 - p1)
-    vabd.u8     q13, q5, q6                 ; abs(p1 - p0)
-    vabd.u8     q14, q8, q7                 ; abs(q1 - q0)
-    vabd.u8     q3, q9, q8                  ; abs(q2 - q1)
-    vabd.u8     q0, q10, q9                 ; abs(q3 - q2)
-
-    vmax.u8     q11, q11, q12
-    vmax.u8     q12, q13, q14
-    vmax.u8     q3, q3, q0
-    vmax.u8     q15, q11, q12
-
-    vabd.u8     q12, q6, q7                 ; abs(p0 - q0)
-
-    ; vp8_hevmask
-    vcgt.u8     q13, q13, q2                ; (abs(p1 - p0) > thresh)*-1
-    vcgt.u8     q14, q14, q2                ; (abs(q1 - q0) > thresh)*-1
-    vmax.u8     q15, q15, q3
-
-    vld1.s8     {d4[], d5[]}, [r2]          ; flimit
-
-    vld1.u8     {q0}, [r12]!
-
-    vadd.u8     q2, q2, q2                  ; flimit * 2
-    vadd.u8     q2, q2, q1                  ; flimit * 2 + limit
-    vcge.u8     q15, q1, q15
-
-    vabd.u8     q1, q5, q8                  ; abs(p1 - q1)
-    vqadd.u8    q12, q12, q12               ; abs(p0 - q0) * 2
-    vshr.u8     q1, q1, #1                  ; abs(p1 - q1) / 2
-    vqadd.u8    q12, q12, q1                ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2
-    vcge.u8     q12, q2, q12                ; (abs(p0 - q0)*2 + abs(p1 - q1)/2 > flimit*2 + limit)*-1
-
-    ;vp8_filter() function
-    veor        q7, q7, q0                  ; qs0: q0 offset to convert to a signed value
-    veor        q6, q6, q0                  ; ps0: p0 offset to convert to a signed value
-    veor        q5, q5, q0                  ; ps1: p1 offset to convert to a signed value
-    veor        q8, q8, q0                  ; qs1: q1 offset to convert to a signed value
-    veor        q4, q4, q0                  ; ps2: p2 offset to convert to a signed value
-    veor        q9, q9, q0                  ; qs2: q2 offset to convert to a signed value
-;;;;;;;;;;;;;
-    vorr        q14, q13, q14               ; q14: vp8_hevmask
-
-    vsubl.s8    q2, d14, d12                ; ( qs0 - ps0)
-    vsubl.s8    q13, d15, d13
-
-    vqsub.s8    q1, q5, q8                  ; vp8_filter = vp8_signed_char_clamp(ps1-qs1)
-
-    vadd.s16    q10, q2, q2                 ; 3 * ( qs0 - ps0)
-    vadd.s16    q11, q13, q13
-    vand        q15, q15, q12               ; vp8_filter_mask
-
-    vadd.s16    q2, q2, q10
-    vadd.s16    q13, q13, q11
-
-    vld1.u8     {q12}, [r12]!               ;#3
-
-    vaddw.s8    q2, q2, d2                  ; vp8_filter + 3 * ( qs0 - ps0)
-    vaddw.s8    q13, q13, d3
-
-    vld1.u8     {q11}, [r12]!               ;#4
-
-    vqmovn.s16  d2, q2                      ; vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * ( qs0 - ps0))
-    vqmovn.s16  d3, q13
-
-;;;;;;;;;;;;;;
-    vand        q1, q1, q15                 ; vp8_filter &= mask
-
-    vld1.u8     {q15}, [r12]!               ;#63
-    ;
-    vand        q13, q1, q14                ; Filter2: q13; Filter2 &= hev
-
-    vld1.u8     {d7}, [r12]!                ;#9
-    ;
-
-    vqadd.s8    q2, q13, q11                ; Filter1 = vp8_signed_char_clamp(Filter2+4)
-    vqadd.s8    q13, q13, q12               ; Filter2 = vp8_signed_char_clamp(Filter2+3)
-
-    vld1.u8     {d6}, [r12]!                ;#18
-    sub         r0, r0, r1, lsl #4
-    sub         sp, sp, #32
-
-    add         r2, r0, r1
-
-    vshr.s8     q2, q2, #3                  ; Filter1 >>= 3
-    vshr.s8     q13, q13, #3                ; Filter2 >>= 3
-
-    vmov        q10, q15
-    vmov        q12, q15
-
-    vqsub.s8    q7, q7, q2                  ; qs0 = vp8_signed_char_clamp(qs0 - Filter1)
-
-    vld1.u8     {d5}, [r12]!                ;#27
-    add         r3, r2, r1
-
-    vqadd.s8    q6, q6, q13                 ; ps0 = vp8_signed_char_clamp(ps0 + Filter2)
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-    vbic        q1, q1, q14                 ; Filter2: q1; vp8_filter &= ~hev; Filter2 = vp8_filter
-
-    ; roughly 1/7th difference across boundary
-    ; roughly 2/7th difference across boundary
-    ; roughly 3/7th difference across boundary
-    vmov        q11, q15
-    vmov        q13, q15
-    vmov        q14, q15
-
-    vmlal.s8    q10, d2, d7                 ; Filter2 * 9
-    vmlal.s8    q11, d3, d7
-    vmlal.s8    q12, d2, d6                 ; Filter2 * 18
-    vmlal.s8    q13, d3, d6
-    vmlal.s8    q14, d2, d5                 ; Filter2 * 27
-    vmlal.s8    q15, d3, d5
-    vqshrn.s16  d20, q10, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 9)>>7)
-    vqshrn.s16  d21, q11, #7
-    vqshrn.s16  d24, q12, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 18)>>7)
-    vqshrn.s16  d25, q13, #7
-    vqshrn.s16  d28, q14, #7                ; u = vp8_signed_char_clamp((63 + Filter2 * 27)>>7)
-    vqshrn.s16  d29, q15, #7
-
-    vqsub.s8    q11, q9, q10                ; s = vp8_signed_char_clamp(qs2 - u)
-    vqadd.s8    q10, q4, q10                ; s = vp8_signed_char_clamp(ps2 + u)
-    vqsub.s8    q13, q8, q12                ; s = vp8_signed_char_clamp(qs1 - u)
-    vqadd.s8    q12, q5, q12                ; s = vp8_signed_char_clamp(ps1 + u)
-    vqsub.s8    q15, q7, q14                ; s = vp8_signed_char_clamp(qs0 - u)
-    vqadd.s8    q14, q6, q14                ; s = vp8_signed_char_clamp(ps0 + u)
-    veor        q9, q11, q0                 ; *oq2 = s^0x80
-    veor        q4, q10, q0                 ; *op2 = s^0x80
-    veor        q8, q13, q0                 ; *oq1 = s^0x80
-    veor        q5, q12, q0                 ; *op2 = s^0x80
-    veor        q7, q15, q0                 ; *oq0 = s^0x80
-    vld1.u8     {q3}, [sp]!
-    veor        q6, q14, q0                 ; *op0 = s^0x80
-    vld1.u8     {q10}, [sp]!
-
-    ;transpose to 16x8 matrix
-    vtrn.32     q3, q7
-    vtrn.32     q4, q8
-    vtrn.32     q5, q9
-    vtrn.32     q6, q10
-    add         r12, r3, r1
-
-    vtrn.16     q3, q5
-    vtrn.16     q4, q6
-    vtrn.16     q7, q9
-    vtrn.16     q8, q10
-
-    vtrn.8      q3, q4
-    vtrn.8      q5, q6
-    vtrn.8      q7, q8
-    vtrn.8      q9, q10
-
-    ;store op2, op1, op0, oq0, oq1, oq2
-    vst1.8      {d6}, [r0]
-    vst1.8      {d8}, [r2]
-    vst1.8      {d10}, [r3]
-    vst1.8      {d12}, [r12], r1
-    add         r0, r12, r1
-    vst1.8      {d14}, [r12]
-    vst1.8      {d16}, [r0], r1
-    add         r2, r0, r1
-    vst1.8      {d18}, [r0]
-    vst1.8      {d20}, [r2], r1
-    add         r3, r2, r1
-    vst1.8      {d7}, [r2]
-    vst1.8      {d9}, [r3], r1
-    add         r12, r3, r1
-    vst1.8      {d11}, [r3]
-    vst1.8      {d13}, [r12], r1
-    add         r0, r12, r1
-    vst1.8      {d15}, [r12]
-    vst1.8      {d17}, [r0], r1
-    add         r2, r0, r1
-    vst1.8      {d19}, [r0]
-    vst1.8      {d21}, [r2]
-
-    bx          lr
-    ENDP        ; |vp8_mbloop_filter_vertical_edge_y_neon|
-
-;-----------------
-    AREA    mbvloopfiltery_dat, DATA, READWRITE         ;read/write by default
-;Data section with name data_area is specified. DCD reserves space in memory for 16 data.
-;One word each is reserved. Label filter_coeff can be used to access the data.
-;Data address: filter_coeff, filter_coeff+4, filter_coeff+8 ...
-_mbvlfy_coeff_
-    DCD     mbvlfy_coeff
-mbvlfy_coeff
-    DCD     0x80808080, 0x80808080, 0x80808080, 0x80808080
-    DCD     0x03030303, 0x03030303, 0x03030303, 0x03030303
-    DCD     0x04040404, 0x04040404, 0x04040404, 0x04040404
-    DCD     0x003f003f, 0x003f003f, 0x003f003f, 0x003f003f
-    DCD     0x09090909, 0x09090909, 0x12121212, 0x12121212
-    DCD     0x1b1b1b1b, 0x1b1b1b1b
-
-    END
--- a/vp8/common/arm/neon/recon_neon.c
+++ b/vp8/common/arm/neon/recon_neon.c
@@ -0,0 +1,29 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/config.h"
+#include "recon.h"
+#include "blockd.h"
+
+extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
+
+void vp8_recon_mb_neon(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+{
+    unsigned char *pred_ptr = &x->predictor[0];
+    short *diff_ptr = &x->diff[0];
+    unsigned char *dst_ptr = x->dst.y_buffer;
+    unsigned char *udst_ptr = x->dst.u_buffer;
+    unsigned char *vdst_ptr = x->dst.v_buffer;
+    int ystride = x->dst.y_stride;
+    /*int uv_stride = x->dst.uv_stride;*/
+
+    vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr);
+}
--- a/vp8/common/arm/recon_arm.c
+++ b/vp8/common/arm/recon_arm.c
@@ -1,109 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "recon.h"
-#include "blockd.h"
-
-extern void vp8_recon16x16mb_neon(unsigned char *pred_ptr, short *diff_ptr, unsigned char *dst_ptr, int ystride, unsigned char *udst_ptr, unsigned char *vdst_ptr);
-
-/*
-void vp8_recon16x16mby(MACROBLOCKD *x)
-{
-    int i;
-    for(i=0;i<16;i+=4)
-    {
-        //vp8_recon4b(&x->block[i]);
-        BLOCKD *b = &x->block[i];
-        vp8_recon4b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    }
-}
-*/
-void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
-    BLOCKD *b = &x->block[0];
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
-    //b = &x->block[4];
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
-    //b = &x->block[8];
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
-    //b = &x->block[12];
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-}
-
-#if HAVE_ARMV7
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
-    unsigned char *pred_ptr = &x->predictor[0];
-    short *diff_ptr = &x->diff[0];
-    unsigned char *dst_ptr = x->dst.y_buffer;
-    unsigned char *udst_ptr = x->dst.u_buffer;
-    unsigned char *vdst_ptr = x->dst.v_buffer;
-    int ystride = x->dst.y_stride;
-    //int uv_stride = x->dst.uv_stride;
-
-    vp8_recon16x16mb_neon(pred_ptr, diff_ptr, dst_ptr, ystride, udst_ptr, vdst_ptr);
-}
-
-#else
-/*
-void vp8_recon16x16mb(MACROBLOCKD *x)
-{
-    int i;
-
-    for(i=0;i<16;i+=4)
-    {
-//      vp8_recon4b(&x->block[i]);
-        BLOCKD *b = &x->block[i];
-        vp8_recon4b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-
-    }
-    for(i=16;i<24;i+=2)
-    {
-//      vp8_recon2b(&x->block[i]);
-        BLOCKD *b = &x->block[i];
-        vp8_recon2b (b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    }
-}
-*/
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
-    BLOCKD *b = &x->block[0];
-
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b += 4;
-    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b += 4;
-
-    //b = &x->block[16];
-
-    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b++;
-    b++;
-    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b++;
-    b++;
-    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    b++;
-    b++;
-    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-}
-#endif
--- a/vp8/common/arm/recon_arm.h
+++ b/vp8/common/arm/recon_arm.h
@@ -21,6 +21,7 @@ extern prototype_copy_block(vp8_copy_mem8x8_v6);
 extern prototype_copy_block(vp8_copy_mem8x4_v6);
 extern prototype_copy_block(vp8_copy_mem16x16_v6);

+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_recon_recon
 #define vp8_recon_recon vp8_recon_b_armv6

@@ -39,6 +40,7 @@ extern prototype_copy_block(vp8_copy_mem16x16_v6);
 #undef  vp8_recon_copy16x16
 #define vp8_recon_copy16x16 vp8_copy_mem16x16_v6
 #endif
+#endif

 #if HAVE_ARMV7
 extern prototype_recon_block(vp8_recon_b_neon);
@@ -49,6 +51,9 @@ extern prototype_copy_block(vp8_copy_mem8x8_neon);
 extern prototype_copy_block(vp8_copy_mem8x4_neon);
 extern prototype_copy_block(vp8_copy_mem16x16_neon);

+extern prototype_recon_macroblock(vp8_recon_mb_neon);
+
+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_recon_recon
 #define vp8_recon_recon vp8_recon_b_neon

@@ -66,6 +71,10 @@ extern prototype_copy_block(vp8_copy_mem16x16_neon);

 #undef  vp8_recon_copy16x16
 #define vp8_recon_copy16x16 vp8_copy_mem16x16_neon
+
+#undef  vp8_recon_recon_mb
+#define vp8_recon_recon_mb vp8_recon_mb_neon
+#endif
 #endif

 #endif
--- a/vp8/common/arm/reconintra4x4_arm.c
+++ b/vp8/common/arm/reconintra4x4_arm.c
@@ -1,409 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "recon.h"
-#include "vpx_mem/vpx_mem.h"
-#include "reconintra.h"
-
-void vp8_predict_intra4x4(BLOCKD *x,
-                          int b_mode,
-                          unsigned char *predictor)
-{
-    int i, r, c;
-
-    unsigned char *Above = *(x->base_dst) + x->dst - x->dst_stride;
-    unsigned char Left[4];
-    unsigned char top_left = Above[-1];
-
-    Left[0] = (*(x->base_dst))[x->dst - 1];
-    Left[1] = (*(x->base_dst))[x->dst - 1 + x->dst_stride];
-    Left[2] = (*(x->base_dst))[x->dst - 1 + 2 * x->dst_stride];
-    Left[3] = (*(x->base_dst))[x->dst - 1 + 3 * x->dst_stride];
-
-    switch (b_mode)
-    {
-    case B_DC_PRED:
-    {
-        int expected_dc = 0;
-
-        for (i = 0; i < 4; i++)
-        {
-            expected_dc += Above[i];
-            expected_dc += Left[i];
-        }
-
-        expected_dc = (expected_dc + 4) >> 3;
-
-        for (r = 0; r < 4; r++)
-        {
-            for (c = 0; c < 4; c++)
-            {
-                predictor[c] = expected_dc;
-            }
-
-            predictor += 16;
-        }
-    }
-    break;
-    case B_TM_PRED:
-    {
-        // prediction similar to true_motion prediction
-        for (r = 0; r < 4; r++)
-        {
-            for (c = 0; c < 4; c++)
-            {
-                int pred = Above[c] - top_left + Left[r];
-
-                if (pred < 0)
-                    pred = 0;
-
-                if (pred > 255)
-                    pred = 255;
-
-                predictor[c] = pred;
-            }
-
-            predictor += 16;
-        }
-    }
-    break;
-
-    case B_VE_PRED:
-    {
-
-        unsigned int ap[4];
-        ap[0] = (top_left  + 2 * Above[0] + Above[1] + 2) >> 2;
-        ap[1] = (Above[0] + 2 * Above[1] + Above[2] + 2) >> 2;
-        ap[2] = (Above[1] + 2 * Above[2] + Above[3] + 2) >> 2;
-        ap[3] = (Above[2] + 2 * Above[3] + Above[4] + 2) >> 2;
-
-        for (r = 0; r < 4; r++)
-        {
-            for (c = 0; c < 4; c++)
-            {
-
-                predictor[c] = ap[c];
-            }
-
-            predictor += 16;
-        }
-
-    }
-    break;
-
-
-    case B_HE_PRED:
-    {
-
-        unsigned int lp[4];
-        lp[0] = (top_left + 2 * Left[0] + Left[1] + 2) >> 2;
-        lp[1] = (Left[0] + 2 * Left[1] + Left[2] + 2) >> 2;
-        lp[2] = (Left[1] + 2 * Left[2] + Left[3] + 2) >> 2;
-        lp[3] = (Left[2] + 2 * Left[3] + Left[3] + 2) >> 2;
-
-        for (r = 0; r < 4; r++)
-        {
-            for (c = 0; c < 4; c++)
-            {
-                predictor[c] = lp[r];
-            }
-
-            predictor += 16;
-        }
-    }
-    break;
-    case B_LD_PRED:
-    {
-        unsigned char *ptr = Above;
-        predictor[0 * 16 + 0] = (ptr[0] + ptr[1] * 2 + ptr[2] + 2) >> 2;
-        predictor[0 * 16 + 1] =
-            predictor[1 * 16 + 0] = (ptr[1] + ptr[2] * 2 + ptr[3] + 2) >> 2;
-        predictor[0 * 16 + 2] =
-            predictor[1 * 16 + 1] =
-                predictor[2 * 16 + 0] = (ptr[2] + ptr[3] * 2 + ptr[4] + 2) >> 2;
-        predictor[0 * 16 + 3] =
-            predictor[1 * 16 + 2] =
-                predictor[2 * 16 + 1] =
-                    predictor[3 * 16 + 0] = (ptr[3] + ptr[4] * 2 + ptr[5] + 2) >> 2;
-        predictor[1 * 16 + 3] =
-            predictor[2 * 16 + 2] =
-                predictor[3 * 16 + 1] = (ptr[4] + ptr[5] * 2 + ptr[6] + 2) >> 2;
-        predictor[2 * 16 + 3] =
-            predictor[3 * 16 + 2] = (ptr[5] + ptr[6] * 2 + ptr[7] + 2) >> 2;
-        predictor[3 * 16 + 3] = (ptr[6] + ptr[7] * 2 + ptr[7] + 2) >> 2;
-
-    }
-    break;
-    case B_RD_PRED:
-    {
-
-        unsigned char pp[9];
-
-        pp[0] = Left[3];
-        pp[1] = Left[2];
-        pp[2] = Left[1];
-        pp[3] = Left[0];
-        pp[4] = top_left;
-        pp[5] = Above[0];
-        pp[6] = Above[1];
-        pp[7] = Above[2];
-        pp[8] = Above[3];
-
-        predictor[3 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
-        predictor[3 * 16 + 1] =
-            predictor[2 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
-        predictor[3 * 16 + 2] =
-            predictor[2 * 16 + 1] =
-                predictor[1 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
-        predictor[3 * 16 + 3] =
-            predictor[2 * 16 + 2] =
-                predictor[1 * 16 + 1] =
-                    predictor[0 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
-        predictor[2 * 16 + 3] =
-            predictor[1 * 16 + 2] =
-                predictor[0 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
-        predictor[1 * 16 + 3] =
-            predictor[0 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
-        predictor[0 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
-
-    }
-    break;
-    case B_VR_PRED:
-    {
-
-        unsigned char pp[9];
-
-        pp[0] = Left[3];
-        pp[1] = Left[2];
-        pp[2] = Left[1];
-        pp[3] = Left[0];
-        pp[4] = top_left;
-        pp[5] = Above[0];
-        pp[6] = Above[1];
-        pp[7] = Above[2];
-        pp[8] = Above[3];
-
-
-        predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
-        predictor[2 * 16 + 0] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
-        predictor[3 * 16 + 1] =
-            predictor[1 * 16 + 0] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
-        predictor[2 * 16 + 1] =
-            predictor[0 * 16 + 0] = (pp[4] + pp[5] + 1) >> 1;
-        predictor[3 * 16 + 2] =
-            predictor[1 * 16 + 1] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
-        predictor[2 * 16 + 2] =
-            predictor[0 * 16 + 1] = (pp[5] + pp[6] + 1) >> 1;
-        predictor[3 * 16 + 3] =
-            predictor[1 * 16 + 2] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
-        predictor[2 * 16 + 3] =
-            predictor[0 * 16 + 2] = (pp[6] + pp[7] + 1) >> 1;
-        predictor[1 * 16 + 3] = (pp[6] + pp[7] * 2 + pp[8] + 2) >> 2;
-        predictor[0 * 16 + 3] = (pp[7] + pp[8] + 1) >> 1;
-
-    }
-    break;
-    case B_VL_PRED:
-    {
-
-        unsigned char *pp = Above;
-
-        predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
-        predictor[1 * 16 + 0] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
-        predictor[2 * 16 + 0] =
-            predictor[0 * 16 + 1] = (pp[1] + pp[2] + 1) >> 1;
-        predictor[1 * 16 + 1] =
-            predictor[3 * 16 + 0] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
-        predictor[2 * 16 + 1] =
-            predictor[0 * 16 + 2] = (pp[2] + pp[3] + 1) >> 1;
-        predictor[3 * 16 + 1] =
-            predictor[1 * 16 + 2] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
-        predictor[0 * 16 + 3] =
-            predictor[2 * 16 + 2] = (pp[3] + pp[4] + 1) >> 1;
-        predictor[1 * 16 + 3] =
-            predictor[3 * 16 + 2] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
-        predictor[2 * 16 + 3] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
-        predictor[3 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
-    }
-    break;
-
-    case B_HD_PRED:
-    {
-        unsigned char pp[9];
-        pp[0] = Left[3];
-        pp[1] = Left[2];
-        pp[2] = Left[1];
-        pp[3] = Left[0];
-        pp[4] = top_left;
-        pp[5] = Above[0];
-        pp[6] = Above[1];
-        pp[7] = Above[2];
-        pp[8] = Above[3];
-
-
-        predictor[3 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
-        predictor[3 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
-        predictor[2 * 16 + 0] =
-            predictor[3 * 16 + 2] = (pp[1] + pp[2] + 1) >> 1;
-        predictor[2 * 16 + 1] =
-            predictor[3 * 16 + 3] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
-        predictor[2 * 16 + 2] =
-            predictor[1 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
-        predictor[2 * 16 + 3] =
-            predictor[1 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[4] + 2) >> 2;
-        predictor[1 * 16 + 2] =
-            predictor[0 * 16 + 0] = (pp[3] + pp[4] + 1) >> 1;
-        predictor[1 * 16 + 3] =
-            predictor[0 * 16 + 1] = (pp[3] + pp[4] * 2 + pp[5] + 2) >> 2;
-        predictor[0 * 16 + 2] = (pp[4] + pp[5] * 2 + pp[6] + 2) >> 2;
-        predictor[0 * 16 + 3] = (pp[5] + pp[6] * 2 + pp[7] + 2) >> 2;
-    }
-    break;
-
-
-    case B_HU_PRED:
-    {
-        unsigned char *pp = Left;
-        predictor[0 * 16 + 0] = (pp[0] + pp[1] + 1) >> 1;
-        predictor[0 * 16 + 1] = (pp[0] + pp[1] * 2 + pp[2] + 2) >> 2;
-        predictor[0 * 16 + 2] =
-            predictor[1 * 16 + 0] = (pp[1] + pp[2] + 1) >> 1;
-        predictor[0 * 16 + 3] =
-            predictor[1 * 16 + 1] = (pp[1] + pp[2] * 2 + pp[3] + 2) >> 2;
-        predictor[1 * 16 + 2] =
-            predictor[2 * 16 + 0] = (pp[2] + pp[3] + 1) >> 1;
-        predictor[1 * 16 + 3] =
-            predictor[2 * 16 + 1] = (pp[2] + pp[3] * 2 + pp[3] + 2) >> 2;
-        predictor[2 * 16 + 2] =
-            predictor[2 * 16 + 3] =
-                predictor[3 * 16 + 0] =
-                    predictor[3 * 16 + 1] =
-                        predictor[3 * 16 + 2] =
-                            predictor[3 * 16 + 3] = pp[3];
-    }
-    break;
-
-
-    }
-}
-// copy 4 bytes from the above right down so that the 4x4 prediction modes using pixels above and
-// to the right prediction have filled in pixels to use.
-void vp8_intra_prediction_down_copy(MACROBLOCKD *x)
-{
-    unsigned char *above_right = *(x->block[0].base_dst) + x->block[0].dst - x->block[0].dst_stride + 16;
-
-    unsigned int *src_ptr = (unsigned int *)above_right;
-    unsigned int *dst_ptr0 = (unsigned int *)(above_right + 4 * x->block[0].dst_stride);
-    unsigned int *dst_ptr1 = (unsigned int *)(above_right + 8 * x->block[0].dst_stride);
-    unsigned int *dst_ptr2 = (unsigned int *)(above_right + 12 * x->block[0].dst_stride);
-
-    *dst_ptr0 = *src_ptr;
-    *dst_ptr1 = *src_ptr;
-    *dst_ptr2 = *src_ptr;
-}
-
-
-
-/*
-void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
-    int i;
-
-    vp8_intra_prediction_down_copy(x);
-
-    for(i=0;i<16;i++)
-    {
-        BLOCKD *b = &x->block[i];
-
-        vp8_predict_intra4x4(b, x->block[i].bmi.mode,x->block[i].predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    }
-
-    vp8_recon_intra_mbuv(x);
-
-}
-*/
-void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
-{
-    int i;
-    BLOCKD *b = &x->block[0];
-
-    vp8_intra_prediction_down_copy(x);
-
-    {
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-        b += 1;
-
-        vp8_predict_intra4x4(b, b->bmi.mode, b->predictor);
-        RECON_INVOKE(rtcd, recon)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
-    }
-
-    vp8_recon_intra_mbuv(rtcd, x);
-
-}
--- a/vp8/common/arm/subpixel_arm.h
+++ b/vp8/common/arm/subpixel_arm.h
@@ -22,6 +22,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_armv6);
 extern prototype_subpixel_predict(vp8_bilinear_predict8x4_armv6);
 extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);

+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_subpix_sixtap16x16
 #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_armv6

@@ -46,6 +47,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_armv6);
 #undef  vp8_subpix_bilinear4x4
 #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_armv6
 #endif
+#endif

 #if HAVE_ARMV7
 extern prototype_subpixel_predict(vp8_sixtap_predict16x16_neon);
@@ -57,6 +59,7 @@ extern prototype_subpixel_predict(vp8_bilinear_predict8x8_neon);
 extern prototype_subpixel_predict(vp8_bilinear_predict8x4_neon);
 extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);

+#if !CONFIG_RUNTIME_CPU_DETECT
 #undef  vp8_subpix_sixtap16x16
 #define vp8_subpix_sixtap16x16 vp8_sixtap_predict16x16_neon

@@ -81,5 +84,6 @@ extern prototype_subpixel_predict(vp8_bilinear_predict4x4_neon);
 #undef  vp8_subpix_bilinear4x4
 #define vp8_subpix_bilinear4x4 vp8_bilinear_predict4x4_neon
 #endif
+#endif

 #endif
--- a/vp8/common/arm/systemdependent.c
+++ b/vp8/common/arm/systemdependent.c
@@ -1,149 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include "vpx_ports/config.h"
-#include "g_common.h"
-#include "pragmas.h"
-#include "subpixel.h"
-#include "loopfilter.h"
-#include "recon.h"
-#include "idct.h"
-#include "onyxc_int.h"
-
-void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_neon(MACROBLOCKD *x);
-
-void (*vp8_build_intra_predictors_mby_s_ptr)(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x);
-extern void vp8_build_intra_predictors_mby_s_neon(MACROBLOCKD *x);
-
-void vp8_machine_specific_config(VP8_COMMON *ctx)
-{
-#if CONFIG_RUNTIME_CPU_DETECT
-    VP8_COMMON_RTCD *rtcd = &ctx->rtcd;
-
-#if HAVE_ARMV7
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_neon;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_neon;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_neon;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_neon;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_neon;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_neon;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_neon;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_neon;
-
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_neon;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_neon;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_neon;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_neon;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_neon;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_neon;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_neon;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_neon;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_neon;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_neon;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_neon;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_neon;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_neon;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_neon;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_neon;
-    rtcd->recon.recon       = vp8_recon_b_neon;
-    rtcd->recon.recon2      = vp8_recon2b_neon;
-    rtcd->recon.recon4      = vp8_recon4b_neon;
-#elif HAVE_ARMV6
-
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_armv6;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_armv6;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_armv6;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_armv6;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_armv6;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_armv6;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_armv6;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_armv6;
-
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_v6;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_v6_dual;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_armv6;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_armv6;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_armv6;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_armv6;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_armv6;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_armv6;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_armv6;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_armv6;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_armv6;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_armv6;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_v6;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_v6;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_v6;
-    rtcd->recon.recon       = vp8_recon_b_armv6;
-    rtcd->recon.recon2      = vp8_recon2b_armv6;
-    rtcd->recon.recon4      = vp8_recon4b_armv6;
-#else
-//pure c
-    rtcd->idct.idct1        = vp8_short_idct4x4llm_1_c;
-    rtcd->idct.idct16       = vp8_short_idct4x4llm_c;
-    rtcd->idct.idct1_scalar = vp8_dc_only_idct_c;
-    rtcd->idct.iwalsh1      = vp8_short_inv_walsh4x4_1_c;
-    rtcd->idct.iwalsh16     = vp8_short_inv_walsh4x4_c;
-
-    rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
-    rtcd->recon.copy8x8     = vp8_copy_mem8x8_c;
-    rtcd->recon.copy8x4     = vp8_copy_mem8x4_c;
-    rtcd->recon.recon      = vp8_recon_b_c;
-    rtcd->recon.recon2      = vp8_recon2b_c;
-    rtcd->recon.recon4     = vp8_recon4b_c;
-
-    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_c;
-    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_c;
-    rtcd->subpix.sixtap8x4     = vp8_sixtap_predict8x4_c;
-    rtcd->subpix.sixtap4x4     = vp8_sixtap_predict_c;
-    rtcd->subpix.bilinear16x16 = vp8_bilinear_predict16x16_c;
-    rtcd->subpix.bilinear8x8   = vp8_bilinear_predict8x8_c;
-    rtcd->subpix.bilinear8x4   = vp8_bilinear_predict8x4_c;
-    rtcd->subpix.bilinear4x4   = vp8_bilinear_predict4x4_c;
-
-    rtcd->loopfilter.normal_mb_v = vp8_loop_filter_mbv_c;
-    rtcd->loopfilter.normal_b_v  = vp8_loop_filter_bv_c;
-    rtcd->loopfilter.normal_mb_h = vp8_loop_filter_mbh_c;
-    rtcd->loopfilter.normal_b_h  = vp8_loop_filter_bh_c;
-    rtcd->loopfilter.simple_mb_v = vp8_loop_filter_mbvs_c;
-    rtcd->loopfilter.simple_b_v  = vp8_loop_filter_bvs_c;
-    rtcd->loopfilter.simple_mb_h = vp8_loop_filter_mbhs_c;
-    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;
-#endif
-
-#if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
-    rtcd->postproc.down        = vp8_mbpost_proc_down_c;
-    rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
-    rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
-    rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
-#endif
-#endif
-
-#if HAVE_ARMV7
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby_neon;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s_neon;
-#elif HAVE_ARMV6
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-#else
-    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
-    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;
-
-#endif
-
-}
--- a/vp8/common/blockd.h
+++ b/vp8/common/blockd.h
@@ -24,7 +24,7 @@ void vpx_log(const char *format, ...);
 #define TRUE    1
 #define FALSE   0

-//#define DCPRED 1
+/*#define DCPRED 1*/
 #define DCPREDSIMTHRESH 0
 #define DCPREDCNTTHRESH 3

@@ -39,7 +39,7 @@ void vpx_log(const char *format, ...);
 #define MAX_REF_LF_DELTAS       4
 #define MAX_MODE_LF_DELTAS      4

-// Segment Feature Masks
+/* Segment Feature Masks */
 #define SEGMENT_DELTADATA   0
 #define SEGMENT_ABSDATA     1

@@ -75,11 +75,11 @@ typedef enum

 typedef enum
 {
-    DC_PRED,            // average of above and left pixels
-    V_PRED,             // vertical prediction
-    H_PRED,             // horizontal prediction
-    TM_PRED,            // Truemotion prediction
-    B_PRED,             // block based prediction, each block has its own prediction mode
+    DC_PRED,            /* average of above and left pixels */
+    V_PRED,             /* vertical prediction */
+    H_PRED,             /* horizontal prediction */
+    TM_PRED,            /* Truemotion prediction */
+    B_PRED,             /* block based prediction, each block has its own prediction mode */

    NEARESTMV,
    NEARMV,
@@ -90,16 +90,16 @@ typedef enum
    MB_MODE_COUNT
 } MB_PREDICTION_MODE;

-// Macroblock level features
+/* Macroblock level features */
 typedef enum
 {
-    MB_LVL_ALT_Q = 0,               // Use alternate Quantizer ....
-    MB_LVL_ALT_LF = 1,              // Use alternate loop filter value...
-    MB_LVL_MAX = 2,                 // Number of MB level features supported
+    MB_LVL_ALT_Q = 0,               /* Use alternate Quantizer .... */
+    MB_LVL_ALT_LF = 1,              /* Use alternate loop filter value... */
+    MB_LVL_MAX = 2                  /* Number of MB level features supported */

 } MB_LVL_FEATURES;

-// Segment Feature Masks
+/* Segment Feature Masks */
 #define SEGMENT_ALTQ    0x01
 #define SEGMENT_ALT_LF  0x02

@@ -110,11 +110,11 @@ typedef enum

 typedef enum
 {
-    B_DC_PRED,          // average of above and left pixels
+    B_DC_PRED,          /* average of above and left pixels */
    B_TM_PRED,

-    B_VE_PRED,           // vertical prediction
-    B_HE_PRED,           // horizontal prediction
+    B_VE_PRED,           /* vertical prediction */
+    B_HE_PRED,           /* horizontal prediction */

    B_LD_PRED,
    B_RD_PRED,
@@ -169,14 +169,14 @@ typedef struct
        MV  as_mv;
    } mv;

-    char partitioning;
-    unsigned char mb_skip_coeff;                                //does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens
+    unsigned char partitioning;
+    unsigned char mb_skip_coeff;                                /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */
    unsigned char dc_diff;
    unsigned char need_to_clamp_mvs;

-    unsigned char segment_id;                  // Which set of segmentation parameters should be used for this MB
+    unsigned char segment_id;                  /* Which set of segmentation parameters should be used for this MB */

-    unsigned char force_no_skip; //encoder only
+    unsigned char force_no_skip; /* encoder only */
 } MB_MODE_INFO;


@@ -195,9 +195,9 @@ typedef struct
    short *diff;
    short *reference;

-    short(*dequant)[4];
+    short *dequant;

-    // 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries
+    /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */
    unsigned char **base_pre;
    int pre;
    int pre_stride;
@@ -214,17 +214,17 @@ typedef struct

 typedef struct
 {
-    DECLARE_ALIGNED(16, short, diff[400]);      // from idct diff
+    DECLARE_ALIGNED(16, short, diff[400]);      /* from idct diff */
    DECLARE_ALIGNED(16, unsigned char,  predictor[384]);
-//not used    DECLARE_ALIGNED(16, short, reference[384]);
+/* not used    DECLARE_ALIGNED(16, short, reference[384]); */
    DECLARE_ALIGNED(16, short, qcoeff[400]);
    DECLARE_ALIGNED(16, short, dqcoeff[400]);
    DECLARE_ALIGNED(16, char,  eobs[25]);

-    // 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries.
+    /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */
    BLOCKD block[25];

-    YV12_BUFFER_CONFIG pre; // Filtered copy of previous frame reconstruction
+    YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */
    YV12_BUFFER_CONFIG dst;

    MODE_INFO *mode_info_context;
@@ -235,39 +235,39 @@ typedef struct
    int up_available;
    int left_available;

-    // Y,U,V,Y2
+    /* Y,U,V,Y2 */
    ENTROPY_CONTEXT_PLANES *above_context;
    ENTROPY_CONTEXT_PLANES *left_context;

-    // 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active.
+    /* 0 indicates segmentation at MB level is not enabled. Otherwise the individual bits indicate which features are active. */
    unsigned char segmentation_enabled;

-    // 0 (do not update) 1 (update) the macroblock segmentation map.
+    /* 0 (do not update) 1 (update) the macroblock segmentation map. */
    unsigned char update_mb_segmentation_map;

-    // 0 (do not update) 1 (update) the macroblock segmentation feature data.
+    /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
    unsigned char update_mb_segmentation_data;

-    // 0 (do not update) 1 (update) the macroblock segmentation feature data.
+    /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */
    unsigned char mb_segement_abs_delta;

-    // Per frame flags that define which MB level features (such as quantizer or loop filter level)
-    // are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO
-    vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];         // Probability Tree used to code Segment number
+    /* Per frame flags that define which MB level features (such as quantizer or loop filter level) */
+    /* are enabled and when enabled the proabilities used to decode the per MB flags in MB_MODE_INFO */
+    vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS];         /* Probability Tree used to code Segment number */

-    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];            // Segment parameters
+    signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS];            /* Segment parameters */

-    // mode_based Loop filter adjustment
+    /* mode_based Loop filter adjustment */
    unsigned char mode_ref_lf_delta_enabled;
    unsigned char mode_ref_lf_delta_update;

-    // Delta values have the range +/- MAX_LOOP_FILTER
-    //char ref_lf_deltas[MAX_REF_LF_DELTAS];                      // 0 = Intra, Last, GF, ARF
-    //char mode_lf_deltas[MAX_MODE_LF_DELTAS];                            // 0 = BPRED, ZERO_MV, MV, SPLIT
-    signed char ref_lf_deltas[MAX_REF_LF_DELTAS];                     // 0 = Intra, Last, GF, ARF
-    signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                           // 0 = BPRED, ZERO_MV, MV, SPLIT
+    /* Delta values have the range +/- MAX_LOOP_FILTER */
+    signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS];                /* 0 = Intra, Last, GF, ARF */
+    signed char ref_lf_deltas[MAX_REF_LF_DELTAS];                     /* 0 = Intra, Last, GF, ARF */
+    signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS];                      /* 0 = BPRED, ZERO_MV, MV, SPLIT */
+    signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];                           /* 0 = BPRED, ZERO_MV, MV, SPLIT */

-    // Distance of MB away from frame edges
+    /* Distance of MB away from frame edges */
    int mb_to_left_edge;
    int mb_to_right_edge;
    int mb_to_top_edge;
--- a/vp8/common/debugmodes.c
+++ b/vp8/common/debugmodes.c
@@ -21,7 +21,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
    int mb_index = 0;
    FILE *mvs = fopen("mvs.stt", "a");

-    // print out the macroblock Y modes
+    /* print out the macroblock Y modes */
    mb_index = 0;
    fprintf(mvs, "Mb Modes for Frame %d\n", frame);

@@ -60,7 +60,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f

    fprintf(mvs, "\n");

-    // print out the macroblock UV modes
+    /* print out the macroblock UV modes */
    mb_index = 0;
    fprintf(mvs, "UV Modes for Frame %d\n", frame);

@@ -80,7 +80,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f

    fprintf(mvs, "\n");

-    // print out the block modes
+    /* print out the block modes */
    mb_index = 0;
    fprintf(mvs, "Mbs for Frame %d\n", frame);
    {
@@ -108,7 +108,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
    }
    fprintf(mvs, "\n");

-    // print out the macroblock mvs
+    /* print out the macroblock mvs */
    mb_index = 0;
    fprintf(mvs, "MVs for Frame %d\n", frame);

@@ -128,7 +128,7 @@ void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int f
    fprintf(mvs, "\n");


-    // print out the block modes
+    /* print out the block modes */
    mb_index = 0;
    fprintf(mvs, "MVs for Frame %d\n", frame);
    {
--- a/vp8/common/defaultcoefcounts.h
+++ b/vp8/common/defaultcoefcounts.h
@@ -15,204 +15,204 @@ static const unsigned int default_coef_counts [BLOCK_TYPES] [COEF_BANDS] [PREV_C
 {

    {
-        // Block Type ( 0 )
+        /* Block Type ( 0 ) */
        {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
        },
        {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
            {30190, 26544, 225,  24,   4,   0,   0,   0,   0,   0,   0, 4171593,},
            {26846, 25157, 1241, 130,  26,   6,   1,   0,   0,   0,   0, 149987,},
            {10484, 9538, 1006, 160,  36,  18,   0,   0,   0,   0,   0, 15104,},
        },
        {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
            {25842, 40456, 1126,  83,  11,   2,   0,   0,   0,   0,   0,   0,},
            {9338, 8010, 512,  73,   7,   3,   2,   0,   0,   0,   0, 43294,},
            {1047, 751, 149,  31,  13,   6,   1,   0,   0,   0,   0, 879,},
        },
        {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
            {26136, 9826, 252,  13,   0,   0,   0,   0,   0,   0,   0,   0,},
            {8134, 5574, 191,  14,   2,   0,   0,   0,   0,   0,   0, 35302,},
            { 605, 677, 116,   9,   1,   0,   0,   0,   0,   0,   0, 611,},
        },
        {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
            {10263, 15463, 283,  17,   0,   0,   0,   0,   0,   0,   0,   0,},
            {2773, 2191, 128,   9,   2,   2,   0,   0,   0,   0,   0, 10073,},
            { 134, 125,  32,   4,   0,   2,   0,   0,   0,   0,   0,  50,},
        },
        {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
            {10483, 2663,  23,   1,   0,   0,   0,   0,   0,   0,   0,   0,},
            {2137, 1251,  27,   1,   1,   0,   0,   0,   0,   0,   0, 14362,},
            { 116, 156,  14,   2,   1,   0,   0,   0,   0,   0,   0, 190,},
        },
        {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
            {40977, 27614, 412,  28,   0,   0,   0,   0,   0,   0,   0,   0,},
            {6113, 5213, 261,  22,   3,   0,   0,   0,   0,   0,   0, 26164,},
            { 382, 312,  50,  14,   2,   0,   0,   0,   0,   0,   0, 345,},
        },
        {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
            {   0,  26,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,  13,   0,   0,   0,   0,   0,   0,   0,   0,   0, 319,},
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   8,},
        },
    },
    {
-        // Block Type ( 1 )
+        /* Block Type ( 1 ) */
        {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
            {3268, 19382, 1043, 250,  93,  82,  49,  26,  17,   8,  25, 82289,},
            {8758, 32110, 5436, 1832, 827, 668, 420, 153,  24,   0,   3, 52914,},
            {9337, 23725, 8487, 3954, 2107, 1836, 1069, 399,  59,   0,   0, 18620,},
        },
        {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
            {12419, 8420, 452,  62,   9,   1,   0,   0,   0,   0,   0,   0,},
            {11715, 8705, 693,  92,  15,   7,   2,   0,   0,   0,   0, 53988,},
            {7603, 8585, 2306, 778, 270, 145,  39,   5,   0,   0,   0, 9136,},
        },
        {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
            {15938, 14335, 1207, 184,  55,  13,   4,   1,   0,   0,   0,   0,},
            {7415, 6829, 1138, 244,  71,  26,   7,   0,   0,   0,   0, 9980,},
            {1580, 1824, 655, 241,  89,  46,  10,   2,   0,   0,   0, 429,},
        },
        {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
            {19453, 5260, 201,  19,   0,   0,   0,   0,   0,   0,   0,   0,},
            {9173, 3758, 213,  22,   1,   1,   0,   0,   0,   0,   0, 9820,},
            {1689, 1277, 276,  51,  17,   4,   0,   0,   0,   0,   0, 679,},
        },
        {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
            {12076, 10667, 620,  85,  19,   9,   5,   0,   0,   0,   0,   0,},
            {4665, 3625, 423,  55,  19,   9,   0,   0,   0,   0,   0, 5127,},
            { 415, 440, 143,  34,  20,   7,   2,   0,   0,   0,   0, 101,},
        },
        {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
            {12183, 4846, 115,  11,   1,   0,   0,   0,   0,   0,   0,   0,},
            {4226, 3149, 177,  21,   2,   0,   0,   0,   0,   0,   0, 7157,},
            { 375, 621, 189,  51,  11,   4,   1,   0,   0,   0,   0, 198,},
        },
        {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
            {61658, 37743, 1203,  94,  10,   3,   0,   0,   0,   0,   0,   0,},
            {15514, 11563, 903, 111,  14,   5,   0,   0,   0,   0,   0, 25195,},
            { 929, 1077, 291,  78,  14,   7,   1,   0,   0,   0,   0, 507,},
        },
        {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
            {   0, 990,  15,   3,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0, 412,  13,   0,   0,   0,   0,   0,   0,   0,   0, 1641,},
            {   0,  18,   7,   1,   0,   0,   0,   0,   0,   0,   0,  30,},
        },
    },
    {
-        // Block Type ( 2 )
+        /* Block Type ( 2 ) */
        {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
            { 953, 24519, 628, 120,  28,  12,   4,   0,   0,   0,   0, 2248798,},
            {1525, 25654, 2647, 617, 239, 143,  42,   5,   0,   0,   0, 66837,},
            {1180, 11011, 3001, 1237, 532, 448, 239,  54,   5,   0,   0, 7122,},
        },
        {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
            {1356, 2220,  67,  10,   4,   1,   0,   0,   0,   0,   0,   0,},
            {1450, 2544, 102,  18,   4,   3,   0,   0,   0,   0,   0, 57063,},
            {1182, 2110, 470, 130,  41,  21,   0,   0,   0,   0,   0, 6047,},
        },
        {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
            { 370, 3378, 200,  30,   5,   4,   1,   0,   0,   0,   0,   0,},
            { 293, 1006, 131,  29,  11,   0,   0,   0,   0,   0,   0, 5404,},
            { 114, 387,  98,  23,   4,   8,   1,   0,   0,   0,   0, 236,},
        },
        {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
            { 579, 194,   4,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            { 395, 213,   5,   1,   0,   0,   0,   0,   0,   0,   0, 4157,},
            { 119, 122,   4,   0,   0,   0,   0,   0,   0,   0,   0, 300,},
        },
        {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
            {  38, 557,  19,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {  21, 114,  12,   1,   0,   0,   0,   0,   0,   0,   0, 427,},
            {   0,   5,   0,   0,   0,   0,   0,   0,   0,   0,   0,   7,},
        },
        {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
            {  52,   7,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {  18,   6,   0,   0,   0,   0,   0,   0,   0,   0,   0, 652,},
            {   1,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,  30,},
        },
        {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
            { 640, 569,  10,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {  25,  77,   2,   0,   0,   0,   0,   0,   0,   0,   0, 517,},
            {   4,   7,   0,   0,   0,   0,   0,   0,   0,   0,   0,   3,},
        },
        {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
        },
    },
    {
-        // Block Type ( 3 )
+        /* Block Type ( 3 ) */
        {
-            // Coeff Band ( 0 )
+            /* Coeff Band ( 0 ) */
            {2506, 20161, 2707, 767, 261, 178, 107,  30,  14,   3,   0, 100694,},
            {8806, 36478, 8817, 3268, 1280, 850, 401, 114,  42,   0,   0, 58572,},
            {11003, 27214, 11798, 5716, 2482, 2072, 1048, 175,  32,   0,   0, 19284,},
        },
        {
-            // Coeff Band ( 1 )
+            /* Coeff Band ( 1 ) */
            {9738, 11313, 959, 205,  70,  18,  11,   1,   0,   0,   0,   0,},
            {12628, 15085, 1507, 273,  52,  19,   9,   0,   0,   0,   0, 54280,},
            {10701, 15846, 5561, 1926, 813, 570, 249,  36,   0,   0,   0, 6460,},
        },
        {
-            // Coeff Band ( 2 )
+            /* Coeff Band ( 2 ) */
            {6781, 22539, 2784, 634, 182, 123,  20,   4,   0,   0,   0,   0,},
            {6263, 11544, 2649, 790, 259, 168,  27,   5,   0,   0,   0, 20539,},
            {3109, 4075, 2031, 896, 457, 386, 158,  29,   0,   0,   0, 1138,},
        },
        {
-            // Coeff Band ( 3 )
+            /* Coeff Band ( 3 ) */
            {11515, 4079, 465,  73,   5,  14,   2,   0,   0,   0,   0,   0,},
            {9361, 5834, 650,  96,  24,   8,   4,   0,   0,   0,   0, 22181,},
            {4343, 3974, 1360, 415, 132,  96,  14,   1,   0,   0,   0, 1267,},
        },
        {
-            // Coeff Band ( 4 )
+            /* Coeff Band ( 4 ) */
            {4787, 9297, 823, 168,  44,  12,   4,   0,   0,   0,   0,   0,},
            {3619, 4472, 719, 198,  60,  31,   3,   0,   0,   0,   0, 8401,},
            {1157, 1175, 483, 182,  88,  31,   8,   0,   0,   0,   0, 268,},
        },
        {
-            // Coeff Band ( 5 )
+            /* Coeff Band ( 5 ) */
            {8299, 1226,  32,   5,   1,   0,   0,   0,   0,   0,   0,   0,},
            {3502, 1568,  57,   4,   1,   1,   0,   0,   0,   0,   0, 9811,},
            {1055, 1070, 166,  29,   6,   1,   0,   0,   0,   0,   0, 527,},
        },
        {
-            // Coeff Band ( 6 )
+            /* Coeff Band ( 6 ) */
            {27414, 27927, 1989, 347,  69,  26,   0,   0,   0,   0,   0,   0,},
            {5876, 10074, 1574, 341,  91,  24,   4,   0,   0,   0,   0, 21954,},
            {1571, 2171, 778, 324, 124,  65,  16,   0,   0,   0,   0, 979,},
        },
        {
-            // Coeff Band ( 7 )
+            /* Coeff Band ( 7 ) */
            {   0,  29,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,},
            {   0,  23,   0,   0,   0,   0,   0,   0,   0,   0,   0, 459,},
            {   0,   1,   0,   0,   0,   0,   0,   0,   0,   0,   0,  13,},
--- a/vp8/common/entropy.h
+++ b/vp8/common/entropy.h
@@ -17,18 +17,18 @@

 /* Coefficient token alphabet */

-#define ZERO_TOKEN              0       //0         Extra Bits 0+0
-#define ONE_TOKEN               1       //1         Extra Bits 0+1
-#define TWO_TOKEN               2       //2         Extra Bits 0+1
-#define THREE_TOKEN             3       //3         Extra Bits 0+1
-#define FOUR_TOKEN              4       //4         Extra Bits 0+1
-#define DCT_VAL_CATEGORY1       5       //5-6       Extra Bits 1+1
-#define DCT_VAL_CATEGORY2       6       //7-10      Extra Bits 2+1
-#define DCT_VAL_CATEGORY3       7       //11-26     Extra Bits 4+1
-#define DCT_VAL_CATEGORY4       8       //11-26     Extra Bits 5+1
-#define DCT_VAL_CATEGORY5       9       //27-58     Extra Bits 5+1
-#define DCT_VAL_CATEGORY6       10      //59+       Extra Bits 11+1
-#define DCT_EOB_TOKEN           11      //EOB       Extra Bits 0+0
+#define ZERO_TOKEN              0       /* 0         Extra Bits 0+0 */
+#define ONE_TOKEN               1       /* 1         Extra Bits 0+1 */
+#define TWO_TOKEN               2       /* 2         Extra Bits 0+1 */
+#define THREE_TOKEN             3       /* 3         Extra Bits 0+1 */
+#define FOUR_TOKEN              4       /* 4         Extra Bits 0+1 */
+#define DCT_VAL_CATEGORY1       5       /* 5-6       Extra Bits 1+1 */
+#define DCT_VAL_CATEGORY2       6       /* 7-10      Extra Bits 2+1 */
+#define DCT_VAL_CATEGORY3       7       /* 11-26     Extra Bits 4+1 */
+#define DCT_VAL_CATEGORY4       8       /* 11-26     Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY5       9       /* 27-58     Extra Bits 5+1 */
+#define DCT_VAL_CATEGORY6       10      /* 59+       Extra Bits 11+1 */
+#define DCT_EOB_TOKEN           11      /* EOB       Extra Bits 0+0 */

 #define vp8_coef_tokens 12
 #define MAX_ENTROPY_TOKENS vp8_coef_tokens
@@ -83,7 +83,7 @@ extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]);
   coefficient band (and since zigzag positions 0, 1, and 2 are in
   distinct bands). */

-/*# define DC_TOKEN_CONTEXTS        3 // 00, 0!0, !0!0 */
+/*# define DC_TOKEN_CONTEXTS        3*/ /* 00, 0!0, !0!0 */
 #   define PREV_COEF_CONTEXTS       3

 extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[vp8_coef_tokens]);
--- a/vp8/common/entropymv.c
+++ b/vp8/common/entropymv.c
@@ -29,21 +29,21 @@ const MV_CONTEXT vp8_mv_update_probs[2] =
 const MV_CONTEXT vp8_default_mv_context[2] =
 {
    {{
-        // row
-        162,                                        // is short
-        128,                                        // sign
-        225, 146, 172, 147, 214,  39, 156,          // short tree
-        128, 129, 132,  75, 145, 178, 206, 239, 254, 254 // long bits
+        /* row */
+        162,                                        /* is short */
+        128,                                        /* sign */
+        225, 146, 172, 147, 214,  39, 156,          /* short tree */
+        128, 129, 132,  75, 145, 178, 206, 239, 254, 254 /* long bits */
    }},



    {{
-        // same for column
-        164,                                        // is short
+        /* same for column */
+        164,                                        /* is short */
        128,
        204, 170, 119, 235, 140, 230, 228,
-        128, 130, 130,  74, 148, 180, 203, 236, 254, 254 // long bits
+        128, 130, 130,  74, 148, 180, 203, 236, 254, 254 /* long bits */

    }}
 };
--- a/vp8/common/extend.c
+++ b/vp8/common/extend.c
@@ -15,14 +15,14 @@

 static void extend_plane_borders
 (
-    unsigned char *s, // source
-    int sp,           // pitch
-    int h,            // height
-    int w,            // width
-    int et,           // extend top border
-    int el,           // extend left border
-    int eb,           // extend bottom border
-    int er            // extend right border
+    unsigned char *s, /* source */
+    int sp,           /* pitch */
+    int h,            /* height */
+    int w,            /* width */
+    int et,           /* extend top border */
+    int el,           /* extend left border */
+    int eb,           /* extend bottom border */
+    int er            /* extend right border */
 )
 {

@@ -31,7 +31,7 @@ static void extend_plane_borders
    unsigned char *dest_ptr1, *dest_ptr2;
    int linesize;

-    // copy the left and right most columns out
+    /* copy the left and right most columns out */
    src_ptr1 = s;
    src_ptr2 = s + w - 1;
    dest_ptr1 = s - el;
@@ -39,8 +39,9 @@ static void extend_plane_borders

    for (i = 0; i < h - 0 + 1; i++)
    {
-        // Some linkers will complain if we call vpx_memset with el set to a
-        // constant 0.
+        /* Some linkers will complain if we call vpx_memset with el set to a
+         * constant 0.
+         */
        if (el)
            vpx_memset(dest_ptr1, src_ptr1[0], el);
        vpx_memset(dest_ptr2, src_ptr2[0], er);
@@ -50,7 +51,7 @@ static void extend_plane_borders
        dest_ptr2 += sp;
    }

-    // Now copy the top and bottom source lines into each line of the respective borders
+    /* Now copy the top and bottom source lines into each line of the respective borders */
    src_ptr1 = s - el;
    src_ptr2 = s + sp * (h - 1) - el;
    dest_ptr1 = s + sp * (-et) - el;
@@ -76,12 +77,12 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
    int er = 0xf & (16 - (width & 0xf));
    int eb = 0xf & (16 - (height & 0xf));

-    // check for non multiples of 16
+    /* check for non multiples of 16 */
    if (er != 0 || eb != 0)
    {
        extend_plane_borders(ybf->y_buffer, ybf->y_stride, height, width, 0, 0, eb, er);

-        //adjust for uv
+        /* adjust for uv */
        height = (height + 1) >> 1;
        width  = (width  + 1) >> 1;
        er = 0x7 & (8 - (width  & 0x7));
@@ -95,7 +96,7 @@ void vp8_extend_to_multiple_of16(YV12_BUFFER_CONFIG *ybf, int width, int height)
    }
 }

-// note the extension is only for the last row, for intra prediction purpose
+/* note the extension is only for the last row, for intra prediction purpose */
 void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr)
 {
    int i;
--- a/vp8/common/filter_c.c
+++ b/vp8/common/filter_c.c
@@ -32,13 +32,13 @@ static const int bilinear_filters[8][2] =
 static const short sub_pel_filters[8][6] =
 {

-    { 0,  0,  128,    0,   0,  0 },         // note that 1/8 pel positions are just as per alpha -0.5 bicubic
+    { 0,  0,  128,    0,   0,  0 },         /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */
    { 0, -6,  123,   12,  -1,  0 },
-    { 2, -11, 108,   36,  -8,  1 },         // New 1/4 pel 6 tap filter
+    { 2, -11, 108,   36,  -8,  1 },         /* New 1/4 pel 6 tap filter */
    { 0, -9,   93,   50,  -6,  0 },
-    { 3, -16,  77,   77, -16,  3 },         // New 1/2 pel 6 tap filter
+    { 3, -16,  77,   77, -16,  3 },         /* New 1/2 pel 6 tap filter */
    { 0, -6,   50,   93,  -9,  0 },
-    { 1, -8,   36,  108, -11,  2 },         // New 1/4 pel 6 tap filter
+    { 1, -8,   36,  108, -11,  2 },         /* New 1/4 pel 6 tap filter */
    { 0, -1,   12,  123,  -6,  0 },


@@ -69,9 +69,9 @@ void vp8_filter_block2d_first_pass
                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
-                   (VP8_FILTER_WEIGHT >> 1);      // Rounding
+                   (VP8_FILTER_WEIGHT >> 1);      /* Rounding */

-            // Normalize back to 0-255
+            /* Normalize back to 0-255 */
            Temp = Temp >> VP8_FILTER_SHIFT;

            if (Temp < 0)
@@ -83,7 +83,7 @@ void vp8_filter_block2d_first_pass
            src_ptr++;
        }

-        // Next row...
+        /* Next row... */
        src_ptr    += src_pixels_per_line - output_width;
        output_ptr += output_width;
    }
@@ -108,16 +108,16 @@ void vp8_filter_block2d_second_pass
    {
        for (j = 0; j < output_width; j++)
        {
-            // Apply filter
+            /* Apply filter */
            Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) +
                   ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) +
                   ((int)src_ptr[0]                 * vp8_filter[2]) +
                   ((int)src_ptr[pixel_step]         * vp8_filter[3]) +
                   ((int)src_ptr[2*pixel_step]       * vp8_filter[4]) +
                   ((int)src_ptr[3*pixel_step]       * vp8_filter[5]) +
-                   (VP8_FILTER_WEIGHT >> 1);   // Rounding
+                   (VP8_FILTER_WEIGHT >> 1);   /* Rounding */

-            // Normalize back to 0-255
+            /* Normalize back to 0-255 */
            Temp = Temp >> VP8_FILTER_SHIFT;

            if (Temp < 0)
@@ -129,7 +129,7 @@ void vp8_filter_block2d_second_pass
            src_ptr++;
        }

-        // Start next row
+        /* Start next row */
        src_ptr    += src_pixels_per_line - output_width;
        output_ptr += output_pitch;
    }
@@ -146,12 +146,12 @@ void vp8_filter_block2d
    const short  *VFilter
 )
 {
-    int FData[9*4]; // Temp data bufffer used in filtering
+    int FData[9*4]; /* Temp data bufffer used in filtering */

-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter);

-    // then filter verticaly...
+    /* then filter verticaly... */
    vp8_filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter);
 }

@@ -195,8 +195,8 @@ void vp8_sixtap_predict_c
    const short  *HFilter;
    const short  *VFilter;

-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */

    vp8_filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter);
 }
@@ -212,16 +212,16 @@ void vp8_sixtap_predict8x8_c
 {
    const short  *HFilter;
    const short  *VFilter;
-    int FData[13*16];   // Temp data bufffer used in filtering
+    int FData[13*16];   /* Temp data bufffer used in filtering */

-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */

-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter);


-    // then filter verticaly...
+    /* then filter verticaly... */
    vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter);

 }
@@ -238,16 +238,16 @@ void vp8_sixtap_predict8x4_c
 {
    const short  *HFilter;
    const short  *VFilter;
-    int FData[13*16];   // Temp data bufffer used in filtering
+    int FData[13*16];   /* Temp data bufffer used in filtering */

-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */

-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter);


-    // then filter verticaly...
+    /* then filter verticaly... */
    vp8_filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter);

 }
@@ -264,16 +264,16 @@ void vp8_sixtap_predict16x16_c
 {
    const short  *HFilter;
    const short  *VFilter;
-    int FData[21*24];   // Temp data bufffer used in filtering
+    int FData[21*24];   /* Temp data bufffer used in filtering */


-    HFilter = sub_pel_filters[xoffset];   // 6 tap
-    VFilter = sub_pel_filters[yoffset];   // 6 tap
+    HFilter = sub_pel_filters[xoffset];   /* 6 tap */
+    VFilter = sub_pel_filters[yoffset];   /* 6 tap */

-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
    vp8_filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter);

-    // then filter verticaly...
+    /* then filter verticaly... */
    vp8_filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter);

 }
@@ -324,14 +324,14 @@ void vp8_filter_block2d_bil_first_pass
    {
        for (j = 0; j < output_width; j++)
        {
-            // Apply bilinear filter
+            /* Apply bilinear filter */
            output_ptr[j] = (((int)src_ptr[0]          * vp8_filter[0]) +
                             ((int)src_ptr[pixel_step] * vp8_filter[1]) +
                             (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT;
            src_ptr++;
        }

-        // Next row...
+        /* Next row... */
        src_ptr    += src_pixels_per_line - output_width;
        output_ptr += output_width;
    }
@@ -384,7 +384,7 @@ void vp8_filter_block2d_bil_second_pass
    {
        for (j = 0; j < output_width; j++)
        {
-            // Apply filter
+            /* Apply filter */
            Temp = ((int)src_ptr[0]         * vp8_filter[0]) +
                   ((int)src_ptr[pixel_step] * vp8_filter[1]) +
                   (VP8_FILTER_WEIGHT / 2);
@@ -392,7 +392,7 @@ void vp8_filter_block2d_bil_second_pass
            src_ptr++;
        }

-        // Next row...
+        /* Next row... */
        src_ptr    += src_pixels_per_line - output_width;
        output_ptr += output_pitch;
    }
@@ -432,12 +432,12 @@ void vp8_filter_block2d_bil
 )
 {

-    unsigned short FData[17*16];    // Temp data bufffer used in filtering
+    unsigned short FData[17*16];    /* Temp data bufffer used in filtering */

-    // First filter 1-D horizontally...
+    /* First filter 1-D horizontally... */
    vp8_filter_block2d_bil_first_pass(src_ptr, FData, src_pixels_per_line, 1, Height + 1, Width, HFilter);

-    // then 1-D vertically...
+    /* then 1-D vertically... */
    vp8_filter_block2d_bil_second_pass(FData, output_ptr, dst_pitch, Width, Width, Height, Width, VFilter);
 }

--- a/vp8/common/findnearmv.c
+++ b/vp8/common/findnearmv.c
@@ -168,7 +168,7 @@ void vp8_find_near_mvs

    vp8_clamp_mv(nearest, xd);
    vp8_clamp_mv(nearby, xd);
-    vp8_clamp_mv(best_mv, xd); //TODO: move this up before the copy
+    vp8_clamp_mv(best_mv, xd); /*TODO: move this up before the copy*/
 }

 vp8_prob *vp8_mv_ref_probs(
@@ -179,7 +179,7 @@ vp8_prob *vp8_mv_ref_probs(
    p[1] = vp8_mode_contexts [near_mv_ref_ct[1]] [1];
    p[2] = vp8_mode_contexts [near_mv_ref_ct[2]] [2];
    p[3] = vp8_mode_contexts [near_mv_ref_ct[3]] [3];
-    //p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];
+    /*p[3] = vp8_mode_contexts [near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]] [3];*/
    return p;
 }

--- a/vp8/common/generic/systemdependent.c
+++ b/vp8/common/generic/systemdependent.c
@@ -18,6 +18,7 @@
 #include "onyxc_int.h"

 extern void vp8_arch_x86_common_init(VP8_COMMON *ctx);
+extern void vp8_arch_arm_common_init(VP8_COMMON *ctx);

 void (*vp8_build_intra_predictors_mby_ptr)(MACROBLOCKD *x);
 extern void vp8_build_intra_predictors_mby(MACROBLOCKD *x);
@@ -39,9 +40,11 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
    rtcd->recon.copy16x16   = vp8_copy_mem16x16_c;
    rtcd->recon.copy8x8     = vp8_copy_mem8x8_c;
    rtcd->recon.copy8x4     = vp8_copy_mem8x4_c;
-    rtcd->recon.recon      = vp8_recon_b_c;
+    rtcd->recon.recon       = vp8_recon_b_c;
    rtcd->recon.recon2      = vp8_recon2b_c;
-    rtcd->recon.recon4     = vp8_recon4b_c;
+    rtcd->recon.recon4      = vp8_recon4b_c;
+    rtcd->recon.recon_mb    = vp8_recon_mb_c;
+    rtcd->recon.recon_mby   = vp8_recon_mby_c;

    rtcd->subpix.sixtap16x16   = vp8_sixtap_predict16x16_c;
    rtcd->subpix.sixtap8x8     = vp8_sixtap_predict8x8_c;
@@ -62,14 +65,17 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
    rtcd->loopfilter.simple_b_h  = vp8_loop_filter_bhs_c;

 #if CONFIG_POSTPROC || (CONFIG_VP8_ENCODER && CONFIG_PSNR)
-    rtcd->postproc.down        = vp8_mbpost_proc_down_c;
-    rtcd->postproc.across      = vp8_mbpost_proc_across_ip_c;
-    rtcd->postproc.downacross  = vp8_post_proc_down_and_across_c;
-    rtcd->postproc.addnoise    = vp8_plane_add_noise_c;
+    rtcd->postproc.down             = vp8_mbpost_proc_down_c;
+    rtcd->postproc.across           = vp8_mbpost_proc_across_ip_c;
+    rtcd->postproc.downacross       = vp8_post_proc_down_and_across_c;
+    rtcd->postproc.addnoise         = vp8_plane_add_noise_c;
+    rtcd->postproc.blend_mb_inner   = vp8_blend_mb_inner_c;
+    rtcd->postproc.blend_mb_outer   = vp8_blend_mb_outer_c;
+    rtcd->postproc.blend_b          = vp8_blend_b_c;
 #endif

 #endif
-    // Pure C:
+    /* Pure C: */
    vp8_build_intra_predictors_mby_ptr = vp8_build_intra_predictors_mby;
    vp8_build_intra_predictors_mby_s_ptr = vp8_build_intra_predictors_mby_s;

@@ -77,4 +83,8 @@ void vp8_machine_specific_config(VP8_COMMON *ctx)
    vp8_arch_x86_common_init(ctx);
 #endif

+#if ARCH_ARM
+    vp8_arch_arm_common_init(ctx);
+#endif
+
 }
--- a/vp8/common/invtrans.c
+++ b/vp8/common/invtrans.c
@@ -38,7 +38,7 @@ void vp8_inverse_transform_mby(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *
 {
    int i;

-    // do 2nd order transform on the dc block
+    /* do 2nd order transform on the dc block */
    IDCT_INVOKE(rtcd, iwalsh16)(x->block[24].dqcoeff, x->block[24].diff);

    recon_dcblock(x);
@@ -68,7 +68,7 @@ void vp8_inverse_transform_mb(const vp8_idct_rtcd_vtable_t *rtcd, MACROBLOCKD *x
    if (x->mode_info_context->mbmi.mode != B_PRED &&
        x->mode_info_context->mbmi.mode != SPLITMV)
    {
-        // do 2nd order transform on the dc block
+        /* do 2nd order transform on the dc block */

        IDCT_INVOKE(rtcd, iwalsh16)(&x->block[24].dqcoeff[0], x->block[24].diff);
        recon_dcblock(x);
--- a/vp8/common/loopfilter.c
+++ b/vp8/common/loopfilter.c
@@ -23,7 +23,7 @@ prototype_loopfilter(vp8_mbloop_filter_vertical_edge_c);
 prototype_loopfilter(vp8_loop_filter_simple_horizontal_edge_c);
 prototype_loopfilter(vp8_loop_filter_simple_vertical_edge_c);

-// Horizontal MB filtering
+/* Horizontal MB filtering */
 void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                           int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -47,7 +47,7 @@ void vp8_loop_filter_mbhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
    vp8_loop_filter_simple_horizontal_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }

-// Vertical MB Filtering
+/* Vertical MB Filtering */
 void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                           int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -71,7 +71,7 @@ void vp8_loop_filter_mbvs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
    vp8_loop_filter_simple_vertical_edge_c(y_ptr, y_stride, lfi->mbflim, lfi->lim, lfi->mbthr, 2);
 }

-// Horizontal B Filtering
+/* Horizontal B Filtering */
 void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                          int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -99,7 +99,7 @@ void vp8_loop_filter_bhs_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned
    vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->flim, lfi->lim, lfi->thr, 2);
 }

-// Vertical B Filtering
+/* Vertical B Filtering */
 void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr,
                          int y_stride, int uv_stride, loop_filter_info *lfi, int simpler_lpf)
 {
@@ -140,7 +140,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
    const int yhedge_boost  = 2;
    const int uvhedge_boost = 2;

-    // For each possible value for the loop filter fill out a "loop_filter_info" entry.
+    /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
    for (i = 0; i <= MAX_LOOP_FILTER; i++)
    {
        int filt_lvl = i;
@@ -166,7 +166,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
                HEVThresh = 0;
        }

-        // Set loop filter paramaeters that control sharpness.
+        /* Set loop filter paramaeters that control sharpness. */
        block_inside_limit = filt_lvl >> (sharpness_lvl > 0);
        block_inside_limit = block_inside_limit >> (sharpness_lvl > 4);

@@ -195,7 +195,7 @@ void vp8_init_loop_filter(VP8_COMMON *cm)

    }

-    // Set up the function pointers depending on the type of loop filtering selected
+    /* Set up the function pointers depending on the type of loop filtering selected */
    if (lft == NORMAL_LOOPFILTER)
    {
        cm->lf_mbv = LF_INVOKE(&cm->rtcd.loopfilter, normal_mb_v);
@@ -212,14 +212,15 @@ void vp8_init_loop_filter(VP8_COMMON *cm)
    }
 }

-// Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
-// each frame. Check last_frame_type to skip the function most of times.
+/* Put vp8_init_loop_filter() in vp8dx_create_decompressor(). Only call vp8_frame_init_loop_filter() while decoding
+ * each frame. Check last_frame_type to skip the function most of times.
+ */
 void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)
 {
    int HEVThresh;
    int i, j;

-    // For each possible value for the loop filter fill out a "loop_filter_info" entry.
+    /* For each possible value for the loop filter fill out a "loop_filter_info" entry. */
    for (i = 0; i <= MAX_LOOP_FILTER; i++)
    {
        int filt_lvl = i;
@@ -247,15 +248,15 @@ void vp8_frame_init_loop_filter(loop_filter_info *lfi, int frame_type)

        for (j = 0; j < 16; j++)
        {
-            //lfi[i].lim[j] = block_inside_limit;
-            //lfi[i].mbflim[j] = filt_lvl+yhedge_boost;
+            /*lfi[i].lim[j] = block_inside_limit;
+            lfi[i].mbflim[j] = filt_lvl+yhedge_boost;*/
            lfi[i].mbthr[j] = HEVThresh;
-            //lfi[i].flim[j] = filt_lvl;
+            /*lfi[i].flim[j] = filt_lvl;*/
            lfi[i].thr[j] = HEVThresh;
-            //lfi[i].uvlim[j] = block_inside_limit;
-            //lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;
+            /*lfi[i].uvlim[j] = block_inside_limit;
+            lfi[i].uvmbflim[j] = filt_lvl+uvhedge_boost;*/
            lfi[i].uvmbthr[j] = HEVThresh;
-            //lfi[i].uvflim[j] = filt_lvl;
+            /*lfi[i].uvflim[j] = filt_lvl;*/
            lfi[i].uvthr[j] = HEVThresh;
        }
    }
@@ -268,32 +269,32 @@ void vp8_adjust_mb_lf_value(MACROBLOCKD *mbd, int *filter_level)

    if (mbd->mode_ref_lf_delta_enabled)
    {
-        // Aplly delta for reference frame
+        /* Apply delta for reference frame */
        *filter_level += mbd->ref_lf_deltas[mbmi->ref_frame];

-        // Apply delta for mode
+        /* Apply delta for mode */
        if (mbmi->ref_frame == INTRA_FRAME)
        {
-            // Only the split mode BPRED has a further special case
+            /* Only the split mode BPRED has a further special case */
            if (mbmi->mode == B_PRED)
                *filter_level +=  mbd->mode_lf_deltas[0];
        }
        else
        {
-            // Zero motion mode
+            /* Zero motion mode */
            if (mbmi->mode == ZEROMV)
                *filter_level +=  mbd->mode_lf_deltas[1];

-            // Split MB motion mode
+            /* Split MB motion mode */
            else if (mbmi->mode == SPLITMV)
                *filter_level +=  mbd->mode_lf_deltas[3];

-            // All other inter motion modes (Nearest, Near, New)
+            /* All other inter motion modes (Nearest, Near, New) */
            else
                *filter_level +=  mbd->mode_lf_deltas[2];
        }

-        // Range check
+        /* Range check */
        if (*filter_level > MAX_LOOP_FILTER)
            *filter_level = MAX_LOOP_FILTER;
        else if (*filter_level < 0)
@@ -311,7 +312,7 @@ void vp8_loop_filter_frame
 {
    YV12_BUFFER_CONFIG *post = cm->frame_to_show;
    loop_filter_info *lfi = cm->lf_info;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;

    int mb_row;
    int mb_col;
@@ -324,21 +325,21 @@ void vp8_loop_filter_frame
    int i;
    unsigned char *y_ptr, *u_ptr, *v_ptr;

-    mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
+    mbd->mode_info_context = cm->mi;          /* Point at base of Mb MODE_INFO list */

-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
    if (alt_flt_enabled)
    {
        for (i = 0; i < MAX_MB_SEGMENTS; i++)
        {
-            // Abs value
+            /* Abs value */
            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
            else
            {
                baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
            }
        }
    }
@@ -348,18 +349,18 @@ void vp8_loop_filter_frame
            baseline_filter_level[i] = default_filt_lvl;
    }

-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
    if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
        vp8_init_loop_filter(cm);
    else if (frame_type != cm->last_frame_type)
        vp8_frame_init_loop_filter(lfi, frame_type);

-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
    y_ptr = post->y_buffer;
    u_ptr = post->u_buffer;
    v_ptr = post->v_buffer;

-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
    for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
    {
        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@@ -368,9 +369,10 @@ void vp8_loop_filter_frame

            filter_level = baseline_filter_level[Segment];

-            // Distance of Mb to the various image edges.
-            // These specified to 8th pel as they are always compared to values that are in 1/8th pel units
-            // Apply any context driven MB level adjustment
+            /* Distance of Mb to the various image edges.
+             * These specified to 8th pel as they are always compared to values that are in 1/8th pel units
+             * Apply any context driven MB level adjustment
+             */
            vp8_adjust_mb_lf_value(mbd, &filter_level);

            if (filter_level)
@@ -381,7 +383,7 @@ void vp8_loop_filter_frame
                if (mbd->mode_info_context->mbmi.dc_diff > 0)
                    cm->lf_bv(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);

-                // don't apply across umv border
+                /* don't apply across umv border */
                if (mb_row > 0)
                    cm->lf_mbh(y_ptr, u_ptr, v_ptr, post->y_stride, post->uv_stride, &lfi[filter_level], cm->simpler_lpf);

@@ -393,14 +395,14 @@ void vp8_loop_filter_frame
            u_ptr += 8;
            v_ptr += 8;

-            mbd->mode_info_context++;     // step to next MB
+            mbd->mode_info_context++;     /* step to next MB */
        }

        y_ptr += post->y_stride  * 16 - post->y_width;
        u_ptr += post->uv_stride *  8 - post->uv_width;
        v_ptr += post->uv_stride *  8 - post->uv_width;

-        mbd->mode_info_context++;         // Skip border mb
+        mbd->mode_info_context++;         /* Skip border mb */
    }
 }

@@ -424,26 +426,26 @@ void vp8_loop_filter_frame_yonly
    int baseline_filter_level[MAX_MB_SEGMENTS];
    int filter_level;
    int alt_flt_enabled = mbd->segmentation_enabled;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;

    (void) sharpness_lvl;

-    //MODE_INFO * this_mb_mode_info = cm->mi;  // Point at base of Mb MODE_INFO list
-    mbd->mode_info_context = cm->mi;          // Point at base of Mb MODE_INFO list
+    /*MODE_INFO * this_mb_mode_info = cm->mi;*/ /* Point at base of Mb MODE_INFO list */
+    mbd->mode_info_context = cm->mi;          /* Point at base of Mb MODE_INFO list */

-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
    if (alt_flt_enabled)
    {
        for (i = 0; i < MAX_MB_SEGMENTS; i++)
        {
-            // Abs value
+            /* Abs value */
            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
            else
            {
                baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
            }
        }
    }
@@ -453,16 +455,16 @@ void vp8_loop_filter_frame_yonly
            baseline_filter_level[i] = default_filt_lvl;
    }

-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
    if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
        vp8_init_loop_filter(cm);
    else if (frame_type != cm->last_frame_type)
        vp8_frame_init_loop_filter(lfi, frame_type);

-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
    y_ptr = post->y_buffer;

-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
    for (mb_row = 0; mb_row < cm->mb_rows; mb_row++)
    {
        for (mb_col = 0; mb_col < cm->mb_cols; mb_col++)
@@ -470,7 +472,7 @@ void vp8_loop_filter_frame_yonly
            int Segment = (alt_flt_enabled) ? mbd->mode_info_context->mbmi.segment_id : 0;
            filter_level = baseline_filter_level[Segment];

-            // Apply any context driven MB level adjustment
+            /* Apply any context driven MB level adjustment */
            vp8_adjust_mb_lf_value(mbd, &filter_level);

            if (filter_level)
@@ -481,7 +483,7 @@ void vp8_loop_filter_frame_yonly
                if (mbd->mode_info_context->mbmi.dc_diff > 0)
                    cm->lf_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);

-                // don't apply across umv border
+                /* don't apply across umv border */
                if (mb_row > 0)
                    cm->lf_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi[filter_level], 0);

@@ -490,12 +492,12 @@ void vp8_loop_filter_frame_yonly
            }

            y_ptr += 16;
-            mbd->mode_info_context ++;        // step to next MB
+            mbd->mode_info_context ++;        /* step to next MB */

        }

        y_ptr += post->y_stride  * 16 - post->y_width;
-        mbd->mode_info_context ++;            // Skip border mb
+        mbd->mode_info_context ++;            /* Skip border mb */
    }

 }
@@ -516,7 +518,7 @@ void vp8_loop_filter_partial_frame
    unsigned char *y_ptr;
    int mb_row;
    int mb_col;
-    //int mb_rows = post->y_height >> 4;
+    /*int mb_rows = post->y_height >> 4;*/
    int mb_cols = post->y_width  >> 4;

    int linestocopy;
@@ -525,12 +527,12 @@ void vp8_loop_filter_partial_frame
    int baseline_filter_level[MAX_MB_SEGMENTS];
    int filter_level;
    int alt_flt_enabled = mbd->segmentation_enabled;
-    int frame_type = cm->frame_type;
+    FRAME_TYPE frame_type = cm->frame_type;

    (void) sharpness_lvl;

-    //MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);  // Point at base of Mb MODE_INFO list
-    mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);        // Point at base of Mb MODE_INFO list
+    /*MODE_INFO * this_mb_mode_info = cm->mi + (post->y_height>>5) * (mb_cols + 1);*/ /* Point at base of Mb MODE_INFO list */
+    mbd->mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1);        /* Point at base of Mb MODE_INFO list */

    linestocopy = (post->y_height >> (4 + Fraction));

@@ -539,19 +541,19 @@ void vp8_loop_filter_partial_frame

    linestocopy <<= 4;

-    // Note the baseline filter values for each segment
+    /* Note the baseline filter values for each segment */
    if (alt_flt_enabled)
    {
        for (i = 0; i < MAX_MB_SEGMENTS; i++)
        {
-            // Abs value
+            /* Abs value */
            if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA)
                baseline_filter_level[i] = mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-            // Delta Value
+            /* Delta Value */
            else
            {
                baseline_filter_level[i] = default_filt_lvl + mbd->segment_feature_data[MB_LVL_ALT_LF][i];
-                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  // Clamp to valid range
+                baseline_filter_level[i] = (baseline_filter_level[i] >= 0) ? ((baseline_filter_level[i] <= MAX_LOOP_FILTER) ? baseline_filter_level[i] : MAX_LOOP_FILTER) : 0;  /* Clamp to valid range */
            }
        }
    }
@@ -561,16 +563,16 @@ void vp8_loop_filter_partial_frame
            baseline_filter_level[i] = default_filt_lvl;
    }

-    // Initialize the loop filter for this frame.
+    /* Initialize the loop filter for this frame. */
    if ((cm->last_filter_type != cm->filter_type) || (cm->last_sharpness_level != cm->sharpness_level))
        vp8_init_loop_filter(cm);
    else if (frame_type != cm->last_frame_type)
        vp8_frame_init_loop_filter(lfi, frame_type);

-    // Set up the buffer pointers
+    /* Set up the buffer pointers */
    y_ptr = post->y_buffer + (post->y_height >> 5) * 16 * post->y_stride;

-    // vp8_filter each macro block
+    /* vp8_filter each macro block */
    for (mb_row = 0; mb_row<(linestocopy >> 4); mb_row++)
    {
        for (mb_col = 0; mb_col < mb_cols; mb_col++)
@@ -593,10 +595,10 @@ void vp8_loop_filter_partial_frame
            }

            y_ptr += 16;
-            mbd->mode_info_context += 1;      // step to next MB
+            mbd->mode_info_context += 1;      /* step to next MB */
        }

        y_ptr += post->y_stride  * 16 - post->y_width;
-        mbd->mode_info_context += 1;          // Skip border mb
+        mbd->mode_info_context += 1;          /* Skip border mb */
    }
 }
--- a/vp8/common/loopfilter.h
+++ b/vp8/common/loopfilter.h
@@ -22,10 +22,10 @@ typedef enum
    SIMPLE_LOOPFILTER = 1
 } LOOPFILTERTYPE;

-// FRK
-// Need to align this structure so when it is declared and
-// passed it can be loaded into vector registers.
-// FRK
+/* FRK
+ * Need to align this structure so when it is declared and
+ * passed it can be loaded into vector registers.
+ */
 typedef struct
 {
    DECLARE_ALIGNED(16, signed char, lim[16]);
@@ -119,8 +119,8 @@ typedef struct

 typedef void loop_filter_uvfunction
 (
-    unsigned char *u,   // source pointer
-    int p,              // pitch
+    unsigned char *u,   /* source pointer */
+    int p,              /* pitch */
    const signed char *flimit,
    const signed char *limit,
    const signed char *thresh,
--- a/vp8/common/loopfilter_filters.c
+++ b/vp8/common/loopfilter_filters.c
@@ -23,7 +23,7 @@ static __inline signed char vp8_signed_char_clamp(int t)
 }


-// should we apply any filter at all ( 11111111 yes, 00000000 no)
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_filter_mask(signed char limit, signed char flimit,
                                     uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3)
 {
@@ -39,7 +39,7 @@ static __inline signed char vp8_filter_mask(signed char limit, signed char flimi
    return mask;
 }

-// is there high variance internal edge ( 11111111 yes, 00000000 no)
+/* is there high variance internal edge ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_hevmask(signed char thresh, uc p1, uc p0, uc q0, uc q1)
 {
    signed char hev = 0;
@@ -61,17 +61,18 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
    qs0 = (signed char) * oq0 ^ 0x80;
    qs1 = (signed char) * oq1 ^ 0x80;

-    // add outer taps if we have high edge variance
+    /* add outer taps if we have high edge variance */
    vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
    vp8_filter &= hev;

-    // inner taps
+    /* inner taps */
    vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
    vp8_filter &= mask;

-    // save bottom 3 bits so that we round one side +4 and the other +3
-    // if it equals 4 we'll set to adjust by -1 to account for the fact
-    // we'd round 3 the other way
+    /* save bottom 3 bits so that we round one side +4 and the other +3
+     * if it equals 4 we'll set to adjust by -1 to account for the fact
+     * we'd round 3 the other way
+     */
    Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
    Filter2 = vp8_signed_char_clamp(vp8_filter + 3);
    Filter1 >>= 3;
@@ -82,7 +83,7 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
    *op0 = u ^ 0x80;
    vp8_filter = Filter1;

-    // outer tap adjustments
+    /* outer tap adjustments */
    vp8_filter += 1;
    vp8_filter >>= 1;
    vp8_filter &= ~hev;
@@ -96,19 +97,20 @@ static __inline void vp8_filter(signed char mask, signed char hev, uc *op1, uc *
 void vp8_loop_filter_horizontal_edge_c
 (
    unsigned char *s,
-    int p, //pitch
+    int p, /* pitch */
    const signed char *flimit,
    const signed char *limit,
    const signed char *thresh,
    int count
 )
 {
-    int  hev = 0; // high edge variance
+    int  hev = 0; /* high edge variance */
    signed char mask = 0;
    int i = 0;

-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
    do
    {
        mask = vp8_filter_mask(limit[i], flimit[i],
@@ -134,12 +136,13 @@ void vp8_loop_filter_vertical_edge_c
    int count
 )
 {
-    int  hev = 0; // high edge variance
+    int  hev = 0; /* high edge variance */
    signed char mask = 0;
    int i = 0;

-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
    do
    {
        mask = vp8_filter_mask(limit[i], flimit[i],
@@ -166,7 +169,7 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
    signed char qs1 = (signed char) * oq1 ^ 0x80;
    signed char qs2 = (signed char) * oq2 ^ 0x80;

-    // add outer taps if we have high edge variance
+    /* add outer taps if we have high edge variance */
    vp8_filter = vp8_signed_char_clamp(ps1 - qs1);
    vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0));
    vp8_filter &= mask;
@@ -174,7 +177,7 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
    Filter2 = vp8_filter;
    Filter2 &= hev;

-    // save bottom 3 bits so that we round one side +4 and the other +3
+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
    Filter1 = vp8_signed_char_clamp(Filter2 + 4);
    Filter2 = vp8_signed_char_clamp(Filter2 + 3);
    Filter1 >>= 3;
@@ -183,25 +186,25 @@ static __inline void vp8_mbfilter(signed char mask, signed char hev,
    ps0 = vp8_signed_char_clamp(ps0 + Filter2);


-    // only apply wider filter if not high edge variance
+    /* only apply wider filter if not high edge variance */
    vp8_filter &= ~hev;
    Filter2 = vp8_filter;

-    // roughly 3/7th difference across boundary
+    /* roughly 3/7th difference across boundary */
    u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7);
    s = vp8_signed_char_clamp(qs0 - u);
    *oq0 = s ^ 0x80;
    s = vp8_signed_char_clamp(ps0 + u);
    *op0 = s ^ 0x80;

-    // roughly 2/7th difference across boundary
+    /* roughly 2/7th difference across boundary */
    u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7);
    s = vp8_signed_char_clamp(qs1 - u);
    *oq1 = s ^ 0x80;
    s = vp8_signed_char_clamp(ps1 + u);
    *op1 = s ^ 0x80;

-    // roughly 1/7th difference across boundary
+    /* roughly 1/7th difference across boundary */
    u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7);
    s = vp8_signed_char_clamp(qs2 - u);
    *oq2 = s ^ 0x80;
@@ -219,12 +222,13 @@ void vp8_mbloop_filter_horizontal_edge_c
    int count
 )
 {
-    signed char hev = 0; // high edge variance
+    signed char hev = 0; /* high edge variance */
    signed char mask = 0;
    int i = 0;

-    // loop filter designed to work using chars so that we can make maximum use
-    // of 8 bit simd instructions.
+    /* loop filter designed to work using chars so that we can make maximum use
+     * of 8 bit simd instructions.
+     */
    do
    {

@@ -253,7 +257,7 @@ void vp8_mbloop_filter_vertical_edge_c
    int count
 )
 {
-    signed char hev = 0; // high edge variance
+    signed char hev = 0; /* high edge variance */
    signed char mask = 0;
    int i = 0;

@@ -273,12 +277,13 @@ void vp8_mbloop_filter_vertical_edge_c

 }

-// should we apply any filter at all ( 11111111 yes, 00000000 no)
+/* should we apply any filter at all ( 11111111 yes, 00000000 no) */
 static __inline signed char vp8_simple_filter_mask(signed char limit, signed char flimit, uc p1, uc p0, uc q0, uc q1)
 {
-// Why does this cause problems for win32?
-// error C2143: syntax error : missing ';' before 'type'
-//  (void) limit;
+/* Why does this cause problems for win32?
+ * error C2143: syntax error : missing ';' before 'type'
+ *  (void) limit;
+ */
    signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2  <= flimit * 2 + limit) * -1;
    return mask;
 }
@@ -296,7 +301,7 @@ static __inline void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *o
    vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (q0 - p0));
    vp8_filter &= mask;

-    // save bottom 3 bits so that we round one side +4 and the other +3
+    /* save bottom 3 bits so that we round one side +4 and the other +3 */
    Filter1 = vp8_signed_char_clamp(vp8_filter + 4);
    Filter1 >>= 3;
    u = vp8_signed_char_clamp(q0 - Filter1);
@@ -324,7 +329,7 @@ void vp8_loop_filter_simple_horizontal_edge_c

    do
    {
-        //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);
+        /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1*p],s[0*p]);*/
        mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2*p], s[-1*p], s[0*p], s[1*p]);
        vp8_simple_filter(mask, s - 2 * p, s - 1 * p, s, s + 1 * p);
        ++s;
@@ -348,7 +353,7 @@ void vp8_loop_filter_simple_vertical_edge_c

    do
    {
-        //mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);
+        /*mask = vp8_simple_filter_mask( limit[i], flimit[i],s[-1],s[0]);*/
        mask = vp8_simple_filter_mask(limit[i], flimit[i], s[-2], s[-1], s[0], s[1]);
        vp8_simple_filter(mask, s - 2, s - 1, s, s + 1);
        s += p;
--- a/vp8/common/mbpitch.c
+++ b/vp8/common/mbpitch.c
@@ -14,7 +14,7 @@
 typedef enum
 {
    PRED = 0,
-    DEST = 1,
+    DEST = 1
 } BLOCKSET;

 void vp8_setup_block
@@ -62,13 +62,13 @@ void vp8_setup_macroblock(MACROBLOCKD *x, BLOCKSET bs)
        v = &x->pre.v_buffer;
    }

-    for (block = 0; block < 16; block++) // y blocks
+    for (block = 0; block < 16; block++) /* y blocks */
    {
        vp8_setup_block(&x->block[block], x->dst.y_stride, y, x->dst.y_stride,
                        (block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4, bs);
    }

-    for (block = 16; block < 20; block++) // U and V blocks
+    for (block = 16; block < 20; block++) /* U and V blocks */
    {
        vp8_setup_block(&x->block[block], x->dst.uv_stride, u, x->dst.uv_stride,
                        ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4, bs);
@@ -123,7 +123,7 @@ void vp8_setup_block_dptrs(MACROBLOCKD *x)
 void vp8_build_block_doffsets(MACROBLOCKD *x)
 {

-    // handle the destination pitch features
+    /* handle the destination pitch features */
    vp8_setup_macroblock(x, DEST);
    vp8_setup_macroblock(x, PRED);
 }
--- a/vp8/common/modecont.c
+++ b/vp8/common/modecont.c
@@ -14,27 +14,27 @@
 const int vp8_mode_contexts[6][4] =
 {
    {
-        // 0
+        /* 0 */
        7,     1,     1,   143,
    },
    {
-        // 1
+        /* 1 */
        14,    18,    14,   107,
    },
    {
-        // 2
+        /* 2 */
        135,    64,    57,    68,
    },
    {
-        // 3
+        /* 3 */
        60,    56,   128,    65,
    },
    {
-        // 4
+        /* 4 */
        159,   134,   128,    34,
    },
    {
-        // 5
+        /* 5 */
        234,   188,   128,    28,
    },
 };
--- a/vp8/common/modecontext.c
+++ b/vp8/common/modecontext.c
@@ -14,133 +14,133 @@
 const unsigned int vp8_kf_default_bmode_counts [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES] =
 {
    {
-        //Above Mode :  0
-        { 43438,   2195,    470,    316,    615,    171,    217,    412,    124,    160, }, // left_mode 0
-        {  5722,   2751,    296,    291,     81,     68,     80,    101,    100,    170, }, // left_mode 1
-        {  1629,    201,    307,     25,     47,     16,     34,     72,     19,     28, }, // left_mode 2
-        {   332,    266,     36,    500,     20,     65,     23,     14,    154,    106, }, // left_mode 3
-        {   450,     97,     10,     24,    117,     10,      2,     12,      8,     71, }, // left_mode 4
-        {   384,     49,     29,     44,     12,    162,     51,      5,     87,     42, }, // left_mode 5
-        {   495,     53,    157,     27,     14,     57,    180,     17,     17,     34, }, // left_mode 6
-        {   695,     64,     62,      9,     27,      5,      3,    147,     10,     26, }, // left_mode 7
-        {   230,     54,     20,    124,     16,    125,     29,     12,    283,     37, }, // left_mode 8
-        {   260,     87,     21,    120,     32,     16,     33,     16,     33,    203, }, // left_mode 9
+        /*Above Mode :  0*/
+        { 43438,   2195,    470,    316,    615,    171,    217,    412,    124,    160, }, /* left_mode 0 */
+        {  5722,   2751,    296,    291,     81,     68,     80,    101,    100,    170, }, /* left_mode 1 */
+        {  1629,    201,    307,     25,     47,     16,     34,     72,     19,     28, }, /* left_mode 2 */
+        {   332,    266,     36,    500,     20,     65,     23,     14,    154,    106, }, /* left_mode 3 */
+        {   450,     97,     10,     24,    117,     10,      2,     12,      8,     71, }, /* left_mode 4 */
+        {   384,     49,     29,     44,     12,    162,     51,      5,     87,     42, }, /* left_mode 5 */
+        {   495,     53,    157,     27,     14,     57,    180,     17,     17,     34, }, /* left_mode 6 */
+        {   695,     64,     62,      9,     27,      5,      3,    147,     10,     26, }, /* left_mode 7 */
+        {   230,     54,     20,    124,     16,    125,     29,     12,    283,     37, }, /* left_mode 8 */
+        {   260,     87,     21,    120,     32,     16,     33,     16,     33,    203, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  1
-        {  3934,   2573,    355,    137,    128,     87,    133,    117,     37,     27, }, // left_mode 0
-        {  1036,   1929,    278,    135,     27,     37,     48,     55,     41,     91, }, // left_mode 1
-        {   223,    256,    253,     15,     13,      9,     28,     64,      3,      3, }, // left_mode 2
-        {   120,    129,     17,    316,     15,     11,      9,      4,     53,     74, }, // left_mode 3
-        {   129,     58,      6,     11,     38,      2,      0,      5,      2,     67, }, // left_mode 4
-        {    53,     22,     11,     16,      8,     26,     14,      3,     19,     12, }, // left_mode 5
-        {    59,     26,     61,     11,      4,      9,     35,     13,      8,      8, }, // left_mode 6
-        {   101,     52,     40,      8,      5,      2,      8,     59,      2,     20, }, // left_mode 7
-        {    48,     34,     10,     52,      8,     15,      6,      6,     63,     20, }, // left_mode 8
-        {    96,     48,     22,     63,     11,     14,      5,      8,      9,     96, }, // left_mode 9
+        /*Above Mode :  1*/
+        {  3934,   2573,    355,    137,    128,     87,    133,    117,     37,     27, }, /* left_mode 0 */
+        {  1036,   1929,    278,    135,     27,     37,     48,     55,     41,     91, }, /* left_mode 1 */
+        {   223,    256,    253,     15,     13,      9,     28,     64,      3,      3, }, /* left_mode 2 */
+        {   120,    129,     17,    316,     15,     11,      9,      4,     53,     74, }, /* left_mode 3 */
+        {   129,     58,      6,     11,     38,      2,      0,      5,      2,     67, }, /* left_mode 4 */
+        {    53,     22,     11,     16,      8,     26,     14,      3,     19,     12, }, /* left_mode 5 */
+        {    59,     26,     61,     11,      4,      9,     35,     13,      8,      8, }, /* left_mode 6 */
+        {   101,     52,     40,      8,      5,      2,      8,     59,      2,     20, }, /* left_mode 7 */
+        {    48,     34,     10,     52,      8,     15,      6,      6,     63,     20, }, /* left_mode 8 */
+        {    96,     48,     22,     63,     11,     14,      5,      8,      9,     96, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  2
-        {   709,    461,    506,     36,     27,     33,    151,     98,     24,      6, }, // left_mode 0
-        {   201,    375,    442,     27,     13,      8,     46,     58,      6,     19, }, // left_mode 1
-        {   122,    140,    417,      4,     13,      3,     33,     59,      4,      2, }, // left_mode 2
-        {    36,     17,     22,     16,      6,      8,     12,     17,      9,     21, }, // left_mode 3
-        {    51,     15,      7,      1,     14,      0,      4,      5,      3,     22, }, // left_mode 4
-        {    18,     11,     30,      9,      7,     20,     11,      5,      2,      6, }, // left_mode 5
-        {    38,     21,    103,      9,      4,     12,     79,     13,      2,      5, }, // left_mode 6
-        {    64,     17,     66,      2,     12,      4,      2,     65,      4,      5, }, // left_mode 7
-        {    14,      7,      7,     16,      3,     11,      4,     13,     15,     16, }, // left_mode 8
-        {    36,      8,     32,      9,      9,      4,     14,      7,      6,     24, }, // left_mode 9
+        /*Above Mode :  2*/
+        {   709,    461,    506,     36,     27,     33,    151,     98,     24,      6, }, /* left_mode 0 */
+        {   201,    375,    442,     27,     13,      8,     46,     58,      6,     19, }, /* left_mode 1 */
+        {   122,    140,    417,      4,     13,      3,     33,     59,      4,      2, }, /* left_mode 2 */
+        {    36,     17,     22,     16,      6,      8,     12,     17,      9,     21, }, /* left_mode 3 */
+        {    51,     15,      7,      1,     14,      0,      4,      5,      3,     22, }, /* left_mode 4 */
+        {    18,     11,     30,      9,      7,     20,     11,      5,      2,      6, }, /* left_mode 5 */
+        {    38,     21,    103,      9,      4,     12,     79,     13,      2,      5, }, /* left_mode 6 */
+        {    64,     17,     66,      2,     12,      4,      2,     65,      4,      5, }, /* left_mode 7 */
+        {    14,      7,      7,     16,      3,     11,      4,     13,     15,     16, }, /* left_mode 8 */
+        {    36,      8,     32,      9,      9,      4,     14,      7,      6,     24, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  3
-        {  1340,    173,     36,    119,     30,     10,     13,     10,     20,     26, }, // left_mode 0
-        {   156,    293,     26,    108,      5,     16,      2,      4,     23,     30, }, // left_mode 1
-        {    60,     34,     13,      7,      3,      3,      0,      8,      4,      5, }, // left_mode 2
-        {    72,     64,      1,    235,      3,      9,      2,      7,     28,     38, }, // left_mode 3
-        {    29,     14,      1,      3,      5,      0,      2,      2,      5,     13, }, // left_mode 4
-        {    22,      7,      4,     11,      2,      5,      1,      2,      6,      4, }, // left_mode 5
-        {    18,     14,      5,      6,      4,      3,     14,      0,      9,      2, }, // left_mode 6
-        {    41,     10,      7,      1,      2,      0,      0,     10,      2,      1, }, // left_mode 7
-        {    23,     19,      2,     33,      1,      5,      2,      0,     51,      8, }, // left_mode 8
-        {    33,     26,      7,     53,      3,      9,      3,      3,      9,     19, }, // left_mode 9
+        /*Above Mode :  3*/
+        {  1340,    173,     36,    119,     30,     10,     13,     10,     20,     26, }, /* left_mode 0 */
+        {   156,    293,     26,    108,      5,     16,      2,      4,     23,     30, }, /* left_mode 1 */
+        {    60,     34,     13,      7,      3,      3,      0,      8,      4,      5, }, /* left_mode 2 */
+        {    72,     64,      1,    235,      3,      9,      2,      7,     28,     38, }, /* left_mode 3 */
+        {    29,     14,      1,      3,      5,      0,      2,      2,      5,     13, }, /* left_mode 4 */
+        {    22,      7,      4,     11,      2,      5,      1,      2,      6,      4, }, /* left_mode 5 */
+        {    18,     14,      5,      6,      4,      3,     14,      0,      9,      2, }, /* left_mode 6 */
+        {    41,     10,      7,      1,      2,      0,      0,     10,      2,      1, }, /* left_mode 7 */
+        {    23,     19,      2,     33,      1,      5,      2,      0,     51,      8, }, /* left_mode 8 */
+        {    33,     26,      7,     53,      3,      9,      3,      3,      9,     19, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  4
-        {   410,    165,     43,     31,     66,     15,     30,     54,      8,     17, }, // left_mode 0
-        {   115,     64,     27,     18,     30,      7,     11,     15,      4,     19, }, // left_mode 1
-        {    31,     23,     25,      1,      7,      2,      2,     10,      0,      5, }, // left_mode 2
-        {    17,      4,      1,      6,      8,      2,      7,      5,      5,     21, }, // left_mode 3
-        {   120,     12,      1,      2,     83,      3,      0,      4,      1,     40, }, // left_mode 4
-        {     4,      3,      1,      2,      1,      2,      5,      0,      3,      6, }, // left_mode 5
-        {    10,      2,     13,      6,      6,      6,      8,      2,      4,      5, }, // left_mode 6
-        {    58,     10,      5,      1,     28,      1,      1,     33,      1,      9, }, // left_mode 7
-        {     8,      2,      1,      4,      2,      5,      1,      1,      2,     10, }, // left_mode 8
-        {    76,      7,      5,      7,     18,      2,      2,      0,      5,     45, }, // left_mode 9
+        /*Above Mode :  4*/
+        {   410,    165,     43,     31,     66,     15,     30,     54,      8,     17, }, /* left_mode 0 */
+        {   115,     64,     27,     18,     30,      7,     11,     15,      4,     19, }, /* left_mode 1 */
+        {    31,     23,     25,      1,      7,      2,      2,     10,      0,      5, }, /* left_mode 2 */
+        {    17,      4,      1,      6,      8,      2,      7,      5,      5,     21, }, /* left_mode 3 */
+        {   120,     12,      1,      2,     83,      3,      0,      4,      1,     40, }, /* left_mode 4 */
+        {     4,      3,      1,      2,      1,      2,      5,      0,      3,      6, }, /* left_mode 5 */
+        {    10,      2,     13,      6,      6,      6,      8,      2,      4,      5, }, /* left_mode 6 */
+        {    58,     10,      5,      1,     28,      1,      1,     33,      1,      9, }, /* left_mode 7 */
+        {     8,      2,      1,      4,      2,      5,      1,      1,      2,     10, }, /* left_mode 8 */
+        {    76,      7,      5,      7,     18,      2,      2,      0,      5,     45, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  5
-        {   444,     46,     47,     20,     14,    110,     60,     14,     60,      7, }, // left_mode 0
-        {    59,     57,     25,     18,      3,     17,     21,      6,     14,      6, }, // left_mode 1
-        {    24,     17,     20,      6,      4,     13,      7,      2,      3,      2, }, // left_mode 2
-        {    13,     11,      5,     14,      4,      9,      2,      4,     15,      7, }, // left_mode 3
-        {     8,      5,      2,      1,      4,      0,      1,      1,      2,     12, }, // left_mode 4
-        {    19,      5,      5,      7,      4,     40,      6,      3,     10,      4, }, // left_mode 5
-        {    16,      5,      9,      1,      1,     16,     26,      2,     10,      4, }, // left_mode 6
-        {    11,      4,      8,      1,      1,      4,      4,      5,      4,      1, }, // left_mode 7
-        {    15,      1,      3,      7,      3,     21,      7,      1,     34,      5, }, // left_mode 8
-        {    18,      5,      1,      3,      4,      3,      7,      1,      2,      9, }, // left_mode 9
+        /*Above Mode :  5*/
+        {   444,     46,     47,     20,     14,    110,     60,     14,     60,      7, }, /* left_mode 0 */
+        {    59,     57,     25,     18,      3,     17,     21,      6,     14,      6, }, /* left_mode 1 */
+        {    24,     17,     20,      6,      4,     13,      7,      2,      3,      2, }, /* left_mode 2 */
+        {    13,     11,      5,     14,      4,      9,      2,      4,     15,      7, }, /* left_mode 3 */
+        {     8,      5,      2,      1,      4,      0,      1,      1,      2,     12, }, /* left_mode 4 */
+        {    19,      5,      5,      7,      4,     40,      6,      3,     10,      4, }, /* left_mode 5 */
+        {    16,      5,      9,      1,      1,     16,     26,      2,     10,      4, }, /* left_mode 6 */
+        {    11,      4,      8,      1,      1,      4,      4,      5,      4,      1, }, /* left_mode 7 */
+        {    15,      1,      3,      7,      3,     21,      7,      1,     34,      5, }, /* left_mode 8 */
+        {    18,      5,      1,      3,      4,      3,      7,      1,      2,      9, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  6
-        {   476,    149,     94,     13,     14,     77,    291,     27,     23,      3, }, // left_mode 0
-        {    79,     83,     42,     14,      2,     12,     63,      2,      4,     14, }, // left_mode 1
-        {    43,     36,     55,      1,      3,      8,     42,     11,      5,      1, }, // left_mode 2
-        {     9,      9,      6,     16,      1,      5,      6,      3,     11,     10, }, // left_mode 3
-        {    10,      3,      1,      3,     10,      1,      0,      1,      1,      4, }, // left_mode 4
-        {    14,      6,     15,      5,      1,     20,     25,      2,      5,      0, }, // left_mode 5
-        {    28,      7,     51,      1,      0,      8,    127,      6,      2,      5, }, // left_mode 6
-        {    13,      3,      3,      2,      3,      1,      2,      8,      1,      2, }, // left_mode 7
-        {    10,      3,      3,      3,      3,      8,      2,      2,      9,      3, }, // left_mode 8
-        {    13,      7,     11,      4,      0,      4,      6,      2,      5,      8, }, // left_mode 9
+        /*Above Mode :  6*/
+        {   476,    149,     94,     13,     14,     77,    291,     27,     23,      3, }, /* left_mode 0 */
+        {    79,     83,     42,     14,      2,     12,     63,      2,      4,     14, }, /* left_mode 1 */
+        {    43,     36,     55,      1,      3,      8,     42,     11,      5,      1, }, /* left_mode 2 */
+        {     9,      9,      6,     16,      1,      5,      6,      3,     11,     10, }, /* left_mode 3 */
+        {    10,      3,      1,      3,     10,      1,      0,      1,      1,      4, }, /* left_mode 4 */
+        {    14,      6,     15,      5,      1,     20,     25,      2,      5,      0, }, /* left_mode 5 */
+        {    28,      7,     51,      1,      0,      8,    127,      6,      2,      5, }, /* left_mode 6 */
+        {    13,      3,      3,      2,      3,      1,      2,      8,      1,      2, }, /* left_mode 7 */
+        {    10,      3,      3,      3,      3,      8,      2,      2,      9,      3, }, /* left_mode 8 */
+        {    13,      7,     11,      4,      0,      4,      6,      2,      5,      8, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  7
-        {   376,    135,    119,      6,     32,      8,     31,    224,      9,      3, }, // left_mode 0
-        {    93,     60,     54,      6,     13,      7,      8,     92,      2,     12, }, // left_mode 1
-        {    74,     36,     84,      0,      3,      2,      9,     67,      2,      1, }, // left_mode 2
-        {    19,      4,      4,      8,      8,      2,      4,      7,      6,     16, }, // left_mode 3
-        {    51,      7,      4,      1,     77,      3,      0,     14,      1,     15, }, // left_mode 4
-        {     7,      7,      5,      7,      4,      7,      4,      5,      0,      3, }, // left_mode 5
-        {    18,      2,     19,      2,      2,      4,     12,     11,      1,      2, }, // left_mode 6
-        {   129,      6,     27,      1,     21,      3,      0,    189,      0,      6, }, // left_mode 7
-        {     9,      1,      2,      8,      3,      7,      0,      5,      3,      3, }, // left_mode 8
-        {    20,      4,      5,     10,      4,      2,      7,     17,      3,     16, }, // left_mode 9
+        /*Above Mode :  7*/
+        {   376,    135,    119,      6,     32,      8,     31,    224,      9,      3, }, /* left_mode 0 */
+        {    93,     60,     54,      6,     13,      7,      8,     92,      2,     12, }, /* left_mode 1 */
+        {    74,     36,     84,      0,      3,      2,      9,     67,      2,      1, }, /* left_mode 2 */
+        {    19,      4,      4,      8,      8,      2,      4,      7,      6,     16, }, /* left_mode 3 */
+        {    51,      7,      4,      1,     77,      3,      0,     14,      1,     15, }, /* left_mode 4 */
+        {     7,      7,      5,      7,      4,      7,      4,      5,      0,      3, }, /* left_mode 5 */
+        {    18,      2,     19,      2,      2,      4,     12,     11,      1,      2, }, /* left_mode 6 */
+        {   129,      6,     27,      1,     21,      3,      0,    189,      0,      6, }, /* left_mode 7 */
+        {     9,      1,      2,      8,      3,      7,      0,      5,      3,      3, }, /* left_mode 8 */
+        {    20,      4,      5,     10,      4,      2,      7,     17,      3,     16, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  8
-        {   617,     68,     34,     79,     11,     27,     25,     14,     75,     13, }, // left_mode 0
-        {    51,     82,     21,     26,      6,     12,     13,      1,     26,     16, }, // left_mode 1
-        {    29,      9,     12,     11,      3,      7,      1,     10,      2,      2, }, // left_mode 2
-        {    17,     19,     11,     74,      4,      3,      2,      0,     58,     13, }, // left_mode 3
-        {    10,      1,      1,      3,      4,      1,      0,      2,      1,      8, }, // left_mode 4
-        {    14,      4,      5,      5,      1,     13,      2,      0,     27,      8, }, // left_mode 5
-        {    10,      3,      5,      4,      1,      7,      6,      4,      5,      1, }, // left_mode 6
-        {    10,      2,      6,      2,      1,      1,      1,      4,      2,      1, }, // left_mode 7
-        {    14,      8,      5,     23,      2,     12,      6,      2,    117,      5, }, // left_mode 8
-        {     9,      6,      2,     19,      1,      6,      3,      2,      9,      9, }, // left_mode 9
+        /*Above Mode :  8*/
+        {   617,     68,     34,     79,     11,     27,     25,     14,     75,     13, }, /* left_mode 0 */
+        {    51,     82,     21,     26,      6,     12,     13,      1,     26,     16, }, /* left_mode 1 */
+        {    29,      9,     12,     11,      3,      7,      1,     10,      2,      2, }, /* left_mode 2 */
+        {    17,     19,     11,     74,      4,      3,      2,      0,     58,     13, }, /* left_mode 3 */
+        {    10,      1,      1,      3,      4,      1,      0,      2,      1,      8, }, /* left_mode 4 */
+        {    14,      4,      5,      5,      1,     13,      2,      0,     27,      8, }, /* left_mode 5 */
+        {    10,      3,      5,      4,      1,      7,      6,      4,      5,      1, }, /* left_mode 6 */
+        {    10,      2,      6,      2,      1,      1,      1,      4,      2,      1, }, /* left_mode 7 */
+        {    14,      8,      5,     23,      2,     12,      6,      2,    117,      5, }, /* left_mode 8 */
+        {     9,      6,      2,     19,      1,      6,      3,      2,      9,      9, }, /* left_mode 9 */
    },
    {
-        //Above Mode :  9
-        {   680,     73,     22,     38,     42,      5,     11,      9,      6,     28, }, // left_mode 0
-        {   113,    112,     21,     22,     10,      2,      8,      4,      6,     42, }, // left_mode 1
-        {    44,     20,     24,      6,      5,      4,      3,      3,      1,      2, }, // left_mode 2
-        {    40,     23,      7,     71,      5,      2,      4,      1,      7,     22, }, // left_mode 3
-        {    85,      9,      4,      4,     17,      2,      0,      3,      2,     23, }, // left_mode 4
-        {    13,      4,      2,      6,      1,      7,      0,      1,      7,      6, }, // left_mode 5
-        {    26,      6,      8,      3,      2,      3,      8,      1,      5,      4, }, // left_mode 6
-        {    54,      8,      9,      6,      7,      0,      1,     11,      1,      3, }, // left_mode 7
-        {     9,     10,      4,     13,      2,      5,      4,      2,     14,      8, }, // left_mode 8
-        {    92,      9,      5,     19,     15,      3,      3,      1,      6,     58, }, // left_mode 9
+        /*Above Mode :  9*/
+        {   680,     73,     22,     38,     42,      5,     11,      9,      6,     28, }, /* left_mode 0 */
+        {   113,    112,     21,     22,     10,      2,      8,      4,      6,     42, }, /* left_mode 1 */
+        {    44,     20,     24,      6,      5,      4,      3,      3,      1,      2, }, /* left_mode 2 */
+        {    40,     23,      7,     71,      5,      2,      4,      1,      7,     22, }, /* left_mode 3 */
+        {    85,      9,      4,      4,     17,      2,      0,      3,      2,     23, }, /* left_mode 4 */
+        {    13,      4,      2,      6,      1,      7,      0,      1,      7,      6, }, /* left_mode 5 */
+        {    26,      6,      8,      3,      2,      3,      8,      1,      5,      4, }, /* left_mode 6 */
+        {    54,      8,      9,      6,      7,      0,      1,     11,      1,      3, }, /* left_mode 7 */
+        {     9,     10,      4,     13,      2,      5,      4,      2,     14,      8, }, /* left_mode 8 */
+        {    92,      9,      5,     19,     15,      3,      3,      1,      6,     58, }, /* left_mode 9 */
    },
 };
--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -21,9 +21,9 @@
 #include "recon.h"
 #include "postproc.h"

-//#ifdef PACKET_TESTING
+/*#ifdef PACKET_TESTING*/
 #include "header.h"
-//#endif
+/*#endif*/

 /* Create/destroy static data structures. */

@@ -43,7 +43,7 @@ typedef struct frame_contexts
    vp8_prob sub_mv_ref_prob [VP8_SUBMVREFS-1];
    vp8_prob coef_probs [BLOCK_TYPES] [COEF_BANDS] [PREV_COEF_CONTEXTS] [vp8_coef_tokens-1];
    MV_CONTEXT mvc[2];
-    MV_CONTEXT pre_mvc[2];  //not to caculate the mvcost for the frame if mvc doesn't change.
+    MV_CONTEXT pre_mvc[2];  /* not to caculate the mvcost for the frame if mvc doesn't change. */
 } FRAME_CONTEXT;

 typedef enum
@@ -74,6 +74,7 @@ typedef struct VP8_COMMON_RTCD
    vp8_subpix_rtcd_vtable_t      subpix;
    vp8_loopfilter_rtcd_vtable_t  loopfilter;
    vp8_postproc_rtcd_vtable_t    postproc;
+    int                           flags;
 #else
    int unused;
 #endif
@@ -83,9 +84,9 @@ typedef struct VP8Common
 {
    struct vpx_internal_error_info  error;

-    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][4][4]);
-    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][4][4]);
+    DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][16]);
+    DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][16]);

    int Width;
    int Height;
@@ -104,7 +105,7 @@ typedef struct VP8Common
    YV12_BUFFER_CONFIG post_proc_buffer;
    YV12_BUFFER_CONFIG temp_scale_frame;

-    FRAME_TYPE last_frame_type;  //Add to check if vp8_frame_init_loop_filter() can be skipped.
+    FRAME_TYPE last_frame_type;  /* Add to check if vp8_frame_init_loop_filter() can be skipped. */
    FRAME_TYPE frame_type;

    int show_frame;
@@ -115,7 +116,7 @@ typedef struct VP8Common
    int mb_cols;
    int mode_info_stride;

-    // prfile settings
+    /* profile settings */
    int experimental;
    int mb_no_coeff_skip;
    int no_lpf;
@@ -124,7 +125,7 @@ typedef struct VP8Common
    int full_pixel;

    int base_qindex;
-    int last_kf_gf_q;  // Q used on the last GF or KF
+    int last_kf_gf_q;  /* Q used on the last GF or KF */

    int y1dc_delta_q;
    int y2dc_delta_q;
@@ -154,31 +155,31 @@ typedef struct VP8Common
    int last_sharpness_level;
    int sharpness_level;

-    int refresh_last_frame;       // Two state 0 = NO, 1 = YES
-    int refresh_golden_frame;     // Two state 0 = NO, 1 = YES
-    int refresh_alt_ref_frame;     // Two state 0 = NO, 1 = YES
+    int refresh_last_frame;       /* Two state 0 = NO, 1 = YES */
+    int refresh_golden_frame;     /* Two state 0 = NO, 1 = YES */
+    int refresh_alt_ref_frame;     /* Two state 0 = NO, 1 = YES */

-    int copy_buffer_to_gf;         // 0 none, 1 Last to GF, 2 ARF to GF
-    int copy_buffer_to_arf;        // 0 none, 1 Last to ARF, 2 GF to ARF
+    int copy_buffer_to_gf;         /* 0 none, 1 Last to GF, 2 ARF to GF */
+    int copy_buffer_to_arf;        /* 0 none, 1 Last to ARF, 2 GF to ARF */

-    int refresh_entropy_probs;    // Two state 0 = NO, 1 = YES
+    int refresh_entropy_probs;    /* Two state 0 = NO, 1 = YES */

-    int ref_frame_sign_bias[MAX_REF_FRAMES];    // Two state 0, 1
+    int ref_frame_sign_bias[MAX_REF_FRAMES];    /* Two state 0, 1 */

-    // Y,U,V,Y2
-    ENTROPY_CONTEXT_PLANES *above_context;   // row of context for each plane
-    ENTROPY_CONTEXT_PLANES left_context;  // (up to) 4 contexts ""
+    /* Y,U,V,Y2 */
+    ENTROPY_CONTEXT_PLANES *above_context;   /* row of context for each plane */
+    ENTROPY_CONTEXT_PLANES left_context;  /* (up to) 4 contexts "" */


-    // keyframe block modes are predicted by their above, left neighbors
+    /* keyframe block modes are predicted by their above, left neighbors */

    vp8_prob kf_bmode_prob [VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES-1];
    vp8_prob kf_ymode_prob [VP8_YMODES-1];  /* keyframe "" */
    vp8_prob kf_uv_mode_prob [VP8_UV_MODES-1];


-    FRAME_CONTEXT lfc; // last frame entropy
-    FRAME_CONTEXT fc;  // this frame entropy
+    FRAME_CONTEXT lfc; /* last frame entropy */
+    FRAME_CONTEXT fc;  /* this frame entropy */

    unsigned int current_video_frame;

--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -19,7 +19,53 @@
 #include <math.h>
 #include <stdlib.h>
 #include <stdio.h>
-// global constants
+
+#define RGB_TO_YUV(t)                                                                       \
+    ( (0.257*(float)(t>>16)) + (0.504*(float)(t>>8&0xff)) + (0.098*(float)(t&0xff)) + 16),  \
+    (-(0.148*(float)(t>>16)) - (0.291*(float)(t>>8&0xff)) + (0.439*(float)(t&0xff)) + 128), \
+    ( (0.439*(float)(t>>16)) - (0.368*(float)(t>>8&0xff)) - (0.071*(float)(t&0xff)) + 128)
+
+/* global constants */
+
+static const unsigned char MB_PREDICTION_MODE_colors[MB_MODE_COUNT][3] =
+{
+    { RGB_TO_YUV(0x98FB98) },   /* PaleGreen */
+    { RGB_TO_YUV(0x00FF00) },   /* Green */
+    { RGB_TO_YUV(0xADFF2F) },   /* GreenYellow */
+    { RGB_TO_YUV(0x228B22) },   /* ForestGreen */
+    { RGB_TO_YUV(0x006400) },   /* DarkGreen */
+    { RGB_TO_YUV(0x98F5FF) },   /* Cadet Blue */
+    { RGB_TO_YUV(0x6CA6CD) },   /* Sky Blue */
+    { RGB_TO_YUV(0x00008B) },   /* Dark blue */
+    { RGB_TO_YUV(0x551A8B) },   /* Purple */
+    { RGB_TO_YUV(0xFF0000) }    /* Red */
+};
+
+static const unsigned char B_PREDICTION_MODE_colors[B_MODE_COUNT][3] =
+{
+    { RGB_TO_YUV(0x6633ff) },   /* Purple */
+    { RGB_TO_YUV(0xcc33ff) },   /* Magenta */
+    { RGB_TO_YUV(0xff33cc) },   /* Pink */
+    { RGB_TO_YUV(0xff3366) },   /* Coral */
+    { RGB_TO_YUV(0x3366ff) },   /* Blue */
+    { RGB_TO_YUV(0xed00f5) },   /* Dark Blue */
+    { RGB_TO_YUV(0x2e00b8) },   /* Dark Purple */
+    { RGB_TO_YUV(0xff6633) },   /* Orange */
+    { RGB_TO_YUV(0x33ccff) },   /* Light Blue */
+    { RGB_TO_YUV(0x8ab800) },   /* Green */
+    { RGB_TO_YUV(0xffcc33) },   /* Light Orange */
+    { RGB_TO_YUV(0x33ffcc) },   /* Aqua */
+    { RGB_TO_YUV(0x66ff33) },   /* Light Green */
+    { RGB_TO_YUV(0xccff33) },   /* Yellow */
+};
+
+static const unsigned char MV_REFERENCE_FRAME_colors[MB_MODE_COUNT][3] =
+{
+    { RGB_TO_YUV(0x00ff00) },   /* Blue */
+    { RGB_TO_YUV(0x0000ff) },   /* Green */
+    { RGB_TO_YUV(0xffff00) },   /* Yellow */
+    { RGB_TO_YUV(0xff0000) },   /* Red */
+};

 static const short kernel5[] =
 {
@@ -76,7 +122,7 @@ const short vp8_rv[] =


 extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch);
-
+extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch);
 /***********************************************************************************************************
 */
 void vp8_post_proc_down_and_across_c
@@ -101,7 +147,7 @@ void vp8_post_proc_down_and_across_c

    for (row = 0; row < rows; row++)
    {
-        // post_proc_down for one row
+        /* post_proc_down for one row */
        p_src = src_ptr;
        p_dst = dst_ptr;

@@ -124,7 +170,7 @@ void vp8_post_proc_down_and_across_c
            p_dst[col] = v;
        }

-        // now post_proc_across
+        /* now post_proc_across */
        p_src = dst_ptr;
        p_dst = dst_ptr;

@@ -153,12 +199,12 @@ void vp8_post_proc_down_and_across_c
                p_dst[col-2] = d[(col-2)&7];
        }

-        //handle the last two pixels
+        /* handle the last two pixels */
        p_dst[col-2] = d[(col-2)&7];
        p_dst[col-1] = d[(col-1)&7];


-        //next row
+        /* next row */
        src_ptr += pitch;
        dst_ptr += pitch;
    }
@@ -351,9 +397,9 @@ static void fillrd(struct postproc_state *state, int q, int a)

    sigma = ai + .5 + .6 * (63 - qi) / 63.0;

-    // set up a lookup table of 256 entries that matches
-    // a gaussian distribution with sigma determined by q.
-    //
+    /* set up a lookup table of 256 entries that matches
+     * a gaussian distribution with sigma determined by q.
+     */
    {
        double i;
        int next, j;
@@ -444,6 +490,187 @@ void vp8_plane_add_noise_c(unsigned char *Start, char *noise,
    }
 }

+/* Blend the macro block with a solid colored square.  Leave the
+ * edges unblended to give distinction to macro blocks in areas
+ * filled with the same color block.
+ */
+void vp8_blend_mb_inner_c (unsigned char *y, unsigned char *u, unsigned char *v,
+                        int y1, int u1, int v1, int alpha, int stride)
+{
+    int i, j;
+    int y1_const = y1*((1<<16)-alpha);
+    int u1_const = u1*((1<<16)-alpha);
+    int v1_const = v1*((1<<16)-alpha);
+
+    y += 2*stride + 2;
+    for (i = 0; i < 12; i++)
+    {
+        for (j = 0; j < 12; j++)
+        {
+            y[j] = (y[j]*alpha + y1_const)>>16;
+        }
+        y += stride;
+    }
+
+    stride >>= 1;
+
+    u += stride + 1;
+    v += stride + 1;
+
+    for (i = 0; i < 6; i++)
+    {
+        for (j = 0; j < 6; j++)
+        {
+            u[j] = (u[j]*alpha + u1_const)>>16;
+            v[j] = (v[j]*alpha + v1_const)>>16;
+        }
+        u += stride;
+        v += stride;
+    }
+}
+
+/* Blend only the edge of the macro block.  Leave center
+ * unblended to allow for other visualizations to be layered.
+ */
+void vp8_blend_mb_outer_c (unsigned char *y, unsigned char *u, unsigned char *v,
+                        int y1, int u1, int v1, int alpha, int stride)
+{
+    int i, j;
+    int y1_const = y1*((1<<16)-alpha);
+    int u1_const = u1*((1<<16)-alpha);
+    int v1_const = v1*((1<<16)-alpha);
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+            y[j] = (y[j]*alpha + y1_const)>>16;
+        }
+        y += stride;
+    }
+
+    for (i = 0; i < 12; i++)
+    {
+        y[0]  = (y[0]*alpha  + y1_const)>>16;
+        y[1]  = (y[1]*alpha  + y1_const)>>16;
+        y[14] = (y[14]*alpha + y1_const)>>16;
+        y[15] = (y[15]*alpha + y1_const)>>16;
+        y += stride;
+    }
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 16; j++)
+        {
+            y[j] = (y[j]*alpha + y1_const)>>16;
+        }
+        y += stride;
+    }
+
+    stride >>= 1;
+
+    for (j = 0; j < 8; j++)
+    {
+        u[j] = (u[j]*alpha + u1_const)>>16;
+        v[j] = (v[j]*alpha + v1_const)>>16;
+    }
+    u += stride;
+    v += stride;
+
+    for (i = 0; i < 6; i++)
+    {
+        u[0] = (u[0]*alpha + u1_const)>>16;
+        v[0] = (v[0]*alpha + v1_const)>>16;
+
+        u[7] = (u[7]*alpha + u1_const)>>16;
+        v[7] = (v[7]*alpha + v1_const)>>16;
+
+        u += stride;
+        v += stride;
+    }
+
+    for (j = 0; j < 8; j++)
+    {
+        u[j] = (u[j]*alpha + u1_const)>>16;
+        v[j] = (v[j]*alpha + v1_const)>>16;
+    }
+}
+
+void vp8_blend_b_c (unsigned char *y, unsigned char *u, unsigned char *v,
+                        int y1, int u1, int v1, int alpha, int stride)
+{
+    int i, j;
+    int y1_const = y1*((1<<16)-alpha);
+    int u1_const = u1*((1<<16)-alpha);
+    int v1_const = v1*((1<<16)-alpha);
+
+    for (i = 0; i < 4; i++)
+    {
+        for (j = 0; j < 4; j++)
+        {
+            y[j] = (y[j]*alpha + y1_const)>>16;
+        }
+        y += stride;
+    }
+
+    stride >>= 1;
+
+    for (i = 0; i < 2; i++)
+    {
+        for (j = 0; j < 2; j++)
+        {
+            u[j] = (u[j]*alpha + u1_const)>>16;
+            v[j] = (v[j]*alpha + v1_const)>>16;
+        }
+        u += stride;
+        v += stride;
+    }
+}
+
+static void constrain_line (int x0, int *x1, int y0, int *y1, int width, int height)
+{
+    int dx;
+    int dy;
+
+    if (*x1 > width)
+    {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
+        *x1 = width;
+        if (dx)
+            *y1 = ((width-x0)*dy)/dx + y0;
+    }
+    if (*x1 < 0)
+    {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
+        *x1 = 0;
+        if (dx)
+            *y1 = ((0-x0)*dy)/dx + y0;
+    }
+    if (*y1 > height)
+    {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
+        *y1 = height;
+        if (dy)
+            *x1 = ((height-y0)*dx)/dy + x0;
+    }
+    if (*y1 < 0)
+    {
+        dx = *x1 - x0;
+        dy = *y1 - y0;
+
+        *y1 = 0;
+        if (dy)
+            *x1 = ((0-y0)*dx)/dy + x0;
+    }
+}
+
+
 #if CONFIG_RUNTIME_CPU_DETECT
 #define RTCD_VTABLE(oci) (&(oci)->rtcd.postproc)
 #else
@@ -465,7 +692,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
    {
        *dest = *oci->frame_to_show;

-        // handle problem with extending borders
+        /* handle problem with extending borders */
        dest->y_width = oci->Width;
        dest->y_height = oci->Height;
        dest->uv_height = dest->y_height / 2;
@@ -521,7 +748,8 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                oci->mb_cols, oci->mb_rows);
        vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
    }
-    else if (flags & VP8D_DEBUG_LEVEL2)
+
+    if (flags & VP8D_DEBUG_LEVEL2)
    {
        int i, j;
        unsigned char *y_ptr;
@@ -533,7 +761,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l

        y_ptr = post->y_buffer + 4 * post->y_stride + 4;

-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
        for (i = 0; i < mb_rows; i++)
        {
            for (j = 0; j < mb_cols; j++)
@@ -547,12 +775,13 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                y_ptr += 16;
            }

-            mb_index ++; //border
+            mb_index ++; /* border */
            y_ptr += post->y_stride  * 16 - post->y_width;

        }
    }
-    else if (flags & VP8D_DEBUG_LEVEL3)
+
+    if (flags & VP8D_DEBUG_LEVEL3)
    {
        int i, j;
        unsigned char *y_ptr;
@@ -564,7 +793,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l

        y_ptr = post->y_buffer + 4 * post->y_stride + 4;

-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
        for (i = 0; i < mb_rows; i++)
        {
            for (j = 0; j < mb_cols; j++)
@@ -581,12 +810,13 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                y_ptr += 16;
            }

-            mb_index ++; //border
+            mb_index ++; /* border */
            y_ptr += post->y_stride  * 16 - post->y_width;

        }
    }
-    else if (flags & VP8D_DEBUG_LEVEL4)
+
+    if (flags & VP8D_DEBUG_LEVEL4)
    {
        sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
        vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
@@ -601,7 +831,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l

        y_ptr = post->y_buffer + 4 * post->y_stride + 4;

-        // vp8_filter each macro block
+        /* vp8_filter each macro block */
        for (i = 0; i < mb_rows; i++)
        {
            for (j = 0; j < mb_cols; j++)
@@ -614,7 +844,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l
                y_ptr += 16;
            }

-            mb_index ++; //border
+            mb_index ++; /* border */
            y_ptr += post->y_stride  * 16 - post->y_width;

        }
@@ -623,11 +853,261 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, int deblock_l

    }

+    /* Draw motion vectors */
+    if (flags & VP8D_DEBUG_DRAW_MV)
+    {
+        YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+        int width  = post->y_width;
+        int height = post->y_height;
+        int mb_cols = width  >> 4;
+        unsigned char *y_buffer = oci->post_proc_buffer.y_buffer;
+        int y_stride = oci->post_proc_buffer.y_stride;
+        MODE_INFO *mi = oci->mi;
+        int x0, y0;

+        for (y0 = 0; y0 < height; y0 += 16)
+        {
+            for (x0 = 0; x0 < width; x0 += 16)
+            {
+                int x1, y1;
+
+                if (mi->mbmi.mode == SPLITMV)
+                {
+                    switch (mi->mbmi.partitioning)
+                    {
+                        case 0 :    /* mv_top_bottom */
+                        {
+                            B_MODE_INFO *bmi = &mi->bmi[0];
+                            MV *mv = &bmi->mv.as_mv;
+
+                            x1 = x0 + 8 + (mv->col >> 3);
+                            y1 = y0 + 4 + (mv->row >> 3);
+
+                            constrain_line (x0+8, &x1, y0+4, &y1, width, height);
+                            vp8_blit_line  (x0+8,  x1, y0+4,  y1, y_buffer, y_stride);
+
+                            bmi = &mi->bmi[8];
+
+                            x1 = x0 + 8 + (mv->col >> 3);
+                            y1 = y0 +12 + (mv->row >> 3);
+
+                            constrain_line (x0+8, &x1, y0+12, &y1, width, height);
+                            vp8_blit_line  (x0+8,  x1, y0+12,  y1, y_buffer, y_stride);
+
+                            break;
+                        }
+                        case 1 :    /* mv_left_right */
+                        {
+                            B_MODE_INFO *bmi = &mi->bmi[0];
+                            MV *mv = &bmi->mv.as_mv;
+
+                            x1 = x0 + 4 + (mv->col >> 3);
+                            y1 = y0 + 8 + (mv->row >> 3);
+
+                            constrain_line (x0+4, &x1, y0+8, &y1, width, height);
+                            vp8_blit_line  (x0+4,  x1, y0+8,  y1, y_buffer, y_stride);
+
+                            bmi = &mi->bmi[2];
+
+                            x1 = x0 +12 + (mv->col >> 3);
+                            y1 = y0 + 8 + (mv->row >> 3);
+
+                            constrain_line (x0+12, &x1, y0+8, &y1, width, height);
+                            vp8_blit_line  (x0+12,  x1, y0+8,  y1, y_buffer, y_stride);
+
+                            break;
+                        }
+                        case 2 :    /* mv_quarters   */
+                        {
+                            B_MODE_INFO *bmi = &mi->bmi[0];
+                            MV *mv = &bmi->mv.as_mv;
+
+                            x1 = x0 + 4 + (mv->col >> 3);
+                            y1 = y0 + 4 + (mv->row >> 3);
+
+                            constrain_line (x0+4, &x1, y0+4, &y1, width, height);
+                            vp8_blit_line  (x0+4,  x1, y0+4,  y1, y_buffer, y_stride);
+
+                            bmi = &mi->bmi[2];
+
+                            x1 = x0 +12 + (mv->col >> 3);
+                            y1 = y0 + 4 + (mv->row >> 3);
+
+                            constrain_line (x0+12, &x1, y0+4, &y1, width, height);
+                            vp8_blit_line  (x0+12,  x1, y0+4,  y1, y_buffer, y_stride);
+
+                            bmi = &mi->bmi[8];
+
+                            x1 = x0 + 4 + (mv->col >> 3);
+                            y1 = y0 +12 + (mv->row >> 3);
+
+                            constrain_line (x0+4, &x1, y0+12, &y1, width, height);
+                            vp8_blit_line  (x0+4,  x1, y0+12,  y1, y_buffer, y_stride);
+
+                            bmi = &mi->bmi[10];
+
+                            x1 = x0 +12 + (mv->col >> 3);
+                            y1 = y0 +12 + (mv->row >> 3);
+
+                            constrain_line (x0+12, &x1, y0+12, &y1, width, height);
+                            vp8_blit_line  (x0+12,  x1, y0+12,  y1, y_buffer, y_stride);
+                            break;
+                        }
+                        default :
+                        {
+                            B_MODE_INFO *bmi = mi->bmi;
+                            int bx0, by0;
+
+                            for (by0 = y0; by0 < (y0+16); by0 += 4)
+                            {
+                                for (bx0 = x0; bx0 < (x0+16); bx0 += 4)
+                                {
+                                    MV *mv = &bmi->mv.as_mv;
+
+                                    x1 = bx0 + 2 + (mv->col >> 3);
+                                    y1 = by0 + 2 + (mv->row >> 3);
+
+                                    constrain_line (bx0+2, &x1, by0+2, &y1, width, height);
+                                    vp8_blit_line  (bx0+2,  x1, by0+2,  y1, y_buffer, y_stride);
+
+                                    bmi++;
+                                }
+                            }
+                        }
+                    }
+                }
+                else if (mi->mbmi.mode >= NEARESTMV)
+                {
+                    MV *mv = &mi->mbmi.mv.as_mv;
+                    const int lx0 = x0 + 8;
+                    const int ly0 = y0 + 8;
+
+                    x1 = lx0 + (mv->col >> 3);
+                    y1 = ly0 + (mv->row >> 3);
+
+                    if (x1 != lx0 && y1 != ly0)
+                    {
+                        constrain_line (lx0, &x1, ly0-1, &y1, width, height);
+                        vp8_blit_line  (lx0,  x1, ly0-1,  y1, y_buffer, y_stride);
+
+                        constrain_line (lx0, &x1, ly0+1, &y1, width, height);
+                        vp8_blit_line  (lx0,  x1, ly0+1,  y1, y_buffer, y_stride);
+                    }
+                    else
+                        vp8_blit_line  (lx0,  x1, ly0,  y1, y_buffer, y_stride);
+                }
+                mi++;
+            }
+            mi++;
+        }
+    }
+
+    /* Color in block modes */
+    if (flags & VP8D_DEBUG_CLR_BLK_MODES)
+    {
+        int y, x;
+        YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+        int width  = post->y_width;
+        int height = post->y_height;
+        unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
+        unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
+        unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
+        int y_stride = oci->post_proc_buffer.y_stride;
+        MODE_INFO *mi = oci->mi;
+
+        for (y = 0; y < height; y += 16)
+        {
+            for (x = 0; x < width; x += 16)
+            {
+                int Y = 0, U = 0, V = 0;
+
+                if (mi->mbmi.mode == B_PRED)
+                {
+                    int by, bx;
+                    unsigned char *yl, *ul, *vl;
+                    B_MODE_INFO *bmi = mi->bmi;
+
+                    yl = y_ptr + x;
+                    ul = u_ptr + (x>>1);
+                    vl = v_ptr + (x>>1);
+
+                    for (by = 0; by < 16; by += 4)
+                    {
+                        for (bx = 0; bx < 16; bx += 4)
+                        {
+                            Y = B_PREDICTION_MODE_colors[bmi->mode][0];
+                            U = B_PREDICTION_MODE_colors[bmi->mode][1];
+                            V = B_PREDICTION_MODE_colors[bmi->mode][2];
+
+                            POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_b)
+                                (yl+bx, ul+(bx>>1), vl+(bx>>1), Y, U, V, 0xc000, y_stride);
+
+                            bmi++;
+                        }
+
+                        yl += y_stride*4;
+                        ul += y_stride*1;
+                        vl += y_stride*1;
+                    }
+                }
+                else
+                {
+                    Y = MB_PREDICTION_MODE_colors[mi->mbmi.mode][0];
+                    U = MB_PREDICTION_MODE_colors[mi->mbmi.mode][1];
+                    V = MB_PREDICTION_MODE_colors[mi->mbmi.mode][2];
+
+                    POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_inner)
+                        (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
+                }
+                mi++;
+            }
+            y_ptr += y_stride*16;
+            u_ptr += y_stride*4;
+            v_ptr += y_stride*4;
+
+            mi++;
+        }
+    }
+
+    /* Color in frame reference blocks */
+    if (flags & VP8D_DEBUG_CLR_FRM_REF_BLKS)
+    {
+        int y, x;
+        YV12_BUFFER_CONFIG *post = &oci->post_proc_buffer;
+        int width  = post->y_width;
+        int height = post->y_height;
+        unsigned char *y_ptr = oci->post_proc_buffer.y_buffer;
+        unsigned char *u_ptr = oci->post_proc_buffer.u_buffer;
+        unsigned char *v_ptr = oci->post_proc_buffer.v_buffer;
+        int y_stride = oci->post_proc_buffer.y_stride;
+        MODE_INFO *mi = oci->mi;
+
+        for (y = 0; y < height; y += 16)
+        {
+            for (x = 0; x < width; x +=16)
+            {
+                int Y = 0, U = 0, V = 0;
+
+                Y = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][0];
+                U = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][1];
+                V = MV_REFERENCE_FRAME_colors[mi->mbmi.ref_frame][2];
+
+                POSTPROC_INVOKE(RTCD_VTABLE(oci), blend_mb_outer)
+                    (y_ptr+x, u_ptr+(x>>1), v_ptr+(x>>1), Y, U, V, 0xc000, y_stride);
+
+                mi++;
+            }
+            y_ptr += y_stride*16;
+            u_ptr += y_stride*4;
+            v_ptr += y_stride*4;
+
+            mi++;
+        }
+    }

    *dest = oci->post_proc_buffer;

-    // handle problem with extending borders
+    /* handle problem with extending borders */
    dest->y_width = oci->Width;
    dest->y_height = oci->Height;
    dest->uv_height = dest->y_height / 2;
--- a/vp8/common/postproc.h
+++ b/vp8/common/postproc.h
@@ -24,6 +24,18 @@
              char whiteclamp[16], char bothclamp[16],\
              unsigned int w, unsigned int h, int pitch)

+#define prototype_postproc_blend_mb_inner(sym)\
+    void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
+              int y1, int u1, int v1, int alpha, int stride)
+
+#define prototype_postproc_blend_mb_outer(sym)\
+    void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
+              int y1, int u1, int v1, int alpha, int stride)
+
+#define prototype_postproc_blend_b(sym)\
+    void sym (unsigned char *y, unsigned char *u, unsigned char *v,\
+              int y1, int u1, int v1, int alpha, int stride)
+
 #if ARCH_X86 || ARCH_X86_64
 #include "x86/postproc_x86.h"
 #endif
@@ -48,16 +60,36 @@ extern prototype_postproc(vp8_postproc_downacross);
 #endif
 extern prototype_postproc_addnoise(vp8_postproc_addnoise);

+#ifndef vp8_postproc_blend_mb_inner
+#define vp8_postproc_blend_mb_inner vp8_blend_mb_inner_c
+#endif
+extern prototype_postproc_blend_mb_inner(vp8_postproc_blend_mb_inner);
+
+#ifndef vp8_postproc_blend_mb_outer
+#define vp8_postproc_blend_mb_outer vp8_blend_mb_outer_c
+#endif
+extern prototype_postproc_blend_mb_outer(vp8_postproc_blend_mb_outer);
+
+#ifndef vp8_postproc_blend_b
+#define vp8_postproc_blend_b vp8_blend_b_c
+#endif
+extern prototype_postproc_blend_b(vp8_postproc_blend_b);

 typedef prototype_postproc((*vp8_postproc_fn_t));
 typedef prototype_postproc_inplace((*vp8_postproc_inplace_fn_t));
 typedef prototype_postproc_addnoise((*vp8_postproc_addnoise_fn_t));
+typedef prototype_postproc_blend_mb_inner((*vp8_postproc_blend_mb_inner_fn_t));
+typedef prototype_postproc_blend_mb_outer((*vp8_postproc_blend_mb_outer_fn_t));
+typedef prototype_postproc_blend_b((*vp8_postproc_blend_b_fn_t));
 typedef struct
 {
-    vp8_postproc_inplace_fn_t   down;
-    vp8_postproc_inplace_fn_t   across;
-    vp8_postproc_fn_t           downacross;
-    vp8_postproc_addnoise_fn_t  addnoise;
+    vp8_postproc_inplace_fn_t           down;
+    vp8_postproc_inplace_fn_t           across;
+    vp8_postproc_fn_t                   downacross;
+    vp8_postproc_addnoise_fn_t          addnoise;
+    vp8_postproc_blend_mb_inner_fn_t    blend_mb_inner;
+    vp8_postproc_blend_mb_outer_fn_t    blend_mb_outer;
+    vp8_postproc_blend_b_fn_t           blend_b;
 } vp8_postproc_rtcd_vtable_t;

 #if CONFIG_RUNTIME_CPU_DETECT
--- a/vp8/common/ppflags.h
+++ b/vp8/common/ppflags.h
@@ -13,14 +13,17 @@
 #define __INC_PPFLAGS_H
 enum
 {
-    VP8D_NOFILTERING    = 0,
-    VP8D_DEBLOCK        = 1,
-    VP8D_DEMACROBLOCK   = 2,
-    VP8D_ADDNOISE       = 4,
-    VP8D_DEBUG_LEVEL1   = 8,
-    VP8D_DEBUG_LEVEL2   = 16,
-    VP8D_DEBUG_LEVEL3   = 32,
-    VP8D_DEBUG_LEVEL4   = 64,
+    VP8D_NOFILTERING            = 0,
+    VP8D_DEBLOCK                = 1<<0,
+    VP8D_DEMACROBLOCK           = 1<<1,
+    VP8D_ADDNOISE               = 1<<2,
+    VP8D_DEBUG_LEVEL1           = 1<<3,
+    VP8D_DEBUG_LEVEL2           = 1<<4,
+    VP8D_DEBUG_LEVEL3           = 1<<5,
+    VP8D_DEBUG_LEVEL4           = 1<<6,
+    VP8D_DEBUG_DRAW_MV          = 1<<7,
+    VP8D_DEBUG_CLR_BLK_MODES    = 1<<8,
+    VP8D_DEBUG_CLR_FRM_REF_BLKS = 1<<9
 };

 #endif
--- a/vp8/common/recon.c
+++ b/vp8/common/recon.c
@@ -106,8 +106,24 @@ void vp8_recon2b_c
    }
 }

-void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+void vp8_recon_mby_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
+#if ARCH_ARM
+    BLOCKD *b = &x->block[0];
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+
+    /*b = &x->block[4];*/
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+
+    /*b = &x->block[8];*/
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+
+    /*b = &x->block[12];*/
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+#else
    int i;

    for (i = 0; i < 16; i += 4)
@@ -116,10 +132,36 @@ void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)

        RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
    }
+#endif
 }

-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
+void vp8_recon_mb_c(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
+#if ARCH_ARM
+    BLOCKD *b = &x->block[0];
+
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b += 4;
+    RECON_INVOKE(rtcd, recon4)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b += 4;
+
+    /*b = &x->block[16];*/
+
+    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b++;
+    b++;
+    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b++;
+    b++;
+    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+    b++;
+    b++;
+    RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
+#else
    int i;

    for (i = 0; i < 16; i += 4)
@@ -135,4 +177,5 @@ void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)

        RECON_INVOKE(rtcd, recon2)(b->predictor, b->diff, *(b->base_dst) + b->dst, b->dst_stride);
    }
+#endif
 }
--- a/vp8/common/recon.h
+++ b/vp8/common/recon.h
@@ -12,11 +12,18 @@
 #ifndef __INC_RECON_H
 #define __INC_RECON_H

+#include "blockd.h"
+
 #define prototype_copy_block(sym) \
    void sym(unsigned char *src, int src_pitch, unsigned char *dst, int dst_pitch)

 #define prototype_recon_block(sym) \
-    void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch);
+    void sym(unsigned char *pred, short *diff, unsigned char *dst, int pitch)
+
+#define prototype_recon_macroblock(sym) \
+    void sym(const struct vp8_recon_rtcd_vtable *rtcd, MACROBLOCKD *x)
+
+struct vp8_recon_rtcd_vtable;

 #if ARCH_X86 || ARCH_X86_64
 #include "x86/recon_x86.h"
@@ -56,9 +63,20 @@ extern prototype_recon_block(vp8_recon_recon2);
 #endif
 extern prototype_recon_block(vp8_recon_recon4);

+#ifndef vp8_recon_recon_mb
+#define vp8_recon_recon_mb vp8_recon_mb_c
+#endif
+extern prototype_recon_macroblock(vp8_recon_recon_mb);
+
+#ifndef vp8_recon_recon_mby
+#define vp8_recon_recon_mby vp8_recon_mby_c
+#endif
+extern prototype_recon_macroblock(vp8_recon_recon_mby);
+
 typedef prototype_copy_block((*vp8_copy_block_fn_t));
 typedef prototype_recon_block((*vp8_recon_fn_t));
-typedef struct
+typedef prototype_recon_macroblock((*vp8_recon_mb_fn_t));
+typedef struct vp8_recon_rtcd_vtable
 {
    vp8_copy_block_fn_t  copy16x16;
    vp8_copy_block_fn_t  copy8x8;
@@ -66,6 +84,8 @@ typedef struct
    vp8_recon_fn_t       recon;
    vp8_recon_fn_t       recon2;
    vp8_recon_fn_t       recon4;
+    vp8_recon_mb_fn_t    recon_mb;
+    vp8_recon_mb_fn_t    recon_mby;
 } vp8_recon_rtcd_vtable_t;

 #if CONFIG_RUNTIME_CPU_DETECT
@@ -74,9 +94,6 @@ typedef struct
 #define RECON_INVOKE(ctx,fn) vp8_recon_##fn
 #endif

-#include "blockd.h"
-void vp8_recon16x16mby(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
-void vp8_recon16x16mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 void vp8_recon_intra4x4mb(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x);
 #endif
--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -18,9 +18,10 @@
 #include "onyxc_int.h"
 #endif

-// use this define on systems where unaligned int reads and writes are
-// not allowed, i.e. ARM architectures
-//#define MUST_BE_ALIGNED
+/* use this define on systems where unaligned int reads and writes are
+ * not allowed, i.e. ARM architectures
+ */
+/*#define MUST_BE_ALIGNED*/


 static const int bbb[4] = {0, 2, 8, 10};
@@ -255,7 +256,7 @@ void vp8_build_inter_predictors_mbuv(MACROBLOCKD *x)
    }
 }

-//encoder only
+/*encoder only*/
 void vp8_build_inter_predictors_mby(MACROBLOCKD *x)
 {

@@ -491,15 +492,16 @@ void vp8_build_uvmvs(MACROBLOCKD *x, int fullpixel)
 }


-// The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this
-// situation, we can write the result directly to dst buffer instead of writing it to predictor
-// buffer and then copying it to dst buffer.
+/* The following functions are wriiten for skip_recon_mb() to call. Since there is no recon in this
+ * situation, we can write the result directly to dst buffer instead of writing it to predictor
+ * buffer and then copying it to dst buffer.
+ */
 static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp8_subpix_fn_t sppf)
 {
    int r;
    unsigned char *ptr_base;
    unsigned char *ptr;
-    //unsigned char *pred_ptr = d->predictor;
+    /*unsigned char *pred_ptr = d->predictor;*/
    int dst_stride = d->dst_stride;
    int pre_stride = d->pre_stride;

@@ -535,8 +537,8 @@ static void vp8_build_inter_predictors_b_s(BLOCKD *d, unsigned char *dst_ptr, vp

 void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
 {
-    //unsigned char *pred_ptr = x->block[0].predictor;
-    //unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;
+    /*unsigned char *pred_ptr = x->block[0].predictor;
+    unsigned char *dst_ptr = *(x->block[0].base_dst) + x->block[0].dst;*/
    unsigned char *pred_ptr = x->predictor;
    unsigned char *dst_ptr = x->dst.y_buffer;

@@ -546,26 +548,26 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
        unsigned char *ptr_base;
        unsigned char *ptr;
        unsigned char *uptr, *vptr;
-        //unsigned char *pred_ptr = x->predictor;
-        //unsigned char *upred_ptr = &x->predictor[256];
-        //unsigned char *vpred_ptr = &x->predictor[320];
+        /*unsigned char *pred_ptr = x->predictor;
+        unsigned char *upred_ptr = &x->predictor[256];
+        unsigned char *vpred_ptr = &x->predictor[320];*/
        unsigned char *udst_ptr = x->dst.u_buffer;
        unsigned char *vdst_ptr = x->dst.v_buffer;

        int mv_row = x->mode_info_context->mbmi.mv.as_mv.row;
        int mv_col = x->mode_info_context->mbmi.mv.as_mv.col;
-        int pre_stride = x->dst.y_stride; //x->block[0].pre_stride;
+        int pre_stride = x->dst.y_stride; /*x->block[0].pre_stride;*/

        ptr_base = x->pre.y_buffer;
        ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3);

        if ((mv_row | mv_col) & 7)
        {
-            x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+            x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
        }
        else
        {
-            RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+            RECON_INVOKE(&x->rtcd->recon, copy16x16)(ptr, pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
        }

        mv_row = x->block[16].bmi.mv.as_mv.row;
@@ -588,8 +590,9 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
    }
    else
    {
-        //note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
-        //if sth is wrong, go back to what it is in build_inter_predictors_mb.
+        /* note: this whole ELSE part is not executed at all. So, no way to test the correctness of my modification. Later,
+         * if sth is wrong, go back to what it is in build_inter_predictors_mb.
+         */
        int i;

        if (x->mode_info_context->mbmi.partitioning < 3)
@@ -597,7 +600,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)
            for (i = 0; i < 4; i++)
            {
                BLOCKD *d = &x->block[bbb[i]];
-                //vp8_build_inter_predictors4b(x, d, 16);
+                /*vp8_build_inter_predictors4b(x, d, 16);*/

                {
                    unsigned char *ptr_base;
@@ -609,11 +612,11 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)

                    if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7)
                    {
-                        x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+                        x->subpixel_predict8x8(ptr, d->pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
                    }
                    else
                    {
-                        RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); //x->block[0].dst_stride);
+                        RECON_INVOKE(&x->rtcd->recon, copy8x8)(ptr, d->pre_stride, dst_ptr, x->dst.y_stride); /*x->block[0].dst_stride);*/
                    }
                }
            }
@@ -627,7 +630,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)

                if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
                {
-                    //vp8_build_inter_predictors2b(x, d0, 16);
+                    /*vp8_build_inter_predictors2b(x, d0, 16);*/
                    unsigned char *ptr_base;
                    unsigned char *ptr;
                    unsigned char *pred_ptr = d0->predictor;
@@ -659,7 +662,7 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)

            if (d0->bmi.mv.as_int == d1->bmi.mv.as_int)
            {
-                //vp8_build_inter_predictors2b(x, d0, 8);
+                /*vp8_build_inter_predictors2b(x, d0, 8);*/
                unsigned char *ptr_base;
                unsigned char *ptr;
                unsigned char *pred_ptr = d0->predictor;
@@ -669,11 +672,15 @@ void vp8_build_inter_predictors_mb_s(MACROBLOCKD *x)

                if (d0->bmi.mv.as_mv.row & 7 || d0->bmi.mv.as_mv.col & 7)
                {
-                    x->subpixel_predict8x4(ptr, d0->pre_stride, d0->bmi.mv.as_mv.col & 7, d0->bmi.mv.as_mv.row & 7, dst_ptr, x->dst.y_stride);
+                    x->subpixel_predict8x4(ptr, d0->pre_stride,
+                        d0->bmi.mv.as_mv.col & 7,
+                        d0->bmi.mv.as_mv.row & 7,
+                        dst_ptr, x->dst.uv_stride);
                }
                else
                {
-                    RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr, d0->pre_stride, dst_ptr, x->dst.y_stride);
+                    RECON_INVOKE(&x->rtcd->recon, copy8x4)(ptr,
+                        d0->pre_stride, dst_ptr, x->dst.uv_stride);
                }
            }
            else
--- a/vp8/common/reconintra.c
+++ b/vp8/common/reconintra.c
@@ -14,9 +14,9 @@
 #include "reconintra.h"
 #include "vpx_mem/vpx_mem.h"

-// For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
-// vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
-
+/* For skip_recon_mb(), add vp8_build_intra_predictors_mby_s(MACROBLOCKD *x) and
+ * vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x).
+ */
 void vp8_recon_intra_mbuv(const vp8_recon_rtcd_vtable_t *rtcd, MACROBLOCKD *x)
 {
    int i;
@@ -42,7 +42,7 @@ void vp8_build_intra_predictors_mby(MACROBLOCKD *x)
        yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
    }

-    // for Y
+    /* for Y */
    switch (x->mode_info_context->mbmi.mode)
    {
    case DC_PRED:
@@ -156,14 +156,14 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
    int r, c, i;

    int y_stride = x->dst.y_stride;
-    ypred_ptr = x->dst.y_buffer; //x->predictor;
+    ypred_ptr = x->dst.y_buffer; /*x->predictor;*/

    for (i = 0; i < 16; i++)
    {
        yleft_col[i] = x->dst.y_buffer [i* x->dst.y_stride -1];
    }

-    // for Y
+    /* for Y */
    switch (x->mode_info_context->mbmi.mode)
    {
    case DC_PRED:
@@ -204,11 +204,11 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
            expected_dc = 128;
        }

-        //vpx_memset(ypred_ptr, expected_dc, 256);
+        /*vpx_memset(ypred_ptr, expected_dc, 256);*/
        for (r = 0; r < 16; r++)
        {
            vpx_memset(ypred_ptr, expected_dc, 16);
-            ypred_ptr += y_stride; //16;
+            ypred_ptr += y_stride; /*16;*/
        }
    }
    break;
@@ -222,7 +222,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
            ((int *)ypred_ptr)[1] = ((int *)yabove_row)[1];
            ((int *)ypred_ptr)[2] = ((int *)yabove_row)[2];
            ((int *)ypred_ptr)[3] = ((int *)yabove_row)[3];
-            ypred_ptr += y_stride; //16;
+            ypred_ptr += y_stride; /*16;*/
        }
    }
    break;
@@ -233,7 +233,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
        {

            vpx_memset(ypred_ptr, yleft_col[r], 16);
-            ypred_ptr += y_stride;  //16;
+            ypred_ptr += y_stride;  /*16;*/
        }

    }
@@ -256,7 +256,7 @@ void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x)
                ypred_ptr[c] = pred;
            }

-            ypred_ptr += y_stride;  //16;
+            ypred_ptr += y_stride;  /*16;*/
        }

    }
@@ -418,8 +418,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
    unsigned char *vabove_row = x->dst.v_buffer - x->dst.uv_stride;
    unsigned char vleft_col[20];
    unsigned char vtop_left = vabove_row[-1];
-    unsigned char *upred_ptr = x->dst.u_buffer; //&x->predictor[256];
-    unsigned char *vpred_ptr = x->dst.v_buffer; //&x->predictor[320];
+    unsigned char *upred_ptr = x->dst.u_buffer; /*&x->predictor[256];*/
+    unsigned char *vpred_ptr = x->dst.v_buffer; /*&x->predictor[320];*/
    int uv_stride = x->dst.uv_stride;

    int i, j;
@@ -472,14 +472,14 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
        }


-        //vpx_memset(upred_ptr,expected_udc,64);
-        //vpx_memset(vpred_ptr,expected_vdc,64);
+        /*vpx_memset(upred_ptr,expected_udc,64);*/
+        /*vpx_memset(vpred_ptr,expected_vdc,64);*/
        for (i = 0; i < 8; i++)
        {
            vpx_memset(upred_ptr, expected_udc, 8);
            vpx_memset(vpred_ptr, expected_vdc, 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
        }
    }
    break;
@@ -491,8 +491,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
        {
            vpx_memcpy(upred_ptr, uabove_row, 8);
            vpx_memcpy(vpred_ptr, vabove_row, 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
        }

    }
@@ -505,8 +505,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
        {
            vpx_memset(upred_ptr, uleft_col[i], 8);
            vpx_memset(vpred_ptr, vleft_col[i], 8);
-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
        }
    }

@@ -538,8 +538,8 @@ void vp8_build_intra_predictors_mbuv_s(MACROBLOCKD *x)
                vpred_ptr[j] = predv;
            }

-            upred_ptr += uv_stride; //8;
-            vpred_ptr += uv_stride; //8;
+            upred_ptr += uv_stride; /*8;*/
+            vpred_ptr += uv_stride; /*8;*/
        }

    }
--- a/Show More
+++ b/Show More