libvpx: enable building for iOS devices (armv7)

Allow output of gas syntax assembly directly from obj_int_extract Change-Id: I33a747e87ef1c590a8766dea17f8cb2497e54591
Replace generated quant tables with static lookup tables.
2013-07-19 14:05:59 -07:00 · 2013-07-16 14:04:41 -07:00 · 2013-07-16 14:04:41 -07:00 · 2013-07-16 14:04:39 -07:00 · 2013-07-16 12:41:10 -07:00 · 2013-07-16 12:40:48 -07:00
286 changed files with 21645 additions and 33034 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,8 +1,6 @@
 *.a
 *.asm.s
 *.d
-*.gcno
-*.gcda
 *.o
 *~
 /*.ivf
@@ -16,7 +14,7 @@
 /.install-*
 /.libs
 /Makefile
-/config.log
+/config.err
 /config.mk
 /decode_to_md5
 /decode_to_md5.c
--- a/36
+++ b/36
@@ -1,7 +1,7 @@
 vpx Multi-Format Codec SDK
-README - 1 August 2013
+README - 21 June 2012

-Welcome to the WebM VP8/VP9 Codec SDK!
+Welcome to the WebM VP8 Codec SDK!

 COMPILING THE APPLICATIONS/LIBRARIES:
  The build system used is similar to autotools. Building generally consists of
@@ -53,63 +53,33 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    armv5te-android-gcc
    armv5te-linux-rvct
    armv5te-linux-gcc
-    armv5te-none-rvct
    armv6-darwin-gcc
    armv6-linux-rvct
    armv6-linux-gcc
-    armv6-none-rvct
    armv7-android-gcc
-    armv7-darwin-gcc
    armv7-linux-rvct
    armv7-linux-gcc
-    armv7-none-rvct
-    armv7-win32-vs11
    mips32-linux-gcc
    ppc32-darwin8-gcc
    ppc32-darwin9-gcc
-    ppc32-linux-gcc
    ppc64-darwin8-gcc
    ppc64-darwin9-gcc
    ppc64-linux-gcc
-    sparc-solaris-gcc
-    x86-android-gcc
    x86-darwin8-gcc
    x86-darwin8-icc
    x86-darwin9-gcc
    x86-darwin9-icc
-    x86-darwin10-gcc
-    x86-darwin11-gcc
-    x86-darwin12-gcc
-    x86-darwin13-gcc
    x86-linux-gcc
    x86-linux-icc
-    x86-os2-gcc
    x86-solaris-gcc
-    x86-win32-gcc
    x86-win32-vs7
    x86-win32-vs8
-    x86-win32-vs9
-    x86-win32-vs10
-    x86-win32-vs11
    x86_64-darwin9-gcc
-    x86_64-darwin10-gcc
-    x86_64-darwin11-gcc
-    x86_64-darwin12-gcc
-    x86_64-darwin13-gcc
    x86_64-linux-gcc
-    x86_64-linux-icc
    x86_64-solaris-gcc
-    x86_64-win64-gcc
    x86_64-win64-vs8
-    x86_64-win64-vs9
-    x86_64-win64-vs10
-    x86_64-win64-vs11
    universal-darwin8-gcc
    universal-darwin9-gcc
-    universal-darwin10-gcc
-    universal-darwin11-gcc
-    universal-darwin12-gcc
-    universal-darwin13-gcc
    generic-gnu

  The generic-gnu target, in conjunction with the CROSS environment variable,
@@ -127,7 +97,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:

  5. Configuration errors
  If the configuration step fails, the first step is to look in the error log.
-  This defaults to config.log. This should give a good indication of what went
+  This defaults to config.err. This should give a good indication of what went
  wrong. If not, contact us for support.

 SUPPORT
--- a/build/arm-msvs/obj_int_extract.bat
+++ b/build/arm-msvs/obj_int_extract.bat
@@ -7,7 +7,18 @@ REM   in the file PATENTS.  All contributing project authors may
 REM   be found in the AUTHORS file in the root of the source tree.
 echo on

+cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/common/vp9_asm_com_offsets.c"
+cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/decoder/vp9_asm_dec_offsets.c"
+cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp9/encoder/vp9_asm_enc_offsets.c"
+obj_int_extract.exe rvds "vp9_asm_com_offsets.obj" > "vp9_asm_com_offsets.asm"
+obj_int_extract.exe rvds "vp9_asm_dec_offsets.obj" > "vp9_asm_dec_offsets.asm"
+obj_int_extract.exe rvds "vp9_asm_enc_offsets.obj" > "vp9_asm_enc_offsets.asm"
+
+cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/common/vp8_asm_com_offsets.c"
+cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/decoder/vp8_asm_dec_offsets.c"
 cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vp8/encoder/vp8_asm_enc_offsets.c"
+obj_int_extract.exe rvds "vp8_asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"
+obj_int_extract.exe rvds "vp8_asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"
 obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"

 cl /I "./" /I "%1" /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%1/vpx_scale/vpx_scale_asm_offsets.c"
--- a/build/make/armlink_adapter.sh
+++ b/build/make/armlink_adapter.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 ##
 ##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
@@ -13,20 +13,20 @@
 verbose=0
 set -- $*
 for i; do
-    if [ "$i" = "-o" ]; then
+    if [ "$i" == "-o" ]; then
        on_of=1
-    elif [ "$i" = "-v" ]; then
+    elif [ "$i" == "-v" ]; then
        verbose=1
-    elif [ "$i" = "-g" ]; then
+    elif [ "$i" == "-g" ]; then
        args="${args} --debug"
-    elif [ "$on_of" = "1" ]; then
+    elif [ "$on_of" == "1" ]; then
        outfile=$i
        on_of=0
    elif [ -f "$i" ]; then
        infiles="$infiles $i"
-    elif [ "${i#-l}" != "$i" ]; then
+    elif [ "${i:0:2}" == "-l" ]; then
        libs="$libs ${i#-l}"
-    elif [ "${i#-L}" != "$i" ]; then
+    elif [ "${i:0:2}" == "-L" ]; then
        libpaths="${libpaths} ${i#-L}"
    else
        args="${args} ${i}"
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 ##
 ##  configure.sh
 ##
@@ -75,7 +75,7 @@ Options:

 Build options:
  --help                      print this message
-  --log=yes|no|FILE           file configure log is written to [config.log]
+  --log=yes|no|FILE           file configure log is written to [config.err]
  --target=TARGET             target platform tuple [generic-gnu]
  --cpu=CPU                   optimize for a specific cpu rather than a family
  --extra-cflags=ECFLAGS      add ECFLAGS to CFLAGS [$CFLAGS]
@@ -198,11 +198,11 @@ add_extralibs() {
 #
 # Boolean Manipulation Functions
 #
-enable_feature(){
+enable(){
    set_all yes $*
 }

-disable_feature(){
+disable(){
    set_all no $*
 }

@@ -219,7 +219,7 @@ soft_enable() {
    for var in $*; do
        if ! disabled $var; then
            log_echo "  enabling $var"
-            enable_feature $var
+            enable $var
        fi
    done
 }
@@ -228,7 +228,7 @@ soft_disable() {
    for var in $*; do
        if ! enabled $var; then
            log_echo "  disabling $var"
-            disable_feature $var
+            disable $var
        fi
    done
 }
@@ -251,10 +251,10 @@ tolower(){
 # Temporary File Functions
 #
 source_path=${0%/*}
-enable_feature source_path_used
+enable source_path_used
 if test -z "$source_path" -o "$source_path" = "." ; then
    source_path="`pwd`"
-    disable_feature source_path_used
+    disable source_path_used
 fi

 if test ! -z "$TMPDIR" ; then
@@ -264,13 +264,12 @@ elif test ! -z "$TEMPDIR" ; then
 else
    TMPDIRx="/tmp"
 fi
-RAND=$(awk 'BEGIN { srand(); printf "%d\n",(rand() * 32768)}')
-TMP_H="${TMPDIRx}/vpx-conf-$$-${RAND}.h"
-TMP_C="${TMPDIRx}/vpx-conf-$$-${RAND}.c"
-TMP_CC="${TMPDIRx}/vpx-conf-$$-${RAND}.cc"
-TMP_O="${TMPDIRx}/vpx-conf-$$-${RAND}.o"
-TMP_X="${TMPDIRx}/vpx-conf-$$-${RAND}.x"
-TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RAND}.asm"
+TMP_H="${TMPDIRx}/vpx-conf-$$-${RANDOM}.h"
+TMP_C="${TMPDIRx}/vpx-conf-$$-${RANDOM}.c"
+TMP_CC="${TMPDIRx}/vpx-conf-$$-${RANDOM}.cc"
+TMP_O="${TMPDIRx}/vpx-conf-$$-${RANDOM}.o"
+TMP_X="${TMPDIRx}/vpx-conf-$$-${RANDOM}.x"
+TMP_ASM="${TMPDIRx}/vpx-conf-$$-${RANDOM}.asm"

 clean_temp_files() {
    rm -f ${TMP_C} ${TMP_CC} ${TMP_H} ${TMP_O} ${TMP_X} ${TMP_ASM}
@@ -317,8 +316,8 @@ check_header(){
    header=$1
    shift
    var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`
-    disable_feature $var
-    check_cpp "$@" <<EOF && enable_feature $var
+    disable $var
+    check_cpp "$@" <<EOF && enable $var
 #include "$header"
 int x;
 EOF
@@ -480,7 +479,7 @@ process_common_cmdline() {
    for opt in "$@"; do
        optval="${opt#*=}"
        case "$opt" in
-        --child) enable_feature child
+        --child) enable child
        ;;
        --log*)
        logging="$optval"
@@ -492,7 +491,7 @@ process_common_cmdline() {
        ;;
        --target=*) toolchain="${toolchain:-${optval}}"
        ;;
-        --force-target=*) toolchain="${toolchain:-${optval}}"; enable_feature force_toolchain
+        --force-target=*) toolchain="${toolchain:-${optval}}"; enable force_toolchain
        ;;
        --cpu)
        ;;
@@ -512,7 +511,7 @@ process_common_cmdline() {
          echo "${CMDLINE_SELECT}" | grep "^ *$option\$" >/dev/null ||
            die_unknown $opt
        fi
-        ${action}_feature $option
+        $action $option
        ;;
        --require-?*)
        eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
@@ -524,11 +523,11 @@ process_common_cmdline() {
        ;;
        --force-enable-?*|--force-disable-?*)
        eval `echo "$opt" | sed 's/--force-/action=/;s/-/ option=/;s/-/_/g'`
-        ${action}_feature $option
+        $action $option
        ;;
        --libc=*)
        [ -d "${optval}" ] || die "Not a directory: ${optval}"
-        disable_feature builtin_libc
+        disable builtin_libc
        alt_libc="${optval}"
        ;;
        --as=*)
@@ -654,10 +653,6 @@ process_common_toolchain() {
                tgt_isa=x86_64
                tgt_os=darwin12
                ;;
-            *darwin13*)
-                tgt_isa=x86_64
-                tgt_os=darwin13
-                ;;
            x86_64*mingw32*)
                tgt_os=win64
                ;;
@@ -697,13 +692,13 @@ process_common_toolchain() {

    # Mark the specific ISA requested as enabled
    soft_enable ${tgt_isa}
-    enable_feature ${tgt_os}
-    enable_feature ${tgt_cc}
+    enable ${tgt_os}
+    enable ${tgt_cc}

    # Enable the architecture family
    case ${tgt_isa} in
-        arm*) enable_feature arm;;
-        mips*) enable_feature mips;;
+        arm*) enable arm;;
+        mips*) enable mips;;
    esac

    # PIC is probably what we want when building shared libs
@@ -756,17 +751,13 @@ process_common_toolchain() {
            add_cflags  "-mmacosx-version-min=10.8"
            add_ldflags "-mmacosx-version-min=10.8"
            ;;
-        *-darwin13-*)
-            add_cflags  "-mmacosx-version-min=10.9"
-            add_ldflags "-mmacosx-version-min=10.9"
-            ;;
    esac

    # Handle Solaris variants. Solaris 10 needs -lposix4
    case ${toolchain} in
        sparc-solaris-*)
            add_extralibs -lposix4
-            disable_feature fast_unaligned
+            disable fast_unaligned
            ;;
        *-solaris-*)
            add_extralibs -lposix4
@@ -791,7 +782,7 @@ process_common_toolchain() {
            ;;
        armv5te)
            soft_enable edsp
-            disable_feature fast_unaligned
+            disable fast_unaligned
            ;;
        esac

@@ -806,7 +797,7 @@ process_common_toolchain() {
            arch_int=${arch_int%%te}
            check_add_asflags --defsym ARCHITECTURE=${arch_int}
            tune_cflags="-mtune="
-            if [ ${tgt_isa} = "armv7" ]; then
+            if [ ${tgt_isa} == "armv7" ]; then
                if [ -z "${float_abi}" ]; then
                    check_cpp <<EOF && float_abi=hard || float_abi=softfp
 #ifndef __ARM_PCS_VFP
@@ -843,8 +834,8 @@ EOF
            asm_conversion_cmd="${source_path}/build/make/ads2armasm_ms.pl"
            AS_SFX=.s
            msvs_arch_dir=arm-msvs
-            disable_feature multithread
-            disable_feature unit_tests
+            disable multithread
+            disable unit_tests
            ;;
        rvct)
            CC=armcc
@@ -856,7 +847,7 @@ EOF
            tune_cflags="--cpu="
            tune_asflags="--cpu="
            if [ -z "${tune_cpu}" ]; then
-                if [ ${tgt_isa} = "armv7" ]; then
+                if [ ${tgt_isa} == "armv7" ]; then
                    if enabled neon
                    then
                        check_add_cflags --fpu=softvfp+vfpv3
@@ -881,8 +872,8 @@ EOF

        case ${tgt_os} in
        none*)
-            disable_feature multithread
-            disable_feature os_support
+            disable multithread
+            disable os_support
            ;;

        android*)
@@ -914,9 +905,9 @@ EOF
            # Cortex-A8 implementations (NDK Dev Guide)
            add_ldflags "-Wl,--fix-cortex-a8"

-            enable_feature pic
+            enable pic
            soft_enable realtime_only
-            if [ ${tgt_isa} = "armv7" ]; then
+            if [ ${tgt_isa} == "armv7" ]; then
                soft_enable runtime_cpu_detect
            fi
            if enabled runtime_cpu_detect; then
@@ -970,7 +961,7 @@ EOF
         ;;

        linux*)
-            enable_feature linux
+            enable linux
            if enabled rvct; then
                # Check if we have CodeSourcery GCC in PATH. Needed for
                # libraries
@@ -1001,14 +992,14 @@ EOF
        tune_cflags="-mtune="
        if enabled dspr2; then
            check_add_cflags -mips32r2 -mdspr2
-            disable_feature fast_unaligned
+            disable fast_unaligned
        fi
        check_add_cflags -march=${tgt_isa}
        check_add_asflags -march=${tgt_isa}
        check_add_asflags -KPIC
    ;;
    ppc*)
-        enable_feature ppc
+        enable ppc
        bits=${tgt_isa##ppc}
        link_with_cc=gcc
        setup_gnu_toolchain
@@ -1062,7 +1053,7 @@ EOF
                setup_gnu_toolchain
                add_cflags -use-msasm -use-asm
                add_ldflags -i-static
-                enabled x86_64 && add_cflags -ipo -static -O3
+                enabled x86_64 && add_cflags -ipo -no-prec-div -static -xSSE2 -axSSE2
                enabled x86_64 && AR=xiar
                case ${tune_cpu} in
                    atom*)
@@ -1156,7 +1147,7 @@ EOF
    ;;
    universal*|*-gcc|generic-gnu)
        link_with_cc=gcc
-        enable_feature gcc
+        enable gcc
    setup_gnu_toolchain
    ;;
    esac
@@ -1190,12 +1181,6 @@ EOF
        fi
    fi

-    # default use_x86inc to yes if pic is no or 64bit or we are not on darwin
-    echo "  checking here for x86inc \"${tgt_isa}\" \"$pic\" "
-    if [ ${tgt_isa} = x86_64 -o ! "$pic" = "yes" -o "${tgt_os#darwin}" = "${tgt_os}"  ]; then
-      soft_enable use_x86inc
-    fi
-
    # Position Independent Code (PIC) support, for building relocatable
    # shared objects
    enabled gcc && enabled pic && check_add_cflags -fPIC
@@ -1205,14 +1190,14 @@ EOF
    enabled linux && check_add_cflags -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0

    # Check for strip utility variant
-    ${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable_feature gnu_strip
+    ${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable gnu_strip

    # Try to determine target endianness
    check_cc <<EOF
    unsigned int e = 'O'<<24 | '2'<<16 | 'B'<<8 | 'E';
 EOF
    [ -f "${TMP_O}" ] && od -A n -t x1 "${TMP_O}" | tr -d '\n' |
-        grep '4f *32 *42 *45' >/dev/null 2>&1 && enable_feature big_endian
+        grep '4f *32 *42 *45' >/dev/null 2>&1 && enable big_endian

    # Try to find which inline keywords are supported
    check_cc <<EOF && INLINE="inline"
@@ -1237,7 +1222,7 @@ EOF
            if enabled dspr2; then
                if enabled big_endian; then
                    echo "dspr2 optimizations are available only for little endian platforms"
-                    disable_feature dspr2
+                    disable dspr2
                fi
            fi
        ;;
@@ -1288,8 +1273,8 @@ print_config_h() {

 print_webm_license() {
    local destination=$1
-    local prefix="$2"
-    local suffix="$3"
+    local prefix=$2
+    local suffix=$3
    shift 3
    cat <<EOF > ${destination}
 ${prefix} Copyright (c) 2011 The WebM project authors. All Rights Reserved.${suffix}
@@ -1310,8 +1295,8 @@ process_detect() {
    true;
 }

-enable_feature logging
-logfile="config.log"
+enable logging
+logfile="config.err"
 self=$0
 process() {
    cmdline_args="$@"
--- a/build/make/gen_asm_deps.sh
+++ b/build/make/gen_asm_deps.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 ##
 ##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -381,7 +381,7 @@ generate_vcproj() {
                            RuntimeLibrary="$debug_runtime" \
                            UsePrecompiledHeader="0" \
                            WarningLevel="3" \
-                            DebugInformationFormat="2" \
+                            DebugInformationFormat="1" \
                            $warn_64bit \

                        $uses_asm && tag Tool Name="YASM"  IncludePaths="$incs" Debug="true"
@@ -395,7 +395,7 @@ generate_vcproj() {
                            RuntimeLibrary="$debug_runtime" \
                            UsePrecompiledHeader="0" \
                            WarningLevel="3" \
-                            DebugInformationFormat="2" \
+                            DebugInformationFormat="1" \
                            $warn_64bit \

                        $uses_asm && tag Tool Name="YASM"  IncludePaths="$incs" Debug="true"
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -72,21 +72,10 @@ parse_project() {
    eval "${var}_name=$name"
    eval "${var}_guid=$guid"

-    if [ "$sfx" = "vcproj" ]; then
-        cur_config_list=`grep -A1 '<Configuration' $file |
-            grep Name | cut -d\" -f2`
-    else
-        cur_config_list=`grep -B1 'Label="Configuration"' $file |
-            grep Condition | cut -d\' -f4`
-    fi
-    new_config_list=$(for i in $config_list $cur_config_list; do
-        echo $i
-    done | sort | uniq)
-    if [ "$config_list" != "" ] && [ "$config_list" != "$new_config_list" ]; then
-        mixed_platforms=1
-    fi
-    config_list="$new_config_list"
-    eval "${var}_config_list=\"$cur_config_list\""
+    # assume that all projects have the same list of possible configurations,
+    # so overwriting old config_lists is not a problem
+    config_list=`grep -A1 '<Configuration' $file |
+        grep Name | cut -d\" -f2`
    proj_list="${proj_list} ${var}"
 }

@@ -136,11 +125,6 @@ process_global() {
    indent_push
    IFS_bak=${IFS}
    IFS=$'\r'$'\n'
-    if [ "$mixed_platforms" != "" ]; then
-        config_list="
-Release|Mixed Platforms
-Debug|Mixed Platforms"
-    fi
    for config in ${config_list}; do
        echo "${indent}$config = $config"
    done
@@ -155,17 +139,10 @@ Debug|Mixed Platforms"
    indent_push
    for proj in ${proj_list}; do
        eval "local proj_guid=\${${proj}_guid}"
-        eval "local proj_config_list=\${${proj}_config_list}"
        IFS=$'\r'$'\n'
-        for config in ${proj_config_list}; do
-            if [ "$mixed_platforms" != "" ]; then
-                local c=${config%%|*}
-                echo "${indent}${proj_guid}.${c}|Mixed Platforms.ActiveCfg = ${config}"
-                echo "${indent}${proj_guid}.${c}|Mixed Platforms.Build.0 = ${config}"
-            else
-                echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}"
-                echo "${indent}${proj_guid}.${config}.Build.0 = ${config}"
-            fi
+        for config in ${config_list}; do
+            echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}"
+            echo "${indent}${proj_guid}.${config}.Build.0 = ${config}"

        done
        IFS=${IFS_bak}
@@ -191,14 +168,9 @@ process_makefile() {
    IFS=$'\r'$'\n'
    local TAB=$'\t'
    cat <<EOF
-ifeq (\$(CONFIG_VS_VERSION),7)
-MSBUILD_TOOL := devenv.com
-else
-MSBUILD_TOOL := msbuild.exe
-endif
-found_devenv := \$(shell which \$(MSBUILD_TOOL) >/dev/null 2>&1 && echo yes)
+found_devenv := \$(shell which devenv.com >/dev/null 2>&1 && echo yes)
 .nodevenv.once:
-${TAB}@echo "  * \$(MSBUILD_TOOL) not found in path."
+${TAB}@echo "  * devenv.com not found in path."
 ${TAB}@echo "  * "
 ${TAB}@echo "  * You will have to build all configurations manually using the"
 ${TAB}@echo "  * Visual Studio IDE. To allow make to build them automatically,"
@@ -223,17 +195,16 @@ ${TAB}rm -rf "$platform"/"$config"
 ifneq (\$(found_devenv),)
  ifeq (\$(CONFIG_VS_VERSION),7)
 $nows_sln_config: $outfile
-${TAB}\$(MSBUILD_TOOL) $outfile -build "$config"
+${TAB}devenv.com $outfile -build "$config"

  else
 $nows_sln_config: $outfile
-${TAB}\$(MSBUILD_TOOL) $outfile -m -t:Build \\
-${TAB}${TAB}-p:Configuration="$config" -p:Platform="$platform"
+${TAB}devenv.com $outfile -build "$sln_config"

  endif
 else
 $nows_sln_config: $outfile .nodevenv.once
-${TAB}@echo "  * Skipping build of $sln_config (\$(MSBUILD_TOOL) not in path)."
+${TAB}@echo "  * Skipping build of $sln_config (devenv.com not in path)."
 ${TAB}@echo "  * "
 endif

--- a/build/make/rtcd.sh
+++ b/build/make/rtcd.sh
@@ -290,11 +290,9 @@ static void setup_rtcd_internal(void)
 {
 $(set_function_pointers c $ALL_ARCHS)
 #if HAVE_DSPR2
-#if CONFIG_VP8
 void dsputil_static_init();
 dsputil_static_init();
 #endif
-#endif
 }
 #endif
 $(common_bottom)
--- a/build/make/thumb.pm
+++ b/build/make/thumb.pm
@@ -47,7 +47,7 @@ sub FixThumbInstructions($$)
    # this is used, it's used for two subsequent load instructions,
    # where a hand-written version of it could merge two subsequent
    # add and sub instructions.
-    s/^(\s*)((ldr|str|pld)(ne)?)(\s+)(r\d+,\s*)?\[(\w+), -([^\]]+)\]/$1sub$4$5$7, $7, $8\n$1$2$5$6\[$7\]\n$1add$4$5$7, $7, $8/g;
+    s/^(\s*)((ldr|str)(ne)?)(\s+)(r\d+),\s*\[(\w+), -([^\]]+)\]/$1sub$4$5$7, $7, $8\n$1$2$5$6, [$7]\n$1add$4$5$7, $7, $8/g;

    # Convert register post indexing to a separate add instruction.
    # This converts "ldrneb r9, [r0], r2" into "ldrneb r9, [r0]",
--- a/build/make/version.sh
+++ b/build/make/version.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 ##
 ##  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
 ##
--- a/build/x86-msvs/obj_int_extract.bat
+++ b/build/x86-msvs/obj_int_extract.bat
@@ -7,6 +7,17 @@ REM   in the file PATENTS.  All contributing project authors may
 REM   be found in the AUTHORS file in the root of the source tree.
 echo on

+cl /I "./" /I "%1" /nologo /c "%1/vp9/common/vp9_asm_com_offsets.c"
+cl /I "./" /I "%1" /nologo /c "%1/vp9/decoder/vp9_asm_dec_offsets.c"
+cl /I "./" /I "%1" /nologo /c "%1/vp9/encoder/vp9_asm_enc_offsets.c"
+obj_int_extract.exe rvds "vp9_asm_com_offsets.obj" > "vp9_asm_com_offsets.asm"
+obj_int_extract.exe rvds "vp9_asm_dec_offsets.obj" > "vp9_asm_dec_offsets.asm"
+obj_int_extract.exe rvds "vp9_asm_enc_offsets.obj" > "vp9_asm_enc_offsets.asm"
+
+cl /I "./" /I "%1" /nologo /c "%1/vp8/common/vp8_asm_com_offsets.c"
+cl /I "./" /I "%1" /nologo /c "%1/vp8/decoder/vp8_asm_dec_offsets.c"
 cl /I "./" /I "%1" /nologo /c "%1/vp8/encoder/vp8_asm_enc_offsets.c"
+obj_int_extract.exe rvds "vp8_asm_com_offsets.obj" > "vp8_asm_com_offsets.asm"
+obj_int_extract.exe rvds "vp8_asm_dec_offsets.obj" > "vp8_asm_dec_offsets.asm"
 obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"

--- a/99
+++ b/99
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 ##
 ##  configure
 ##
@@ -38,7 +38,6 @@ Advanced options:
  ${toggle_internal_stats}        output of encoder internal stats for debug, if supported (encoders)
  ${toggle_mem_tracker}           track memory usage
  ${toggle_postproc}              postprocessing
-  ${toggle_vp9_postproc}          vp9 specific postprocessing
  ${toggle_multithread}           multithreaded encoding and decoding
  ${toggle_spatial_resampling}    spatial sampling (scaling) support
  ${toggle_realtime_only}         enable this option while building for real-time encoding
@@ -116,7 +115,6 @@ all_platforms="${all_platforms} x86-darwin9-icc"
 all_platforms="${all_platforms} x86-darwin10-gcc"
 all_platforms="${all_platforms} x86-darwin11-gcc"
 all_platforms="${all_platforms} x86-darwin12-gcc"
-all_platforms="${all_platforms} x86-darwin13-gcc"
 all_platforms="${all_platforms} x86-linux-gcc"
 all_platforms="${all_platforms} x86-linux-icc"
 all_platforms="${all_platforms} x86-os2-gcc"
@@ -131,7 +129,6 @@ all_platforms="${all_platforms} x86_64-darwin9-gcc"
 all_platforms="${all_platforms} x86_64-darwin10-gcc"
 all_platforms="${all_platforms} x86_64-darwin11-gcc"
 all_platforms="${all_platforms} x86_64-darwin12-gcc"
-all_platforms="${all_platforms} x86_64-darwin13-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
 all_platforms="${all_platforms} x86_64-linux-icc"
 all_platforms="${all_platforms} x86_64-solaris-gcc"
@@ -145,7 +142,6 @@ all_platforms="${all_platforms} universal-darwin9-gcc"
 all_platforms="${all_platforms} universal-darwin10-gcc"
 all_platforms="${all_platforms} universal-darwin11-gcc"
 all_platforms="${all_platforms} universal-darwin12-gcc"
-all_platforms="${all_platforms} universal-darwin13-gcc"
 all_platforms="${all_platforms} generic-gnu"

 # all_targets is a list of all targets that can be configured
@@ -154,7 +150,7 @@ all_targets="libs examples docs"

 # all targets available are enabled, by default.
 for t in ${all_targets}; do
-    [ -f ${source_path}/${t}.mk ] && enable_feature ${t}
+    [ -f ${source_path}/${t}.mk ] && enable ${t}
 done

 # check installed doxygen version
@@ -165,30 +161,30 @@ if [ ${doxy_major:-0} -ge 1 ]; then
    doxy_minor=${doxy_version%%.*}
    doxy_patch=${doxy_version##*.}

-    [ $doxy_major -gt 1 ] && enable_feature doxygen
-    [ $doxy_minor -gt 5 ] && enable_feature doxygen
-    [ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
+    [ $doxy_major -gt 1 ] && enable doxygen
+    [ $doxy_minor -gt 5 ] && enable doxygen
+    [ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable doxygen
 fi

 # install everything except the sources, by default. sources will have
 # to be enabled when doing dist builds, since that's no longer a common
 # case.
-enabled doxygen && php -v >/dev/null 2>&1 && enable_feature install_docs
-enable_feature install_bins
-enable_feature install_libs
+enabled doxygen && php -v >/dev/null 2>&1 && enable install_docs
+enable install_bins
+enable install_libs

-enable_feature static
-enable_feature optimizations
-enable_feature fast_unaligned #allow unaligned accesses, if supported by hw
-enable_feature md5
-enable_feature spatial_resampling
-enable_feature multithread
-enable_feature os_support
-enable_feature temporal_denoising
+enable static
+enable optimizations
+enable fast_unaligned #allow unaligned accesses, if supported by hw
+enable md5
+enable spatial_resampling
+enable multithread
+enable os_support
+enable temporal_denoising

-[ -d ${source_path}/../include ] && enable_feature alt_tree_layout
+[ -d ${source_path}/../include ] && enable alt_tree_layout
 for d in vp8 vp9; do
-    [ -d ${source_path}/${d} ] && disable_feature alt_tree_layout;
+    [ -d ${source_path}/${d} ] && disable alt_tree_layout;
 done

 if ! enabled alt_tree_layout; then
@@ -201,10 +197,10 @@ else
 [ -f ${source_path}/../include/vpx/vp8dx.h ] && CODECS="${CODECS} vp8_decoder"
 [ -f ${source_path}/../include/vpx/vp9cx.h ] && CODECS="${CODECS} vp9_encoder"
 [ -f ${source_path}/../include/vpx/vp9dx.h ] && CODECS="${CODECS} vp9_decoder"
-[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable_feature vp8_encoder
-[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable_feature vp8_decoder
-[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable_feature vp9_encoder
-[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable_feature vp9_decoder
+[ -f ${source_path}/../include/vpx/vp8cx.h ] || disable vp8_encoder
+[ -f ${source_path}/../include/vpx/vp8dx.h ] || disable vp8_decoder
+[ -f ${source_path}/../include/vpx/vp9cx.h ] || disable vp9_encoder
+[ -f ${source_path}/../include/vpx/vp9dx.h ] || disable vp9_decoder

 [ -f ${source_path}/../lib/*/*mt.lib ] && soft_enable static_msvcrt
 fi
@@ -251,6 +247,7 @@ EXPERIMENT_LIST="
    multiple_arf
    non420
    alpha
+    balanced_coeftree
 "
 CONFIG_LIST="
    external_build
@@ -258,7 +255,6 @@ CONFIG_LIST="
    install_bins
    install_libs
    install_srcs
-    use_x86inc
    debug
    gprof
    gcov
@@ -280,7 +276,6 @@ CONFIG_LIST="
    dc_recon
    runtime_cpu_detect
    postproc
-    vp9_postproc
    multithread
    internal_stats
    ${CODECS}
@@ -316,7 +311,6 @@ CMDLINE_SELECT="
    gprof
    gcov
    pic
-    use_x86inc
    optimizations
    ccache
    runtime_cpu_detect
@@ -335,7 +329,6 @@ CMDLINE_SELECT="
    dequant_tokens
    dc_recon
    postproc
-    vp9_postproc
    multithread
    internal_stats
    ${CODECS}
@@ -361,12 +354,12 @@ process_cmdline() {
    for opt do
        optval="${opt#*=}"
        case "$opt" in
-        --disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;;
+        --disable-codecs) for c in ${CODECS}; do disable $c; done ;;
        --enable-?*|--disable-?*)
        eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
        if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
            if enabled experimental; then
-                ${action}_feature $option
+                $action $option
            else
                log_echo "Ignoring $opt -- not in experimental mode."
            fi
@@ -387,8 +380,8 @@ post_process_cmdline() {
    # If the codec family is enabled, enable all components of that family.
    log_echo "Configuring selected codecs"
    for c in ${CODECS}; do
-        disabled ${c%%_*} && disable_feature ${c}
-        enabled ${c%%_*} && enable_feature ${c}
+        disabled ${c%%_*} && disable ${c}
+        enabled ${c%%_*} && enable ${c}
    done

    # Enable all detected codecs, if they haven't been disabled
@@ -396,12 +389,12 @@ post_process_cmdline() {

    # Enable the codec family if any component of that family is enabled
    for c in ${CODECS}; do
-        enabled $c && enable_feature ${c%_*}
+        enabled $c && enable ${c%_*}
    done

    # Set the {en,de}coders variable if any algorithm in that class is enabled
    for c in ${CODECS}; do
-        enabled ${c} && enable_feature ${c##*_}s
+        enabled ${c} && enable ${c##*_}s
    done
 }

@@ -441,7 +434,7 @@ process_targets() {
    done
    enabled debug_libs && DIST_DIR="${DIST_DIR}-debug"
    enabled codec_srcs && DIST_DIR="${DIST_DIR}-src"
-    ! enabled postproc && ! enabled vp9_postproc && DIST_DIR="${DIST_DIR}-nopost"
+    ! enabled postproc && DIST_DIR="${DIST_DIR}-nopost"
    ! enabled multithread && DIST_DIR="${DIST_DIR}-nomt"
    ! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs"
    DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}"
@@ -511,13 +504,13 @@ process_detect() {
    fi
    if [ -z "$CC" ] || enabled external_build; then
        echo "Bypassing toolchain for environment detection."
-        enable_feature external_build
+        enable external_build
        check_header() {
            log fake_check_header "$@"
            header=$1
            shift
            var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'`
-            disable_feature $var
+            disable $var
            # Headers common to all environments
            case $header in
                stdio.h)
@@ -529,7 +522,7 @@ process_detect() {
                        [ -f "${d##-I}/$header" ] && result=true && break
                    done
                    ${result:-true}
-            esac && enable_feature $var
+            esac && enable $var

            # Specialize windows and POSIX environments.
            case $toolchain in
@@ -537,7 +530,7 @@ process_detect() {
                    case $header-$toolchain in
                        stdint*-gcc) true;;
                        *) false;;
-                    esac && enable_feature $var
+                    esac && enable $var
                    ;;
                *)
                    case $header in
@@ -546,7 +539,7 @@ process_detect() {
                        sys/mman.h) true;;
                        unistd.h) true;;
                        *) false;;
-                    esac && enable_feature $var
+                    esac && enable $var
            esac
            enabled $var
        }
@@ -564,7 +557,7 @@ EOF
    check_header sys/mman.h
    check_header unistd.h # for sysconf(3) and friends.

-    check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports
+    check_header vpx/vpx_integer.h -I${source_path} && enable vpx_ports
 }

 process_toolchain() {
@@ -646,18 +639,14 @@ process_toolchain() {
    # ccache only really works on gcc toolchains
    enabled gcc || soft_disable ccache
    if enabled mips; then
-        enable_feature dequant_tokens
-        enable_feature dc_recon
-    fi
-
-    if enabled internal_stats; then
-        enable_feature vp9_postproc
+        enable dequant_tokens
+        enable dc_recon
    fi

    # Enable the postbuild target if building for visual studio.
    case "$tgt_cc" in
-        vs*) enable_feature msvs
-             enable_feature solution
+        vs*) enable msvs
+             enable solution
             vs_version=${tgt_cc##vs}
             case $vs_version in
             [789])
@@ -693,14 +682,6 @@ process_toolchain() {
            # iOS/ARM builds do not work with gtest. This does not match
            # x86 targets.
        ;;
-        *-win*)
-            # Some mingw toolchains don't have pthread available by default.
-            # Treat these more like visual studio where threading in gtest
-            # would be disabled for the same reason.
-            check_cxx "$@" <<EOF && soft_enable unit_tests
-int z;
-EOF
-        ;;
        *)
            enabled pthread_h && check_cxx "$@" <<EOF && soft_enable unit_tests
 int z;
--- a/examples.mk
+++ b/examples.mk
@@ -49,9 +49,6 @@ vpxenc.DESCRIPTION           = Full featured encoder
 UTILS-$(CONFIG_VP8_ENCODER)    += vp8_scalable_patterns.c
 vp8_scalable_patterns.GUID   = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C
 vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder
-UTILS-$(CONFIG_VP9_ENCODER)    += vp9_spatial_scalable_encoder.c
-vp9_spatial_scalable_encoder.GUID   = 4A38598D-627D-4505-9C7B-D4020C84100D
-vp9_spatial_scalable_encoder.DESCRIPTION = Spatial Scalable Encoder

 # Clean up old ivfenc, ivfdec binaries.
 ifeq ($(CONFIG_MSVS),yes)
--- a/libmkv/EbmlWriter.c
+++ b/libmkv/EbmlWriter.c
@@ -105,7 +105,7 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l
 void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) {
  int size;
  for (size = 4; size > 1; size--) {
-    if (bin & (unsigned int)0x000000ff << ((size - 1) * 8))
+    if (bin & 0x000000ff << ((size - 1) * 8))
      break;
  }
  Ebml_WriteID(glob, class_id);
--- a/libs.mk
+++ b/libs.mk
@@ -57,13 +57,6 @@ CLEAN-OBJS += $$(BUILD_PFX)$(1).h
 RTCD += $$(BUILD_PFX)$(1).h
 endef

-# x86inc.asm is not compatible with pic 32bit builds. Restrict
-# files which use it to 64bit builds or 32bit without pic
-USE_X86INC = no
-ifeq ($(CONFIG_USE_X86INC),yes)
-  USE_X86INC = yes
-endif
-
 CODEC_SRCS-yes += CHANGELOG
 CODEC_SRCS-yes += libs.mk

@@ -390,12 +383,7 @@ LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
                     $(call enabled,LIBVPX_TEST_DATA))
 libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)

-libvpx_test_srcs.txt:
-	@echo "    [CREATE] $@"
-	@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | sort -u > $@
-CLEAN-OBJS += libvpx_test_srcs.txt
-
-$(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1
+$(LIBVPX_TEST_DATA):
 	@echo "    [DOWNLOAD] $@"
 	$(qexec)trap 'rm -f $@' INT TERM &&\
            curl -L -o $@ $(call libvpx_test_data_url,$(@F))
@@ -455,10 +443,6 @@ else
 include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk
 GTEST_SRCS := $(addprefix third_party/googletest/src/,$(call enabled,GTEST_SRCS))
 GTEST_OBJS=$(call objs,$(GTEST_SRCS))
-ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS))
-# Disabling pthreads globally will cause issues on darwin and possibly elsewhere
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0
-endif
 $(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
 $(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
 OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
@@ -483,7 +467,7 @@ $(foreach bin,$(LIBVPX_TEST_BINS),\
        lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
    $(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
        $(LIBVPX_TEST_OBJS) \
-        -L. -lvpx -lgtest $(extralibs) -lm)\
+        -L. -lvpx -lgtest -lpthread -lm)\
        )))\
    $(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\

--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -8,8 +8,8 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef TEST_ACM_RANDOM_H_
-#define TEST_ACM_RANDOM_H_
+#ifndef LIBVPX_TEST_ACM_RANDOM_H_
+#define LIBVPX_TEST_ACM_RANDOM_H_

 #include "third_party/googletest/src/include/gtest/gtest.h"

@@ -59,4 +59,4 @@ class ACMRandom {

 }  // namespace libvpx_test

-#endif  // TEST_ACM_RANDOM_H_
+#endif  // LIBVPX_TEST_ACM_RANDOM_H_
--- a/test/altref_test.cc
+++ b/test/altref_test.cc
@@ -33,6 +33,10 @@ class AltRefTest : public ::libvpx_test::EncoderTest,
    altref_count_ = 0;
  }

+  virtual bool Continue() const {
+    return !HasFatalFailure() && !abort_;
+  }
+
  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                  libvpx_test::Encoder *encoder) {
    if (video->frame() == 1) {
--- a/test/borders_test.cc
+++ b/test/borders_test.cc
@@ -27,10 +27,14 @@ class BordersTest : public ::libvpx_test::EncoderTest,
    SetMode(GET_PARAM(1));
  }

+  virtual bool Continue() const {
+    return !HasFatalFailure() && !abort_;
+  }
+
  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
-      encoder->Control(VP8E_SET_CPUUSED, 1);
+    if ( video->frame() == 1) {
+      encoder->Control(VP8E_SET_CPUUSED, 0);
      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
      encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
      encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
--- a/test/clear_system_state.h
+++ b/test/clear_system_state.h
@@ -10,7 +10,7 @@
 #ifndef TEST_CLEAR_SYSTEM_STATE_H_
 #define TEST_CLEAR_SYSTEM_STATE_H_

-#include "./vpx_config.h"
+#include "vpx_config.h"
 extern "C" {
 #if ARCH_X86 || ARCH_X86_64
 # include "vpx_ports/x86.h"
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -134,14 +134,14 @@ class VP8CodecFactory : public CodecFactory {

 const libvpx_test::VP8CodecFactory kVP8;

-#define VP8_INSTANTIATE_TEST_CASE(test, ...)\
+#define VP8_INSTANTIATE_TEST_CASE(test, params)\
  INSTANTIATE_TEST_CASE_P(VP8, test, \
      ::testing::Combine( \
          ::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
              &libvpx_test::kVP8)), \
-          __VA_ARGS__))
+          params))
 #else
-#define VP8_INSTANTIATE_TEST_CASE(test, ...)
+#define VP8_INSTANTIATE_TEST_CASE(test, params)
 #endif  // CONFIG_VP8


@@ -216,14 +216,14 @@ class VP9CodecFactory : public CodecFactory {

 const libvpx_test::VP9CodecFactory kVP9;

-#define VP9_INSTANTIATE_TEST_CASE(test, ...)\
+#define VP9_INSTANTIATE_TEST_CASE(test, params)\
  INSTANTIATE_TEST_CASE_P(VP9, test, \
      ::testing::Combine( \
          ::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
               &libvpx_test::kVP9)), \
-          __VA_ARGS__))
+          params))
 #else
-#define VP9_INSTANTIATE_TEST_CASE(test, ...)
+#define VP9_INSTANTIATE_TEST_CASE(test, params)
 #endif  // CONFIG_VP9


--- a/test/config_test.cc
+++ b/test/config_test.cc
@@ -40,6 +40,10 @@ class ConfigTest : public ::libvpx_test::EncoderTest,
    ++frame_count_out_;
  }

+  virtual bool Continue() const {
+    return !HasFatalFailure() && !abort_;
+  }
+
  unsigned int frame_count_in_;
  unsigned int frame_count_out_;
  unsigned int frame_count_max_;
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -8,7 +8,6 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include <string.h>
 #include "test/acm_random.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
@@ -23,8 +22,8 @@ extern "C" {
 }

 namespace {
-typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride,
-                              uint8_t *dst, ptrdiff_t dst_stride,
+typedef void (*convolve_fn_t)(const uint8_t *src, int src_stride,
+                              uint8_t *dst, int dst_stride,
                              const int16_t *filter_x, int filter_x_stride,
                              const int16_t *filter_y, int filter_y_stride,
                              int w, int h);
@@ -188,7 +187,7 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {

 protected:
  static const int kDataAlignment = 16;
-  static const int kOuterBlockSize = 256;
+  static const int kOuterBlockSize = 128;
  static const int kInputStride = kOuterBlockSize;
  static const int kOutputStride = kOuterBlockSize;
  static const int kMaxDimension = 64;
@@ -212,7 +211,7 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {

  virtual void SetUp() {
    UUT_ = GET_PARAM(2);
-    /* Set up guard blocks for an inner block centered in the outer block */
+    /* Set up guard blocks for an inner block cetered in the outer block */
    for (int i = 0; i < kOutputBufferSize; ++i) {
      if (IsIndexInBorder(i))
        output_[i] = 255;
@@ -225,10 +224,6 @@ class ConvolveTest : public PARAMS(int, int, const ConvolveFunctions*) {
      input_[i] = prng.Rand8Extremes();
  }

-  void SetConstantInput(int value) {
-    memset(input_, value, kInputBufferSize);
-  }
-
  void CheckGuardBlocks() {
    for (int i = 0; i < kOutputBufferSize; ++i) {
      if (IsIndexInBorder(i))
@@ -461,86 +456,45 @@ DECLARE_ALIGNED(256, const int16_t, kChangeFilters[16][8]) = {
    { 128}
 };

-/* This test exercises the horizontal and vertical filter functions. */
 TEST_P(ConvolveTest, ChangeFilterWorks) {
  uint8_t* const in = input();
  uint8_t* const out = output();
-
-  /* Assume that the first input sample is at the 8/16th position. */
-  const int kInitialSubPelOffset = 8;
-
-  /* Filters are 8-tap, so the first filter tap will be applied to the pixel
-   * at position -3 with respect to the current filtering position. Since
-   * kInitialSubPelOffset is set to 8, we first select sub-pixel filter 8,
-   * which is non-zero only in the last tap. So, applying the filter at the
-   * current input position will result in an output equal to the pixel at
-   * offset +4 (-3 + 7) with respect to the current filtering position.
-   */
  const int kPixelSelected = 4;

-  /* Assume that each output pixel requires us to step on by 17/16th pixels in
-   * the input.
-   */
-  const int kInputPixelStep = 17;
-
-  /* The filters are setup in such a way that the expected output produces
-   * sets of 8 identical output samples. As the filter position moves to the
-   * next 1/16th pixel position the only active (=128) filter tap moves one
-   * position to the left, resulting in the same input pixel being replicated
-   * in to the output for 8 consecutive samples. After each set of 8 positions
-   * the filters select a different input pixel. kFilterPeriodAdjust below
-   * computes which input pixel is written to the output for a specified
-   * x or y position.
-   */
-
-  /* Test the horizontal filter. */
  REGISTER_STATE_CHECK(UUT_->h8_(in, kInputStride, out, kOutputStride,
-                                 kChangeFilters[kInitialSubPelOffset],
-                                 kInputPixelStep, NULL, 0, Width(), Height()));
+                                 kChangeFilters[8], 17, kChangeFilters[4], 16,
+                                 Width(), Height()));

  for (int x = 0; x < Width(); ++x) {
+    const int kQ4StepAdjust = x >> 4;
    const int kFilterPeriodAdjust = (x >> 3) << 3;
-    const int ref_x =
-        kPixelSelected + ((kInitialSubPelOffset
-            + kFilterPeriodAdjust * kInputPixelStep)
-                          >> SUBPEL_BITS);
-    ASSERT_EQ(in[ref_x], out[x]) << "x == " << x << "width = " << Width();
+    const int ref_x = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
+    ASSERT_EQ(in[ref_x], out[x]) << "x == " << x;
  }

-  /* Test the vertical filter. */
  REGISTER_STATE_CHECK(UUT_->v8_(in, kInputStride, out, kOutputStride,
-                                 NULL, 0, kChangeFilters[kInitialSubPelOffset],
-                                 kInputPixelStep, Width(), Height()));
+                                 kChangeFilters[4], 16, kChangeFilters[8], 17,
+                                 Width(), Height()));

  for (int y = 0; y < Height(); ++y) {
+    const int kQ4StepAdjust = y >> 4;
    const int kFilterPeriodAdjust = (y >> 3) << 3;
-    const int ref_y =
-        kPixelSelected + ((kInitialSubPelOffset
-            + kFilterPeriodAdjust * kInputPixelStep)
-                          >> SUBPEL_BITS);
+    const int ref_y = kQ4StepAdjust + kFilterPeriodAdjust + kPixelSelected;
    ASSERT_EQ(in[ref_y * kInputStride], out[y * kInputStride]) << "y == " << y;
  }

-  /* Test the horizontal and vertical filters in combination. */
  REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
-                                  kChangeFilters[kInitialSubPelOffset],
-                                  kInputPixelStep,
-                                  kChangeFilters[kInitialSubPelOffset],
-                                  kInputPixelStep,
+                                  kChangeFilters[8], 17, kChangeFilters[8], 17,
                                  Width(), Height()));

  for (int y = 0; y < Height(); ++y) {
+    const int kQ4StepAdjustY = y >> 4;
    const int kFilterPeriodAdjustY = (y >> 3) << 3;
-    const int ref_y =
-        kPixelSelected + ((kInitialSubPelOffset
-            + kFilterPeriodAdjustY * kInputPixelStep)
-                          >> SUBPEL_BITS);
+    const int ref_y = kQ4StepAdjustY + kFilterPeriodAdjustY + kPixelSelected;
    for (int x = 0; x < Width(); ++x) {
+      const int kQ4StepAdjustX = x >> 4;
      const int kFilterPeriodAdjustX = (x >> 3) << 3;
-      const int ref_x =
-          kPixelSelected + ((kInitialSubPelOffset
-              + kFilterPeriodAdjustX * kInputPixelStep)
-                            >> SUBPEL_BITS);
+      const int ref_x = kQ4StepAdjustX + kFilterPeriodAdjustX + kPixelSelected;

      ASSERT_EQ(in[ref_y * kInputStride + ref_x], out[y * kOutputStride + x])
          << "x == " << x << ", y == " << y;
@@ -548,34 +502,6 @@ TEST_P(ConvolveTest, ChangeFilterWorks) {
  }
 }

-/* This test exercises that enough rows and columns are filtered with every
-   possible initial fractional positions and scaling steps. */
-TEST_P(ConvolveTest, CheckScalingFiltering) {
-  uint8_t* const in = input();
-  uint8_t* const out = output();
-
-  SetConstantInput(127);
-
-  for (int frac = 0; frac < 16; ++frac) {
-    for (int step = 1; step <= 32; ++step) {
-      /* Test the horizontal and vertical filters in combination. */
-      REGISTER_STATE_CHECK(UUT_->hv8_(in, kInputStride, out, kOutputStride,
-                                      vp9_sub_pel_filters_8[frac], step,
-                                      vp9_sub_pel_filters_8[frac], step,
-                                      Width(), Height()));
-
-      CheckGuardBlocks();
-
-      for (int y = 0; y < Height(); ++y) {
-        for (int x = 0; x < Width(); ++x) {
-          ASSERT_EQ(in[y * kInputStride + x], out[y * kOutputStride + x])
-              << "x == " << x << ", y == " << y
-              << ", frac == " << frac << ", step == " << step;
-        }
-      }
-    }
-  }
-}

 using std::tr1::make_tuple;

@@ -601,9 +527,9 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(

 #if HAVE_SSSE3
 const ConvolveFunctions convolve8_ssse3(
-    vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
-    vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
-    vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3);
+    vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_c,
+    vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_c,
+    vp9_convolve8_ssse3, vp9_convolve8_avg_c);

 INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_ssse3),
@@ -620,26 +546,4 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(
    make_tuple(32, 64, &convolve8_ssse3),
    make_tuple(64, 64, &convolve8_ssse3)));
 #endif
-
-#if HAVE_NEON
-const ConvolveFunctions convolve8_neon(
-    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
-    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
-    vp9_convolve8_neon, vp9_convolve8_avg_neon);
-
-INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
-    make_tuple(4, 4, &convolve8_neon),
-    make_tuple(8, 4, &convolve8_neon),
-    make_tuple(4, 8, &convolve8_neon),
-    make_tuple(8, 8, &convolve8_neon),
-    make_tuple(16, 8, &convolve8_neon),
-    make_tuple(8, 16, &convolve8_neon),
-    make_tuple(16, 16, &convolve8_neon),
-    make_tuple(32, 16, &convolve8_neon),
-    make_tuple(16, 32, &convolve8_neon),
-    make_tuple(32, 32, &convolve8_neon),
-    make_tuple(64, 32, &convolve8_neon),
-    make_tuple(32, 64, &convolve8_neon),
-    make_tuple(64, 64, &convolve8_neon)));
-#endif
 }  // namespace
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -1,112 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#include <climits>
-#include <vector>
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-
-class CpuSpeedTest : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWith2Params<
-        libvpx_test::TestMode, int> {
- protected:
-  CpuSpeedTest() : EncoderTest(GET_PARAM(0)) {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(GET_PARAM(1));
-    set_cpu_used_ = GET_PARAM(2);
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
-      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
-      encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
-      encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
-      encoder->Control(VP8E_SET_ARNR_TYPE, 3);
-    }
-  }
-
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) {
-    }
-  }
-  int set_cpu_used_;
-};
-
-TEST_P(CpuSpeedTest, TestQ0) {
-  // Validate that this non multiple of 64 wide clip encodes and decodes
-  // without a mismatch when passing in a very low max q.  This pushes
-  // the encoder to producing lots of big partitions which will likely
-  // extend into the border and test the border condition.
-  cfg_.g_lag_in_frames = 25;
-  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_minsection_pct = 2000;
-  cfg_.rc_target_bitrate = 400;
-  cfg_.rc_max_quantizer = 0;
-  cfg_.rc_min_quantizer = 0;
-
-  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       20);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-
-TEST_P(CpuSpeedTest, TestEncodeHighBitrate) {
-  // Validate that this non multiple of 64 wide clip encodes and decodes
-  // without a mismatch when passing in a very low max q.  This pushes
-  // the encoder to producing lots of big partitions which will likely
-  // extend into the border and test the border condition.
-  cfg_.g_lag_in_frames = 25;
-  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_minsection_pct = 2000;
-  cfg_.rc_target_bitrate = 12000;
-  cfg_.rc_max_quantizer = 10;
-  cfg_.rc_min_quantizer = 0;
-
-  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       40);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-TEST_P(CpuSpeedTest, TestLowBitrate) {
-  // Validate that this clip encodes and decodes without a mismatch
-  // when passing in a very high min q.  This pushes the encoder to producing
-  // lots of small partitions which might will test the other condition.
-
-  cfg_.g_lag_in_frames = 25;
-  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_minsection_pct = 2000;
-  cfg_.rc_target_bitrate = 200;
-  cfg_.rc_min_quantizer = 40;
-
-  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       40);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-using std::tr1::make_tuple;
-
-#define VP9_FACTORY \
-  static_cast<const libvpx_test::CodecFactory*> (&libvpx_test::kVP9)
-
-VP9_INSTANTIATE_TEST_CASE(
-    CpuSpeedTest,
-    ::testing::Values(::libvpx_test::kTwoPassGood),
-    ::testing::Range(0, 5));
-}  // namespace
--- a/test/cq_test.cc
+++ b/test/cq_test.cc
@@ -42,6 +42,10 @@ class CQTest : public ::libvpx_test::EncoderTest,
    n_frames_ = 0;
  }

+  virtual bool Continue() const {
+    return !HasFatalFailure() && !abort_;
+  }
+
  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                  libvpx_test::Encoder *encoder) {
    if (video->frame() == 1) {
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -36,6 +36,10 @@ class DatarateTest : public ::libvpx_test::EncoderTest,
    duration_ = 0.0;
  }

+  virtual bool Continue() const {
+    return !HasFatalFailure() && !abort_;
+  }
+
  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
    const vpx_rational_t tb = video->timebase();
@@ -75,7 +79,7 @@ class DatarateTest : public ::libvpx_test::EncoderTest,
    bits_in_buffer_model_ -= frame_size_in_bits;

    // Update the running total of bits for end of test datarate checks.
-    bits_total_ += frame_size_in_bits;
+    bits_total_ += frame_size_in_bits ;

    // If first drop not set and we have a drop set it to this time.
    if (!first_drop_ && duration > 1)
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -13,16 +13,14 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"

 extern "C" {
 #include "vp9/common/vp9_entropy.h"
-#include "./vp9_rtcd.h"
-void vp9_short_idct16x16_add_c(int16_t *input, uint8_t *output, int pitch);
+#include "vp9_rtcd.h"
+void vp9_short_idct16x16_add_c(short *input, uint8_t *output, int pitch);
 }
+
+#include "acm_random.h"
 #include "vpx/vpx_integer.h"

 using libvpx_test::ACMRandom;
@@ -32,13 +30,12 @@ namespace {
 #ifdef _MSC_VER
 static int round(double x) {
  if (x < 0)
-    return static_cast<int>(ceil(x - 0.5));
+    return (int)ceil(x - 0.5);
  else
-    return static_cast<int>(floor(x + 0.5));
+    return (int)floor(x + 0.5);
 }
 #endif

-const int kNumCoeffs = 256;
 const double PI = 3.1415926535898;
 void reference2_16x16_idct_2d(double *input, double *output) {
  double x;
@@ -47,9 +44,7 @@ void reference2_16x16_idct_2d(double *input, double *output) {
      double s = 0;
      for (int i = 0; i < 16; ++i) {
        for (int j = 0; j < 16; ++j) {
-          x = cos(PI * j * (l + 0.5) / 16.0) *
-              cos(PI * i * (k + 0.5) / 16.0) *
-              input[i * 16 + j] / 256;
+          x=cos(PI*j*(l+0.5)/16.0)*cos(PI*i*(k+0.5)/16.0)*input[i*16+j]/256;
          if (i != 0)
            x *= sqrt(2.0);
          if (j != 0)
@@ -63,23 +58,23 @@ void reference2_16x16_idct_2d(double *input, double *output) {
 }


-const double C1 = 0.995184726672197;
-const double C2 = 0.98078528040323;
-const double C3 = 0.956940335732209;
-const double C4 = 0.923879532511287;
-const double C5 = 0.881921264348355;
-const double C6 = 0.831469612302545;
-const double C7 = 0.773010453362737;
-const double C8 = 0.707106781186548;
-const double C9 = 0.634393284163646;
-const double C10 = 0.555570233019602;
-const double C11 = 0.471396736825998;
-const double C12 = 0.38268343236509;
-const double C13 = 0.290284677254462;
-const double C14 = 0.195090322016128;
-const double C15 = 0.098017140329561;
+static const double C1 = 0.995184726672197;
+static const double C2 = 0.98078528040323;
+static const double C3 = 0.956940335732209;
+static const double C4 = 0.923879532511287;
+static const double C5 = 0.881921264348355;
+static const double C6 = 0.831469612302545;
+static const double C7 = 0.773010453362737;
+static const double C8 = 0.707106781186548;
+static const double C9 = 0.634393284163646;
+static const double C10 = 0.555570233019602;
+static const double C11 = 0.471396736825998;
+static const double C12 = 0.38268343236509;
+static const double C13 = 0.290284677254462;
+static const double C14 = 0.195090322016128;
+static const double C15 = 0.098017140329561;

-void butterfly_16x16_dct_1d(double input[16], double output[16]) {
+static void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  double step[16];
  double intermediate[16];
  double temp1, temp2;
@@ -112,36 +107,36 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  output[6] = step[1] - step[6];
  output[7] = step[0] - step[7];

-  temp1 = step[ 8] * C7;
-  temp2 = step[15] * C9;
+  temp1 = step[ 8]*C7;
+  temp2 = step[15]*C9;
  output[ 8] = temp1 + temp2;

-  temp1 = step[ 9] * C11;
-  temp2 = step[14] * C5;
+  temp1 = step[ 9]*C11;
+  temp2 = step[14]*C5;
  output[ 9] = temp1 - temp2;

-  temp1 = step[10] * C3;
-  temp2 = step[13] * C13;
+  temp1 = step[10]*C3;
+  temp2 = step[13]*C13;
  output[10] = temp1 + temp2;

-  temp1 = step[11] * C15;
-  temp2 = step[12] * C1;
+  temp1 = step[11]*C15;
+  temp2 = step[12]*C1;
  output[11] = temp1 - temp2;

-  temp1 = step[11] * C1;
-  temp2 = step[12] * C15;
+  temp1 = step[11]*C1;
+  temp2 = step[12]*C15;
  output[12] = temp2 + temp1;

-  temp1 = step[10] * C13;
-  temp2 = step[13] * C3;
+  temp1 = step[10]*C13;
+  temp2 = step[13]*C3;
  output[13] = temp2 - temp1;

-  temp1 = step[ 9] * C5;
-  temp2 = step[14] * C11;
+  temp1 = step[ 9]*C5;
+  temp2 = step[14]*C11;
  output[14] = temp2 + temp1;

-  temp1 = step[ 8] * C9;
-  temp2 = step[15] * C7;
+  temp1 = step[ 8]*C9;
+  temp2 = step[15]*C7;
  output[15] = temp2 - temp1;

  // step 3
@@ -150,20 +145,20 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  step[ 2] = output[1] - output[2];
  step[ 3] = output[0] - output[3];

-  temp1 = output[4] * C14;
-  temp2 = output[7] * C2;
+  temp1 = output[4]*C14;
+  temp2 = output[7]*C2;
  step[ 4] = temp1 + temp2;

-  temp1 = output[5] * C10;
-  temp2 = output[6] * C6;
+  temp1 = output[5]*C10;
+  temp2 = output[6]*C6;
  step[ 5] = temp1 + temp2;

-  temp1 = output[5] * C6;
-  temp2 = output[6] * C10;
+  temp1 = output[5]*C6;
+  temp2 = output[6]*C10;
  step[ 6] = temp2 - temp1;

-  temp1 = output[4] * C2;
-  temp2 = output[7] * C14;
+  temp1 = output[4]*C2;
+  temp2 = output[7]*C14;
  step[ 7] = temp2 - temp1;

  step[ 8] = output[ 8] + output[11];
@@ -180,18 +175,18 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  output[ 0] = (step[ 0] + step[ 1]);
  output[ 8] = (step[ 0] - step[ 1]);

-  temp1 = step[2] * C12;
-  temp2 = step[3] * C4;
+  temp1 = step[2]*C12;
+  temp2 = step[3]*C4;
  temp1 = temp1 + temp2;
-  output[ 4] = 2*(temp1 * C8);
+  output[ 4] = 2*(temp1*C8);

-  temp1 = step[2] * C4;
-  temp2 = step[3] * C12;
+  temp1 = step[2]*C4;
+  temp2 = step[3]*C12;
  temp1 = temp2 - temp1;
-  output[12] = 2 * (temp1 * C8);
+  output[12] = 2*(temp1*C8);

-  output[ 2] = 2 * ((step[4] + step[ 5]) * C8);
-  output[14] = 2 * ((step[7] - step[ 6]) * C8);
+  output[ 2] = 2*((step[4] + step[ 5])*C8);
+  output[14] = 2*((step[7] - step[ 6])*C8);

  temp1 = step[4] - step[5];
  temp2 = step[6] + step[7];
@@ -201,17 +196,17 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  intermediate[8] = step[8] + step[14];
  intermediate[9] = step[9] + step[15];

-  temp1 = intermediate[8] * C12;
-  temp2 = intermediate[9] * C4;
+  temp1 = intermediate[8]*C12;
+  temp2 = intermediate[9]*C4;
  temp1 = temp1 - temp2;
-  output[3] = 2 * (temp1 * C8);
+  output[3] = 2*(temp1*C8);

-  temp1 = intermediate[8] * C4;
-  temp2 = intermediate[9] * C12;
+  temp1 = intermediate[8]*C4;
+  temp2 = intermediate[9]*C12;
  temp1 = temp2 + temp1;
-  output[13] = 2 * (temp1 * C8);
+  output[13] = 2*(temp1*C8);

-  output[ 9] = 2 * ((step[10] + step[11]) * C8);
+  output[ 9] = 2*((step[10] + step[11])*C8);

  intermediate[11] = step[10] - step[11];
  intermediate[12] = step[12] + step[13];
@@ -222,301 +217,150 @@ void butterfly_16x16_dct_1d(double input[16], double output[16]) {
  output[15] = (intermediate[11] + intermediate[12]);
  output[ 1] = -(intermediate[11] - intermediate[12]);

-  output[ 7] = 2 * (intermediate[13] * C8);
+  output[ 7] = 2*(intermediate[13]*C8);

-  temp1 = intermediate[14] * C12;
-  temp2 = intermediate[15] * C4;
+  temp1 = intermediate[14]*C12;
+  temp2 = intermediate[15]*C4;
  temp1 = temp1 - temp2;
-  output[11] = -2 * (temp1 * C8);
+  output[11] = -2*(temp1*C8);

-  temp1 = intermediate[14] * C4;
-  temp2 = intermediate[15] * C12;
+  temp1 = intermediate[14]*C4;
+  temp2 = intermediate[15]*C12;
  temp1 = temp2 + temp1;
-  output[ 5] = 2 * (temp1 * C8);
+  output[ 5] = 2*(temp1*C8);
 }

-void reference_16x16_dct_2d(int16_t input[256], double output[256]) {
+static void reference_16x16_dct_1d(double in[16], double out[16]) {
+  const double kPi = 3.141592653589793238462643383279502884;
+  const double kInvSqrt2 = 0.707106781186547524400844362104;
+  for (int k = 0; k < 16; k++) {
+    out[k] = 0.0;
+    for (int n = 0; n < 16; n++)
+      out[k] += in[n]*cos(kPi*(2*n+1)*k/32.0);
+    if (k == 0)
+      out[k] = out[k]*kInvSqrt2;
+  }
+}
+
+void reference_16x16_dct_2d(int16_t input[16*16], double output[16*16]) {
  // First transform columns
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
    for (int j = 0; j < 16; ++j)
-      temp_in[j] = input[j * 16 + i];
+      temp_in[j] = input[j*16 + i];
    butterfly_16x16_dct_1d(temp_in, temp_out);
    for (int j = 0; j < 16; ++j)
-      output[j * 16 + i] = temp_out[j];
+      output[j*16 + i] = temp_out[j];
  }
  // Then transform rows
  for (int i = 0; i < 16; ++i) {
    double temp_in[16], temp_out[16];
    for (int j = 0; j < 16; ++j)
-      temp_in[j] = output[j + i * 16];
+      temp_in[j] = output[j + i*16];
    butterfly_16x16_dct_1d(temp_in, temp_out);
    // Scale by some magic number
    for (int j = 0; j < 16; ++j)
-      output[j + i * 16] = temp_out[j]/2;
+      output[j + i*16] = temp_out[j]/2;
  }
 }

-typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
-typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
-typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
-typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);

-void fdct16x16_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
-  vp9_short_fdct16x16_c(in, out, stride);
-}
+TEST(VP9Idct16x16Test, AccuracyCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = 1000;
+  for (int i = 0; i < count_test_block; ++i) {
+    int16_t in[256], coeff[256];
+    uint8_t dst[256], src[256];
+    double out_r[256];

-void fht16x16_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
-  vp9_short_fht16x16_c(in, out, stride, tx_type);
-}
-
-class Trans16x16TestBase {
- public:
-  virtual ~Trans16x16TestBase() {}
-
- protected:
-  virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
-
-  virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
-
-  void RunAccuracyCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    uint32_t max_error = 0;
-    int64_t total_error = 0;
-    const int count_test_block = 10000;
-    for (int i = 0; i < count_test_block; ++i) {
-      DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
-
-      // Initialize a test block with input range [-255, 255].
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        src[j] = rnd.Rand8();
-        dst[j] = rnd.Rand8();
-        test_input_block[j] = src[j] - dst[j];
-      }
-
-      REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block,
-                                      test_temp_block, pitch_));
-      REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
-
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        const uint32_t diff = dst[j] - src[j];
-        const uint32_t error = diff * diff;
-        if (max_error < error)
-          max_error = error;
-        total_error += error;
-      }
+    for (int j = 0; j < 256; ++j) {
+      src[j] = rnd.Rand8();
+      dst[j] = rnd.Rand8();
    }
+    // Initialize a test block with input range [-255, 255].
+    for (int j = 0; j < 256; ++j)
+      in[j] = src[j] - dst[j];

-    EXPECT_GE(1u, max_error)
-        << "Error: 16x16 FHT/IHT has an individual round trip error > 1";
-
-    EXPECT_GE(count_test_block , total_error)
-        << "Error: 16x16 FHT/IHT has average round trip error > 1 per block";
+    reference_16x16_dct_2d(in, out_r);
+    for (int j = 0; j < 256; j++)
+      coeff[j] = round(out_r[j]);
+    vp9_short_idct16x16_add_c(coeff, dst, 16);
+    for (int j = 0; j < 256; ++j) {
+      const int diff = dst[j] - src[j];
+      const int error = diff * diff;
+      EXPECT_GE(1, error)
+          << "Error: 16x16 IDCT has error " << error
+          << " at index " << j;
+    }
  }
+}

-  void RunCoeffCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
+// we need enable fdct test once we re-do the 16 point fdct.
+TEST(VP9Fdct16x16Test, AccuracyCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int max_error = 0;
+  double total_error = 0;
+  const int count_test_block = 1000;
+  for (int i = 0; i < count_test_block; ++i) {
+    int16_t test_input_block[256];
+    int16_t test_temp_block[256];
+    uint8_t dst[256], src[256];

-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-255, 255].
-      for (int j = 0; j < kNumCoeffs; ++j)
-        input_block[j] = rnd.Rand8() - rnd.Rand8();
+    for (int j = 0; j < 256; ++j) {
+      src[j] = rnd.Rand8();
+      dst[j] = rnd.Rand8();
+    }
+    // Initialize a test block with input range [-255, 255].
+    for (int j = 0; j < 256; ++j)
+      test_input_block[j] = src[j] - dst[j];

-      fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
+    const int pitch = 32;
+    vp9_short_fdct16x16_c(test_input_block, test_temp_block, pitch);
+    vp9_short_idct16x16_add_c(test_temp_block, dst, 16);

-      // The minimum quant value is 4.
-      for (int j = 0; j < kNumCoeffs; ++j)
-        EXPECT_EQ(output_block[j], output_ref_block[j]);
+    for (int j = 0; j < 256; ++j) {
+      const int diff = dst[j] - src[j];
+      const int error = diff * diff;
+      if (max_error < error)
+        max_error = error;
+      total_error += error;
    }
  }

-  void RunMemCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
+  EXPECT_GE(1, max_error)
+      << "Error: 16x16 FDCT/IDCT has an individual round trip error > 1";

-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-255, 255].
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        input_block[j] = rnd.Rand8() - rnd.Rand8();
-        input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
-      }
-      if (i == 0)
-        for (int j = 0; j < kNumCoeffs; ++j)
-          input_extreme_block[j] = 255;
-      if (i == 1)
-        for (int j = 0; j < kNumCoeffs; ++j)
-          input_extreme_block[j] = -255;
+  EXPECT_GE(count_test_block , total_error)
+      << "Error: 16x16 FDCT/IDCT has average round trip error > 1 per block";
+}

-      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
-      REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block,
-                                      output_block, pitch_));
+TEST(VP9Fdct16x16Test, CoeffSizeCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = 1000;
+  for (int i = 0; i < count_test_block; ++i) {
+    int16_t input_block[256], input_extreme_block[256];
+    int16_t output_block[256], output_extreme_block[256];

-      // The minimum quant value is 4.
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        EXPECT_EQ(output_block[j], output_ref_block[j]);
-        EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
-            << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
-      }
+    // Initialize a test block with input range [-255, 255].
+    for (int j = 0; j < 256; ++j) {
+      input_block[j] = rnd.Rand8() - rnd.Rand8();
+      input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
+    }
+    if (i == 0)
+      for (int j = 0; j < 256; ++j)
+        input_extreme_block[j] = 255;
+
+    const int pitch = 32;
+    vp9_short_fdct16x16_c(input_block, output_block, pitch);
+    vp9_short_fdct16x16_c(input_extreme_block, output_extreme_block, pitch);
+
+    // The minimum quant value is 4.
+    for (int j = 0; j < 256; ++j) {
+      EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j]))
+          << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
+      EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j]))
+          << "Error: 16x16 FDCT extreme has coefficient larger than 4*DCT_MAX_VALUE";
    }
  }
-
-  void RunInvAccuracyCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
-
-    for (int i = 0; i < count_test_block; ++i) {
-      double out_r[kNumCoeffs];
-
-      // Initialize a test block with input range [-255, 255].
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        src[j] = rnd.Rand8();
-        dst[j] = rnd.Rand8();
-        in[j] = src[j] - dst[j];
-      }
-
-      reference_16x16_dct_2d(in, out_r);
-      for (int j = 0; j < kNumCoeffs; ++j)
-        coeff[j] = round(out_r[j]);
-
-      const int pitch = 32;
-      REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch));
-
-      for (int j = 0; j < kNumCoeffs; ++j) {
-        const uint32_t diff = dst[j] - src[j];
-        const uint32_t error = diff * diff;
-        EXPECT_GE(1u, error)
-            << "Error: 16x16 IDCT has error " << error
-            << " at index " << j;
-      }
-    }
-  }
-  int pitch_;
-  int tx_type_;
-  fht_t fwd_txfm_ref;
-};
-
-class Trans16x16DCT : public Trans16x16TestBase,
-                      public PARAMS(fdct_t, idct_t, int) {
- public:
-  virtual ~Trans16x16DCT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
-    pitch_    = 32;
-    fwd_txfm_ref = fdct16x16_ref;
-  }
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
-    fwd_txfm_(in, out, stride);
-  }
-  void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride >> 1);
-  }
-
-  fdct_t fwd_txfm_;
-  idct_t inv_txfm_;
-};
-
-TEST_P(Trans16x16DCT, AccuracyCheck) {
-  RunAccuracyCheck();
 }
-
-TEST_P(Trans16x16DCT, CoeffCheck) {
-  RunCoeffCheck();
-}
-
-TEST_P(Trans16x16DCT, MemCheck) {
-  RunMemCheck();
-}
-
-TEST_P(Trans16x16DCT, InvAccuracyCheck) {
-  RunInvAccuracyCheck();
-}
-
-class Trans16x16HT : public Trans16x16TestBase,
-                     public PARAMS(fht_t, iht_t, int) {
- public:
-  virtual ~Trans16x16HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
-    pitch_    = 16;
-    fwd_txfm_ref = fht16x16_ref;
-  }
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
-    fwd_txfm_(in, out, stride, tx_type_);
-  }
-  void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, tx_type_);
-  }
-
-  fht_t fwd_txfm_;
-  iht_t inv_txfm_;
-};
-
-TEST_P(Trans16x16HT, AccuracyCheck) {
-  RunAccuracyCheck();
-}
-
-TEST_P(Trans16x16HT, CoeffCheck) {
-  RunCoeffCheck();
-}
-
-TEST_P(Trans16x16HT, MemCheck) {
-  RunMemCheck();
-}
-
-using std::tr1::make_tuple;
-
-INSTANTIATE_TEST_CASE_P(
-    C, Trans16x16DCT,
-    ::testing::Values(
-        make_tuple(&vp9_short_fdct16x16_c, &vp9_short_idct16x16_add_c, 0)));
-INSTANTIATE_TEST_CASE_P(
-    C, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 0),
-        make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 1),
-        make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 2),
-        make_tuple(&vp9_short_fht16x16_c, &vp9_short_iht16x16_add_c, 3)));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans16x16DCT,
-    ::testing::Values(
-        make_tuple(&vp9_short_fdct16x16_sse2,
-                   &vp9_short_idct16x16_add_sse2, 0)));
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 0),
-        make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 1),
-        make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 2),
-        make_tuple(&vp9_short_fht16x16_sse2, &vp9_short_iht16x16_add_sse2, 3)));
-#endif
 }  // namespace
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -13,17 +13,15 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"

 extern "C" {
-#include "./vpx_config.h"
 #include "vp9/common/vp9_entropy.h"
 #include "./vp9_rtcd.h"
+  void vp9_short_fdct32x32_c(int16_t *input, int16_t *out, int pitch);
+  void vp9_short_idct32x32_add_c(short *input, uint8_t *output, int pitch);
 }

+#include "test/acm_random.h"
 #include "vpx/vpx_integer.h"

 using libvpx_test::ACMRandom;
@@ -32,15 +30,35 @@ namespace {
 #ifdef _MSC_VER
 static int round(double x) {
  if (x < 0)
-    return static_cast<int>(ceil(x - 0.5));
+    return (int)ceil(x - 0.5);
  else
-    return static_cast<int>(floor(x + 0.5));
+    return (int)floor(x + 0.5);
 }
 #endif

-const int kNumCoeffs = 1024;
-const double kPi = 3.141592653589793238462643383279502884;
-void reference_32x32_dct_1d(const double in[32], double out[32], int stride) {
+static const double kPi = 3.141592653589793238462643383279502884;
+static void reference2_32x32_idct_2d(double *input, double *output) {
+  double x;
+  for (int l = 0; l < 32; ++l) {
+    for (int k = 0; k < 32; ++k) {
+      double s = 0;
+      for (int i = 0; i < 32; ++i) {
+        for (int j = 0; j < 32; ++j) {
+          x = cos(kPi * j * (l + 0.5) / 32.0) *
+              cos(kPi * i * (k + 0.5) / 32.0) * input[i * 32 + j] / 1024;
+          if (i != 0)
+            x *= sqrt(2.0);
+          if (j != 0)
+            x *= sqrt(2.0);
+          s += x;
+        }
+      }
+      output[k * 32 + l] = s / 4;
+    }
+  }
+}
+
+static void reference_32x32_dct_1d(double in[32], double out[32], int stride) {
  const double kInvSqrt2 = 0.707106781186547524400844362104;
  for (int k = 0; k < 32; k++) {
    out[k] = 0.0;
@@ -51,8 +69,7 @@ void reference_32x32_dct_1d(const double in[32], double out[32], int stride) {
  }
 }

-void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
-                            double output[kNumCoeffs]) {
+static void reference_32x32_dct_2d(int16_t input[32*32], double output[32*32]) {
  // First transform columns
  for (int i = 0; i < 32; ++i) {
    double temp_in[32], temp_out[32];
@@ -74,165 +91,27 @@ void reference_32x32_dct_2d(const int16_t input[kNumCoeffs],
  }
 }

-typedef void (*fwd_txfm_t)(int16_t *in, int16_t *out, int stride);
-typedef void (*inv_txfm_t)(int16_t *in, uint8_t *dst, int stride);
-
-class Trans32x32Test : public PARAMS(fwd_txfm_t, inv_txfm_t, int) {
- public:
-  virtual ~Trans32x32Test() {}
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    version_  = GET_PARAM(2);  // 0: high precision forward transform
-                               // 1: low precision version for rd loop
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  int version_;
-  fwd_txfm_t fwd_txfm_;
-  inv_txfm_t inv_txfm_;
-};
-
-TEST_P(Trans32x32Test, AccuracyCheck) {
+TEST(VP9Idct32x32Test, AccuracyCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  uint32_t max_error = 0;
-  int64_t total_error = 0;
  const int count_test_block = 1000;
-  DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
-
  for (int i = 0; i < count_test_block; ++i) {
-    // Initialize a test block with input range [-255, 255].
-    for (int j = 0; j < kNumCoeffs; ++j) {
+    int16_t in[1024], coeff[1024];
+    uint8_t dst[1024], src[1024];
+    double out_r[1024];
+
+    for (int j = 0; j < 1024; ++j) {
      src[j] = rnd.Rand8();
      dst[j] = rnd.Rand8();
-      test_input_block[j] = src[j] - dst[j];
    }
-
-    const int pitch = 64;
-    REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, pitch));
-    REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32));
-
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      const uint32_t diff = dst[j] - src[j];
-      const uint32_t error = diff * diff;
-      if (max_error < error)
-        max_error = error;
-      total_error += error;
-    }
-  }
-
-  if (version_ == 1) {
-    max_error /= 2;
-    total_error /= 45;
-  }
-
-  EXPECT_GE(1u, max_error)
-      << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1";
-
-  EXPECT_GE(count_test_block, total_error)
-      << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block";
-}
-
-TEST_P(Trans32x32Test, CoeffCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = 1000;
-
-  DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
-
-  for (int i = 0; i < count_test_block; ++i) {
-    for (int j = 0; j < kNumCoeffs; ++j)
-      input_block[j] = rnd.Rand8() - rnd.Rand8();
-
-    const int pitch = 64;
-    vp9_short_fdct32x32_c(input_block, output_ref_block, pitch);
-    REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, pitch));
-
-    if (version_ == 0) {
-      for (int j = 0; j < kNumCoeffs; ++j)
-        EXPECT_EQ(output_block[j], output_ref_block[j])
-            << "Error: 32x32 FDCT versions have mismatched coefficients";
-    } else {
-      for (int j = 0; j < kNumCoeffs; ++j)
-        EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
-            << "Error: 32x32 FDCT rd has mismatched coefficients";
-    }
-  }
-}
-
-TEST_P(Trans32x32Test, MemCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = 2000;
-
-  DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, output_ref_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, output_block, kNumCoeffs);
-
-  for (int i = 0; i < count_test_block; ++i) {
    // Initialize a test block with input range [-255, 255].
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      input_block[j] = rnd.Rand8() - rnd.Rand8();
-      input_extreme_block[j] = rnd.Rand8() & 1 ? 255 : -255;
-    }
-    if (i == 0)
-      for (int j = 0; j < kNumCoeffs; ++j)
-        input_extreme_block[j] = 255;
-    if (i == 1)
-      for (int j = 0; j < kNumCoeffs; ++j)
-        input_extreme_block[j] = -255;
-
-    const int pitch = 64;
-    vp9_short_fdct32x32_c(input_extreme_block, output_ref_block, pitch);
-    REGISTER_STATE_CHECK(fwd_txfm_(input_extreme_block, output_block, pitch));
-
-    // The minimum quant value is 4.
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      if (version_ == 0) {
-        EXPECT_EQ(output_block[j], output_ref_block[j])
-            << "Error: 32x32 FDCT versions have mismatched coefficients";
-      } else {
-        EXPECT_GE(6, abs(output_block[j] - output_ref_block[j]))
-            << "Error: 32x32 FDCT rd has mismatched coefficients";
-      }
-      EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_ref_block[j]))
-          << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE";
-      EXPECT_GE(4 * DCT_MAX_VALUE, abs(output_block[j]))
-          << "Error: 32x32 FDCT has coefficient larger than "
-          << "4*DCT_MAX_VALUE";
-    }
-  }
-}
-
-TEST_P(Trans32x32Test, InverseAccuracy) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  const int count_test_block = 1000;
-  DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, coeff, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
-
-  for (int i = 0; i < count_test_block; ++i) {
-    double out_r[kNumCoeffs];
-
-    // Initialize a test block with input range [-255, 255]
-    for (int j = 0; j < kNumCoeffs; ++j) {
-      src[j] = rnd.Rand8();
-      dst[j] = rnd.Rand8();
+    for (int j = 0; j < 1024; ++j)
      in[j] = src[j] - dst[j];
-    }

    reference_32x32_dct_2d(in, out_r);
-    for (int j = 0; j < kNumCoeffs; ++j)
+    for (int j = 0; j < 1024; j++)
      coeff[j] = round(out_r[j]);
-    REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
-    for (int j = 0; j < kNumCoeffs; ++j) {
+    vp9_short_idct32x32_add_c(coeff, dst, 32);
+    for (int j = 0; j < 1024; ++j) {
      const int diff = dst[j] - src[j];
      const int error = diff * diff;
      EXPECT_GE(1, error)
@@ -242,21 +121,72 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
  }
 }

-using std::tr1::make_tuple;
+TEST(VP9Fdct32x32Test, AccuracyCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  unsigned int max_error = 0;
+  int64_t total_error = 0;
+  const int count_test_block = 1000;
+  for (int i = 0; i < count_test_block; ++i) {
+    int16_t test_input_block[1024];
+    int16_t test_temp_block[1024];
+    uint8_t dst[1024], src[1024];

-INSTANTIATE_TEST_CASE_P(
-    C, Trans32x32Test,
-    ::testing::Values(
-        make_tuple(&vp9_short_fdct32x32_c, &vp9_short_idct32x32_add_c, 0),
-        make_tuple(&vp9_short_fdct32x32_rd_c, &vp9_short_idct32x32_add_c, 1)));
+    for (int j = 0; j < 1024; ++j) {
+      src[j] = rnd.Rand8();
+      dst[j] = rnd.Rand8();
+    }
+    // Initialize a test block with input range [-255, 255].
+    for (int j = 0; j < 1024; ++j)
+      test_input_block[j] = src[j] - dst[j];

-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans32x32Test,
-    ::testing::Values(
-        make_tuple(&vp9_short_fdct32x32_sse2,
-                   &vp9_short_idct32x32_add_sse2, 0),
-        make_tuple(&vp9_short_fdct32x32_rd_sse2,
-                   &vp9_short_idct32x32_add_sse2, 1)));
-#endif
+    const int pitch = 64;
+    vp9_short_fdct32x32_c(test_input_block, test_temp_block, pitch);
+    vp9_short_idct32x32_add_c(test_temp_block, dst, 32);
+
+    for (int j = 0; j < 1024; ++j) {
+      const unsigned diff = dst[j] - src[j];
+      const unsigned error = diff * diff;
+      if (max_error < error)
+        max_error = error;
+      total_error += error;
+    }
+  }
+
+  EXPECT_GE(1u, max_error)
+      << "Error: 32x32 FDCT/IDCT has an individual roundtrip error > 1";
+
+  EXPECT_GE(count_test_block, total_error)
+      << "Error: 32x32 FDCT/IDCT has average roundtrip error > 1 per block";
+}
+
+TEST(VP9Fdct32x32Test, CoeffSizeCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  const int count_test_block = 1000;
+  for (int i = 0; i < count_test_block; ++i) {
+    int16_t input_block[1024], input_extreme_block[1024];
+    int16_t output_block[1024], output_extreme_block[1024];
+
+    // Initialize a test block with input range [-255, 255].
+    for (int j = 0; j < 1024; ++j) {
+      input_block[j] = rnd.Rand8() - rnd.Rand8();
+      input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
+    }
+    if (i == 0)
+      for (int j = 0; j < 1024; ++j)
+        input_extreme_block[j] = 255;
+
+    const int pitch = 64;
+    vp9_short_fdct32x32_c(input_block, output_block, pitch);
+    vp9_short_fdct32x32_c(input_extreme_block, output_extreme_block, pitch);
+
+    // The minimum quant value is 4.
+    for (int j = 0; j < 1024; ++j) {
+      EXPECT_GE(4*DCT_MAX_VALUE, abs(output_block[j]))
+          << "Error: 32x32 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
+      EXPECT_GE(4*DCT_MAX_VALUE, abs(output_extreme_block[j]))
+          << "Error: 32x32 FDCT extreme has coefficient larger than "
+             "4*DCT_MAX_VALUE";
+    }
+  }
+}
 }  // namespace
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -12,7 +12,7 @@
 #define TEST_DECODE_TEST_DRIVER_H_
 #include <cstring>
 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
+#include "vpx_config.h"
 #include "vpx/vpx_decoder.h"

 namespace libvpx_test {
@@ -36,8 +36,9 @@ class DxDataIterator {
 };

 // Provides a simplified interface to manage one video decoding.
-// Similar to Encoder class, the exact services should be added
-// as more tests are added.
+//
+// TODO: similar to Encoder class, the exact services should be
+// added as more tests are added.
 class Decoder {
 public:
  Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -8,7 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "./vpx_config.h"
+#include "vpx_config.h"
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
 #include "test/decode_test_driver.h"
@@ -114,19 +114,19 @@ static bool compare_img(const vpx_image_t *img1,
  const unsigned int height_y = img1->d_h;
  unsigned int i;
  for (i = 0; i < height_y; ++i)
-    match = (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
-                    img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
-                    width_y) == 0) && match;
+    match = ( memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
+                     img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
+                     width_y) == 0) && match;
  const unsigned int width_uv  = (img1->d_w + 1) >> 1;
  const unsigned int height_uv = (img1->d_h + 1) >> 1;
  for (i = 0; i <  height_uv; ++i)
-    match = (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
-                    img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
-                    width_uv) == 0) && match;
+    match = ( memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
+                     img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
+                     width_uv) == 0) && match;
  for (i = 0; i < height_uv; ++i)
-    match = (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
-                    img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
-                    width_uv) == 0) && match;
+    match = ( memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
+                     img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
+                     width_uv) == 0) && match;
  return match;
 }

@@ -158,7 +158,7 @@ void EncoderTest::RunLoop(VideoSource *video) {
    Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
    bool again;
    for (again = true, video->Begin(); again; video->Next()) {
-      again = (video->img() != NULL);
+      again = video->img() != NULL;

      PreEncodeFrameHook(video);
      PreEncodeFrameHook(video, encoder);
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -190,9 +190,7 @@ class EncoderTest {
  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {}

  // Hook to determine whether the encode loop should continue.
-  virtual bool Continue() const {
-    return !(::testing::Test::HasFatalFailure() || abort_);
-  }
+  virtual bool Continue() const { return !abort_; }

  const CodecFactory   *codec_;
  // Hook to determine whether to decode frame after encoding
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -50,6 +50,10 @@ class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
    mismatch_nframes_ = 0;
  }

+  virtual bool Continue() const {
+    return !HasFatalFailure() && !abort_;
+  }
+
  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
    psnr_ += pkt->data.psnr.psnr[0];
    nframes_++;
@@ -62,7 +66,7 @@ class ErrorResilienceTest : public ::libvpx_test::EncoderTest,
    if (droppable_nframes_ > 0 &&
        (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
      for (unsigned int i = 0; i < droppable_nframes_; ++i) {
-        if (droppable_frames_[i] == video->frame()) {
+        if (droppable_frames_[i] == nframes_) {
          std::cout << "             Encoding droppable frame: "
                    << droppable_frames_[i] << "\n";
          frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
@@ -148,7 +152,7 @@ TEST_P(ErrorResilienceTest, OnVersusOff) {
  const vpx_rational timebase = { 33333333, 1000000000 };
  cfg_.g_timebase = timebase;
  cfg_.rc_target_bitrate = 2000;
-  cfg_.g_lag_in_frames = 10;
+  cfg_.g_lag_in_frames = 25;

  init_flags_ = VPX_CODEC_USE_PSNR;

@@ -179,9 +183,6 @@ TEST_P(ErrorResilienceTest, DropFramesWithoutRecovery) {
  const vpx_rational timebase = { 33333333, 1000000000 };
  cfg_.g_timebase = timebase;
  cfg_.rc_target_bitrate = 500;
-  // FIXME(debargha): Fix this to work for any lag.
-  // Currently this test only works for lag = 0
-  cfg_.g_lag_in_frames = 0;

  init_flags_ = VPX_CODEC_USE_PSNR;

--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -15,69 +15,68 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"

 extern "C" {
-#include "./vp9_rtcd.h"
+#include "vp9_rtcd.h"
 }

-#include "test/acm_random.h"
+#include "acm_random.h"
 #include "vpx/vpx_integer.h"
-#include "vpx_ports/mem.h"

 using libvpx_test::ACMRandom;

 namespace {
-void fdct4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
-             int stride, int /*tx_type*/) {
+void fdct4x4(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) {
  vp9_short_fdct4x4_c(in, out, stride);
 }
-void idct4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
-                 int stride, int /*tx_type*/) {
+void idct4x4_add(int16_t *in, int16_t *out, uint8_t *dst,
+                 int stride, int tx_type) {
  vp9_short_idct4x4_add_c(out, dst, stride >> 1);
 }
-void fht4x4(int16_t *in, int16_t *out, uint8_t* /*dst*/,
-            int stride, int tx_type) {
+void fht4x4(int16_t *in, int16_t *out, uint8_t *dst, int stride, int tx_type) {
  vp9_short_fht4x4_c(in, out, stride >> 1, tx_type);
 }
-void iht4x4_add(int16_t* /*in*/, int16_t *out, uint8_t *dst,
+void iht4x4_add(int16_t *in, int16_t *out, uint8_t *dst,
                int stride, int tx_type) {
  vp9_short_iht4x4_add_c(out, dst, stride >> 1, tx_type);
 }

 class FwdTrans4x4Test : public ::testing::TestWithParam<int> {
 public:
-  virtual ~FwdTrans4x4Test() {}
-  virtual void SetUp() {
-    tx_type_ = GetParam();
-    if (tx_type_ == 0) {
-      fwd_txfm_ = fdct4x4;
-      inv_txfm_ = idct4x4_add;
+  FwdTrans4x4Test() {SetUpTestTxfm();}
+  ~FwdTrans4x4Test() {}
+
+  void SetUpTestTxfm() {
+    tx_type = GetParam();
+    if (tx_type == 0) {
+      fwd_txfm = fdct4x4;
+      inv_txfm = idct4x4_add;
    } else {
-      fwd_txfm_ = fht4x4;
-      inv_txfm_ = iht4x4_add;
+      fwd_txfm = fht4x4;
+      inv_txfm = iht4x4_add;
    }
  }

 protected:
  void RunFwdTxfm(int16_t *in, int16_t *out, uint8_t *dst,
                  int stride, int tx_type) {
-    (*fwd_txfm_)(in, out, dst, stride, tx_type);
+    (*fwd_txfm)(in, out, dst, stride, tx_type);
  }

  void RunInvTxfm(int16_t *in, int16_t *out, uint8_t *dst,
                  int stride, int tx_type) {
-    (*inv_txfm_)(in, out, dst, stride, tx_type);
+    (*inv_txfm)(in, out, dst, stride, tx_type);
  }

-  int tx_type_;
-  void (*fwd_txfm_)(int16_t *in, int16_t *out, uint8_t *dst,
+  int tx_type;
+  void (*fwd_txfm)(int16_t *in, int16_t *out, uint8_t *dst,
                   int stride, int tx_type);
-  void (*inv_txfm_)(int16_t *in, int16_t *out, uint8_t *dst,
+  void (*inv_txfm)(int16_t *in, int16_t *out, uint8_t *dst,
                   int stride, int tx_type);
 };

 TEST_P(FwdTrans4x4Test, SignBiasCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 16);
+  int16_t test_input_block[16];
+  int16_t test_output_block[16];
  const int pitch = 8;
  int count_sign_block[16][2];
  const int count_test_block = 1000000;
@@ -88,7 +87,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
    for (int j = 0; j < 16; ++j)
      test_input_block[j] = rnd.Rand8() - rnd.Rand8();

-    RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
+    RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type);

    for (int j = 0; j < 16; ++j) {
      if (test_output_block[j] < 0)
@@ -104,7 +103,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
    EXPECT_TRUE(bias_acceptable)
        << "Error: 4x4 FDCT/FHT has a sign bias > 1%"
        << " for input range [-255, 255] at index " << j
-        << " tx_type " << tx_type_;
+        << " tx_type " << tx_type;
  }

  memset(count_sign_block, 0, sizeof(count_sign_block));
@@ -113,7 +112,7 @@ TEST_P(FwdTrans4x4Test, SignBiasCheck) {
    for (int j = 0; j < 16; ++j)
      test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);

-    RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type_);
+    RunFwdTxfm(test_input_block, test_output_block, NULL, pitch, tx_type);

    for (int j = 0; j < 16; ++j) {
      if (test_output_block[j] < 0)
@@ -136,13 +135,12 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());

  int max_error = 0;
-  int total_error = 0;
+  double total_error = 0;
  const int count_test_block = 1000000;
  for (int i = 0; i < count_test_block; ++i) {
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 16);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 16);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 16);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 16);
+    int16_t test_input_block[16];
+    int16_t test_temp_block[16];
+    uint8_t dst[16], src[16];

    for (int j = 0; j < 16; ++j) {
      src[j] = rnd.Rand8();
@@ -153,10 +151,10 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
      test_input_block[j] = src[j] - dst[j];

    const int pitch = 8;
-    RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
+    RunFwdTxfm(test_input_block, test_temp_block, dst, pitch, tx_type);

    for (int j = 0; j < 16; ++j) {
-        if (test_temp_block[j] > 0) {
+        if(test_temp_block[j] > 0) {
          test_temp_block[j] += 2;
          test_temp_block[j] /= 4;
          test_temp_block[j] *= 4;
@@ -168,7 +166,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {
    }

    // inverse transform and reconstruct the pixel block
-    RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type_);
+    RunInvTxfm(test_input_block, test_temp_block, dst, pitch, tx_type);

    for (int j = 0; j < 16; ++j) {
      const int diff = dst[j] - src[j];
@@ -183,7 +181,7 @@ TEST_P(FwdTrans4x4Test, RoundTripErrorCheck) {

  EXPECT_GE(count_test_block, total_error)
      << "Error: FDCT/IDCT or FHT/IHT has average "
-      << "roundtrip error > 1 per block";
+          "roundtrip error > 1 per block";
 }

 INSTANTIATE_TEST_CASE_P(VP9, FwdTrans4x4Test, ::testing::Range(0, 4));
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -13,309 +13,173 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"

 extern "C" {
-#include "vp9/common/vp9_entropy.h"
-#include "./vp9_rtcd.h"
-void vp9_short_idct8x8_add_c(int16_t *input, uint8_t *output, int pitch);
+#include "vp9_rtcd.h"
+void vp9_short_idct8x8_add_c(short *input, uint8_t *output, int pitch);
 }
+
+#include "acm_random.h"
 #include "vpx/vpx_integer.h"

 using libvpx_test::ACMRandom;

 namespace {
-typedef void (*fdct_t)(int16_t *in, int16_t *out, int stride);
-typedef void (*idct_t)(int16_t *in, uint8_t *dst, int stride);
-typedef void (*fht_t) (int16_t *in, int16_t *out, int stride, int tx_type);
-typedef void (*iht_t) (int16_t *in, uint8_t *dst, int stride, int tx_type);

-void fdct8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
-  vp9_short_fdct8x8_c(in, out, stride);
-}
+TEST(VP9Fdct8x8Test, SignBiasCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int16_t test_input_block[64];
+  int16_t test_output_block[64];
+  const int pitch = 16;
+  int count_sign_block[64][2];
+  const int count_test_block = 100000;

-void fht8x8_ref(int16_t *in, int16_t *out, int stride, int tx_type) {
-  vp9_short_fht8x8_c(in, out, stride, tx_type);
-}
+  memset(count_sign_block, 0, sizeof(count_sign_block));

-class FwdTrans8x8TestBase {
- public:
-  virtual ~FwdTrans8x8TestBase() {}
+  for (int i = 0; i < count_test_block; ++i) {
+    // Initialize a test block with input range [-255, 255].
+    for (int j = 0; j < 64; ++j)
+      test_input_block[j] = rnd.Rand8() - rnd.Rand8();

- protected:
-  virtual void RunFwdTxfm(int16_t *in, int16_t *out, int stride) = 0;
-  virtual void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) = 0;
-
-  void RunSignBiasCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_output_block, 64);
-    int count_sign_block[64][2];
-    const int count_test_block = 100000;
-
-    memset(count_sign_block, 0, sizeof(count_sign_block));
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-255, 255].
-      for (int j = 0; j < 64; ++j)
-        test_input_block[j] = rnd.Rand8() - rnd.Rand8();
-      REGISTER_STATE_CHECK(
-          RunFwdTxfm(test_input_block, test_output_block, pitch_));
-
-      for (int j = 0; j < 64; ++j) {
-        if (test_output_block[j] < 0)
-          ++count_sign_block[j][0];
-        else if (test_output_block[j] > 0)
-          ++count_sign_block[j][1];
-      }
-    }
+    vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);

    for (int j = 0; j < 64; ++j) {
-      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = 1125;
-      EXPECT_LT(diff, max_diff)
-          << "Error: 8x8 FDCT/FHT has a sign bias > "
-          << 1. * max_diff / count_test_block * 100 << "%"
-          << " for input range [-255, 255] at index " << j
-          << " count0: " << count_sign_block[j][0]
-          << " count1: " << count_sign_block[j][1]
-          << " diff: " << diff;
-    }
-
-    memset(count_sign_block, 0, sizeof(count_sign_block));
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-15, 15].
-      for (int j = 0; j < 64; ++j)
-        test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
-      REGISTER_STATE_CHECK(
-          RunFwdTxfm(test_input_block, test_output_block, pitch_));
-
-      for (int j = 0; j < 64; ++j) {
-        if (test_output_block[j] < 0)
-          ++count_sign_block[j][0];
-        else if (test_output_block[j] > 0)
-          ++count_sign_block[j][1];
-      }
-    }
-
-    for (int j = 0; j < 64; ++j) {
-      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = 10000;
-      EXPECT_LT(diff, max_diff)
-          << "Error: 4x4 FDCT/FHT has a sign bias > "
-          << 1. * max_diff / count_test_block * 100 << "%"
-          << " for input range [-15, 15] at index " << j
-          << " count0: " << count_sign_block[j][0]
-          << " count1: " << count_sign_block[j][1]
-          << " diff: " << diff;
+      if (test_output_block[j] < 0)
+        ++count_sign_block[j][0];
+      else if (test_output_block[j] > 0)
+        ++count_sign_block[j][1];
    }
  }

-  void RunRoundTripErrorCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    int max_error = 0;
-    int total_error = 0;
-    const int count_test_block = 100000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
+  for (int j = 0; j < 64; ++j) {
+    const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
+    const int max_diff = 1125;
+    EXPECT_LT(diff, max_diff)
+        << "Error: 8x8 FDCT has a sign bias > "
+        << 1. * max_diff / count_test_block * 100 << "%"
+        << " for input range [-255, 255] at index " << j
+        << " count0: " << count_sign_block[j][0]
+        << " count1: " << count_sign_block[j][1]
+        << " diff: " << diff;
+  }

-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-255, 255].
-      for (int j = 0; j < 64; ++j) {
-        src[j] = rnd.Rand8();
-        dst[j] = rnd.Rand8();
-        test_input_block[j] = src[j] - dst[j];
-      }
+  memset(count_sign_block, 0, sizeof(count_sign_block));

-      REGISTER_STATE_CHECK(
-          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
-      for (int j = 0; j < 64; ++j) {
-          if (test_temp_block[j] > 0) {
-            test_temp_block[j] += 2;
-            test_temp_block[j] /= 4;
-            test_temp_block[j] *= 4;
-          } else {
-            test_temp_block[j] -= 2;
-            test_temp_block[j] /= 4;
-            test_temp_block[j] *= 4;
-          }
-      }
-      REGISTER_STATE_CHECK(
-          RunInvTxfm(test_temp_block, dst, pitch_));
+  for (int i = 0; i < count_test_block; ++i) {
+    // Initialize a test block with input range [-15, 15].
+    for (int j = 0; j < 64; ++j)
+      test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);

-      for (int j = 0; j < 64; ++j) {
-        const int diff = dst[j] - src[j];
-        const int error = diff * diff;
-        if (max_error < error)
-          max_error = error;
-        total_error += error;
-      }
+    vp9_short_fdct8x8_c(test_input_block, test_output_block, pitch);
+
+    for (int j = 0; j < 64; ++j) {
+      if (test_output_block[j] < 0)
+        ++count_sign_block[j][0];
+      else if (test_output_block[j] > 0)
+        ++count_sign_block[j][1];
+    }
+  }
+
+  for (int j = 0; j < 64; ++j) {
+    const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
+    const int max_diff = 10000;
+    EXPECT_LT(diff, max_diff)
+        << "Error: 4x4 FDCT has a sign bias > "
+        << 1. * max_diff / count_test_block * 100 << "%"
+        << " for input range [-15, 15] at index " << j
+        << " count0: " << count_sign_block[j][0]
+        << " count1: " << count_sign_block[j][1]
+        << " diff: " << diff;
+  }
+};
+
+TEST(VP9Fdct8x8Test, RoundTripErrorCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int max_error = 0;
+  double total_error = 0;
+  const int count_test_block = 100000;
+  for (int i = 0; i < count_test_block; ++i) {
+    int16_t test_input_block[64];
+    int16_t test_temp_block[64];
+    uint8_t dst[64], src[64];
+
+    for (int j = 0; j < 64; ++j) {
+      src[j] = rnd.Rand8();
+      dst[j] = rnd.Rand8();
+    }
+    // Initialize a test block with input range [-255, 255].
+    for (int j = 0; j < 64; ++j)
+      test_input_block[j] = src[j] - dst[j];
+
+    const int pitch = 16;
+    vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
+    for (int j = 0; j < 64; ++j){
+        if(test_temp_block[j] > 0) {
+          test_temp_block[j] += 2;
+          test_temp_block[j] /= 4;
+          test_temp_block[j] *= 4;
+        } else {
+          test_temp_block[j] -= 2;
+          test_temp_block[j] /= 4;
+          test_temp_block[j] *= 4;
+        }
+    }
+    vp9_short_idct8x8_add_c(test_temp_block, dst, 8);
+
+    for (int j = 0; j < 64; ++j) {
+      const int diff = dst[j] - src[j];
+      const int error = diff * diff;
+      if (max_error < error)
+        max_error = error;
+      total_error += error;
+    }
+  }
+
+  EXPECT_GE(1, max_error)
+      << "Error: 8x8 FDCT/IDCT has an individual roundtrip error > 1";
+
+  EXPECT_GE(count_test_block/5, total_error)
+      << "Error: 8x8 FDCT/IDCT has average roundtrip error > 1/5 per block";
+};
+
+TEST(VP9Fdct8x8Test, ExtremalCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int max_error = 0;
+  double total_error = 0;
+  const int count_test_block = 100000;
+  for (int i = 0; i < count_test_block; ++i) {
+    int16_t test_input_block[64];
+    int16_t test_temp_block[64];
+    uint8_t dst[64], src[64];
+
+    for (int j = 0; j < 64; ++j) {
+      src[j] = rnd.Rand8() % 2 ? 255 : 0;
+      dst[j] = src[j] > 0 ? 0 : 255;
+    }
+    // Initialize a test block with input range [-255, 255].
+    for (int j = 0; j < 64; ++j)
+      test_input_block[j] = src[j] - dst[j];
+
+    const int pitch = 16;
+    vp9_short_fdct8x8_c(test_input_block, test_temp_block, pitch);
+    vp9_short_idct8x8_add_c(test_temp_block, dst, 8);
+
+    for (int j = 0; j < 64; ++j) {
+      const int diff = dst[j] - src[j];
+      const int error = diff * diff;
+      if (max_error < error)
+        max_error = error;
+      total_error += error;
    }

    EXPECT_GE(1, max_error)
-      << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual"
-      << " roundtrip error > 1";
+        << "Error: Extremal 8x8 FDCT/IDCT has an"
+        << " individual roundtrip error > 1";

    EXPECT_GE(count_test_block/5, total_error)
-      << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip "
-      << "error > 1/5 per block";
+        << "Error: Extremal 8x8 FDCT/IDCT has average"
+        << " roundtrip error > 1/5 per block";
  }
-
-  void RunExtremalCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    int max_error = 0;
-    int total_error = 0;
-    const int count_test_block = 100000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_temp_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-255, 255].
-      for (int j = 0; j < 64; ++j) {
-        src[j] = rnd.Rand8() % 2 ? 255 : 0;
-        dst[j] = src[j] > 0 ? 0 : 255;
-        test_input_block[j] = src[j] - dst[j];
-      }
-
-      REGISTER_STATE_CHECK(
-          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
-      REGISTER_STATE_CHECK(
-          RunInvTxfm(test_temp_block, dst, pitch_));
-
-      for (int j = 0; j < 64; ++j) {
-        const int diff = dst[j] - src[j];
-        const int error = diff * diff;
-        if (max_error < error)
-          max_error = error;
-        total_error += error;
-      }
-
-      EXPECT_GE(1, max_error)
-          << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has"
-          << "an individual roundtrip error > 1";
-
-      EXPECT_GE(count_test_block/5, total_error)
-          << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average"
-          << " roundtrip error > 1/5 per block";
-    }
-  }
-
-  int pitch_;
-  int tx_type_;
-  fht_t fwd_txfm_ref;
 };

-class FwdTrans8x8DCT : public FwdTrans8x8TestBase,
-                       public PARAMS(fdct_t, idct_t, int) {
- public:
-  virtual ~FwdTrans8x8DCT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
-    pitch_    = 16;
-    fwd_txfm_ref = fdct8x8_ref;
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
-    fwd_txfm_(in, out, stride);
-  }
-  void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride >> 1);
-  }
-
-  fdct_t fwd_txfm_;
-  idct_t inv_txfm_;
-};
-
-TEST_P(FwdTrans8x8DCT, SignBiasCheck) {
-  RunSignBiasCheck();
-}
-
-TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) {
-  RunRoundTripErrorCheck();
-}
-
-TEST_P(FwdTrans8x8DCT, ExtremalCheck) {
-  RunExtremalCheck();
-}
-
-class FwdTrans8x8HT : public FwdTrans8x8TestBase,
-                      public PARAMS(fht_t, iht_t, int) {
- public:
-  virtual ~FwdTrans8x8HT() {}
-
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    tx_type_  = GET_PARAM(2);
-    pitch_    = 8;
-    fwd_txfm_ref = fht8x8_ref;
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  void RunFwdTxfm(int16_t *in, int16_t *out, int stride) {
-    fwd_txfm_(in, out, stride, tx_type_);
-  }
-  void RunInvTxfm(int16_t *out, uint8_t *dst, int stride) {
-    inv_txfm_(out, dst, stride, tx_type_);
-  }
-
-  fht_t fwd_txfm_;
-  iht_t inv_txfm_;
-};
-
-TEST_P(FwdTrans8x8HT, SignBiasCheck) {
-  RunSignBiasCheck();
-}
-
-TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) {
-  RunRoundTripErrorCheck();
-}
-
-TEST_P(FwdTrans8x8HT, ExtremalCheck) {
-  RunExtremalCheck();
-}
-
-using std::tr1::make_tuple;
-
-INSTANTIATE_TEST_CASE_P(
-    C, FwdTrans8x8DCT,
-    ::testing::Values(
-        make_tuple(&vp9_short_fdct8x8_c, &vp9_short_idct8x8_add_c, 0)));
-INSTANTIATE_TEST_CASE_P(
-    C, FwdTrans8x8HT,
-    ::testing::Values(
-        make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 0),
-        make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 1),
-        make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 2),
-        make_tuple(&vp9_short_fht8x8_c, &vp9_short_iht8x8_add_c, 3)));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, FwdTrans8x8DCT,
-    ::testing::Values(
-        make_tuple(&vp9_short_fdct8x8_sse2, &vp9_short_idct8x8_add_sse2, 0)));
-INSTANTIATE_TEST_CASE_P(
-    SSE2, FwdTrans8x8HT,
-    ::testing::Values(
-        make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 0),
-        make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 1),
-        make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 2),
-        make_tuple(&vp9_short_fht8x8_sse2, &vp9_short_iht8x8_add_sse2, 3)));
-#endif
 }  // namespace
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -11,7 +11,6 @@
 #define TEST_I420_VIDEO_SOURCE_H_
 #include <cstdio>
 #include <cstdlib>
-#include <string>

 #include "test/video_source.h"

@@ -35,6 +34,7 @@ class I420VideoSource : public VideoSource {
        height_(0),
        framerate_numerator_(rate_numerator),
        framerate_denominator_(rate_denominator) {
+
    // This initializes raw_sz_, width_, height_ and allocates an img.
    SetSize(width, height);
  }
@@ -49,7 +49,7 @@ class I420VideoSource : public VideoSource {
    if (input_file_)
      fclose(input_file_);
    input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+    ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
        << file_name_;
    if (start_) {
      fseek(input_file_, raw_sz_ * start_, SEEK_SET);
@@ -92,7 +92,6 @@ class I420VideoSource : public VideoSource {
  }

  virtual void FillFrame() {
-    ASSERT_TRUE(input_file_ != NULL);
    // Read a frame from input_file.
    if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
      limit_ = frame_;
@@ -109,8 +108,8 @@ class I420VideoSource : public VideoSource {
  unsigned int frame_;
  unsigned int width_;
  unsigned int height_;
-  int framerate_numerator_;
-  int framerate_denominator_;
+  unsigned int framerate_numerator_;
+  unsigned int framerate_denominator_;
 };

 }  // namespace libvpx_test
--- a/test/idct8x8_test.cc
+++ b/test/idct8x8_test.cc
@@ -15,10 +15,10 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"

 extern "C" {
-#include "./vp9_rtcd.h"
+#include "vp9_rtcd.h"
 }

-#include "test/acm_random.h"
+#include "acm_random.h"
 #include "vpx/vpx_integer.h"

 using libvpx_test::ACMRandom;
@@ -27,10 +27,10 @@ namespace {

 #ifdef _MSC_VER
 static int round(double x) {
-  if (x < 0)
-    return static_cast<int>(ceil(x - 0.5));
+  if(x < 0)
+    return (int)ceil(x - 0.5);
  else
-    return static_cast<int>(floor(x + 0.5));
+    return (int)floor(x + 0.5);
 }
 #endif

--- a/test/idct_test.cc
+++ b/test/idct_test.cc
@@ -8,6 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+
 extern "C" {
 #include "./vpx_config.h"
 #include "./vp8_rtcd.h"
@@ -16,101 +17,105 @@ extern "C" {
 #include "test/register_state_check.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "vpx/vpx_integer.h"
-
-typedef void (*idct_fn_t)(int16_t *input, unsigned char *pred_ptr,
+typedef void (*idct_fn_t)(short *input, unsigned char *pred_ptr,
                          int pred_stride, unsigned char *dst_ptr,
                          int dst_stride);
 namespace {
 class IDCTTest : public ::testing::TestWithParam<idct_fn_t> {
- protected:
-  virtual void SetUp() {
-    int i;
+  protected:
+    virtual void SetUp() {
+        int i;

-    UUT = GetParam();
-    memset(input, 0, sizeof(input));
-    /* Set up guard blocks */
-    for (i = 0; i < 256; i++) output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
-  }
+        UUT = GetParam();
+        memset(input, 0, sizeof(input));
+        /* Set up guard blocks */
+        for (i = 0; i < 256; i++)
+            output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
+    }

-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+    virtual void TearDown() {
+      libvpx_test::ClearSystemState();
+    }

-  idct_fn_t UUT;
-  int16_t input[16];
-  unsigned char output[256];
-  unsigned char predict[256];
+    idct_fn_t UUT;
+    short input[16];
+    unsigned char output[256];
+    unsigned char predict[256];
 };

 TEST_P(IDCTTest, TestGuardBlocks) {
-  int i;
+    int i;

-  for (i = 0; i < 256; i++)
-    if ((i & 0xF) < 4 && i < 64)
-      EXPECT_EQ(0, output[i]) << i;
-    else
-      EXPECT_EQ(255, output[i]);
+    for (i = 0; i < 256; i++)
+        if ((i & 0xF) < 4 && i < 64)
+            EXPECT_EQ(0, output[i]) << i;
+        else
+            EXPECT_EQ(255, output[i]);
 }

 TEST_P(IDCTTest, TestAllZeros) {
-  int i;
+    int i;

-  REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+    REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));

-  for (i = 0; i < 256; i++)
-    if ((i & 0xF) < 4 && i < 64)
-      EXPECT_EQ(0, output[i]) << "i==" << i;
-    else
-      EXPECT_EQ(255, output[i]) << "i==" << i;
+    for (i = 0; i < 256; i++)
+        if ((i & 0xF) < 4 && i < 64)
+            EXPECT_EQ(0, output[i]) << "i==" << i;
+        else
+            EXPECT_EQ(255, output[i]) << "i==" << i;
 }

 TEST_P(IDCTTest, TestAllOnes) {
-  int i;
+    int i;

-  input[0] = 4;
-  REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+    input[0] = 4;
+    REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));

-  for (i = 0; i < 256; i++)
-    if ((i & 0xF) < 4 && i < 64)
-      EXPECT_EQ(1, output[i]) << "i==" << i;
-    else
-      EXPECT_EQ(255, output[i]) << "i==" << i;
+    for (i = 0; i < 256; i++)
+        if ((i & 0xF) < 4 && i < 64)
+            EXPECT_EQ(1, output[i]) << "i==" << i;
+        else
+            EXPECT_EQ(255, output[i]) << "i==" << i;
 }

 TEST_P(IDCTTest, TestAddOne) {
-  int i;
+    int i;

-  for (i = 0; i < 256; i++) predict[i] = i;
-  input[0] = 4;
-  REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
+    for (i = 0; i < 256; i++)
+        predict[i] = i;
+    input[0] = 4;
+    REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));

-  for (i = 0; i < 256; i++)
-    if ((i & 0xF) < 4 && i < 64)
-      EXPECT_EQ(i + 1, output[i]) << "i==" << i;
-    else
-      EXPECT_EQ(255, output[i]) << "i==" << i;
+    for (i = 0; i < 256; i++)
+        if ((i & 0xF) < 4 && i < 64)
+            EXPECT_EQ(i+1, output[i]) << "i==" << i;
+        else
+            EXPECT_EQ(255, output[i]) << "i==" << i;
 }

 TEST_P(IDCTTest, TestWithData) {
-  int i;
+    int i;

-  for (i = 0; i < 16; i++) input[i] = i;
+    for (i = 0; i < 16; i++)
+        input[i] = i;

-  REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+    REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));

-  for (i = 0; i < 256; i++)
-    if ((i & 0xF) > 3 || i > 63)
-      EXPECT_EQ(255, output[i]) << "i==" << i;
-    else if (i == 0)
-      EXPECT_EQ(11, output[i]) << "i==" << i;
-    else if (i == 34)
-      EXPECT_EQ(1, output[i]) << "i==" << i;
-    else if (i == 2 || i == 17 || i == 32)
-      EXPECT_EQ(3, output[i]) << "i==" << i;
-    else
-      EXPECT_EQ(0, output[i]) << "i==" << i;
+    for (i = 0; i < 256; i++)
+        if ((i & 0xF) > 3 || i > 63)
+            EXPECT_EQ(255, output[i]) << "i==" << i;
+        else if (i == 0)
+            EXPECT_EQ(11, output[i]) << "i==" << i;
+        else if (i == 34)
+            EXPECT_EQ(1, output[i]) << "i==" << i;
+        else if (i == 2 || i == 17 || i == 32)
+            EXPECT_EQ(3, output[i]) << "i==" << i;
+        else
+            EXPECT_EQ(0, output[i]) << "i==" << i;
 }

-INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
+INSTANTIATE_TEST_CASE_P(C, IDCTTest,
+                        ::testing::Values(vp8_short_idct4x4llm_c));
 #if HAVE_MMX
 INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_mmx));
--- a/test/intrapred_test.cc
+++ b/test/intrapred_test.cc
@@ -15,8 +15,8 @@
 #include "test/register_state_check.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
 extern "C" {
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 #include "vpx_mem/vpx_mem.h"
 }
@@ -27,24 +27,18 @@ using libvpx_test::ACMRandom;

 class IntraPredBase {
 public:
-  virtual ~IntraPredBase() {}
-
  virtual void TearDown() {
    libvpx_test::ClearSystemState();
  }

 protected:
-  void SetupMacroblock(MACROBLOCKD *mbptr,
-                       MODE_INFO *miptr,
-                       uint8_t *data,
-                       int block_size,
-                       int stride,
+  void SetupMacroblock(uint8_t *data, int block_size, int stride,
                       int num_planes) {
-    mbptr_ = mbptr;
-    miptr_ = miptr;
-    mbptr_->up_available = 1;
-    mbptr_->left_available = 1;
-    mbptr_->mode_info_context = miptr_;
+    memset(&mb_, 0, sizeof(mb_));
+    memset(&mi_, 0, sizeof(mi_));
+    mb_.up_available = 1;
+    mb_.left_available = 1;
+    mb_.mode_info_context = &mi_;
    stride_ = stride;
    block_size_ = block_size;
    num_planes_ = num_planes;
@@ -67,14 +61,14 @@ class IntraPredBase {
  virtual void Predict(MB_PREDICTION_MODE mode) = 0;

  void SetLeftUnavailable() {
-    mbptr_->left_available = 0;
+    mb_.left_available = 0;
    for (int p = 0; p < num_planes_; p++)
      for (int i = -1; i < block_size_; ++i)
        data_ptr_[p][stride_ * i - 1] = 129;
  }

  void SetTopUnavailable() {
-    mbptr_->up_available = 0;
+    mb_.up_available = 0;
    for (int p = 0; p < num_planes_; p++)
      memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
  }
@@ -100,19 +94,19 @@ class IntraPredBase {
    for (int p = 0; p < num_planes_; p++) {
      // calculate expected DC
      int expected;
-      if (mbptr_->up_available || mbptr_->left_available) {
-        int sum = 0, shift = BlockSizeLog2Min1() + mbptr_->up_available +
-                             mbptr_->left_available;
-        if (mbptr_->up_available)
+      if (mb_.up_available || mb_.left_available) {
+        int sum = 0, shift = BlockSizeLog2Min1() + mb_.up_available +
+                             mb_.left_available;
+        if (mb_.up_available)
          for (int x = 0; x < block_size_; x++)
            sum += data_ptr_[p][x - stride_];
-        if (mbptr_->left_available)
+        if (mb_.left_available)
          for (int y = 0; y < block_size_; y++)
            sum += data_ptr_[p][y * stride_ - 1];
        expected = (sum + (1 << (shift - 1))) >> shift;
-      } else {
+      } else
        expected = 0x80;
-      }
+
      // check that all subsequent lines are equal to the first
      for (int y = 1; y < block_size_; ++y)
        ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
@@ -213,8 +207,8 @@ class IntraPredBase {
    }
  }

-  MACROBLOCKD *mbptr_;
-  MODE_INFO *miptr_;
+  MACROBLOCKD mb_;
+  MODE_INFO mi_;
  uint8_t *data_ptr_[2];  // in the case of Y, only [0] is used
  int stride_;
  int block_size_;
@@ -232,18 +226,12 @@ class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,
    protected IntraPredBase {
 public:
  static void SetUpTestCase() {
-    mb_ = reinterpret_cast<MACROBLOCKD*>(
-        vpx_memalign(32, sizeof(MACROBLOCKD)));
-    mi_ = reinterpret_cast<MODE_INFO*>(
-        vpx_memalign(32, sizeof(MODE_INFO)));
    data_array_ = reinterpret_cast<uint8_t*>(
        vpx_memalign(kDataAlignment, kDataBufferSize));
  }

  static void TearDownTestCase() {
    vpx_free(data_array_);
-    vpx_free(mi_);
-    vpx_free(mb_);
    data_array_ = NULL;
  }

@@ -260,12 +248,12 @@ class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,

  virtual void SetUp() {
    pred_fn_ = GetParam();
-    SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1);
+    SetupMacroblock(data_array_, kBlockSize, kStride, 1);
  }

  virtual void Predict(MB_PREDICTION_MODE mode) {
-    mbptr_->mode_info_context->mbmi.mode = mode;
-    REGISTER_STATE_CHECK(pred_fn_(mbptr_,
+    mb_.mode_info_context->mbmi.mode = mode;
+    REGISTER_STATE_CHECK(pred_fn_(&mb_,
                                  data_ptr_[0] - kStride,
                                  data_ptr_[0] - 1, kStride,
                                  data_ptr_[0], kStride));
@@ -273,12 +261,8 @@ class IntraPredYTest : public ::testing::TestWithParam<intra_pred_y_fn_t>,

  intra_pred_y_fn_t pred_fn_;
  static uint8_t* data_array_;
-  static MACROBLOCKD * mb_;
-  static MODE_INFO *mi_;
 };

-MACROBLOCKD* IntraPredYTest::mb_ = NULL;
-MODE_INFO* IntraPredYTest::mi_ = NULL;
 uint8_t* IntraPredYTest::data_array_ = NULL;

 TEST_P(IntraPredYTest, IntraPredTests) {
@@ -313,18 +297,12 @@ class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
    protected IntraPredBase {
 public:
  static void SetUpTestCase() {
-    mb_ = reinterpret_cast<MACROBLOCKD*>(
-        vpx_memalign(32, sizeof(MACROBLOCKD)));
-    mi_ = reinterpret_cast<MODE_INFO*>(
-        vpx_memalign(32, sizeof(MODE_INFO)));
    data_array_ = reinterpret_cast<uint8_t*>(
        vpx_memalign(kDataAlignment, kDataBufferSize));
  }

  static void TearDownTestCase() {
    vpx_free(data_array_);
-    vpx_free(mi_);
-    vpx_free(mb_);
    data_array_ = NULL;
  }

@@ -342,12 +320,12 @@ class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,

  virtual void SetUp() {
    pred_fn_ = GetParam();
-    SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 2);
+    SetupMacroblock(data_array_, kBlockSize, kStride, 2);
  }

  virtual void Predict(MB_PREDICTION_MODE mode) {
-    mbptr_->mode_info_context->mbmi.uv_mode = mode;
-    pred_fn_(mbptr_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
+    mb_.mode_info_context->mbmi.uv_mode = mode;
+    pred_fn_(&mb_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
             data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
             data_ptr_[0], data_ptr_[1], kStride);
  }
@@ -360,12 +338,8 @@ class IntraPredUVTest : public ::testing::TestWithParam<intra_pred_uv_fn_t>,
  // We use 9 lines so we have one line above us for top-prediction.
  // [0] = U, [1] = V
  static uint8_t* data_array_;
-  static MACROBLOCKD* mb_;
-  static MODE_INFO* mi_;
 };

-MACROBLOCKD* IntraPredUVTest::mb_ = NULL;
-MODE_INFO* IntraPredUVTest::mi_ = NULL;
 uint8_t* IntraPredUVTest::data_array_ = NULL;

 TEST_P(IntraPredUVTest, IntraPredTests) {
--- a/test/ivf_video_source.h
+++ b/test/ivf_video_source.h
@@ -28,7 +28,7 @@ static unsigned int MemGetLe32(const uint8_t *mem) {
 // so that we can do actual file decodes.
 class IVFVideoSource : public CompressedVideoSource {
 public:
-  explicit IVFVideoSource(const std::string &file_name)
+  IVFVideoSource(const std::string &file_name)
      : file_name_(file_name),
        input_file_(NULL),
        compressed_frame_buf_(NULL),
@@ -47,13 +47,12 @@ class IVFVideoSource : public CompressedVideoSource {
  virtual void Init() {
    // Allocate a buffer for read in the compressed video frame.
    compressed_frame_buf_ = new uint8_t[libvpx_test::kCodeBufferSize];
-    ASSERT_TRUE(compressed_frame_buf_ != NULL)
-        << "Allocate frame buffer failed";
+    ASSERT_TRUE(compressed_frame_buf_) << "Allocate frame buffer failed";
  }

  virtual void Begin() {
    input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+    ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
        << file_name_;

    // Read file header
@@ -73,7 +72,6 @@ class IVFVideoSource : public CompressedVideoSource {
  }

  void FillFrame() {
-    ASSERT_TRUE(input_file_ != NULL);
    uint8_t frame_hdr[kIvfFrameHdrSize];
    // Check frame header and read a frame from input_file.
    if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_)
--- a/test/keyframe_test.cc
+++ b/test/keyframe_test.cc
@@ -31,6 +31,10 @@ class KeyframeTest : public ::libvpx_test::EncoderTest,
    set_cpu_used_ = 0;
  }

+  virtual bool Continue() const {
+    return !HasFatalFailure() && !abort_;
+  }
+
  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
    if (kf_do_force_kf_)
@@ -132,6 +136,7 @@ TEST_P(KeyframeTest, TestAutoKeyframe) {
  // Verify that keyframes match the file keyframes in the file.
  for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
       iter != kf_pts_list_.end(); ++iter) {
+
    if (deadline_ == VPX_DL_REALTIME && *iter > 0)
      EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame "
        << *iter;
--- a/test/md5_helper.h
+++ b/test/md5_helper.h
@@ -8,8 +8,8 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef TEST_MD5_HELPER_H_
-#define TEST_MD5_HELPER_H_
+#ifndef LIBVPX_TEST_MD5_HELPER_H_
+#define LIBVPX_TEST_MD5_HELPER_H_

 extern "C" {
 #include "./md5_utils.h"
@@ -25,15 +25,9 @@ class MD5 {

  void Add(const vpx_image_t *img) {
    for (int plane = 0; plane < 3; ++plane) {
-      const uint8_t *buf = img->planes[plane];
-      // Calculate the width and height to do the md5 check. For the chroma
-      // plane, we never want to round down and thus skip a pixel so if
-      // we are shifting by 1 (chroma_shift) we add 1 before doing the shift.
-      // This works only for chroma_shift of 0 and 1.
-      const int h = plane ? (img->d_h + img->y_chroma_shift) >>
-                    img->y_chroma_shift : img->d_h;
-      const int w = plane ? (img->d_w + img->x_chroma_shift) >>
-                    img->x_chroma_shift : img->d_w;
+      uint8_t *buf = img->planes[plane];
+      const int h = plane ? (img->d_h + 1) >> 1 : img->d_h;
+      const int w = plane ? (img->d_w + 1) >> 1 : img->d_w;

      for (int y = 0; y < h; ++y) {
        MD5Update(&md5_, buf, w);
@@ -67,4 +61,4 @@ class MD5 {

 }  // namespace libvpx_test

-#endif  // TEST_MD5_HELPER_H_
+#endif  // LIBVPX_TEST_MD5_HELPER_H_
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -11,8 +11,8 @@
 #include "test/register_state_check.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
 extern "C" {
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
 }
@@ -63,8 +63,7 @@ TEST_P(Vp8PostProcessingFilterTest, FilterOutputCheck) {
  // Pointers to top-left pixel of block in the input and output images.
  uint8_t *const src_image_ptr = src_image + (input_stride << 1);
  uint8_t *const dst_image_ptr = dst_image + 8;
-  uint8_t *const flimits =
-      reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
+  uint8_t *const flimits = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
  (void)vpx_memset(flimits, 255, block_width);

  // Initialize pixels in the input:
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -8,8 +8,8 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef TEST_REGISTER_STATE_CHECK_H_
-#define TEST_REGISTER_STATE_CHECK_H_
+#ifndef LIBVPX_TEST_REGISTER_STATE_CHECK_H_
+#define LIBVPX_TEST_REGISTER_STATE_CHECK_H_

 #ifdef _WIN64

@@ -92,4 +92,4 @@ class RegisterStateCheck {};

 #endif  // _WIN64

-#endif  // TEST_REGISTER_STATE_CHECK_H_
+#endif  // LIBVPX_TEST_REGISTER_STATE_CHECK_H_
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -16,68 +16,8 @@
 #include "test/video_source.h"
 #include "test/util.h"

-// Enable(1) or Disable(0) writing of the compressed bitstream.
-#define WRITE_COMPRESSED_STREAM 0
-
 namespace {

-#if WRITE_COMPRESSED_STREAM
-static void mem_put_le16(char *const mem, const unsigned int val) {
-  mem[0] = val;
-  mem[1] = val >> 8;
-}
-
-static void mem_put_le32(char *const mem, const unsigned int val) {
-  mem[0] = val;
-  mem[1] = val >> 8;
-  mem[2] = val >> 16;
-  mem[3] = val >> 24;
-}
-
-static void write_ivf_file_header(const vpx_codec_enc_cfg_t *const cfg,
-                                  int frame_cnt, FILE *const outfile) {
-  char header[32];
-
-  header[0] = 'D';
-  header[1] = 'K';
-  header[2] = 'I';
-  header[3] = 'F';
-  mem_put_le16(header + 4,  0);                   /* version */
-  mem_put_le16(header + 6,  32);                  /* headersize */
-  mem_put_le32(header + 8,  0x30395056);          /* fourcc (vp9) */
-  mem_put_le16(header + 12, cfg->g_w);            /* width */
-  mem_put_le16(header + 14, cfg->g_h);            /* height */
-  mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */
-  mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */
-  mem_put_le32(header + 24, frame_cnt);           /* length */
-  mem_put_le32(header + 28, 0);                   /* unused */
-
-  (void)fwrite(header, 1, 32, outfile);
-}
-
-static void write_ivf_frame_size(FILE *const outfile, const size_t size) {
-  char header[4];
-  mem_put_le32(header, static_cast<unsigned int>(size));
-  (void)fwrite(header, 1, 4, outfile);
-}
-
-static void write_ivf_frame_header(const vpx_codec_cx_pkt_t *const pkt,
-                                   FILE *const outfile) {
-  char header[12];
-  vpx_codec_pts_t pts;
-
-  if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
-    return;
-
-  pts = pkt->data.frame.pts;
-  mem_put_le32(header, static_cast<unsigned int>(pkt->data.frame.sz));
-  mem_put_le32(header + 4, pts & 0xFFFFFFFF);
-  mem_put_le32(header + 8, pts >> 32);
-
-  (void)fwrite(header, 1, 12, outfile);
-}
-#endif  // WRITE_COMPRESSED_STREAM
-
 const unsigned int kInitialWidth = 320;
 const unsigned int kInitialHeight = 240;

@@ -102,8 +42,6 @@ class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
    limit_ = 60;
  }

-  virtual ~ResizingVideoSource() {}
-
 protected:
  virtual void Next() {
    ++frame_;
@@ -118,15 +56,13 @@ class ResizeTest : public ::libvpx_test::EncoderTest,
 protected:
  ResizeTest() : EncoderTest(GET_PARAM(0)) {}

-  virtual ~ResizeTest() {}
-
  struct FrameInfo {
    FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
        : pts(_pts), w(_w), h(_h) {}

    vpx_codec_pts_t pts;
-    unsigned int w;
-    unsigned int h;
+    unsigned int    w;
+    unsigned int    h;
  };

  virtual void SetUp() {
@@ -134,6 +70,10 @@ class ResizeTest : public ::libvpx_test::EncoderTest,
    SetMode(GET_PARAM(1));
  }

+  virtual bool Continue() const {
+    return !HasFatalFailure() && !abort_;
+  }
+
  virtual void DecompressedFrameHook(const vpx_image_t &img,
                                     vpx_codec_pts_t pts) {
    frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
@@ -159,47 +99,17 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
  }
 }

-const unsigned int kStepDownFrame = 3;
-const unsigned int kStepUpFrame = 6;
-
 class ResizeInternalTest : public ResizeTest {
 protected:
-#if WRITE_COMPRESSED_STREAM
-  ResizeInternalTest()
-      : ResizeTest(),
-        frame0_psnr_(0.0),
-        outfile_(NULL),
-        out_frames_(0) {}
-#else
  ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0) {}
-#endif
-
-  virtual ~ResizeInternalTest() {}
-
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-#if WRITE_COMPRESSED_STREAM
-    outfile_ = fopen("vp90-2-05-resize.ivf", "wb");
-#endif
-  }
-
-  virtual void EndPassHook() {
-#if WRITE_COMPRESSED_STREAM
-    if (outfile_) {
-      if (!fseek(outfile_, 0, SEEK_SET))
-        write_ivf_file_header(&cfg_, out_frames_, outfile_);
-      fclose(outfile_);
-      outfile_ = NULL;
-    }
-#endif
-  }

  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                  libvpx_test::Encoder *encoder) {
-    if (video->frame() == kStepDownFrame) {
+    if (video->frame() == 3) {
      struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
      encoder->Control(VP8E_SET_SCALEMODE, &mode);
    }
-    if (video->frame() == kStepUpFrame) {
+    if (video->frame() == 6) {
      struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
      encoder->Control(VP8E_SET_SCALEMODE, &mode);
    }
@@ -211,46 +121,21 @@ class ResizeInternalTest : public ResizeTest {
    EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 1.0);
  }

-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-#if WRITE_COMPRESSED_STREAM
-    ++out_frames_;
-
-    // Write initial file header if first frame.
-    if (pkt->data.frame.pts == 0)
-      write_ivf_file_header(&cfg_, 0, outfile_);
-
-    // Write frame header and data.
-    write_ivf_frame_header(pkt, outfile_);
-    (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
-#endif
-  }
-
  double frame0_psnr_;
-#if WRITE_COMPRESSED_STREAM
-  FILE *outfile_;
-  unsigned int out_frames_;
-#endif
 };

 TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 10);
  init_flags_ = VPX_CODEC_USE_PSNR;
-
  // q picked such that initial keyframe on this clip is ~30dB PSNR
  cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
-
-  // If the number of frames being encoded is smaller than g_lag_in_frames
-  // the encoded frame is unavailable using the current API. Comparing
-  // frames to detect mismatch would then not be possible. Set
-  // g_lag_in_frames = 0 to get around this.
-  cfg_.g_lag_in_frames = 0;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

  for (std::vector<FrameInfo>::iterator info = frame_info_list_.begin();
       info != frame_info_list_.end(); ++info) {
    const vpx_codec_pts_t pts = info->pts;
-    if (pts >= kStepDownFrame && pts < kStepUpFrame) {
+    if (pts >= 3 && pts < 6) {
      ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
      ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height";
    } else {
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -17,6 +17,7 @@ extern "C" {
 #include "./vpx_config.h"
 #if CONFIG_VP8_ENCODER
 #include "./vp8_rtcd.h"
+//#include "vp8/common/blockd.h"
 #endif
 #if CONFIG_VP9_ENCODER
 #include "./vp9_rtcd.h"
@@ -427,7 +428,6 @@ INSTANTIATE_TEST_CASE_P(MMX, SADTest, ::testing::ValuesIn(mmx_tests));

 #if HAVE_SSE
 #if CONFIG_VP9_ENCODER
-#if CONFIG_USE_X86INC
 const sad_m_by_n_fn_t sad_4x4_sse_vp9 = vp9_sad4x4_sse;
 const sad_m_by_n_fn_t sad_4x8_sse_vp9 = vp9_sad4x8_sse;
 INSTANTIATE_TEST_CASE_P(SSE, SADTest, ::testing::Values(
@@ -441,7 +441,6 @@ INSTANTIATE_TEST_CASE_P(SSE, SADx4Test, ::testing::Values(
                        make_tuple(4, 4, sad_4x4x4d_sse)));
 #endif
 #endif
-#endif

 #if HAVE_SSE2
 #if CONFIG_VP8_ENCODER
@@ -452,20 +451,14 @@ const sad_m_by_n_fn_t sad_8x8_wmt = vp8_sad8x8_wmt;
 const sad_m_by_n_fn_t sad_4x4_wmt = vp8_sad4x4_wmt;
 #endif
 #if CONFIG_VP9_ENCODER
-#if CONFIG_USE_X86INC
 const sad_m_by_n_fn_t sad_64x64_sse2_vp9 = vp9_sad64x64_sse2;
-const sad_m_by_n_fn_t sad_64x32_sse2_vp9 = vp9_sad64x32_sse2;
-const sad_m_by_n_fn_t sad_32x64_sse2_vp9 = vp9_sad32x64_sse2;
 const sad_m_by_n_fn_t sad_32x32_sse2_vp9 = vp9_sad32x32_sse2;
-const sad_m_by_n_fn_t sad_32x16_sse2_vp9 = vp9_sad32x16_sse2;
-const sad_m_by_n_fn_t sad_16x32_sse2_vp9 = vp9_sad16x32_sse2;
 const sad_m_by_n_fn_t sad_16x16_sse2_vp9 = vp9_sad16x16_sse2;
-const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
 const sad_m_by_n_fn_t sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
+const sad_m_by_n_fn_t sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
 const sad_m_by_n_fn_t sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
 const sad_m_by_n_fn_t sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
 #endif
-#endif
 const sad_m_by_n_test_param_t sse2_tests[] = {
 #if CONFIG_VP8_ENCODER
  make_tuple(16, 16, sad_16x16_wmt),
@@ -475,25 +468,18 @@ const sad_m_by_n_test_param_t sse2_tests[] = {
  make_tuple(4, 4, sad_4x4_wmt),
 #endif
 #if CONFIG_VP9_ENCODER
-#if CONFIG_USE_X86INC
  make_tuple(64, 64, sad_64x64_sse2_vp9),
-  make_tuple(64, 32, sad_64x32_sse2_vp9),
-  make_tuple(32, 64, sad_32x64_sse2_vp9),
  make_tuple(32, 32, sad_32x32_sse2_vp9),
-  make_tuple(32, 16, sad_32x16_sse2_vp9),
-  make_tuple(16, 32, sad_16x32_sse2_vp9),
  make_tuple(16, 16, sad_16x16_sse2_vp9),
-  make_tuple(16, 8, sad_16x8_sse2_vp9),
  make_tuple(8, 16, sad_8x16_sse2_vp9),
+  make_tuple(16, 8, sad_16x8_sse2_vp9),
  make_tuple(8, 8, sad_8x8_sse2_vp9),
  make_tuple(8, 4, sad_8x4_sse2_vp9),
 #endif
-#endif
 };
 INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests));

 #if CONFIG_VP9_ENCODER
-#if CONFIG_USE_X86INC
 const sad_n_by_n_by_4_fn_t sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
 const sad_n_by_n_by_4_fn_t sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
 const sad_n_by_n_by_4_fn_t sad_32x64x4d_sse2 = vp9_sad32x64x4d_sse2;
@@ -519,7 +505,6 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
                        make_tuple(8, 4, sad_8x4x4d_sse2)));
 #endif
 #endif
-#endif

 #if HAVE_SSE3
 #if CONFIG_VP8_ENCODER
@@ -538,11 +523,9 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADx4Test, ::testing::Values(
 #endif

 #if HAVE_SSSE3
-#if CONFIG_USE_X86INC
 const sad_m_by_n_fn_t sad_16x16_sse3 = vp8_sad16x16_sse3;
 INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
                        make_tuple(16, 16, sad_16x16_sse3)));
 #endif
-#endif

 }  // namespace
--- a/test/set_roi.cc
+++ b/test/set_roi.cc
@@ -17,19 +17,15 @@
 #include <sys/types.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/acm_random.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"
 extern "C" {
 #include "vp8/encoder/onyx_int.h"
 }

-using libvpx_test::ACMRandom;
-
 namespace {

 TEST(Vp8RoiMapTest, ParameterCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
  int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
  int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 };
  unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 };
@@ -125,10 +121,10 @@ TEST(Vp8RoiMapTest, ParameterCheck) {
    for (int i = 0; i < 1000; ++i) {
      int rand_deltas[4];
      int deltas_valid;
-      rand_deltas[0] = rnd(160) - 80;
-      rand_deltas[1] = rnd(160) - 80;
-      rand_deltas[2] = rnd(160) - 80;
-      rand_deltas[3] = rnd(160) - 80;
+      rand_deltas[0] = (rand() % 160) - 80;
+      rand_deltas[1] = (rand() % 160) - 80;
+      rand_deltas[2] = (rand() % 160) - 80;
+      rand_deltas[3] = (rand() % 160) - 80;

      deltas_valid = ((abs(rand_deltas[0]) <= 63) &&
                      (abs(rand_deltas[1]) <= 63) &&
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -13,8 +13,8 @@
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 extern "C" {
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
+#include "vpx_config.h"
+#include "vp8_rtcd.h"
 #include "vp8/common/blockd.h"
 #include "vp8/encoder/block.h"
 #include "vpx_mem/vpx_mem.h"
@@ -51,7 +51,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
  bd.predictor = reinterpret_cast<unsigned char*>(
      vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));

-  for (int i = 0; kSrcStride[i] > 0; ++i) {
+  for(int i = 0; kSrcStride[i] > 0; ++i) {
    // start at block0
    be.src = 0;
    be.base_src = &source;
@@ -61,7 +61,7 @@ TEST_P(SubtractBlockTest, SimpleSubtract) {
    int16_t *src_diff = be.src_diff;
    for (int r = 0; r < kBlockHeight; ++r) {
      for (int c = 0; c < kBlockWidth; ++c) {
-        src_diff[c] = static_cast<int16_t>(0xa5a5);
+        src_diff[c] = 0xa5a5;
      }
      src_diff += kDiffPredStride;
    }
--- a/test/superframe_test.cc
+++ b/test/superframe_test.cc
@@ -33,6 +33,10 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,
    delete[] modified_buf_;
  }

+  virtual bool Continue() const {
+    return !HasFatalFailure() && !abort_;
+  }
+
  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                  libvpx_test::Encoder *encoder) {
    if (video->frame() == 1) {
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -520,10 +520,3 @@ d17bc08eedfc60c4c23d576a6c964a21bf854d1f  vp90-2-03-size-226x202.webm
 83c6d8f2969b759e10e5c6542baca1265c874c29  vp90-2-03-size-226x224.webm.md5
 fe0af2ee47b1e5f6a66db369e2d7e9d870b38dce  vp90-2-03-size-226x226.webm
 94ad19b8b699cea105e2ff18f0df2afd7242bcf7  vp90-2-03-size-226x226.webm.md5
-b6524e4084d15b5d0caaa3d3d1368db30cbee69c  vp90-2-03-deltaq.webm
-65f45ec9a55537aac76104818278e0978f94a678  vp90-2-03-deltaq.webm.md5
-4dbb87494c7f565ffc266c98d17d0d8c7a5c5aba  vp90-2-05-resize.ivf
-7f6d8879336239a43dbb6c9f13178cb11cf7ed09  vp90-2-05-resize.ivf.md5
-495256cfd123fe777b2c0406862ed8468a1f4677  vp91-2-04-yv444.webm
-65e3a7ffef61ab340d9140f335ecc49125970c2c  vp91-2-04-yv444.webm.md5
-
--- a/test/test.mk
+++ b/test/test.mk
@@ -24,9 +24,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += i420_video_source.h
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += resize_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../md5_utils.h ../md5_utils.c
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.cc
@@ -89,7 +87,6 @@ LIBVPX_TEST_SRCS-yes                   += tile_independence_test.cc
 endif

 LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += convolve_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
@@ -629,9 +626,3 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yv444.webm.md5
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -8,7 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include <string>
-#include "./vpx_config.h"
+#include "vpx_config.h"
 extern "C" {
 #if ARCH_X86 || ARCH_X86_64
 #include "vpx_ports/x86.h"
@@ -48,9 +48,7 @@ int main(int argc, char **argv) {
 #endif

 #if !CONFIG_SHARED
-// Shared library builds don't support whitebox tests
-// that exercise internal symbols.
-
+  /* Shared library builds don't support whitebox tests that exercise internal symbols. */
 #if CONFIG_VP8
  vp8_rtcd();
 #endif
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -159,11 +159,7 @@ const char *kVP9TestVectors[] = {
  "vp90-2-03-size-226x198.webm", "vp90-2-03-size-226x200.webm",
  "vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm",
  "vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm",
-  "vp90-2-03-size-226x226.webm", "vp90-2-03-deltaq.webm",
-  "vp90-2-05-resize.ivf",
-#if CONFIG_NON420
-  "vp91-2-04-yv444.webm"
-#endif
+  "vp90-2-03-size-226x226.webm"
 };
 #endif

@@ -185,7 +181,6 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,

  virtual void DecompressedFrameHook(const vpx_image_t& img,
                                     const unsigned int frame_number) {
-    ASSERT_TRUE(md5_file_ != NULL);
    char expected_md5[33];
    char junk[128];

--- a/test/tile_independence_test.cc
+++ b/test/tile_independence_test.cc
@@ -23,13 +23,10 @@ extern "C" {

 namespace {
 class TileIndependenceTest : public ::libvpx_test::EncoderTest,
-                             public ::libvpx_test::CodecTestWithParam<int> {
+    public ::libvpx_test::CodecTestWithParam<int> {
 protected:
-  TileIndependenceTest()
-      : EncoderTest(GET_PARAM(0)),
-        md5_fw_order_(),
-        md5_inv_order_(),
-        n_tiles_(GET_PARAM(1)) {
+  TileIndependenceTest() : EncoderTest(GET_PARAM(0)), n_tiles_(GET_PARAM(1)),
+      md5_fw_order_(), md5_inv_order_() {
    init_flags_ = VPX_CODEC_USE_PSNR;
    vpx_codec_dec_cfg_t cfg;
    cfg.w = 704;
@@ -59,8 +56,9 @@ class TileIndependenceTest : public ::libvpx_test::EncoderTest,

  void UpdateMD5(::libvpx_test::Decoder *dec, const vpx_codec_cx_pkt_t *pkt,
                 ::libvpx_test::MD5 *md5) {
-    const vpx_codec_err_t res = dec->DecodeFrame(
-        reinterpret_cast<uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz);
+    const vpx_codec_err_t res =
+        dec->DecodeFrame(reinterpret_cast<uint8_t*>(pkt->data.frame.buf),
+                         pkt->data.frame.sz);
    if (res != VPX_CODEC_OK) {
      abort_ = true;
      ASSERT_EQ(VPX_CODEC_OK, res);
@@ -74,11 +72,11 @@ class TileIndependenceTest : public ::libvpx_test::EncoderTest,
    UpdateMD5(inv_dec_, pkt, &md5_inv_order_);
  }

-  ::libvpx_test::MD5 md5_fw_order_, md5_inv_order_;
-  ::libvpx_test::Decoder *fw_dec_, *inv_dec_;
-
 private:
  int n_tiles_;
+ protected:
+  ::libvpx_test::MD5 md5_fw_order_, md5_inv_order_;
+  ::libvpx_test::Decoder *fw_dec_, *inv_dec_;
 };

 // run an encode with 2 or 4 tiles, and do the decode both in normal and
@@ -95,7 +93,7 @@ TEST_P(TileIndependenceTest, MD5Match) {
                                     timebase.den, timebase.num, 0, 30);
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

-  const char *md5_fw_str = md5_fw_order_.Get();
+  const char *md5_fw_str  = md5_fw_order_.Get();
  const char *md5_inv_str = md5_inv_order_.Get();

  // could use ASSERT_EQ(!memcmp(.., .., 16) here, but this gives nicer
@@ -104,6 +102,7 @@ TEST_P(TileIndependenceTest, MD5Match) {
  ASSERT_STREQ(md5_fw_str, md5_inv_str);
 }

-VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));
+VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest,
+                          ::testing::Range(0, 2, 1));

 }  // namespace
--- a/test/util.h
+++ b/test/util.h
@@ -37,7 +37,7 @@ static double compute_psnr(const vpx_image_t *img1,
                  img2->planes[VPX_PLANE_Y][i * img2->stride[VPX_PLANE_Y] + j];
      sqrerr += d * d;
    }
-  double mse = static_cast<double>(sqrerr) / (width_y * height_y);
+  double mse = sqrerr / (width_y * height_y);
  double psnr = 100.0;
  if (mse > 0.0) {
    psnr = 10 * log10(255.0 * 255.0 / mse);
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -16,16 +16,16 @@
 #include "test/register_state_check.h"

 #include "vpx/vpx_integer.h"
-#include "./vpx_config.h"
+#include "vpx_config.h"
 extern "C" {
 #include "vpx_mem/vpx_mem.h"
 #if CONFIG_VP8_ENCODER
 # include "vp8/common/variance.h"
-# include "./vp8_rtcd.h"
+# include "vp8_rtcd.h"
 #endif
 #if CONFIG_VP9_ENCODER
 # include "vp9/encoder/vp9_variance.h"
-# include "./vp9_rtcd.h"
+# include "vp9_rtcd.h"
 #endif
 }
 #include "test/acm_random.h"
@@ -78,9 +78,37 @@ static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
  return sse - (((int64_t) se * se) >> (l2w + l2h));
 }

+static unsigned int subpel_avg_variance_ref(const uint8_t *ref,
+                                            const uint8_t *src,
+                                            const uint8_t *second_pred,
+                                            int l2w, int l2h,
+                                            int xoff, int yoff,
+                                            unsigned int *sse_ptr) {
+  int se = 0;
+  unsigned int sse = 0;
+  const int w = 1 << l2w, h = 1 << l2h;
+  for (int y = 0; y < h; y++) {
+    for (int x = 0; x < w; x++) {
+      // bilinear interpolation at a 16th pel step
+      const int a1 = ref[(w + 1) * (y + 0) + x + 0];
+      const int a2 = ref[(w + 1) * (y + 0) + x + 1];
+      const int b1 = ref[(w + 1) * (y + 1) + x + 0];
+      const int b2 = ref[(w + 1) * (y + 1) + x + 1];
+      const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
+      const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
+      const int r = a + (((b - a) * yoff + 8) >> 4);
+      int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+      se += diff;
+      sse += diff * diff;
+    }
+  }
+  *sse_ptr = sse;
+  return sse - (((int64_t) se * se) >> (l2w + l2h));
+}
+
 template<typename VarianceFunctionType>
-class VarianceTest
-    : public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
+class VarianceTest :
+    public ::testing::TestWithParam<tuple<int, int, VarianceFunctionType> > {
 public:
  virtual void SetUp() {
    const tuple<int, int, VarianceFunctionType>& params = this->GetParam();
@@ -162,40 +190,10 @@ void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
  EXPECT_EQ(expected, var);
 }

-#if CONFIG_VP9_ENCODER
-
-unsigned int subpel_avg_variance_ref(const uint8_t *ref,
-                                     const uint8_t *src,
-                                     const uint8_t *second_pred,
-                                     int l2w, int l2h,
-                                     int xoff, int yoff,
-                                     unsigned int *sse_ptr) {
-  int se = 0;
-  unsigned int sse = 0;
-  const int w = 1 << l2w, h = 1 << l2h;
-  for (int y = 0; y < h; y++) {
-    for (int x = 0; x < w; x++) {
-      // bilinear interpolation at a 16th pel step
-      const int a1 = ref[(w + 1) * (y + 0) + x + 0];
-      const int a2 = ref[(w + 1) * (y + 0) + x + 1];
-      const int b1 = ref[(w + 1) * (y + 1) + x + 0];
-      const int b2 = ref[(w + 1) * (y + 1) + x + 1];
-      const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
-      const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
-      const int r = a + (((b - a) * yoff + 8) >> 4);
-      int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
-      se += diff;
-      sse += diff * diff;
-    }
-  }
-  *sse_ptr = sse;
-  return sse - (((int64_t) se * se) >> (l2w + l2h));
-}
-
 template<typename SubpelVarianceFunctionType>
-class SubpelVarianceTest
-    : public ::testing::TestWithParam<tuple<int, int,
-                                            SubpelVarianceFunctionType> > {
+class SubpelVarianceTest :
+    public ::testing::TestWithParam<tuple<int, int,
+                                          SubpelVarianceFunctionType> > {
 public:
  virtual void SetUp() {
    const tuple<int, int, SubpelVarianceFunctionType>& params =
@@ -220,7 +218,6 @@ class SubpelVarianceTest
    vpx_free(src_);
    delete[] ref_;
    vpx_free(sec_);
-    libvpx_test::ClearSystemState();
  }

 protected:
@@ -282,8 +279,6 @@ void SubpelVarianceTest<vp9_subp_avg_variance_fn_t>::RefTest() {
  }
 }

-#endif  // CONFIG_VP9_ENCODER
-
 // -----------------------------------------------------------------------------
 // VP8 test cases.

@@ -487,7 +482,6 @@ INSTANTIATE_TEST_CASE_P(
 #endif

 #if HAVE_SSE2
-#if CONFIG_USE_X86INC
 const vp9_variance_fn_t variance4x4_sse2 = vp9_variance4x4_sse2;
 const vp9_variance_fn_t variance4x8_sse2 = vp9_variance4x8_sse2;
 const vp9_variance_fn_t variance8x4_sse2 = vp9_variance8x4_sse2;
@@ -601,11 +595,8 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(6, 5, subpel_avg_variance64x32_sse2),
                      make_tuple(6, 6, subpel_avg_variance64x64_sse2)));
 #endif
-#endif

 #if HAVE_SSSE3
-#if CONFIG_USE_X86INC
-
 const vp9_subpixvariance_fn_t subpel_variance4x4_ssse3 =
    vp9_sub_pixel_variance4x4_ssse3;
 const vp9_subpixvariance_fn_t subpel_variance4x8_ssse3 =
@@ -690,7 +681,6 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(6, 5, subpel_avg_variance64x32_ssse3),
                      make_tuple(6, 6, subpel_avg_variance64x64_ssse3)));
 #endif
-#endif
 #endif  // CONFIG_VP9_ENCODER

 }  // namespace vp9
--- a/test/vp8_boolcoder_test.cc
+++ b/test/vp8_boolcoder_test.cc
@@ -8,6 +8,10 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+extern "C" {
+#include "vp8/encoder/boolhuff.h"
+#include "vp8/decoder/dboolhuff.h"
+}

 #include <math.h>
 #include <stddef.h>
@@ -20,11 +24,6 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "vpx/vpx_integer.h"

-extern "C" {
-#include "vp8/encoder/boolhuff.h"
-#include "vp8/decoder/dboolhuff.h"
-}
-
 namespace {
 const int num_tests = 10;

@@ -45,7 +44,7 @@ void encrypt_buffer(uint8_t *buffer, int size) {

 void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
                           uint8_t *output, int count) {
-  int offset = input - reinterpret_cast<uint8_t *>(decrypt_state);
+  int offset = input - (uint8_t *)decrypt_state;
  for (int i = 0; i < count; i++) {
    output[i] = input[i] ^ secret_key[(offset + i) & 15];
  }
@@ -59,10 +58,10 @@ TEST(VP8, TestBitIO) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  for (int n = 0; n < num_tests; ++n) {
    for (int method = 0; method <= 7; ++method) {   // we generate various proba
-      const int kBitsToTest = 1000;
-      uint8_t probas[kBitsToTest];
+      const int bits_to_test = 1000;
+      uint8_t probas[bits_to_test];

-      for (int i = 0; i < kBitsToTest; ++i) {
+      for (int i = 0; i < bits_to_test; ++i) {
        const int parity = i & 1;
        probas[i] =
            (method == 0) ? 0 : (method == 1) ? 255 :
@@ -77,14 +76,14 @@ TEST(VP8, TestBitIO) {
      }
      for (int bit_method = 0; bit_method <= 3; ++bit_method) {
        const int random_seed = 6432;
-        const int kBufferSize = 10000;
+        const int buffer_size = 10000;
        ACMRandom bit_rnd(random_seed);
        BOOL_CODER bw;
-        uint8_t bw_buffer[kBufferSize];
-        vp8_start_encode(&bw, bw_buffer, bw_buffer + kBufferSize);
+        uint8_t bw_buffer[buffer_size];
+        vp8_start_encode(&bw, bw_buffer, bw_buffer + buffer_size);

        int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
-        for (int i = 0; i < kBitsToTest; ++i) {
+        for (int i = 0; i < bits_to_test; ++i) {
          if (bit_method == 2) {
            bit = (i & 1);
          } else if (bit_method == 3) {
@@ -99,20 +98,19 @@ TEST(VP8, TestBitIO) {
 #if CONFIG_DECRYPT
        encrypt_buffer(bw_buffer, buffer_size);
        vp8dx_start_decode(&br, bw_buffer, buffer_size,
-                           test_decrypt_cb,
-                           reinterpret_cast<void *>(bw_buffer));
+                           test_decrypt_cb, (void *)bw_buffer);
 #else
-        vp8dx_start_decode(&br, bw_buffer, kBufferSize, NULL, NULL);
+        vp8dx_start_decode(&br, bw_buffer, buffer_size, NULL, NULL);
 #endif
        bit_rnd.Reset(random_seed);
-        for (int i = 0; i < kBitsToTest; ++i) {
+        for (int i = 0; i < bits_to_test; ++i) {
          if (bit_method == 2) {
            bit = (i & 1);
          } else if (bit_method == 3) {
            bit = bit_rnd(2);
          }
          GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit)
-              << "pos: "<< i << " / " << kBitsToTest
+              << "pos: "<< i << " / " << bits_to_test
              << " bit_method: " << bit_method
              << " method: " << method;
        }
--- a/test/vp8_decrypt_test.cc
+++ b/test/vp8_decrypt_test.cc
@@ -26,8 +26,7 @@ const uint8_t test_key[16] = {
  0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0
 };

-void encrypt_buffer(const uint8_t *src, uint8_t *dst,
-                    int size, int offset = 0) {
+void encrypt_buffer(const uint8_t *src, uint8_t *dst, int size, int offset = 0) {
  for (int i = 0; i < size; ++i) {
    dst[i] = src[i] ^ test_key[(offset + i) & 15];
  }
@@ -35,11 +34,10 @@ void encrypt_buffer(const uint8_t *src, uint8_t *dst,

 void test_decrypt_cb(void *decrypt_state, const uint8_t *input,
                     uint8_t *output, int count) {
-  encrypt_buffer(input, output, count,
-                 input - reinterpret_cast<uint8_t *>(decrypt_state));
+  encrypt_buffer(input, output, count, input - (uint8_t *)decrypt_state);
 }

-}  // namespace
+} // namespace

 namespace libvpx_test {

--- a/test/vp8_fdct4x4_test.cc
+++ b/test/vp8_fdct4x4_test.cc
@@ -18,7 +18,7 @@


 extern "C" {
-#include "./vp8_rtcd.h"
+#include "vp8_rtcd.h"
 }

 #include "test/acm_random.h"
--- a/test/vp9_boolcoder_test.cc
+++ b/test/vp9_boolcoder_test.cc
@@ -19,7 +19,7 @@ extern "C" {
 #include "vp9/decoder/vp9_dboolhuff.h"
 }

-#include "test/acm_random.h"
+#include "acm_random.h"
 #include "vpx/vpx_integer.h"

 using libvpx_test::ACMRandom;
@@ -32,10 +32,10 @@ TEST(VP9, TestBitIO) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  for (int n = 0; n < num_tests; ++n) {
    for (int method = 0; method <= 7; ++method) {   // we generate various proba
-      const int kBitsToTest = 1000;
-      uint8_t probas[kBitsToTest];
+      const int bits_to_test = 1000;
+      uint8_t probas[bits_to_test];

-      for (int i = 0; i < kBitsToTest; ++i) {
+      for (int i = 0; i < bits_to_test; ++i) {
        const int parity = i & 1;
        probas[i] =
          (method == 0) ? 0 : (method == 1) ? 255 :
@@ -50,14 +50,14 @@ TEST(VP9, TestBitIO) {
      }
      for (int bit_method = 0; bit_method <= 3; ++bit_method) {
        const int random_seed = 6432;
-        const int kBufferSize = 10000;
+        const int buffer_size = 10000;
        ACMRandom bit_rnd(random_seed);
        vp9_writer bw;
-        uint8_t bw_buffer[kBufferSize];
+        uint8_t bw_buffer[buffer_size];
        vp9_start_encode(&bw, bw_buffer);

        int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0;
-        for (int i = 0; i < kBitsToTest; ++i) {
+        for (int i = 0; i < bits_to_test; ++i) {
          if (bit_method == 2) {
            bit = (i & 1);
          } else if (bit_method == 3) {
@@ -72,16 +72,16 @@ TEST(VP9, TestBitIO) {
        GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0);

        vp9_reader br;
-        vp9_reader_init(&br, bw_buffer, kBufferSize);
+        vp9_reader_init(&br, bw_buffer, buffer_size);
        bit_rnd.Reset(random_seed);
-        for (int i = 0; i < kBitsToTest; ++i) {
+        for (int i = 0; i < bits_to_test; ++i) {
          if (bit_method == 2) {
            bit = (i & 1);
          } else if (bit_method == 3) {
            bit = bit_rnd(2);
          }
          GTEST_ASSERT_EQ(vp9_read(&br, probas[i]), bit)
-              << "pos: " << i << " / " << kBitsToTest
+              << "pos: " << i << " / " << bits_to_test
              << " bit_method: " << bit_method
              << " method: " << method;
        }
--- a/test/vp9_lossless_test.cc
+++ b/test/vp9_lossless_test.cc
@@ -1,75 +0,0 @@
-/*
-  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
-
-  Use of this source code is governed by a BSD-style license
-  that can be found in the LICENSE file in the root of the source
-  tree. An additional intellectual property rights grant can be found
-  in the file PATENTS.  All contributing project authors may
-  be found in the AUTHORS file in the root of the source tree.
-*/
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-
-const int kMaxPsnr = 100;
-
-class LossLessTest : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
- protected:
-  LossLessTest() : EncoderTest(GET_PARAM(0)),
-                   psnr_(kMaxPsnr),
-                   nframes_(0),
-                   encoding_mode_(GET_PARAM(1)) {
-  }
-
-  virtual ~LossLessTest() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-  }
-
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-    psnr_ = 0.0;
-    nframes_ = 0;
-  }
-
-  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
-    if (pkt->data.psnr.psnr[0] < psnr_)
-      psnr_= pkt->data.psnr.psnr[0];
-  }
-
-  double GetMinPsnr() const {
-      return psnr_;
-  }
-
- private:
-  double psnr_;
-  unsigned int nframes_;
-  libvpx_test::TestMode encoding_mode_;
-};
-
-TEST_P(LossLessTest, TestLossLessEncoding) {
-  const vpx_rational timebase = { 33333333, 1000000000 };
-  cfg_.g_timebase = timebase;
-  cfg_.rc_target_bitrate = 2000;
-  cfg_.g_lag_in_frames = 25;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 0;
-
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  // intentionally changed the dimension for better testing coverage
-  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 356, 284,
-                                     timebase.den, timebase.num, 0, 30);
-
-  const double psnr_lossless = GetMinPsnr();
-  EXPECT_GE(psnr_lossless, kMaxPsnr);
-}
-VP9_INSTANTIATE_TEST_CASE(LossLessTest, ALL_TEST_MODES);
-}  // namespace
--- a/test/vp9_subtract_test.cc
+++ b/test/vp9_subtract_test.cc
@@ -39,8 +39,8 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());

  // FIXME(rbultje) split in its own file
-  for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES;
-       bsize = static_cast<BLOCK_SIZE>(static_cast<int>(bsize) + 1)) {
+  for (BLOCK_SIZE_TYPE bsize = BLOCK_SIZE_AB4X4; bsize < BLOCK_SIZE_TYPES;
+       bsize = static_cast<BLOCK_SIZE_TYPE>(static_cast<int>(bsize) + 1)) {
    const int block_width  = 4 << b_width_log2(bsize);
    const int block_height = 4 << b_height_log2(bsize);
    int16_t *diff = reinterpret_cast<int16_t *>(
@@ -93,8 +93,9 @@ TEST_P(VP9SubtractBlockTest, SimpleSubtract) {
 INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest,
                        ::testing::Values(vp9_subtract_block_c));

-#if HAVE_SSE2 && CONFIG_USE_X86INC
+#if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest,
                        ::testing::Values(vp9_subtract_block_sse2));
 #endif
+
 }  // namespace vp9
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -1,109 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "vp9/decoder/vp9_thread.h"
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/md5_helper.h"
-#include "test/webm_video_source.h"
-
-namespace {
-
-class VP9WorkerThreadTest : public ::testing::Test {
- protected:
-  virtual ~VP9WorkerThreadTest() {}
-  virtual void SetUp() {
-    vp9_worker_init(&worker_);
-  }
-
-  virtual void TearDown() {
-    vp9_worker_end(&worker_);
-  }
-
-  VP9Worker worker_;
-};
-
-int ThreadHook(void* data, void* return_value) {
-  int* const hook_data = reinterpret_cast<int*>(data);
-  *hook_data = 5;
-  return *reinterpret_cast<int*>(return_value);
-}
-
-TEST_F(VP9WorkerThreadTest, HookSuccess) {
-  EXPECT_TRUE(vp9_worker_sync(&worker_));  // should be a no-op.
-
-  for (int i = 0; i < 2; ++i) {
-    EXPECT_TRUE(vp9_worker_reset(&worker_));
-
-    int hook_data = 0;
-    int return_value = 1;  // return successfully from the hook
-    worker_.hook = ThreadHook;
-    worker_.data1 = &hook_data;
-    worker_.data2 = &return_value;
-
-    vp9_worker_launch(&worker_);
-    EXPECT_TRUE(vp9_worker_sync(&worker_));
-    EXPECT_FALSE(worker_.had_error);
-    EXPECT_EQ(5, hook_data);
-
-    EXPECT_TRUE(vp9_worker_sync(&worker_));  // should be a no-op.
-  }
-}
-
-TEST_F(VP9WorkerThreadTest, HookFailure) {
-  EXPECT_TRUE(vp9_worker_reset(&worker_));
-
-  int hook_data = 0;
-  int return_value = 0;  // return failure from the hook
-  worker_.hook = ThreadHook;
-  worker_.data1 = &hook_data;
-  worker_.data2 = &return_value;
-
-  vp9_worker_launch(&worker_);
-  EXPECT_FALSE(vp9_worker_sync(&worker_));
-  EXPECT_TRUE(worker_.had_error);
-
-  // Ensure _reset() clears the error and _launch() can be called again.
-  return_value = 1;
-  EXPECT_TRUE(vp9_worker_reset(&worker_));
-  EXPECT_FALSE(worker_.had_error);
-  vp9_worker_launch(&worker_);
-  EXPECT_TRUE(vp9_worker_sync(&worker_));
-  EXPECT_FALSE(worker_.had_error);
-}
-
-TEST(VP9DecodeMTTest, MTDecode) {
-  libvpx_test::WebMVideoSource video("vp90-2-03-size-226x226.webm");
-  video.Init();
-
-  vpx_codec_dec_cfg_t cfg = {0};
-  cfg.threads = 2;
-  libvpx_test::VP9Decoder decoder(cfg, 0);
-
-  libvpx_test::MD5 md5;
-  for (video.Begin(); video.cxdata(); video.Next()) {
-    const vpx_codec_err_t res =
-        decoder.DecodeFrame(video.cxdata(), video.frame_size());
-    ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
-
-    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
-    const vpx_image_t *img = NULL;
-
-    // Get decompressed data
-    while ((img = dec_iter.Next())) {
-      md5.Add(img);
-    }
-  }
-  EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", md5.Get());
-}
-
-}  // namespace
--- a/test/webm_video_source.h
+++ b/test/webm_video_source.h
@@ -99,7 +99,7 @@ class WebMVideoSource : public CompressedVideoSource {

  virtual void Begin() {
    input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+    ASSERT_TRUE(input_file_) << "Input file open failed. Filename: "
        << file_name_;

    nestegg_io io = {nestegg_read_cb, nestegg_seek_cb, nestegg_tell_cb,
@@ -130,7 +130,6 @@ class WebMVideoSource : public CompressedVideoSource {
  }

  void FillFrame() {
-    ASSERT_TRUE(input_file_ != NULL);
    if (chunk_ >= chunks_) {
      unsigned int track;

--- a/third_party/libyuv/source/scale.c
+++ b/third_party/libyuv/source/scale.c
@@ -1370,12 +1370,12 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr, const uint8* src_ptr,
    mov        edx, [esp + 8 + 12]  // src_stride
    mov        ecx, [esp + 8 + 16]  // dst_width
    mov        eax, [esp + 8 + 20]  // source_y_fraction (0..255)
-    shr        eax, 1
    cmp        eax, 0
    je         xloop1
-    cmp        eax, 64
+    cmp        eax, 128
    je         xloop2

+    shr        eax, 1
    mov        ah,al
    neg        al
    add        al, 128
@@ -2132,11 +2132,11 @@ void ScaleFilterRows_SSSE3(uint8* dst_ptr,
    "mov    0x14(%esp),%edx                    \n"
    "mov    0x18(%esp),%ecx                    \n"
    "mov    0x1c(%esp),%eax                    \n"
-    "shr    %eax                               \n"
    "cmp    $0x0,%eax                          \n"
    "je     2f                                 \n"
-    "cmp    $0x40,%eax                         \n"
+    "cmp    $0x80,%eax                         \n"
    "je     3f                                 \n"
+    "shr    %eax                               \n"
    "mov    %al,%ah                            \n"
    "neg    %al                                \n"
    "add    $0x80,%al                          \n"
@@ -2662,7 +2662,6 @@ static void ScaleFilterRows_SSE2(uint8* dst_ptr,
 static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
                                  const uint8* src_ptr, int src_stride,
                                  int dst_width, int source_y_fraction) {
-  source_y_fraction >>= 1;
  if (source_y_fraction == 0) {
    asm volatile (
   "1:"
@@ -2681,7 +2680,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
      : "memory", "cc", "rax"
    );
    return;
-  } else if (source_y_fraction == 64) {
+  } else if (source_y_fraction == 128) {
    asm volatile (
    "1:"
      "movdqa     (%1),%%xmm0                  \n"
@@ -2704,6 +2703,7 @@ static void ScaleFilterRows_SSSE3(uint8* dst_ptr,
  } else {
    asm volatile (
      "mov        %3,%%eax                     \n"
+      "shr        %%eax                        \n"
      "mov        %%al,%%ah                    \n"
      "neg        %%al                         \n"
      "add        $0x80,%%al                   \n"
--- a/third_party/x86inc/x86inc.asm
+++ b/third_party/x86inc/x86inc.asm
@@ -97,91 +97,21 @@
    %endif
 %endmacro

-; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC"
-; from original code is added in for 64bit.
-%ifidn __OUTPUT_FORMAT__,elf32
-%define ABI_IS_32BIT 1
-%elifidn __OUTPUT_FORMAT__,macho32
-%define ABI_IS_32BIT 1
-%elifidn __OUTPUT_FORMAT__,win32
-%define ABI_IS_32BIT 1
-%elifidn __OUTPUT_FORMAT__,aout
-%define ABI_IS_32BIT 1
-%else
-%define ABI_IS_32BIT 0
-%endif
-
-%if ABI_IS_32BIT
-  %if CONFIG_PIC=1
-  %ifidn __OUTPUT_FORMAT__,elf32
-    %define GET_GOT_SAVE_ARG 1
-    %define WRT_PLT wrt ..plt
-    %macro GET_GOT 1
-      extern _GLOBAL_OFFSET_TABLE_
-      push %1
-      call %%get_got
-      %%sub_offset:
-      jmp %%exitGG
-      %%get_got:
-      mov %1, [esp]
-      add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc
-      ret
-      %%exitGG:
-      %undef GLOBAL
-      %define GLOBAL(x) x + %1 wrt ..gotoff
-      %undef RESTORE_GOT
-      %define RESTORE_GOT pop %1
-    %endmacro
-  %elifidn __OUTPUT_FORMAT__,macho32
-    %define GET_GOT_SAVE_ARG 1
-    %macro GET_GOT 1
-      push %1
-      call %%get_got
-      %%get_got:
-      pop  %1
-      %undef GLOBAL
-      %define GLOBAL(x) x + %1 - %%get_got
-      %undef RESTORE_GOT
-      %define RESTORE_GOT pop %1
-    %endmacro
-  %endif
-  %endif
-
-  %if ARCH_X86_64 == 0
+%if WIN64
+    %define PIC
+%elifidn __OUTPUT_FORMAT__,macho64
+    %define PIC
+%elif ARCH_X86_64 == 0
+; x86_32 doesn't require PIC.
+; Some distros prefer shared objects to be PIC, but nothing breaks if
+; the code contains a few textrels, so we'll skip that complexity.
    %undef PIC
-  %endif
-
-%else
-  %macro GET_GOT 1
-  %endmacro
-  %define GLOBAL(x) rel x
-  %define WRT_PLT wrt ..plt
-
-  %if WIN64
+%elif CONFIG_PIC
    %define PIC
-  %elifidn __OUTPUT_FORMAT__,macho64
-    %define PIC
-  %elif CONFIG_PIC
-    %define PIC
-  %endif
 %endif
-
-%ifnmacro GET_GOT
-    %macro GET_GOT 1
-    %endmacro
-    %define GLOBAL(x) x
-%endif
-%ifndef RESTORE_GOT
-%define RESTORE_GOT
-%endif
-%ifndef WRT_PLT
-%define WRT_PLT
-%endif
-
 %ifdef PIC
    default rel
 %endif
-; Done with PIC macros

 ; Always use long nops (reduces 0x90 spam in disassembly on x86_32)
 %ifndef __NASM_VER__
@@ -598,10 +528,6 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14
        global %1:function hidden
    %elifidn __OUTPUT_FORMAT__,elf64
        global %1:function hidden
-    %elifidn __OUTPUT_FORMAT__,macho32
-        global %1:private_extern
-    %elifidn __OUTPUT_FORMAT__,macho64
-        global %1:private_extern
    %else
        global %1
    %endif
--- a/vp8/common/alloccommon.c
+++ b/vp8/common/alloccommon.c
@@ -173,6 +173,7 @@ void vp8_create_common(VP8_COMMON *oci)
    oci->use_bilinear_mc_filter = 0;
    oci->full_pixel = 0;
    oci->multi_token_partition = ONE_PARTITION;
+    oci->clr_type = REG_YUV;
    oci->clamp_type = RECON_CLAMP_REQUIRED;

    /* Initialize reference frame sign bias structure to defaults */
--- a/vp8/common/filter.c
+++ b/vp8/common/filter.c
@@ -9,7 +9,9 @@
 */


+#include <stdlib.h>
 #include "filter.h"
+#include "vpx_ports/mem.h"

 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) =
 {
--- a/vp8/common/filter.h
+++ b/vp8/common/filter.h
@@ -12,13 +12,11 @@
 #ifndef FILTER_H
 #define FILTER_H

-#include "vpx_ports/mem.h"
-
 #define BLOCK_HEIGHT_WIDTH 4
 #define VP8_FILTER_WEIGHT 128
 #define VP8_FILTER_SHIFT  7

-extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]);
-extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]);
+extern const short vp8_bilinear_filters[8][2];
+extern const short vp8_sub_pel_filters[8][6];

 #endif
--- a/vp8/common/findnearmv.h
+++ b/vp8/common/findnearmv.h
@@ -124,7 +124,7 @@ static int above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride)
        b += 16;
    }

-    return (cur_mb->bmi + (b - 4))->mv.as_int;
+    return (cur_mb->bmi + b - 4)->mv.as_int;
 }
 static B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b)
 {
--- a/vp8/common/onyx.h
+++ b/vp8/common/onyx.h
@@ -41,8 +41,7 @@ extern "C"
    {
        USAGE_STREAM_FROM_SERVER    = 0x0,
        USAGE_LOCAL_FILE_PLAYBACK   = 0x1,
-        USAGE_CONSTRAINED_QUALITY   = 0x2,
-        USAGE_CONSTANT_QUALITY      = 0x3
+        USAGE_CONSTRAINED_QUALITY   = 0x2
    } END_USAGE;


--- a/vp8/common/onyxc_int.h
+++ b/vp8/common/onyxc_int.h
@@ -72,6 +72,7 @@ typedef struct VP8Common
    int horiz_scale;
    int vert_scale;

+    YUV_TYPE clr_type;
    CLAMP_TYPE  clamp_type;

    YV12_BUFFER_CONFIG *frame_to_show;
@@ -114,6 +115,9 @@ typedef struct VP8Common
    int uvdc_delta_q;
    int uvac_delta_q;

+    unsigned int frames_since_golden;
+    unsigned int frames_till_alt_ref_frame;
+
    /* We allocate a MODE_INFO struct for each macroblock, together with
       an extra row on top and column on the left to simplify prediction. */

@@ -153,6 +157,7 @@ typedef struct VP8Common

    unsigned int current_video_frame;

+    int near_boffset[3];
    int version;

    TOKEN_PARTITION multi_token_partition;
@@ -160,10 +165,8 @@ typedef struct VP8Common
 #ifdef PACKET_TESTING
    VP8_HEADER oh;
 #endif
-#if CONFIG_POSTPROC_VISUALIZER
    double bitrate;
    double framerate;
-#endif

 #if CONFIG_MULTITHREAD
    int processor_core_count;
--- a/vp8/common/postproc.c
+++ b/vp8/common/postproc.c
@@ -923,7 +923,7 @@ int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t
    if (flags & VP8D_DEBUG_TXT_RATE_INFO)
    {
        char message[512];
-        sprintf(message, "Bitrate: %10.2f framerate: %10.2f ", oci->bitrate, oci->framerate);
+        sprintf(message, "Bitrate: %10.2f frame_rate: %10.2f ", oci->bitrate, oci->framerate);
        vp8_blit_text(message, oci->post_proc_buffer.y_buffer, oci->post_proc_buffer.y_stride);
    }

--- a/vp8/common/reconinter.c
+++ b/vp8/common/reconinter.c
@@ -138,10 +138,14 @@ void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre,
    {
        for (r = 0; r < 4; r++)
        {
+#if !(CONFIG_FAST_UNALIGNED)
            pred_ptr[0]  = ptr[0];
            pred_ptr[1]  = ptr[1];
            pred_ptr[2]  = ptr[2];
            pred_ptr[3]  = ptr[3];
+#else
+            *(uint32_t *)pred_ptr = *(uint32_t *)ptr ;
+#endif
            pred_ptr     += pitch;
            ptr         += pre_stride;
        }
@@ -192,12 +196,16 @@ static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stri
    {
        for (r = 0; r < 4; r++)
        {
+#if !(CONFIG_FAST_UNALIGNED)
          dst[0]  = ptr[0];
          dst[1]  = ptr[1];
          dst[2]  = ptr[2];
          dst[3]  = ptr[3];
-          dst     += dst_stride;
-          ptr     += pre_stride;
+#else
+            *(uint32_t *)dst = *(uint32_t *)ptr ;
+#endif
+            dst     += dst_stride;
+            ptr     += pre_stride;
        }
    }
 }
@@ -262,7 +270,7 @@ void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
                   + x->block[yoffset+4].bmi.mv.as_mv.row
                   + x->block[yoffset+5].bmi.mv.as_mv.row;

-            temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
+            temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);

            x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;

@@ -271,7 +279,7 @@ void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x)
                   + x->block[yoffset+4].bmi.mv.as_mv.col
                   + x->block[yoffset+5].bmi.mv.as_mv.col;

-            temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
+            temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);

            x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;

@@ -550,7 +558,7 @@ void build_4x4uvmvs(MACROBLOCKD *x)
                 + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row
                 + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row;

-            temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
+            temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);

            x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask;

@@ -559,7 +567,7 @@ void build_4x4uvmvs(MACROBLOCKD *x)
                 + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col
                 + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col;

-            temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8);
+            temp += 4 + ((temp >> (sizeof(int) * CHAR_BIT - 1)) << 3);

            x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask;

--- a/vp8/common/vp8_asm_com_offsets.c
+++ b/vp8/common/vp8_asm_com_offsets.c
@@ -0,0 +1,52 @@
+/*
+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_config.h"
+#include "vpx/vpx_codec.h"
+#include "vpx_ports/asm_offsets.h"
+#include "vp8/common/blockd.h"
+
+#if CONFIG_POSTPROC
+#include "postproc.h"
+#endif /* CONFIG_POSTPROC */
+
+BEGIN
+
+#if CONFIG_POSTPROC
+/* mfqe.c / filter_by_weight */
+DEFINE(MFQE_PRECISION_VAL,                      MFQE_PRECISION);
+#endif /* CONFIG_POSTPROC */
+
+END
+
+/* add asserts for any offset that is not supported by assembly code */
+/* add asserts for any size that is not supported by assembly code */
+
+#if HAVE_MEDIA
+/* switch case in vp8_intra4x4_predict_armv6 is based on these enumerated values */
+ct_assert(B_DC_PRED, B_DC_PRED == 0);
+ct_assert(B_TM_PRED, B_TM_PRED == 1);
+ct_assert(B_VE_PRED, B_VE_PRED == 2);
+ct_assert(B_HE_PRED, B_HE_PRED == 3);
+ct_assert(B_LD_PRED, B_LD_PRED == 4);
+ct_assert(B_RD_PRED, B_RD_PRED == 5);
+ct_assert(B_VR_PRED, B_VR_PRED == 6);
+ct_assert(B_VL_PRED, B_VL_PRED == 7);
+ct_assert(B_HD_PRED, B_HD_PRED == 8);
+ct_assert(B_HU_PRED, B_HU_PRED == 9);
+#endif
+
+#if HAVE_SSE2
+#if CONFIG_POSTPROC
+/* vp8_filter_by_weight16x16 and 8x8 */
+ct_assert(MFQE_PRECISION_VAL, MFQE_PRECISION == 4)
+#endif /* CONFIG_POSTPROC */
+#endif /* HAVE_SSE2 */
--- a/vp8/common/x86/filter_x86.c
+++ b/vp8/common/x86/filter_x86.c
@@ -8,7 +8,7 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "vp8/common/x86/filter_x86.h"
+#include "vpx_ports/mem.h"

 DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]) =
 {
--- a/vp8/common/x86/filter_x86.h
+++ b/vp8/common/x86/filter_x86.h
@@ -11,15 +11,9 @@
 #ifndef FILTER_X86_H
 #define FILTER_X86_H

-#include "vpx_ports/mem.h"
-
 /* x86 assembly specific copy of vp8/common/filter.c:vp8_bilinear_filters with
 * duplicated values */
-
-/* duplicated 4x */
-extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_4[8][8]);
-
-/* duplicated 8x */
-extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters_x86_8[8][16]);
+extern const short vp8_bilinear_filters_x86_4[8][8];  /* duplicated 4x */
+extern const short vp8_bilinear_filters_x86_8[8][16]; /* duplicated 8x */

 #endif /* FILTER_X86_H */
--- a/vp8/common/x86/vp8_asm_stubs.c
+++ b/vp8/common/x86/vp8_asm_stubs.c
@@ -611,12 +611,16 @@ void vp8_sixtap_predict4x4_ssse3

          for (r = 0; r < 4; r++)
          {
+  #if !(CONFIG_FAST_UNALIGNED)
            dst_ptr[0]  = src_ptr[0];
            dst_ptr[1]  = src_ptr[1];
            dst_ptr[2]  = src_ptr[2];
            dst_ptr[3]  = src_ptr[3];
-            dst_ptr     += dst_pitch;
-            src_ptr     += src_pixels_per_line;
+  #else
+              *(uint32_t *)dst_ptr = *(uint32_t *)src_ptr ;
+  #endif
+              dst_ptr     += dst_pitch;
+              src_ptr     += src_pixels_per_line;
          }
      }
  }
--- a/vp8/decoder/decodemv.c
+++ b/vp8/decoder/decodemv.c
@@ -110,8 +110,8 @@ static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc)

 static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc)
 {
-    mv->row = (short)(read_mvcomponent(r,   mvc) * 2);
-    mv->col = (short)(read_mvcomponent(r, ++mvc) * 2);
+    mv->row = (short)(read_mvcomponent(r,   mvc) << 1);
+    mv->col = (short)(read_mvcomponent(r, ++mvc) << 1);
 }


@@ -292,9 +292,9 @@ static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi,
                blockmv.as_int = 0;
                if( vp8_read(bc, prob[2]) )
                {
-                    blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) * 2;
+                    blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) << 1;
                    blockmv.as_mv.row += best_mv.as_mv.row;
-                    blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) * 2;
+                    blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) << 1;
                    blockmv.as_mv.col += best_mv.as_mv.col;
                }
            }
--- a/vp8/decoder/decodframe.c
+++ b/vp8/decoder/decodframe.c
@@ -576,7 +576,7 @@ static void decode_mb_rows(VP8D_COMP *pbi)

        xd->left_available = 0;

-        xd->mb_to_top_edge = -((mb_row * 16) << 3);
+        xd->mb_to_top_edge = -((mb_row * 16)) << 3;
        xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;

        xd->recon_above[0] = dst_buffer[0] + recon_yoffset;
@@ -1026,7 +1026,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
        const unsigned char *clear = data;
        if (pbi->decrypt_cb)
        {
-            int n = (int)(data_end - data);
+            int n = data_end - data;
            if (n > 10) n = 10;
            pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n);
            clear = clear_buffer;
@@ -1095,7 +1095,7 @@ int vp8_decode_frame(VP8D_COMP *pbi)
        vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR,
                           "Failed to allocate bool decoder 0");
    if (pc->frame_type == KEY_FRAME) {
-        (void)vp8_read_bit(bc);  // colorspace
+        pc->clr_type    = (YUV_TYPE)vp8_read_bit(bc);
        pc->clamp_type  = (CLAMP_TYPE)vp8_read_bit(bc);
    }

--- a/vp8/decoder/onyxd_if.c
+++ b/vp8/decoder/onyxd_if.c
@@ -430,6 +430,7 @@ int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_st
    *time_stamp = pbi->last_time_stamp;
    *time_end_stamp = 0;

+    sd->clrtype = pbi->common.clr_type;
 #if CONFIG_POSTPROC
    ret = vp8_post_proc_frame(&pbi->common, sd, flags);
 #else
--- a/vp8/decoder/vp8_asm_dec_offsets.c
+++ b/vp8/decoder/vp8_asm_dec_offsets.c
@@ -0,0 +1,26 @@
+/*
+ *  Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include "vpx_ports/asm_offsets.h"
+#include "onyxd_int.h"
+
+BEGIN
+
+DEFINE(bool_decoder_user_buffer_end,            offsetof(BOOL_DECODER, user_buffer_end));
+DEFINE(bool_decoder_user_buffer,                offsetof(BOOL_DECODER, user_buffer));
+DEFINE(bool_decoder_value,                      offsetof(BOOL_DECODER, value));
+DEFINE(bool_decoder_count,                      offsetof(BOOL_DECODER, count));
+DEFINE(bool_decoder_range,                      offsetof(BOOL_DECODER, range));
+
+END
+
+/* add asserts for any offset that is not supported by assembly code */
+/* add asserts for any size that is not supported by assembly code */
--- a/vp8/encoder/bitstream.c
+++ b/vp8/encoder/bitstream.c
@@ -432,7 +432,7 @@ static void write_mv_ref
    assert(NEARESTMV <= m  &&  m <= SPLITMV);
 #endif
    vp8_write_token(w, vp8_mv_ref_tree, p,
-                    vp8_mv_ref_encoding_array + (m - NEARESTMV));
+                    vp8_mv_ref_encoding_array - NEARESTMV + m);
 }

 static void write_sub_mv_ref
@@ -444,7 +444,7 @@ static void write_sub_mv_ref
    assert(LEFT4X4 <= m  &&  m <= NEW4X4);
 #endif
    vp8_write_token(w, vp8_sub_mv_ref_tree, p,
-                    vp8_sub_mv_ref_encoding_array + (m - LEFT4X4));
+                    vp8_sub_mv_ref_encoding_array - LEFT4X4 + m);
 }

 static void write_mv
@@ -577,7 +577,7 @@ static void pack_inter_mode_mvs(VP8_COMP *const cpi)
             */
            xd->mb_to_left_edge = -((mb_col * 16) << 3);
            xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3;
-            xd->mb_to_top_edge = -((mb_row * 16) << 3);
+            xd->mb_to_top_edge = -((mb_row * 16)) << 3;
            xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3;

 #ifdef VP8_ENTROPY_STATS
@@ -1322,7 +1322,7 @@ void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char * dest
        vp8_start_encode(bc, cx_data, cx_data_end);

        /* signal clr type */
-        vp8_write_bit(bc, 0);
+        vp8_write_bit(bc, pc->clr_type);
        vp8_write_bit(bc, pc->clamp_type);

    }
--- a/vp8/encoder/dct.c
+++ b/vp8/encoder/dct.c
@@ -20,10 +20,10 @@ void vp8_short_fdct4x4_c(short *input, short *output, int pitch)

    for (i = 0; i < 4; i++)
    {
-        a1 = ((ip[0] + ip[3]) * 8);
-        b1 = ((ip[1] + ip[2]) * 8);
-        c1 = ((ip[1] - ip[2]) * 8);
-        d1 = ((ip[0] - ip[3]) * 8);
+        a1 = ((ip[0] + ip[3])<<3);
+        b1 = ((ip[1] + ip[2])<<3);
+        c1 = ((ip[1] - ip[2])<<3);
+        d1 = ((ip[0] - ip[3])<<3);

        op[0] = a1 + b1;
        op[2] = a1 - b1;
@@ -72,10 +72,10 @@ void vp8_short_walsh4x4_c(short *input, short *output, int pitch)

    for (i = 0; i < 4; i++)
    {
-        a1 = ((ip[0] + ip[2]) * 4);
-        d1 = ((ip[1] + ip[3]) * 4);
-        c1 = ((ip[1] - ip[3]) * 4);
-        b1 = ((ip[0] - ip[2]) * 4);
+        a1 = ((ip[0] + ip[2])<<2);
+        d1 = ((ip[1] + ip[3])<<2);
+        c1 = ((ip[1] - ip[3])<<2);
+        b1 = ((ip[0] - ip[2])<<2);

        op[0] = a1 + d1 + (a1!=0);
        op[1] = b1 + c1;
--- a/vp8/encoder/firstpass.c
+++ b/vp8/encoder/firstpass.c
@@ -711,8 +711,8 @@ skip_motion_search:
                        neutral_count++;
                    }

-                    d->bmi.mv.as_mv.row *= 8;
-                    d->bmi.mv.as_mv.col *= 8;
+                    d->bmi.mv.as_mv.row <<= 3;
+                    d->bmi.mv.as_mv.col <<= 3;
                    this_error = motion_error;
                    vp8_set_mbmode_and_mvs(x, NEWMV, &d->bmi.mv);
                    vp8_encode_inter16x16y(x);
@@ -909,16 +909,13 @@ extern const int vp8_bits_per_mb[2][QINDEX_RANGE];

 static double bitcost( double prob )
 {
-  if (prob > 0.000122)
-    return -log(prob) / log(2.0);
-  else
-    return 13.0;
+    return -(log( prob ) / log( 2.0 ));
 }
 static int64_t estimate_modemvcost(VP8_COMP *cpi,
                                     FIRSTPASS_STATS * fpstats)
 {
    int mv_cost;
-    int64_t mode_cost;
+    int mode_cost;

    double av_pct_inter = fpstats->pcnt_inter / fpstats->count;
    double av_pct_motion = fpstats->pcnt_motion / fpstats->count;
@@ -940,9 +937,10 @@ static int64_t estimate_modemvcost(VP8_COMP *cpi,
    /* Crude estimate of overhead cost from modes
     * << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb
     */
-    mode_cost =((((av_pct_inter - av_pct_motion) * zz_cost) +
-                (av_pct_motion * motion_cost) +
-                (av_intra * intra_cost)) * cpi->common.MBs) * 512;
+    mode_cost =
+        (int)( ( ((av_pct_inter - av_pct_motion) * zz_cost) +
+                 (av_pct_motion * motion_cost) +
+                 (av_intra * intra_cost) ) * cpi->common.MBs ) << 9;

    return mv_cost + mode_cost;
 }
@@ -1327,7 +1325,7 @@ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_ta
    return Q;
 }

-extern void vp8_new_framerate(VP8_COMP *cpi, double framerate);
+extern void vp8_new_frame_rate(VP8_COMP *cpi, double framerate);

 void vp8_init_second_pass(VP8_COMP *cpi)
 {
@@ -1351,9 +1349,9 @@ void vp8_init_second_pass(VP8_COMP *cpi)
     * sum duration is not. Its calculated based on the actual durations of
     * all frames from the first pass.
     */
-    vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);
+    vp8_new_frame_rate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration);

-    cpi->output_framerate = cpi->framerate;
+    cpi->output_frame_rate = cpi->frame_rate;
    cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0) ;
    cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0);

@@ -2400,7 +2398,7 @@ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
    target_frame_size += cpi->min_frame_bandwidth;

    /* Every other frame gets a few extra bits */
-    if ( (cpi->frames_since_golden & 0x01) &&
+    if ( (cpi->common.frames_since_golden & 0x01) &&
         (cpi->frames_till_gf_update_due > 0) )
    {
        target_frame_size += cpi->twopass.alt_extra_bits;
@@ -2531,7 +2529,7 @@ void vp8_second_pass(VP8_COMP *cpi)

    /* Set nominal per second bandwidth for this frame */
    cpi->target_bandwidth = (int)
-    (cpi->per_frame_bandwidth * cpi->output_framerate);
+    (cpi->per_frame_bandwidth * cpi->output_frame_rate);
    if (cpi->target_bandwidth < 0)
        cpi->target_bandwidth = 0;

@@ -3187,7 +3185,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)

        /* Convert to a per second bitrate */
        cpi->target_bandwidth = (int)(cpi->twopass.kf_bits *
-                                      cpi->output_framerate);
+                                      cpi->output_frame_rate);
    }

    /* Note the total error score of the kf group minus the key frame itself */
@@ -3226,7 +3224,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
        cpi->common.vert_scale = NORMAL;

        /* Calculate Average bits per frame. */
-        av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate);
+        av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate);

        /* CBR... Use the clip average as the target for deciding resample */
        if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)
@@ -3301,7 +3299,7 @@ static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame)
        }
        else
        {
-            int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate));
+            int64_t clip_bits = (int64_t)(cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->frame_rate));
            int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level;

            /* If triggered last time the threshold for triggering again is
--- a/vp8/encoder/mcomp.c
+++ b/vp8/encoder/mcomp.c
@@ -210,7 +210,7 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    unsigned char *z = (*(b->base_src) + b->src);

    int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1;
-    int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4;
+    int br = bestmv->as_mv.row << 2, bc = bestmv->as_mv.col << 2;
    int tr = br, tc = bc;
    unsigned int besterr;
    unsigned int left, right, up, down, diag;
@@ -220,14 +220,10 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    unsigned int quarteriters = 4;
    int thismse;

-    int minc = MAX(x->mv_col_min * 4,
-                   (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
-    int maxc = MIN(x->mv_col_max * 4,
-                   (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
-    int minr = MAX(x->mv_row_min * 4,
-                   (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
-    int maxr = MIN(x->mv_row_max * 4,
-                   (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));
+    int minc = MAX(x->mv_col_min << 2, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1));
+    int maxc = MIN(x->mv_col_max << 2, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1));
+    int minr = MAX(x->mv_row_min << 2, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1));
+    int maxr = MIN(x->mv_row_max << 2, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1));

    int y_stride;
    int offset;
@@ -258,8 +254,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
    offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col;

    /* central mv */
-    bestmv->as_mv.row *= 8;
-    bestmv->as_mv.col *= 8;
+    bestmv->as_mv.row <<= 3;
+    bestmv->as_mv.col <<= 3;

    /* calculate central point error */
    besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1);
@@ -341,8 +337,8 @@ int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
        tc = bc;
    }

-    bestmv->as_mv.row = br * 2;
-    bestmv->as_mv.col = bc * 2;
+    bestmv->as_mv.row = br << 1;
+    bestmv->as_mv.col = bc << 1;

    if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL<<3)) ||
        (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL<<3)))
@@ -703,8 +699,8 @@ int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
 #endif

    /* central mv */
-    bestmv->as_mv.row *= 8;
-    bestmv->as_mv.col *= 8;
+    bestmv->as_mv.row <<= 3;
+    bestmv->as_mv.col <<= 3;
    startmv = *bestmv;

    /* calculate central point error */
@@ -1319,8 +1315,8 @@ int vp8_diamond_search_sadx4
            (*num00)++;
    }

-    this_mv.as_mv.row = best_mv->as_mv.row * 8;
-    this_mv.as_mv.col = best_mv->as_mv.col * 8;
+    this_mv.as_mv.row = best_mv->as_mv.row << 3;
+    this_mv.as_mv.col = best_mv->as_mv.col << 3;

    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
@@ -1713,8 +1709,8 @@ int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv,
        }
    }

-    this_mv.as_mv.row = best_mv->as_mv.row * 8;
-    this_mv.as_mv.col = best_mv->as_mv.col * 8;
+    this_mv.as_mv.row = best_mv->as_mv.row << 3;
+    this_mv.as_mv.col = best_mv->as_mv.col << 3;

    return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad)
           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
@@ -1909,8 +1905,8 @@ int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d,
        }
    }

-    this_mv.as_mv.row = ref_mv->as_mv.row * 8;
-    this_mv.as_mv.col = ref_mv->as_mv.col * 8;
+    this_mv.as_mv.row = ref_mv->as_mv.row << 3;
+    this_mv.as_mv.col = ref_mv->as_mv.col << 3;

    return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad)
           + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit);
--- a/vp8/encoder/onyx_if.c
+++ b/vp8/encoder/onyx_if.c
@@ -301,11 +301,11 @@ static int rescale(int val, int num, int denom)
 static void init_temporal_layer_context(VP8_COMP *cpi,
                                        VP8_CONFIG *oxcf,
                                        const int layer,
-                                        double prev_layer_framerate)
+                                        double prev_layer_frame_rate)
 {
    LAYER_CONTEXT *lc = &cpi->layer_context[layer];

-    lc->framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[layer];
+    lc->frame_rate = cpi->output_frame_rate / cpi->oxcf.rate_decimator[layer];
    lc->target_bandwidth = cpi->oxcf.target_bitrate[layer] * 1000;

    lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level;
@@ -335,7 +335,7 @@ static void init_temporal_layer_context(VP8_COMP *cpi,
      lc->avg_frame_size_for_layer =
          (int)((cpi->oxcf.target_bitrate[layer] -
                cpi->oxcf.target_bitrate[layer-1]) * 1000 /
-                (lc->framerate - prev_layer_framerate));
+                (lc->frame_rate - prev_layer_frame_rate));

     lc->active_worst_quality         = cpi->oxcf.worst_allowed_q;
     lc->active_best_quality          = cpi->oxcf.best_allowed_q;
@@ -363,7 +363,7 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
                                        const int prev_num_layers)
 {
    int i;
-    double prev_layer_framerate = 0;
+    double prev_layer_frame_rate = 0;
    const int curr_num_layers = cpi->oxcf.number_of_layers;
    // If the previous state was 1 layer, get current layer context from cpi.
    // We need this to set the layer context for the new layers below.
@@ -377,7 +377,7 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
        LAYER_CONTEXT *lc = &cpi->layer_context[i];
        if (i >= prev_num_layers)
        {
-           init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
+           init_temporal_layer_context(cpi, oxcf, i, prev_layer_frame_rate);
        }
        // The initial buffer levels are set based on their starting levels.
        // We could set the buffer levels based on the previous state (normalized
@@ -403,8 +403,8 @@ static void reset_temporal_layer_change(VP8_COMP *cpi,
            lc->bits_off_target = lc->buffer_level;
            restore_layer_context(cpi, 0);
        }
-        prev_layer_framerate = cpi->output_framerate /
-                               cpi->oxcf.rate_decimator[i];
+        prev_layer_frame_rate =  cpi->output_frame_rate /
+                                 cpi->oxcf.rate_decimator[i];
    }
 }

@@ -1282,21 +1282,21 @@ int vp8_reverse_trans(int x)

    return 63;
 }
-void vp8_new_framerate(VP8_COMP *cpi, double framerate)
+void vp8_new_frame_rate(VP8_COMP *cpi, double framerate)
 {
    if(framerate < .1)
        framerate = 30;

-    cpi->framerate              = framerate;
-    cpi->output_framerate       = framerate;
+    cpi->frame_rate             = framerate;
+    cpi->output_frame_rate      = framerate;
    cpi->per_frame_bandwidth    = (int)(cpi->oxcf.target_bandwidth /
-                                  cpi->output_framerate);
+                                  cpi->output_frame_rate);
    cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth;
    cpi->min_frame_bandwidth    = (int)(cpi->av_per_frame_bandwidth *
                                  cpi->oxcf.two_pass_vbrmin_section / 100);

    /* Set Maximum gf/arf interval */
-    cpi->max_gf_interval = ((int)(cpi->output_framerate / 2.0) + 2);
+    cpi->max_gf_interval = ((int)(cpi->output_frame_rate / 2.0) + 2);

    if(cpi->max_gf_interval < 12)
        cpi->max_gf_interval = 12;
@@ -1337,13 +1337,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
     * seems like a reasonable framerate, then use that as a guess, otherwise
     * use 30.
     */
-    cpi->framerate = (double)(oxcf->timebase.den) /
-                     (double)(oxcf->timebase.num);
+    cpi->frame_rate = (double)(oxcf->timebase.den) /
+                      (double)(oxcf->timebase.num);

-    if (cpi->framerate > 180)
-        cpi->framerate = 30;
+    if (cpi->frame_rate > 180)
+        cpi->frame_rate = 30;

-    cpi->ref_framerate = cpi->framerate;
+    cpi->ref_frame_rate = cpi->frame_rate;

    /* change includes all joint functionality */
    vp8_change_config(cpi, oxcf);
@@ -1369,13 +1369,13 @@ static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
    if (cpi->oxcf.number_of_layers > 1)
    {
        unsigned int i;
-        double prev_layer_framerate=0;
+        double prev_layer_frame_rate=0;

        for (i=0; i<cpi->oxcf.number_of_layers; i++)
        {
-            init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate);
-            prev_layer_framerate = cpi->output_framerate /
-                                   cpi->oxcf.rate_decimator[i];
+            init_temporal_layer_context(cpi, oxcf, i, prev_layer_frame_rate);
+            prev_layer_frame_rate = cpi->output_frame_rate /
+                                    cpi->oxcf.rate_decimator[i];
        }
    }

@@ -1399,14 +1399,14 @@ static void update_layer_contexts (VP8_COMP *cpi)
    if (oxcf->number_of_layers > 1)
    {
        unsigned int i;
-        double prev_layer_framerate=0;
+        double prev_layer_frame_rate=0;

        for (i=0; i<oxcf->number_of_layers; i++)
        {
            LAYER_CONTEXT *lc = &cpi->layer_context[i];

-            lc->framerate =
-                cpi->ref_framerate / oxcf->rate_decimator[i];
+            lc->frame_rate =
+                cpi->ref_frame_rate / oxcf->rate_decimator[i];
            lc->target_bandwidth = oxcf->target_bitrate[i] * 1000;

            lc->starting_buffer_level = rescale(
@@ -1432,9 +1432,9 @@ static void update_layer_contexts (VP8_COMP *cpi)
                lc->avg_frame_size_for_layer =
                   (int)((oxcf->target_bitrate[i] -
                          oxcf->target_bitrate[i-1]) * 1000 /
-                          (lc->framerate - prev_layer_framerate));
+                          (lc->frame_rate - prev_layer_frame_rate));

-            prev_layer_framerate = lc->framerate;
+            prev_layer_frame_rate = lc->frame_rate;
        }
    }
 }
@@ -1625,7 +1625,7 @@ void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf)
                    cpi->oxcf.target_bandwidth, 1000);

    /* Set up frame rate and related parameters rate control values. */
-    vp8_new_framerate(cpi, cpi->framerate);
+    vp8_new_frame_rate(cpi, cpi->frame_rate);

    /* Set absolute upper and lower quality limits */
    cpi->worst_quality               = cpi->oxcf.worst_allowed_q;
@@ -1945,7 +1945,7 @@ struct VP8_COMP* vp8_create_compressor(VP8_CONFIG *oxcf)

    for (i = 0; i < KEY_FRAME_CONTEXT; i++)
    {
-        cpi->prior_key_frame_distance[i] = (int)cpi->output_framerate;
+        cpi->prior_key_frame_distance[i] = (int)cpi->output_frame_rate;
    }

 #ifdef OUTPUT_YUV_SRC
@@ -2273,7 +2273,7 @@ void vp8_remove_compressor(VP8_COMP **ptr)
        {
            extern int count_mb_seg[4];
            FILE *f = fopen("modes.stt", "a");
-            double dr = (double)cpi->framerate * (double)bytes * (double)8 / (double)count / (double)1000 ;
+            double dr = (double)cpi->frame_rate * (double)bytes * (double)8 / (double)count / (double)1000 ;
            fprintf(f, "intra_mode in Intra Frames:\n");
            fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]);
            fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]);
@@ -2750,7 +2750,7 @@ static void update_alt_ref_frame_stats(VP8_COMP *cpi)
    cpi->gf_active_count = cm->mb_rows * cm->mb_cols;

    /* this frame refreshes means next frames don't unless specified by user */
-    cpi->frames_since_golden = 0;
+    cpi->common.frames_since_golden = 0;

    /* Clear the alternate reference update pending flag. */
    cpi->source_alt_ref_pending = 0;
@@ -2802,7 +2802,7 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
         * user
         */
        cm->refresh_golden_frame = 0;
-        cpi->frames_since_golden = 0;
+        cpi->common.frames_since_golden = 0;

        cpi->recent_ref_frame_usage[INTRA_FRAME] = 1;
        cpi->recent_ref_frame_usage[LAST_FRAME] = 1;
@@ -2834,12 +2834,12 @@ static void update_golden_frame_stats(VP8_COMP *cpi)
        if (cpi->frames_till_gf_update_due > 0)
            cpi->frames_till_gf_update_due--;

-        if (cpi->frames_till_alt_ref_frame)
-            cpi->frames_till_alt_ref_frame --;
+        if (cpi->common.frames_till_alt_ref_frame)
+            cpi->common.frames_till_alt_ref_frame --;

-        cpi->frames_since_golden ++;
+        cpi->common.frames_since_golden ++;

-        if (cpi->frames_since_golden > 1)
+        if (cpi->common.frames_since_golden > 1)
        {
            cpi->recent_ref_frame_usage[INTRA_FRAME] +=
                cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME];
@@ -2890,11 +2890,11 @@ static void update_rd_ref_frame_probs(VP8_COMP *cpi)
            cpi->prob_last_coded = 200;
            cpi->prob_gf_coded = 1;
        }
-        else if (cpi->frames_since_golden == 0)
+        else if (cpi->common.frames_since_golden == 0)
        {
            cpi->prob_last_coded = 214;
        }
-        else if (cpi->frames_since_golden == 1)
+        else if (cpi->common.frames_since_golden == 1)
        {
            cpi->prob_last_coded = 192;
            cpi->prob_gf_coded = 220;
@@ -3368,12 +3368,12 @@ static void encode_frame_to_data_rate
            cpi->per_frame_bandwidth = cpi->twopass.gf_bits;
            /* per second target bitrate */
            cpi->target_bandwidth = (int)(cpi->twopass.gf_bits *
-                                          cpi->output_framerate);
+                                          cpi->output_frame_rate);
        }
    }
    else
 #endif
-        cpi->per_frame_bandwidth  = (int)(cpi->target_bandwidth / cpi->output_framerate);
+        cpi->per_frame_bandwidth  = (int)(cpi->target_bandwidth / cpi->output_frame_rate);

    /* Default turn off buffer to buffer copying */
    cm->copy_buffer_to_gf = 0;
@@ -4557,7 +4557,7 @@ static void encode_frame_to_data_rate
        {
            LAYER_CONTEXT *lc = &cpi->layer_context[i];
            int bits_off_for_this_layer =
-               (int)(lc->target_bandwidth / lc->framerate -
+               (int)(lc->target_bandwidth / lc->frame_rate -
                     cpi->projected_frame_size);

            lc->bits_off_target += bits_off_for_this_layer;
@@ -4805,7 +4805,7 @@ static void Pass2Encode(VP8_COMP *cpi, unsigned long *size, unsigned char *dest,
    {
        double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth
            *cpi->oxcf.two_pass_vbrmin_section / 100);
-        cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->framerate);
+        cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->frame_rate);
    }
 }
 #endif
@@ -4821,10 +4821,8 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C
 {
 #if HAVE_NEON
    int64_t store_reg[8];
-#if CONFIG_RUNTIME_CPU_DETECT
+#endif
    VP8_COMMON            *cm = &cpi->common;
-#endif
-#endif
    struct vpx_usec_timer  timer;
    int                    res = 0;

@@ -4850,6 +4848,7 @@ int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_C
    if(vp8_lookahead_push(cpi->lookahead, sd, time_stamp, end_time,
                          frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL))
        res = -1;
+    cm->clr_type = sd->clrtype;
    vpx_usec_timer_mark(&timer);
    cpi->time_receive_data += vpx_usec_timer_elapsed(&timer);

@@ -4934,7 +4933,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
                                              cpi->frames_till_gf_update_due);
                force_src_buffer = &cpi->alt_ref_buffer;
            }
-            cpi->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
+            cm->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due;
            cm->refresh_alt_ref_frame = 1;
            cm->refresh_golden_frame = 0;
            cm->refresh_last_frame = 0;
@@ -5039,7 +5038,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
        if (this_duration)
        {
            if (step)
-                cpi->ref_framerate = 10000000.0 / this_duration;
+                cpi->ref_frame_rate = 10000000.0 / this_duration;
            else
            {
                double avg_duration, interval;
@@ -5053,11 +5052,11 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
                if(interval > 10000000.0)
                    interval = 10000000;

-                avg_duration = 10000000.0 / cpi->ref_framerate;
+                avg_duration = 10000000.0 / cpi->ref_frame_rate;
                avg_duration *= (interval - avg_duration + this_duration);
                avg_duration /= interval;

-                cpi->ref_framerate = 10000000.0 / avg_duration;
+                cpi->ref_frame_rate = 10000000.0 / avg_duration;
            }

            if (cpi->oxcf.number_of_layers > 1)
@@ -5068,12 +5067,12 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
                for (i=0; i<cpi->oxcf.number_of_layers; i++)
                {
                    LAYER_CONTEXT *lc = &cpi->layer_context[i];
-                    lc->framerate = cpi->ref_framerate /
-                                    cpi->oxcf.rate_decimator[i];
+                    lc->frame_rate = cpi->ref_frame_rate /
+                                  cpi->oxcf.rate_decimator[i];
                }
            }
            else
-                vp8_new_framerate(cpi, cpi->ref_framerate);
+                vp8_new_frame_rate(cpi, cpi->ref_frame_rate);
        }

        cpi->last_time_stamp_seen = cpi->source->ts_start;
@@ -5090,7 +5089,7 @@ int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, unsigned l
        layer = cpi->oxcf.layer_id[
                cpi->temporal_pattern_counter % cpi->oxcf.periodicity];
        restore_layer_context (cpi, layer);
-        vp8_new_framerate(cpi, cpi->layer_context[layer].framerate);
+        vp8_new_frame_rate (cpi, cpi->layer_context[layer].frame_rate);
    }

    if (cpi->compressor_speed == 2)
--- a/vp8/encoder/onyx_int.h
+++ b/vp8/encoder/onyx_int.h
@@ -232,7 +232,7 @@ enum
 typedef struct
 {
    /* Layer configuration */
-    double framerate;
+    double frame_rate;
    int target_bandwidth;

    /* Layer specific coding parameters */
@@ -320,7 +320,6 @@ typedef struct VP8_COMP
    YV12_BUFFER_CONFIG scaled_source;
    YV12_BUFFER_CONFIG *last_frame_unscaled_source;

-    unsigned int frames_till_alt_ref_frame;
    /* frame in src_buffers has been identified to be encoded as an alt ref */
    int source_alt_ref_pending;
    /* an alt ref frame has been encoded and is usable */
@@ -370,7 +369,6 @@ typedef struct VP8_COMP
    double key_frame_rate_correction_factor;
    double gf_rate_correction_factor;

-    unsigned int frames_since_golden;
    /* Count down till next GF */
    int frames_till_gf_update_due;

@@ -403,7 +401,7 @@ typedef struct VP8_COMP
    /* Minimum allocation that should be used for any frame */
    int min_frame_bandwidth;
    int inter_frame_target;
-    double output_framerate;
+    double output_frame_rate;
    int64_t last_time_stamp_seen;
    int64_t last_end_time_stamp_seen;
    int64_t first_time_stamp_ever;
@@ -417,8 +415,8 @@ typedef struct VP8_COMP

    int buffered_mode;

-    double framerate;
-    double ref_framerate;
+    double frame_rate;
+    double ref_frame_rate;
    int64_t buffer_level;
    int64_t bits_off_target;

--- a/vp8/encoder/picklpf.c
+++ b/vp8/encoder/picklpf.c
@@ -313,7 +313,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
    /* Get baseline error score */

    /* Copy the unfiltered / processed recon buffer to the new buffer */
-    vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
+    vp8_yv12_copy_y(saved_frame, cm->frame_to_show);

    vp8cx_set_alt_lf_level(cpi, filt_mid);
    vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid);
@@ -339,7 +339,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
            if(ss_err[filt_low] == 0)
            {
                /* Get Low filter error score */
-                vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
+                vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
                vp8cx_set_alt_lf_level(cpi, filt_low);
                vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low);

@@ -367,7 +367,7 @@ void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi)
        {
            if(ss_err[filt_high] == 0)
            {
-                vpx_yv12_copy_y(saved_frame, cm->frame_to_show);
+                vp8_yv12_copy_y(saved_frame, cm->frame_to_show);
                vp8cx_set_alt_lf_level(cpi, filt_high);
                vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high);

--- a/vp8/encoder/ratectrl.c
+++ b/vp8/encoder/ratectrl.c
@@ -234,7 +234,7 @@ void vp8_save_coding_context(VP8_COMP *cpi)
    cc->frames_since_key          = cpi->frames_since_key;
    cc->filter_level             = cpi->common.filter_level;
    cc->frames_till_gf_update_due   = cpi->frames_till_gf_update_due;
-    cc->frames_since_golden       = cpi->frames_since_golden;
+    cc->frames_since_golden       = cpi->common.frames_since_golden;

    vp8_copy(cc->mvc,      cpi->common.fc.mvc);
    vp8_copy(cc->mvcosts,  cpi->rd_costs.mvcosts);
@@ -271,7 +271,7 @@ void vp8_restore_coding_context(VP8_COMP *cpi)
    cpi->frames_since_key         =   cc->frames_since_key;
    cpi->common.filter_level     =   cc->filter_level;
    cpi->frames_till_gf_update_due  =   cc->frames_till_gf_update_due;
-    cpi->frames_since_golden       =   cc->frames_since_golden;
+    cpi->common.frames_since_golden       =   cc->frames_since_golden;

    vp8_copy(cpi->common.fc.mvc, cc->mvc);

@@ -388,7 +388,7 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
        int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */
        /* Boost depends somewhat on frame rate: only used for 1 layer case. */
        if (cpi->oxcf.number_of_layers == 1) {
-          kf_boost = MAX(initial_boost, (int)(2 * cpi->output_framerate - 16));
+          kf_boost = MAX(initial_boost, (int)(2 * cpi->output_frame_rate - 16));
        }
        else {
          /* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */
@@ -399,9 +399,9 @@ static void calc_iframe_target_size(VP8_COMP *cpi)
        kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100;

        /* frame separation adjustment ( down) */
-        if (cpi->frames_since_key  < cpi->output_framerate / 2)
+        if (cpi->frames_since_key  < cpi->output_frame_rate / 2)
            kf_boost = (int)(kf_boost
-                       * cpi->frames_since_key / (cpi->output_framerate / 2));
+                       * cpi->frames_since_key / (cpi->output_frame_rate / 2));

        /* Minimal target size is |2* per_frame_bandwidth|. */
        if (kf_boost < 16)
@@ -715,7 +715,7 @@ static void calc_pframe_target_size(VP8_COMP *cpi)
                if (Adjustment > (cpi->this_frame_target - min_frame_target))
                    Adjustment = (cpi->this_frame_target - min_frame_target);

-                if (cpi->frames_since_golden == (cpi->current_gf_interval >> 1))
+                if (cpi->common.frames_since_golden == (cpi->current_gf_interval >> 1))
                    cpi->this_frame_target += ((cpi->current_gf_interval - 1) * Adjustment);
                else
                    cpi->this_frame_target -= Adjustment;
@@ -1360,7 +1360,7 @@ static int estimate_keyframe_frequency(VP8_COMP *cpi)
         * whichever is smaller.
         */
        int key_freq = cpi->oxcf.key_freq>0 ? cpi->oxcf.key_freq : 1;
-        av_key_frame_frequency = 1 + (int)cpi->output_framerate * 2;
+        av_key_frame_frequency = 1 + (int)cpi->output_frame_rate * 2;

        if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq)
            av_key_frame_frequency = key_freq;
--- a/vp8/encoder/rdopt.c
+++ b/vp8/encoder/rdopt.c
@@ -341,7 +341,7 @@ void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue)

 void vp8_auto_select_speed(VP8_COMP *cpi)
 {
-    int milliseconds_for_compress = (int)(1000000 / cpi->framerate);
+    int milliseconds_for_compress = (int)(1000000 / cpi->frame_rate);

    milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16;

@@ -935,7 +935,7 @@ int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4])
    assert(NEARESTMV <= m  &&  m <= SPLITMV);
    vp8_mv_ref_probs(p, near_mv_ref_ct);
    return vp8_cost_token(vp8_mv_ref_tree, p,
-                          vp8_mv_ref_encoding_array + (m - NEARESTMV));
+                          vp8_mv_ref_encoding_array - NEARESTMV + m);
 }

 void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv)
--- a/vp8/vp8_common.mk
+++ b/vp8/vp8_common.mk
@@ -66,6 +66,7 @@ VP8_COMMON_SRCS-yes += common/setupintrarecon.c
 VP8_COMMON_SRCS-yes += common/swapyv12buffer.c
 VP8_COMMON_SRCS-yes += common/variance_c.c
 VP8_COMMON_SRCS-yes += common/variance.h
+VP8_COMMON_SRCS-yes += common/vp8_asm_com_offsets.c
 VP8_COMMON_SRCS-yes += common/vp8_entropymodedata.h


@@ -191,4 +192,7 @@ VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/vp8_subpixelvariance8x8_neon$(A
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/vp8_subpixelvariance16x16_neon$(ASM)
 VP8_COMMON_SRCS-$(HAVE_NEON)  += common/arm/neon/vp8_subpixelvariance16x16s_neon$(ASM)

+$(eval $(call asm_offsets_template,\
+         vp8_asm_com_offsets.asm, $(VP8_PREFIX)common/vp8_asm_com_offsets.c))
+
 $(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.sh))
--- a/vp8/vp8_cx_iface.c
+++ b/vp8/vp8_cx_iface.c
@@ -153,7 +153,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
 #else
    RANGE_CHECK_HI(cfg, g_lag_in_frames,    25);
 #endif
-    RANGE_CHECK(cfg, rc_end_usage,          VPX_VBR, VPX_Q);
+    RANGE_CHECK(cfg, rc_end_usage,          VPX_VBR, VPX_CQ);
    RANGE_CHECK_HI(cfg, rc_undershoot_pct,  1000);
    RANGE_CHECK_HI(cfg, rc_overshoot_pct,   1000);
    RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100);
@@ -204,7 +204,7 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t      *ctx,
    RANGE_CHECK_HI(vp8_cfg, arnr_strength,   6);
    RANGE_CHECK(vp8_cfg, arnr_type,       1, 3);
    RANGE_CHECK(vp8_cfg, cq_level, 0, 63);
-    if (finalize && (cfg->rc_end_usage == VPX_CQ || cfg->rc_end_usage == VPX_Q))
+    if(finalize && cfg->rc_end_usage == VPX_CQ)
        RANGE_CHECK(vp8_cfg, cq_level,
                    cfg->rc_min_quantizer, cfg->rc_max_quantizer);

@@ -327,14 +327,17 @@ static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf,
    oxcf->resample_up_water_mark   = cfg.rc_resize_up_thresh;
    oxcf->resample_down_water_mark = cfg.rc_resize_down_thresh;

-    if (cfg.rc_end_usage == VPX_VBR) {
-      oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
-    } else if (cfg.rc_end_usage == VPX_CBR) {
-      oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
-    } else if (cfg.rc_end_usage == VPX_CQ) {
-      oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
-    } else if (cfg.rc_end_usage == VPX_Q) {
-      oxcf->end_usage = USAGE_CONSTANT_QUALITY;
+    if (cfg.rc_end_usage == VPX_VBR)
+    {
+        oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK;
+    }
+    else if (cfg.rc_end_usage == VPX_CBR)
+    {
+        oxcf->end_usage = USAGE_STREAM_FROM_SERVER;
+    }
+    else if (cfg.rc_end_usage == VPX_CQ)
+    {
+        oxcf->end_usage = USAGE_CONSTRAINED_QUALITY;
    }

    oxcf->target_bandwidth         = cfg.rc_target_bitrate;
@@ -692,6 +695,7 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t   *img,
    yv12->uv_stride = img->stride[VPX_PLANE_U];

    yv12->border  = (img->stride[VPX_PLANE_Y] - img->w) / 2;
+    yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
    return res;
 }

@@ -1075,7 +1079,11 @@ static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx)
        ctx->preview_img.planes[VPX_PLANE_U] = sd.u_buffer;
        ctx->preview_img.planes[VPX_PLANE_V] = sd.v_buffer;

-        ctx->preview_img.fmt = VPX_IMG_FMT_I420;
+        if (sd.clrtype == REG_YUV)
+            ctx->preview_img.fmt = VPX_IMG_FMT_I420;
+        else
+            ctx->preview_img.fmt = VPX_IMG_FMT_VPXI420;
+
        ctx->preview_img.x_chroma_shift = 1;
        ctx->preview_img.y_chroma_shift = 1;

@@ -1269,7 +1277,7 @@ static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] =
        1,                  /* g_delete_first_pass_file */
        "vp8.fpf"           /* first pass filename */
 #endif
-        VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */
+
        1,                  /* ts_number_layers */
        {0},                /* ts_target_bitrate */
        {0},                /* ts_rate_decimator */
--- a/vp8/vp8_dx_iface.c
+++ b/vp8/vp8_dx_iface.c
@@ -41,6 +41,15 @@ typedef enum

 static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_t);

+typedef struct
+{
+    unsigned int   id;
+    unsigned long  sz;
+    unsigned int   align;
+    unsigned int   flags;
+    unsigned long(*calc_sz)(const vpx_codec_dec_cfg_t *, vpx_codec_flags_t);
+} mem_req_t;
+
 static const mem_req_t vp8_mem_req_segs[] =
 {
    {VP8_SEG_ALG_PRIV,    0, 8, VPX_CODEC_MEM_ZERO, vp8_priv_sz},
@@ -84,6 +93,65 @@ static unsigned long vp8_priv_sz(const vpx_codec_dec_cfg_t *si, vpx_codec_flags_
    return sizeof(vpx_codec_alg_priv_t);
 }

+
+static void vp8_mmap_dtor(vpx_codec_mmap_t *mmap)
+{
+    free(mmap->priv);
+}
+
+static vpx_codec_err_t vp8_mmap_alloc(vpx_codec_mmap_t *mmap)
+{
+    vpx_codec_err_t  res;
+    unsigned int   align;
+
+    align = mmap->align ? mmap->align - 1 : 0;
+
+    if (mmap->flags & VPX_CODEC_MEM_ZERO)
+        mmap->priv = calloc(1, mmap->sz + align);
+    else
+        mmap->priv = malloc(mmap->sz + align);
+
+    res = (mmap->priv) ? VPX_CODEC_OK : VPX_CODEC_MEM_ERROR;
+    mmap->base = (void *)((((uintptr_t)mmap->priv) + align) & ~(uintptr_t)align);
+    mmap->dtor = vp8_mmap_dtor;
+    return res;
+}
+
+static vpx_codec_err_t vp8_validate_mmaps(const vp8_stream_info_t *si,
+        const vpx_codec_mmap_t        *mmaps,
+        vpx_codec_flags_t              init_flags)
+{
+    int i;
+    vpx_codec_err_t res = VPX_CODEC_OK;
+
+    for (i = 0; i < NELEMENTS(vp8_mem_req_segs) - 1; i++)
+    {
+        /* Ensure the segment has been allocated */
+        if (!mmaps[i].base)
+        {
+            res = VPX_CODEC_MEM_ERROR;
+            break;
+        }
+
+        /* Verify variable size segment is big enough for the current si. */
+        if (vp8_mem_req_segs[i].calc_sz)
+        {
+            vpx_codec_dec_cfg_t cfg;
+
+            cfg.w = si->w;
+            cfg.h = si->h;
+
+            if (mmaps[i].sz < vp8_mem_req_segs[i].calc_sz(&cfg, init_flags))
+            {
+                res = VPX_CODEC_MEM_ERROR;
+                break;
+            }
+        }
+    }
+
+    return res;
+}
+
 static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
 {
    int i;
@@ -110,6 +178,16 @@ static void vp8_init_ctx(vpx_codec_ctx_t *ctx, const vpx_codec_mmap_t *mmap)
    }
 }

+static void *mmap_lkup(vpx_codec_alg_priv_t *ctx, unsigned int id)
+{
+    int i;
+
+    for (i = 0; i < NELEMENTS(ctx->mmaps); i++)
+        if (ctx->mmaps[i].id == id)
+            return ctx->mmaps[i].base;
+
+    return NULL;
+}
 static void vp8_finalize_mmaps(vpx_codec_alg_priv_t *ctx)
 {
    /* nothing to clean up */
@@ -136,7 +214,7 @@ static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx,
        mmap.align = vp8_mem_req_segs[0].align;
        mmap.flags = vp8_mem_req_segs[0].flags;

-        res = vpx_mmap_alloc(&mmap);
+        res = vp8_mmap_alloc(&mmap);
        if (res != VPX_CODEC_OK) return res;

        vp8_init_ctx(ctx, &mmap);
@@ -288,7 +366,8 @@ static void yuvconfig2image(vpx_image_t               *img,
      * the Y, U, and V planes, nor other alignment adjustments that
      * might be representable by a YV12_BUFFER_CONFIG, so we just
      * initialize all the fields.*/
-    img->fmt = VPX_IMG_FMT_I420;
+    img->fmt = yv12->clrtype == REG_YUV ?
+        VPX_IMG_FMT_I420 : VPX_IMG_FMT_VPXI420;
    img->w = yv12->y_stride;
    img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15;
    img->d_w = yv12->y_width;
@@ -409,7 +488,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
                ctx->mmaps[i].sz = vp8_mem_req_segs[i].calc_sz(&cfg,
                                   ctx->base.init_flags);

-            res = vpx_mmap_alloc(&ctx->mmaps[i]);
+            res = vp8_mmap_alloc(&ctx->mmaps[i]);
        }

        if (!res)
@@ -421,9 +500,7 @@ static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t  *ctx,
    /* Initialize the decoder instance on the first frame*/
    if (!res && !ctx->decoder_init)
    {
-        res = vpx_validate_mmaps(&ctx->si, ctx->mmaps,
-                                 vp8_mem_req_segs, NELEMENTS(vp8_mem_req_segs),
-                                 ctx->base.init_flags);
+        res = vp8_validate_mmaps(&ctx->si, ctx->mmaps, ctx->base.init_flags);

        if (!res)
        {
@@ -720,6 +797,8 @@ static vpx_codec_err_t image2yuvconfig(const vpx_image_t   *img,
    yv12->uv_stride = img->stride[VPX_PLANE_U];

    yv12->border  = (img->stride[VPX_PLANE_Y] - img->d_w) / 2;
+    yv12->clrtype = (img->fmt == VPX_IMG_FMT_VPXI420 || img->fmt == VPX_IMG_FMT_VPXYV12);
+
    return res;
 }

--- a/vp8/vp8dx.mk
+++ b/vp8/vp8dx.mk
@@ -35,5 +35,9 @@ VP8_DX_SRCS-yes += decoder/onyxd_int.h
 VP8_DX_SRCS-yes += decoder/treereader.h
 VP8_DX_SRCS-yes += decoder/onyxd_if.c
 VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c
+VP8_DX_SRCS-yes += decoder/vp8_asm_dec_offsets.c

 VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes))
+
+$(eval $(call asm_offsets_template,\
+         vp8_asm_dec_offsets.asm, $(VP8_PREFIX)decoder/vp8_asm_dec_offsets.c))
--- a/vp9/common/arm/neon/vp9_avg_neon.asm
+++ b/vp9/common/arm/neon/vp9_avg_neon.asm
@@ -1,116 +0,0 @@
-;
-;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-    EXPORT  |vp9_convolve_avg_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-|vp9_convolve_avg_neon| PROC
-    push                {r4-r6, lr}
-    ldrd                r4, r5, [sp, #32]
-    mov                 r6, r2
-
-    cmp                 r4, #32
-    bgt                 avg64
-    beq                 avg32
-    cmp                 r4, #8
-    bgt                 avg16
-    beq                 avg8
-    b                   avg4
-
-avg64
-    sub                 lr, r1, #32
-    sub                 r4, r3, #32
-avg64_h
-    pld                 [r0, r1, lsl #1]
-    vld1.8              {q0-q1}, [r0]!
-    vld1.8              {q2-q3}, [r0], lr
-    pld                 [r2, r3]
-    vld1.8              {q8-q9},   [r6@128]!
-    vld1.8              {q10-q11}, [r6@128], r4
-    vrhadd.u8           q0, q0, q8
-    vrhadd.u8           q1, q1, q9
-    vrhadd.u8           q2, q2, q10
-    vrhadd.u8           q3, q3, q11
-    vst1.8              {q0-q1}, [r2@128]!
-    vst1.8              {q2-q3}, [r2@128], r4
-    subs                r5, r5, #1
-    bgt                 avg64_h
-    pop                 {r4-r6, pc}
-
-avg32
-    vld1.8              {q0-q1}, [r0], r1
-    vld1.8              {q2-q3}, [r0], r1
-    vld1.8              {q8-q9},   [r6@128], r3
-    vld1.8              {q10-q11}, [r6@128], r3
-    pld                 [r0]
-    vrhadd.u8           q0, q0, q8
-    pld                 [r0, r1]
-    vrhadd.u8           q1, q1, q9
-    pld                 [r6]
-    vrhadd.u8           q2, q2, q10
-    pld                 [r6, r3]
-    vrhadd.u8           q3, q3, q11
-    vst1.8              {q0-q1}, [r2@128], r3
-    vst1.8              {q2-q3}, [r2@128], r3
-    subs                r5, r5, #2
-    bgt                 avg32
-    pop                 {r4-r6, pc}
-
-avg16
-    vld1.8              {q0}, [r0], r1
-    vld1.8              {q1}, [r0], r1
-    vld1.8              {q2}, [r6@128], r3
-    vld1.8              {q3}, [r6@128], r3
-    pld                 [r0]
-    pld                 [r0, r1]
-    vrhadd.u8           q0, q0, q2
-    pld                 [r6]
-    pld                 [r6, r3]
-    vrhadd.u8           q1, q1, q3
-    vst1.8              {q0}, [r2@128], r3
-    vst1.8              {q1}, [r2@128], r3
-    subs                r5, r5, #2
-    bgt                 avg16
-    pop                 {r4-r6, pc}
-
-avg8
-    vld1.8              {d0}, [r0], r1
-    vld1.8              {d1}, [r0], r1
-    vld1.8              {d2}, [r6@64], r3
-    vld1.8              {d3}, [r6@64], r3
-    pld                 [r0]
-    pld                 [r0, r1]
-    vrhadd.u8           q0, q0, q1
-    pld                 [r6]
-    pld                 [r6, r3]
-    vst1.8              {d0}, [r2@64], r3
-    vst1.8              {d1}, [r2@64], r3
-    subs                r5, r5, #2
-    bgt                 avg8
-    pop                 {r4-r6, pc}
-
-avg4
-    vld1.32             {d0[0]}, [r0], r1
-    vld1.32             {d0[1]}, [r0], r1
-    vld1.32             {d2[0]}, [r6@32], r3
-    vld1.32             {d2[1]}, [r6@32], r3
-    vrhadd.u8           d0, d0, d2
-    vst1.32             {d0[0]}, [r2@32], r3
-    vst1.32             {d0[1]}, [r2@32], r3
-    subs                r5, r5, #2
-    bgt                 avg4
-    pop                 {r4-r6, pc}
-    ENDP
-
-    END
--- a/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
+++ b/vp9/common/arm/neon/vp9_convolve8_avg_neon.asm
@@ -1,302 +0,0 @@
-;
-;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    ; These functions are only valid when:
-    ; x_step_q4 == 16
-    ; w%4 == 0
-    ; h%4 == 0
-    ; taps == 8
-    ; VP9_FILTER_WEIGHT == 128
-    ; VP9_FILTER_SHIFT == 7
-
-    EXPORT  |vp9_convolve8_avg_horiz_neon|
-    EXPORT  |vp9_convolve8_avg_vert_neon|
-    IMPORT  |vp9_convolve8_avg_horiz_c|
-    IMPORT  |vp9_convolve8_avg_vert_c|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-    ; Multiply and accumulate by q0
-    MACRO
-    MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7
-    vmull.s16 $dst, $src0, d0[0]
-    vmlal.s16 $dst, $src1, d0[1]
-    vmlal.s16 $dst, $src2, d0[2]
-    vmlal.s16 $dst, $src3, d0[3]
-    vmlal.s16 $dst, $src4, d1[0]
-    vmlal.s16 $dst, $src5, d1[1]
-    vmlal.s16 $dst, $src6, d1[2]
-    vmlal.s16 $dst, $src7, d1[3]
-    MEND
-
-; r0    const uint8_t *src
-; r1    int src_stride
-; r2    uint8_t *dst
-; r3    int dst_stride
-; sp[]const int16_t *filter_x
-; sp[]int x_step_q4
-; sp[]const int16_t *filter_y ; unused
-; sp[]int y_step_q4           ; unused
-; sp[]int w
-; sp[]int h
-
-|vp9_convolve8_avg_horiz_neon| PROC
-    ldr             r12, [sp, #4]           ; x_step_q4
-    cmp             r12, #16
-    bne             vp9_convolve8_avg_horiz_c
-
-    push            {r4-r10, lr}
-
-    sub             r0, r0, #3              ; adjust for taps
-
-    ldr             r5, [sp, #32]           ; filter_x
-    ldr             r6, [sp, #48]           ; w
-    ldr             r7, [sp, #52]           ; h
-
-    vld1.s16        {q0}, [r5]              ; filter_x
-
-    sub             r8, r1, r1, lsl #2      ; -src_stride * 3
-    add             r8, r8, #4              ; -src_stride * 3 + 4
-
-    sub             r4, r3, r3, lsl #2      ; -dst_stride * 3
-    add             r4, r4, #4              ; -dst_stride * 3 + 4
-
-    rsb             r9, r6, r1, lsl #2      ; reset src for outer loop
-    sub             r9, r9, #7
-    rsb             r12, r6, r3, lsl #2     ; reset dst for outer loop
-
-    mov             r10, r6                 ; w loop counter
-
-loop_horiz_v
-    vld1.8          {d24}, [r0], r1
-    vld1.8          {d25}, [r0], r1
-    vld1.8          {d26}, [r0], r1
-    vld1.8          {d27}, [r0], r8
-
-    vtrn.16         q12, q13
-    vtrn.8          d24, d25
-    vtrn.8          d26, d27
-
-    pld             [r0, r1, lsl #2]
-
-    vmovl.u8        q8, d24
-    vmovl.u8        q9, d25
-    vmovl.u8        q10, d26
-    vmovl.u8        q11, d27
-
-    ; save a few instructions in the inner loop
-    vswp            d17, d18
-    vmov            d23, d21
-
-    add             r0, r0, #3
-
-loop_horiz
-    add             r5, r0, #64
-
-    vld1.32         {d28[]}, [r0], r1
-    vld1.32         {d29[]}, [r0], r1
-    vld1.32         {d31[]}, [r0], r1
-    vld1.32         {d30[]}, [r0], r8
-
-    pld             [r5]
-
-    vtrn.16         d28, d31
-    vtrn.16         d29, d30
-    vtrn.8          d28, d29
-    vtrn.8          d31, d30
-
-    pld             [r5, r1]
-
-    ; extract to s16
-    vtrn.32         q14, q15
-    vmovl.u8        q12, d28
-    vmovl.u8        q13, d29
-
-    pld             [r5, r1, lsl #1]
-
-    ; slightly out of order load to match the existing data
-    vld1.u32        {d6[0]}, [r2], r3
-    vld1.u32        {d7[0]}, [r2], r3
-    vld1.u32        {d6[1]}, [r2], r3
-    vld1.u32        {d7[1]}, [r2], r3
-
-    sub             r2, r2, r3, lsl #2      ; reset for store
-
-    ; src[] * filter_x
-    MULTIPLY_BY_Q0  q1,  d16, d17, d20, d22, d18, d19, d23, d24
-    MULTIPLY_BY_Q0  q2,  d17, d20, d22, d18, d19, d23, d24, d26
-    MULTIPLY_BY_Q0  q14, d20, d22, d18, d19, d23, d24, d26, d27
-    MULTIPLY_BY_Q0  q15, d22, d18, d19, d23, d24, d26, d27, d25
-
-    pld             [r5, -r8]
-
-    ; += 64 >> 7
-    vqrshrun.s32    d2, q1, #7
-    vqrshrun.s32    d3, q2, #7
-    vqrshrun.s32    d4, q14, #7
-    vqrshrun.s32    d5, q15, #7
-
-    ; saturate
-    vqmovn.u16      d2, q1
-    vqmovn.u16      d3, q2
-
-    ; transpose
-    vtrn.16         d2, d3
-    vtrn.32         d2, d3
-    vtrn.8          d2, d3
-
-    ; average the new value and the dst value
-    vrhadd.u8       q1, q1, q3
-
-    vst1.u32        {d2[0]}, [r2@32], r3
-    vst1.u32        {d3[0]}, [r2@32], r3
-    vst1.u32        {d2[1]}, [r2@32], r3
-    vst1.u32        {d3[1]}, [r2@32], r4
-
-    vmov            q8,  q9
-    vmov            d20, d23
-    vmov            q11, q12
-    vmov            q9,  q13
-
-    subs            r6, r6, #4              ; w -= 4
-    bgt             loop_horiz
-
-    ; outer loop
-    mov             r6, r10                 ; restore w counter
-    add             r0, r0, r9              ; src += src_stride * 4 - w
-    add             r2, r2, r12             ; dst += dst_stride * 4 - w
-    subs            r7, r7, #4              ; h -= 4
-    bgt loop_horiz_v
-
-    pop             {r4-r10, pc}
-
-    ENDP
-
-|vp9_convolve8_avg_vert_neon| PROC
-    ldr             r12, [sp, #12]
-    cmp             r12, #16
-    bne             vp9_convolve8_avg_vert_c
-
-    push            {r4-r8, lr}
-
-    ; adjust for taps
-    sub             r0, r0, r1
-    sub             r0, r0, r1, lsl #1
-
-    ldr             r4, [sp, #32]           ; filter_y
-    ldr             r6, [sp, #40]           ; w
-    ldr             lr, [sp, #44]           ; h
-
-    vld1.s16        {q0}, [r4]              ; filter_y
-
-    lsl             r1, r1, #1
-    lsl             r3, r3, #1
-
-loop_vert_h
-    mov             r4, r0
-    add             r7, r0, r1, asr #1
-    mov             r5, r2
-    add             r8, r2, r3, asr #1
-    mov             r12, lr                 ; h loop counter
-
-    vld1.u32        {d16[0]}, [r4], r1
-    vld1.u32        {d16[1]}, [r7], r1
-    vld1.u32        {d18[0]}, [r4], r1
-    vld1.u32        {d18[1]}, [r7], r1
-    vld1.u32        {d20[0]}, [r4], r1
-    vld1.u32        {d20[1]}, [r7], r1
-    vld1.u32        {d22[0]}, [r4], r1
-
-    vmovl.u8        q8, d16
-    vmovl.u8        q9, d18
-    vmovl.u8        q10, d20
-    vmovl.u8        q11, d22
-
-loop_vert
-    ; always process a 4x4 block at a time
-    vld1.u32        {d24[0]}, [r7], r1
-    vld1.u32        {d26[0]}, [r4], r1
-    vld1.u32        {d26[1]}, [r7], r1
-    vld1.u32        {d24[1]}, [r4], r1
-
-    ; extract to s16
-    vmovl.u8        q12, d24
-    vmovl.u8        q13, d26
-
-    vld1.u32        {d6[0]}, [r5@32], r3
-    vld1.u32        {d6[1]}, [r8@32], r3
-    vld1.u32        {d7[0]}, [r5@32], r3
-    vld1.u32        {d7[1]}, [r8@32], r3
-
-    pld             [r7]
-    pld             [r4]
-
-    ; src[] * filter_y
-    MULTIPLY_BY_Q0  q1,  d16, d17, d18, d19, d20, d21, d22, d24
-
-    pld             [r7, r1]
-    pld             [r4, r1]
-
-    MULTIPLY_BY_Q0  q2,  d17, d18, d19, d20, d21, d22, d24, d26
-
-    pld             [r5]
-    pld             [r8]
-
-    MULTIPLY_BY_Q0  q14, d18, d19, d20, d21, d22, d24, d26, d27
-
-    pld             [r5, r3]
-    pld             [r8, r3]
-
-    MULTIPLY_BY_Q0  q15, d19, d20, d21, d22, d24, d26, d27, d25
-
-    ; += 64 >> 7
-    vqrshrun.s32    d2, q1, #7
-    vqrshrun.s32    d3, q2, #7
-    vqrshrun.s32    d4, q14, #7
-    vqrshrun.s32    d5, q15, #7
-
-    ; saturate
-    vqmovn.u16      d2, q1
-    vqmovn.u16      d3, q2
-
-    ; average the new value and the dst value
-    vrhadd.u8       q1, q1, q3
-
-    sub             r5, r5, r3, lsl #1      ; reset for store
-    sub             r8, r8, r3, lsl #1
-
-    vst1.u32        {d2[0]}, [r5@32], r3
-    vst1.u32        {d2[1]}, [r8@32], r3
-    vst1.u32        {d3[0]}, [r5@32], r3
-    vst1.u32        {d3[1]}, [r8@32], r3
-
-    vmov            q8, q10
-    vmov            d18, d22
-    vmov            d19, d24
-    vmov            q10, q13
-    vmov            d22, d25
-
-    subs            r12, r12, #4            ; h -= 4
-    bgt             loop_vert
-
-    ; outer loop
-    add             r0, r0, #4
-    add             r2, r2, #4
-    subs            r6, r6, #4              ; w -= 4
-    bgt             loop_vert_h
-
-    pop             {r4-r8, pc}
-
-    ENDP
-    END
--- a/vp9/common/arm/neon/vp9_convolve8_neon.asm
+++ b/vp9/common/arm/neon/vp9_convolve8_neon.asm
@@ -1,280 +0,0 @@
-;
-;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-
-    ; These functions are only valid when:
-    ; x_step_q4 == 16
-    ; w%4 == 0
-    ; h%4 == 0
-    ; taps == 8
-    ; VP9_FILTER_WEIGHT == 128
-    ; VP9_FILTER_SHIFT == 7
-
-    EXPORT  |vp9_convolve8_horiz_neon|
-    EXPORT  |vp9_convolve8_vert_neon|
-    IMPORT  |vp9_convolve8_horiz_c|
-    IMPORT  |vp9_convolve8_vert_c|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-    ; Multiply and accumulate by q0
-    MACRO
-    MULTIPLY_BY_Q0 $dst, $src0, $src1, $src2, $src3, $src4, $src5, $src6, $src7
-    vmull.s16 $dst, $src0, d0[0]
-    vmlal.s16 $dst, $src1, d0[1]
-    vmlal.s16 $dst, $src2, d0[2]
-    vmlal.s16 $dst, $src3, d0[3]
-    vmlal.s16 $dst, $src4, d1[0]
-    vmlal.s16 $dst, $src5, d1[1]
-    vmlal.s16 $dst, $src6, d1[2]
-    vmlal.s16 $dst, $src7, d1[3]
-    MEND
-
-; r0    const uint8_t *src
-; r1    int src_stride
-; r2    uint8_t *dst
-; r3    int dst_stride
-; sp[]const int16_t *filter_x
-; sp[]int x_step_q4
-; sp[]const int16_t *filter_y ; unused
-; sp[]int y_step_q4           ; unused
-; sp[]int w
-; sp[]int h
-
-|vp9_convolve8_horiz_neon| PROC
-    ldr             r12, [sp, #4]           ; x_step_q4
-    cmp             r12, #16
-    bne             vp9_convolve8_horiz_c
-
-    push            {r4-r10, lr}
-
-    sub             r0, r0, #3              ; adjust for taps
-
-    ldr             r5, [sp, #32]           ; filter_x
-    ldr             r6, [sp, #48]           ; w
-    ldr             r7, [sp, #52]           ; h
-
-    vld1.s16        {q0}, [r5]              ; filter_x
-
-    sub             r8, r1, r1, lsl #2      ; -src_stride * 3
-    add             r8, r8, #4              ; -src_stride * 3 + 4
-
-    sub             r4, r3, r3, lsl #2      ; -dst_stride * 3
-    add             r4, r4, #4              ; -dst_stride * 3 + 4
-
-    rsb             r9, r6, r1, lsl #2      ; reset src for outer loop
-    sub             r9, r9, #7
-    rsb             r12, r6, r3, lsl #2     ; reset dst for outer loop
-
-    mov             r10, r6                 ; w loop counter
-
-loop_horiz_v
-    vld1.8          {d24}, [r0], r1
-    vld1.8          {d25}, [r0], r1
-    vld1.8          {d26}, [r0], r1
-    vld1.8          {d27}, [r0], r8
-
-    vtrn.16         q12, q13
-    vtrn.8          d24, d25
-    vtrn.8          d26, d27
-
-    pld             [r0, r1, lsl #2]
-
-    vmovl.u8        q8, d24
-    vmovl.u8        q9, d25
-    vmovl.u8        q10, d26
-    vmovl.u8        q11, d27
-
-    ; save a few instructions in the inner loop
-    vswp            d17, d18
-    vmov            d23, d21
-
-    add             r0, r0, #3
-
-loop_horiz
-    add             r5, r0, #64
-
-    vld1.32         {d28[]}, [r0], r1
-    vld1.32         {d29[]}, [r0], r1
-    vld1.32         {d31[]}, [r0], r1
-    vld1.32         {d30[]}, [r0], r8
-
-    pld             [r5]
-
-    vtrn.16         d28, d31
-    vtrn.16         d29, d30
-    vtrn.8          d28, d29
-    vtrn.8          d31, d30
-
-    pld             [r5, r1]
-
-    ; extract to s16
-    vtrn.32         q14, q15
-    vmovl.u8        q12, d28
-    vmovl.u8        q13, d29
-
-    pld             [r5, r1, lsl #1]
-
-    ; src[] * filter_x
-    MULTIPLY_BY_Q0  q1,  d16, d17, d20, d22, d18, d19, d23, d24
-    MULTIPLY_BY_Q0  q2,  d17, d20, d22, d18, d19, d23, d24, d26
-    MULTIPLY_BY_Q0  q14, d20, d22, d18, d19, d23, d24, d26, d27
-    MULTIPLY_BY_Q0  q15, d22, d18, d19, d23, d24, d26, d27, d25
-
-    pld             [r5, -r8]
-
-    ; += 64 >> 7
-    vqrshrun.s32    d2, q1, #7
-    vqrshrun.s32    d3, q2, #7
-    vqrshrun.s32    d4, q14, #7
-    vqrshrun.s32    d5, q15, #7
-
-    ; saturate
-    vqmovn.u16      d2, q1
-    vqmovn.u16      d3, q2
-
-    ; transpose
-    vtrn.16         d2, d3
-    vtrn.32         d2, d3
-    vtrn.8          d2, d3
-
-    vst1.u32        {d2[0]}, [r2@32], r3
-    vst1.u32        {d3[0]}, [r2@32], r3
-    vst1.u32        {d2[1]}, [r2@32], r3
-    vst1.u32        {d3[1]}, [r2@32], r4
-
-    vmov            q8,  q9
-    vmov            d20, d23
-    vmov            q11, q12
-    vmov            q9,  q13
-
-    subs            r6, r6, #4              ; w -= 4
-    bgt             loop_horiz
-
-    ; outer loop
-    mov             r6, r10                 ; restore w counter
-    add             r0, r0, r9              ; src += src_stride * 4 - w
-    add             r2, r2, r12             ; dst += dst_stride * 4 - w
-    subs            r7, r7, #4              ; h -= 4
-    bgt loop_horiz_v
-
-    pop             {r4-r10, pc}
-
-    ENDP
-
-|vp9_convolve8_vert_neon| PROC
-    ldr             r12, [sp, #12]
-    cmp             r12, #16
-    bne             vp9_convolve8_vert_c
-
-    push            {r4-r8, lr}
-
-    ; adjust for taps
-    sub             r0, r0, r1
-    sub             r0, r0, r1, lsl #1
-
-    ldr             r4, [sp, #32]           ; filter_y
-    ldr             r6, [sp, #40]           ; w
-    ldr             lr, [sp, #44]           ; h
-
-    vld1.s16        {q0}, [r4]              ; filter_y
-
-    lsl             r1, r1, #1
-    lsl             r3, r3, #1
-
-loop_vert_h
-    mov             r4, r0
-    add             r7, r0, r1, asr #1
-    mov             r5, r2
-    add             r8, r2, r3, asr #1
-    mov             r12, lr                 ; h loop counter
-
-    vld1.u32        {d16[0]}, [r4], r1
-    vld1.u32        {d16[1]}, [r7], r1
-    vld1.u32        {d18[0]}, [r4], r1
-    vld1.u32        {d18[1]}, [r7], r1
-    vld1.u32        {d20[0]}, [r4], r1
-    vld1.u32        {d20[1]}, [r7], r1
-    vld1.u32        {d22[0]}, [r4], r1
-
-    vmovl.u8        q8, d16
-    vmovl.u8        q9, d18
-    vmovl.u8        q10, d20
-    vmovl.u8        q11, d22
-
-loop_vert
-    ; always process a 4x4 block at a time
-    vld1.u32        {d24[0]}, [r7], r1
-    vld1.u32        {d26[0]}, [r4], r1
-    vld1.u32        {d26[1]}, [r7], r1
-    vld1.u32        {d24[1]}, [r4], r1
-
-    ; extract to s16
-    vmovl.u8        q12, d24
-    vmovl.u8        q13, d26
-
-    pld             [r5]
-    pld             [r8]
-
-    ; src[] * filter_y
-    MULTIPLY_BY_Q0  q1,  d16, d17, d18, d19, d20, d21, d22, d24
-
-    pld             [r5, r3]
-    pld             [r8, r3]
-
-    MULTIPLY_BY_Q0  q2,  d17, d18, d19, d20, d21, d22, d24, d26
-
-    pld             [r7]
-    pld             [r4]
-
-    MULTIPLY_BY_Q0  q14, d18, d19, d20, d21, d22, d24, d26, d27
-
-    pld             [r7, r1]
-    pld             [r4, r1]
-
-    MULTIPLY_BY_Q0  q15, d19, d20, d21, d22, d24, d26, d27, d25
-
-    ; += 64 >> 7
-    vqrshrun.s32    d2, q1, #7
-    vqrshrun.s32    d3, q2, #7
-    vqrshrun.s32    d4, q14, #7
-    vqrshrun.s32    d5, q15, #7
-
-    ; saturate
-    vqmovn.u16      d2, q1
-    vqmovn.u16      d3, q2
-
-    vst1.u32        {d2[0]}, [r5@32], r3
-    vst1.u32        {d2[1]}, [r8@32], r3
-    vst1.u32        {d3[0]}, [r5@32], r3
-    vst1.u32        {d3[1]}, [r8@32], r3
-
-    vmov            q8, q10
-    vmov            d18, d22
-    vmov            d19, d24
-    vmov            q10, q13
-    vmov            d22, d25
-
-    subs            r12, r12, #4            ; h -= 4
-    bgt             loop_vert
-
-    ; outer loop
-    add             r0, r0, #4
-    add             r2, r2, #4
-    subs            r6, r6, #4              ; w -= 4
-    bgt             loop_vert_h
-
-    pop             {r4-r8, pc}
-
-    ENDP
-    END
--- a/vp9/common/arm/neon/vp9_convolve_neon.c
+++ b/vp9/common/arm/neon/vp9_convolve_neon.c
@@ -1,78 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
-#include "vpx_ports/mem.h"
-
-void vp9_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride,
-                        uint8_t *dst, ptrdiff_t dst_stride,
-                        const int16_t *filter_x, int x_step_q4,
-                        const int16_t *filter_y, int y_step_q4,
-                        int w, int h) {
-  /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the
-   * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4).
-   */
-  DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
-
-  // Account for the vertical phase needing 3 lines prior and 4 lines post
-  int intermediate_height = h + 7;
-
-  if (x_step_q4 != 16 || y_step_q4 != 16)
-    return vp9_convolve8_c(src, src_stride,
-                           dst, dst_stride,
-                           filter_x, x_step_q4,
-                           filter_y, y_step_q4,
-                           w, h);
-
-  /* Filter starting 3 lines back. The neon implementation will ignore the
-   * given height and filter a multiple of 4 lines. Since this goes in to
-   * the temp buffer which has lots of extra room and is subsequently discarded
-   * this is safe if somewhat less than ideal.
-   */
-  vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
-                           temp, 64,
-                           filter_x, x_step_q4, filter_y, y_step_q4,
-                           w, intermediate_height);
-
-  /* Step into the temp buffer 3 lines to get the actual frame data */
-  vp9_convolve8_vert_neon(temp + 64 * 3, 64,
-                          dst, dst_stride,
-                          filter_x, x_step_q4, filter_y, y_step_q4,
-                          w, h);
-}
-
-void vp9_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride,
-                            uint8_t *dst, ptrdiff_t dst_stride,
-                            const int16_t *filter_x, int x_step_q4,
-                            const int16_t *filter_y, int y_step_q4,
-                            int w, int h) {
-  DECLARE_ALIGNED_ARRAY(8, uint8_t, temp, 64 * 72);
-  int intermediate_height = h + 7;
-
-  if (x_step_q4 != 16 || y_step_q4 != 16)
-    return vp9_convolve8_avg_c(src, src_stride,
-                               dst, dst_stride,
-                               filter_x, x_step_q4,
-                               filter_y, y_step_q4,
-                               w, h);
-
-  /* This implementation has the same issues as above. In addition, we only want
-   * to average the values after both passes.
-   */
-  vp9_convolve8_horiz_neon(src - src_stride * 3, src_stride,
-                           temp, 64,
-                           filter_x, x_step_q4, filter_y, y_step_q4,
-                           w, intermediate_height);
-  vp9_convolve8_avg_vert_neon(temp + 64 * 3,
-                              64, dst, dst_stride,
-                              filter_x, x_step_q4, filter_y, y_step_q4,
-                              w, h);
-}
--- a/vp9/common/arm/neon/vp9_copy_neon.asm
+++ b/vp9/common/arm/neon/vp9_copy_neon.asm
@@ -1,84 +0,0 @@
-;
-;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license
-;  that can be found in the LICENSE file in the root of the source
-;  tree. An additional intellectual property rights grant can be found
-;  in the file PATENTS.  All contributing project authors may
-;  be found in the AUTHORS file in the root of the source tree.
-;
-
-    EXPORT  |vp9_convolve_copy_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-|vp9_convolve_copy_neon| PROC
-    push                {r4-r5, lr}
-    ldrd                r4, r5, [sp, #28]
-
-    cmp                 r4, #32
-    bgt                 copy64
-    beq                 copy32
-    cmp                 r4, #8
-    bgt                 copy16
-    beq                 copy8
-    b                   copy4
-
-copy64
-    sub                 lr, r1, #32
-    sub                 r3, r3, #32
-copy64_h
-    pld                 [r0, r1, lsl #1]
-    vld1.8              {q0-q1}, [r0]!
-    vld1.8              {q2-q3}, [r0], lr
-    vst1.8              {q0-q1}, [r2@128]!
-    vst1.8              {q2-q3}, [r2@128], r3
-    subs                r5, r5, #1
-    bgt                 copy64_h
-    pop                 {r4-r5, pc}
-
-copy32
-    pld                 [r0, r1, lsl #1]
-    vld1.8              {q0-q1}, [r0], r1
-    pld                 [r0, r1, lsl #1]
-    vld1.8              {q2-q3}, [r0], r1
-    vst1.8              {q0-q1}, [r2@128], r3
-    vst1.8              {q2-q3}, [r2@128], r3
-    subs                r5, r5, #2
-    bgt                 copy32
-    pop                 {r4-r5, pc}
-
-copy16
-    pld                 [r0, r1, lsl #1]
-    vld1.8              {q0}, [r0], r1
-    pld                 [r0, r1, lsl #1]
-    vld1.8              {q1}, [r0], r1
-    vst1.8              {q0}, [r2@128], r3
-    vst1.8              {q1}, [r2@128], r3
-    subs                r5, r5, #2
-    bgt                 copy16
-    pop                 {r4-r5, pc}
-
-copy8
-    pld                 [r0, r1, lsl #1]
-    vld1.8              {d0}, [r0], r1
-    pld                 [r0, r1, lsl #1]
-    vld1.8              {d2}, [r0], r1
-    vst1.8              {d0}, [r2@64], r3
-    vst1.8              {d2}, [r2@64], r3
-    subs                r5, r5, #2
-    bgt                 copy8
-    pop                 {r4-r5, pc}
-
-copy4
-    ldr                 r12, [r0], r1
-    str                 r12, [r2], r3
-    subs                r5, r5, #1
-    bgt                 copy4
-    pop                 {r4-r5, pc}
-    ENDP
-
-    END
--- a/vp9/common/arm/neon/vp9_dc_only_idct_add_neon.asm
+++ b/vp9/common/arm/neon/vp9_dc_only_idct_add_neon.asm
@@ -1,69 +0,0 @@
-;
-;  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-;
-;  Use of this source code is governed by a BSD-style license and patent
-;  grant that can be found in the LICENSE file in the root of the source
-;  tree. All contributing project authors may be found in the AUTHORS
-;  file in the root of the source tree.
-;
-
-
-    EXPORT  |vp9_dc_only_idct_add_neon|
-    ARM
-    REQUIRE8
-    PRESERVE8
-
-    AREA ||.text||, CODE, READONLY, ALIGN=2
-
-;void vp9_dc_only_idct_add_neon(int input_dc, uint8_t *pred_ptr,
-;                            uint8_t *dst_ptr, int pitch, int stride)
-;
-; r0  int input_dc
-; r1  uint8_t *pred_ptr
-; r2  uint8_t *dst_ptr
-; r3  int pitch
-; sp  int stride
-
-|vp9_dc_only_idct_add_neon| PROC
-
-    ; generate cospi_16_64 = 11585
-    mov              r12, #0x2d00
-    add              r12, #0x41
-
-    ; dct_const_round_shift(input_dc * cospi_16_64)
-    mul              r0, r0, r12               ; input_dc * cospi_16_64
-    add              r0, r0, #0x2000           ; +(1 << ((DCT_CONST_BITS) - 1))
-    asr              r0, r0, #14               ; >> DCT_CONST_BITS
-
-    ; dct_const_round_shift(out * cospi_16_64)
-    mul              r0, r0, r12               ; out * cospi_16_64
-    add              r0, r0, #0x2000           ; +(1 << ((DCT_CONST_BITS) - 1))
-    asr              r0, r0, #14               ; >> DCT_CONST_BITS
-
-    ; ROUND_POWER_OF_TWO(out, 4)
-    add              r0, r0, #8                ; + (1 <<((4) - 1))
-    asr              r0, r0, #4                ; >> 4
-
-    vdup.16         q0, r0;                   ; duplicate a1
-    ldr              r12, [sp]                 ; load stride
-
-    vld1.32         {d2[0]}, [r1], r3
-    vld1.32         {d2[1]}, [r1], r3
-    vld1.32         {d4[0]}, [r1], r3
-    vld1.32         {d4[1]}, [r1]
-
-    vaddw.u8        q1, q0, d2                ; a1 + pred_ptr[c]
-    vaddw.u8        q2, q0, d4
-
-    vqmovun.s16     d2, q1                    ; clip_pixel
-    vqmovun.s16     d4, q2
-
-    vst1.32         {d2[0]}, [r2], r12
-    vst1.32         {d2[1]}, [r2], r12
-    vst1.32         {d4[0]}, [r2], r12
-    vst1.32         {d4[1]}, [r2]
-
-    bx               lr
-    ENDP             ; |vp9_dc_only_idct_add_neon|
-
-    END
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Ami Fischman	28147a449a	libvpx: enable building for iOS devices (armv7) Allow output of gas syntax assembly directly from obj_int_extract Change-Id: I33a747e87ef1c590a8766dea17f8cb2497e54591	2013-07-19 14:05:59 -07:00
Ronald S. Bultje	33149cbb4c	Replace generated quant tables with static lookup tables. This prevents possible float rounding issues between architectures. Change-Id: I6ed260aebd49feb4cfb5596a5370c44be5f72167	2013-07-16 14:04:41 -07:00
John Koleszar	3f454060bb	Fix above context pointers In the prior code, the above context pointers used for entropy decoding were initialized on the first frame, and not updated when the frame size changed. The per-frame code which initializes the contexts assumes that the contexts are contiguous, leading to an incomplete initialization when the frame is smaller. This commit updates the pointers so that the context is contigous whenever the frame size changes. Conflicts: vp9/common/vp9_alloccommon.c Change-Id: I08b53e3a30c8289491212311682ff1b8028cff6c	2013-07-16 14:04:41 -07:00
Yaowu Xu	d19ed5f249	Change to extend full border only when needed This is a short term optimization till we work out a decoder implementation requiring no frame border extension. Change-Id: I02d15bfde4d926b50a4e58b393d8c4062d1be70f	2013-07-16 14:04:39 -07:00
Ronald S. Bultje	a801f7a295	Increase border size from 96 to 160. This is required because upon downscaling, if a motion vector points partially into the UMV (e.g. all minus 1 of 64+7 pixels, i.e. 70), then we can point up to 140 pixels into the larger-resolution (2x) reference buffer UMV, which means the UMV for reference buffers in downscaling needs to be 140 rounded up to the nearest multiple of 32, i.e. 160. Longer-term, we should probably handle the UMV differently by detecting edge coverage on-the-fly and using a temporary buffer for edge extensions instead of adding 160 pixels on all sides of the image (which means a CIF image uses 3x its own area size for borders). Change-Id: I5184443e6731cd6721fc6a5d430a53e7d91b4f7e	2013-07-16 12:41:10 -07:00
Dmitry Kovalev	e39bd6407f	Fixing vp9_get_pred_context_comp_ref_p function. Adding missed parenthesis around boolean expressions. Bitstream is changed. Regenerating test vectors. Conflicts: vp9/common/vp9_pred_common.c Change-Id: I4cc00b761e9473f92f180a9fc3a0c607f0aaae56	2013-07-16 12:40:48 -07:00