Corrected optimization of 8x8 DCT code

The 8x8 DCT uses a fast version whenever possible. There was a mistake in the checking code which meant sometimes the fast version was used when it was not safe to do so. Change-Id: I154c84c9e2d836764768a11082947ca30f4b5ab7
Merge "Added tests for high bitdepth variance sse2 functions" into highbitdepth
2014-12-11 15:54:23 +00:00 · 2014-11-10 14:42:26 -08:00 · 2014-11-10 20:42:24 +00:00 · 2014-11-10 10:47:39 -08:00 · 2014-11-10 16:17:49 +00:00 · 2014-11-06 13:57:04 -08:00
386 changed files with 27025 additions and 40405 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -44,6 +44,7 @@
 /ivfenc.dox
 /libvpx.so*
 /libvpx.ver
+/obj_int_extract
 /samples.dox
 /test_libvpx
 /vp8_api1_migration.dox
--- a/4
+++ b/4
@@ -47,6 +47,10 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  --help output of the configure script. As of this writing, the list of
  available targets is:

+    armv5te-android-gcc
+    armv5te-linux-rvct
+    armv5te-linux-gcc
+    armv5te-none-rvct
    armv6-darwin-gcc
    armv6-linux-rvct
    armv6-linux-gcc
--- a/build/arm-msvs/obj_int_extract.bat
+++ b/build/arm-msvs/obj_int_extract.bat
@@ -0,0 +1,18 @@
+REM   Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+REM
+REM   Use of this source code is governed by a BSD-style license
+REM   that can be found in the LICENSE file in the root of the source
+REM   tree. An additional intellectual property rights grant can be found
+REM   in the file PATENTS.  All contributing project authors may
+REM   be found in the AUTHORS file in the root of the source tree.
+echo on
+
+REM Arguments:
+REM   %1 - Relative path to the directory containing the vp8 and vpx_scale
+REM        source directories.
+REM   %2 - Path to obj_int_extract.exe.
+cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
+%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
+
+cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vpx_scale/vpx_scale_asm_offsets.c"
+%2\obj_int_extract.exe rvds "vpx_scale_asm_offsets.obj" > "vpx_scale_asm_offsets.asm"
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -43,7 +43,7 @@
 # will remove any NEON dependency.

 # To change to building armeabi, run ./libvpx/configure again, but with
-# --target=armv6-android-gcc and modify the Application.mk file to
+# --target=arm5te-android-gcc and modify the Application.mk file to
 # set APP_ABI := armeabi
 #
 # Running ndk-build will build libvpx and include it in your project.
@@ -60,7 +60,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
  include $(CONFIG_DIR)libs-armv7-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq  ($(TARGET_ARCH_ABI),armeabi)
-  include $(CONFIG_DIR)libs-armv6-android-gcc.mk
+  include $(CONFIG_DIR)libs-armv5te-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq  ($(TARGET_ARCH_ABI),arm64-v8a)
  include $(CONFIG_DIR)libs-armv8-android-gcc.mk
@@ -91,8 +91,51 @@ LOCAL_CFLAGS := -O3
 # like x86inc.asm and x86_abi_support.asm
 LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)

+# -----------------------------------------------------------------------------
+# Template  : asm_offsets_template
+# Arguments : 1: assembly offsets file to be created
+#             2: c file to base assembly offsets on
+# Returns   : None
+# Usage     : $(eval $(call asm_offsets_template,<asmfile>, <srcfile>
+# Rationale : Create offsets at compile time using for structures that are
+#             defined in c, but used in assembly functions.
+# -----------------------------------------------------------------------------
+define asm_offsets_template
+
+_SRC:=$(2)
+_OBJ:=$(ASM_CNV_PATH)/$$(notdir $(2)).S
+
+_FLAGS = $$($$(my)CFLAGS) \
+          $$(call get-src-file-target-cflags,$(2)) \
+          $$(call host-c-includes,$$(LOCAL_C_INCLUDES) $$(CONFIG_DIR)) \
+          $$(LOCAL_CFLAGS) \
+          $$(NDK_APP_CFLAGS) \
+          $$(call host-c-includes,$$($(my)C_INCLUDES)) \
+          -DINLINE_ASM \
+          -S \
+
+_TEXT = "Compile $$(call get-src-file-text,$(2))"
+_CC   = $$(TARGET_CC)
+
+$$(eval $$(call ev-build-file))
+
+$(1) : $$(_OBJ) $(2)
+	@mkdir -p $$(dir $$@)
+	@grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)$(ASM_CONVERSION) > $$@
+endef
+
+# Use ads2gas script to convert from RVCT format to GAS format.  This
+#  puts the processed file under $(ASM_CNV_PATH).  Local clean rule
+#  to handle removing these
+ifeq ($(CONFIG_VP8_ENCODER), yes)
+  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm
+endif
+ifeq ($(HAVE_NEON_ASM), yes)
+  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm
+endif
+
 .PRECIOUS: %.asm.s
-$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm
+$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm $(ASM_CNV_OFFSETS_DEPEND)
 	@mkdir -p $(dir $@)
 	@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@

@@ -181,11 +224,24 @@ endif
 clean:
 	@echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]"
 	@$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
+	@$(RM) $(patsubst %.asm, %.*, $(ASM_CNV_OFFSETS_DEPEND))
 	@$(RM) -r $(ASM_CNV_PATH)
 	@$(RM) $(CLEAN-OBJS)

 include $(BUILD_SHARED_LIBRARY)

+ifeq ($(HAVE_NEON), yes)
+  $(eval $(call asm_offsets_template,\
+    $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm, \
+    $(LIBVPX_PATH)/vpx_scale/vpx_scale_asm_offsets.c))
+endif
+
+ifeq ($(CONFIG_VP8_ENCODER), yes)
+  $(eval $(call asm_offsets_template,\
+    $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm, \
+    $(LIBVPX_PATH)/vp8/encoder/vp8_asm_enc_offsets.c))
+endif
+
 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
 $(call import-module,cpufeatures)
 endif
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -146,7 +146,6 @@ $(BUILD_PFX)%.c.d: %.c

 $(BUILD_PFX)%.c.o: %.c
 	$(if $(quiet),@echo "    [CC] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CC) $(INTERNAL_CFLAGS) $(CFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.cc.d: %.cc
@@ -156,7 +155,6 @@ $(BUILD_PFX)%.cc.d: %.cc

 $(BUILD_PFX)%.cc.o: %.cc
 	$(if $(quiet),@echo "    [CXX] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.cpp.d: %.cpp
@@ -166,7 +164,6 @@ $(BUILD_PFX)%.cpp.d: %.cpp

 $(BUILD_PFX)%.cpp.o: %.cpp
 	$(if $(quiet),@echo "    [CXX] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.asm.d: %.asm
@@ -177,7 +174,6 @@ $(BUILD_PFX)%.asm.d: %.asm

 $(BUILD_PFX)%.asm.o: %.asm
 	$(if $(quiet),@echo "    [AS] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<

 $(BUILD_PFX)%.s.d: %.s
@@ -188,14 +184,12 @@ $(BUILD_PFX)%.s.d: %.s

 $(BUILD_PFX)%.s.o: %.s
 	$(if $(quiet),@echo "    [AS] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<

 .PRECIOUS: %.c.S
 %.c.S: CFLAGS += -DINLINE_ASM
 $(BUILD_PFX)%.c.S: %.c
 	$(if $(quiet),@echo "    [GEN] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CC) -S $(CFLAGS) -o $@ $<

 .PRECIOUS: %.asm.s
@@ -222,6 +216,14 @@ else
 	$(qexec)cp $< $@
 endif

+#
+# Rule to extract assembly constants from C sources
+#
+obj_int_extract: build/make/obj_int_extract.c
+	$(if $(quiet),@echo "    [HOSTCC] $@")
+	$(qexec)$(HOSTCC) -I. -I$(SRC_PATH_BARE) -o $@ $<
+CLEAN-OBJS += obj_int_extract
+
 #
 # Utility functions
 #
@@ -338,11 +340,9 @@ endif
 skip_deps := $(filter %clean,$(MAKECMDGOALS))
 skip_deps += $(findstring testdata,$(MAKECMDGOALS))
 ifeq ($(strip $(skip_deps)),)
-  ifeq ($(CONFIG_DEPENDENCY_TRACKING),yes)
-    # Older versions of make don't like -include directives with no arguments
-    ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
-      -include $(filter %.d,$(OBJS-yes:.o=.d))
-    endif
+  # Older versions of make don't like -include directives with no arguments
+  ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
+    -include $(filter %.d,$(OBJS-yes:.o=.d))
  endif
 endif

@@ -424,7 +424,11 @@ ifneq ($(call enabled,DIST-SRCS),)
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_sln.sh
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_vcxproj.sh
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/msvs_common.sh
+    DIST-SRCS-$(CONFIG_MSVS)  += build/x86-msvs/obj_int_extract.bat
+    DIST-SRCS-$(CONFIG_MSVS)  += build/arm-msvs/obj_int_extract.bat
    DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
+    # Include obj_int_extract if we use offsets from *_asm_*_offsets
+    DIST-SRCS-$(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64)    += build/make/obj_int_extract.c
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2gas.pl
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2gas_apple.pl
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2armasm_ms.pl
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -295,7 +295,22 @@ generate_vcproj() {
        case "$target" in
            x86*)
                case "$name" in
+                    obj_int_extract)
+                        tag Tool \
+                            Name="VCCLCompilerTool" \
+                            Optimization="0" \
+                            AdditionalIncludeDirectories="$incs" \
+                            PreprocessorDefinitions="WIN32;DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
+                            RuntimeLibrary="$debug_runtime" \
+                            WarningLevel="3" \
+                            DebugInformationFormat="1" \
+                            $warn_64bit \
+                    ;;
                    vpx)
+                        tag Tool \
+                            Name="VCPreBuildEventTool" \
+                            CommandLine="call obj_int_extract.bat &quot;$src_path_bare&quot; $plat_no_ws\\\$(ConfigurationName)" \
+
                        tag Tool \
                            Name="VCCLCompilerTool" \
                            Optimization="0" \
@@ -332,6 +347,11 @@ generate_vcproj() {
                case "$target" in
                    x86*)
                        case "$name" in
+                            obj_int_extract)
+                                tag Tool \
+                                    Name="VCLinkerTool" \
+                                    GenerateDebugInformation="true" \
+                            ;;
                            *)
                                tag Tool \
                                    Name="VCLinkerTool" \
@@ -380,7 +400,24 @@ generate_vcproj() {
        case "$target" in
            x86*)
                case "$name" in
+                    obj_int_extract)
+                        tag Tool \
+                            Name="VCCLCompilerTool" \
+                            Optimization="2" \
+                            FavorSizeorSpeed="1" \
+                            AdditionalIncludeDirectories="$incs" \
+                            PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
+                            RuntimeLibrary="$release_runtime" \
+                            UsePrecompiledHeader="0" \
+                            WarningLevel="3" \
+                            DebugInformationFormat="0" \
+                            $warn_64bit \
+                    ;;
                    vpx)
+                        tag Tool \
+                            Name="VCPreBuildEventTool" \
+                            CommandLine="call obj_int_extract.bat &quot;$src_path_bare&quot; $plat_no_ws\\\$(ConfigurationName)" \
+
                        tag Tool \
                            Name="VCCLCompilerTool" \
                            Optimization="2" \
@@ -419,6 +456,11 @@ generate_vcproj() {
                case "$target" in
                    x86*)
                        case "$name" in
+                            obj_int_extract)
+                                tag Tool \
+                                    Name="VCLinkerTool" \
+                                    GenerateDebugInformation="true" \
+                            ;;
                            *)
                                tag Tool \
                                    Name="VCLinkerTool" \
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -262,9 +262,15 @@ case "$target" in
        asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} &quot;%(FullPath)&quot;"
    ;;
    arm*)
-        platforms[0]="ARM"
        asm_Debug_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
        asm_Release_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
+        if [ "$name" = "obj_int_extract" ]; then
+            # We don't want to build this tool for the target architecture,
+            # but for an architecture we can run locally during the build.
+            platforms[0]="Win32"
+        else
+            platforms[0]="ARM"
+        fi
    ;;
    *) die "Unsupported target $target!"
    ;;
@@ -394,13 +400,23 @@ generate_vcxproj() {
                if [ "$hostplat" == "ARM" ]; then
                    hostplat=Win32
                fi
+                open_tag PreBuildEvent
+                tag_content Command "call obj_int_extract.bat &quot;$src_path_bare&quot; $hostplat\\\$(Configuration)"
+                close_tag PreBuildEvent
            fi
            open_tag ClCompile
            if [ "$config" = "Debug" ]; then
                opt=Disabled
                runtime=$debug_runtime
                curlibs=$debug_libs
-                debug=_DEBUG
+                case "$name" in
+                obj_int_extract)
+                    debug=DEBUG
+                    ;;
+                *)
+                    debug=_DEBUG
+                    ;;
+                esac
            else
                opt=MaxSpeed
                runtime=$release_runtime
@@ -408,7 +424,14 @@ generate_vcxproj() {
                tag_content FavorSizeOrSpeed Speed
                debug=NDEBUG
            fi
-            extradefines=";$defines"
+            case "$name" in
+            obj_int_extract)
+                extradefines=";_CONSOLE"
+                ;;
+            *)
+                extradefines=";$defines"
+                ;;
+            esac
            tag_content Optimization $opt
            tag_content AdditionalIncludeDirectories "$incs;%(AdditionalIncludeDirectories)"
            tag_content PreprocessorDefinitions "WIN32;$debug;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE$extradefines;%(PreprocessorDefinitions)"
@@ -428,6 +451,10 @@ generate_vcxproj() {
            case "$proj_kind" in
            exe)
                open_tag Link
+                if [ "$name" != "obj_int_extract" ]; then
+                    tag_content AdditionalDependencies "$curlibs;%(AdditionalDependencies)"
+                    tag_content AdditionalLibraryDirectories "$libdirs;%(AdditionalLibraryDirectories)"
+                fi
                tag_content GenerateDebugInformation true
                # Console is the default normally, but if
                # AppContainerApplication is set, we need to override it.
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -18,19 +18,15 @@ set -e
 devnull='> /dev/null 2>&1'

 BUILD_ROOT="_iosbuild"
-CONFIGURE_ARGS="--disable-docs
-                --disable-examples
-                --disable-libyuv
-                --disable-unit-tests"
 DIST_DIR="_dist"
 FRAMEWORK_DIR="VPX.framework"
 HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
 MAKE_JOBS=1
-SCRIPT_DIR=$(dirname "$0")
-LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
+LIBVPX_SOURCE_DIR=$(dirname "$0" | sed -e s,/build/make,,)
 LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
 ORIG_PWD="$(pwd)"
 TARGETS="arm64-darwin-gcc
+         armv6-darwin-gcc
         armv7-darwin-gcc
         armv7s-darwin-gcc
         x86-iphonesimulator-gcc
@@ -46,8 +42,8 @@ build_target() {

  mkdir "${target}"
  cd "${target}"
-  eval "${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
-    ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${devnull}
+  eval "../../${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
+      --disable-docs ${EXTRA_CONFIGURE_ARGS} ${devnull}
  export DIST_DIR
  eval make -j ${MAKE_JOBS} dist ${devnull}
  cd "${old_pwd}"
@@ -62,6 +58,9 @@ target_to_preproc_symbol() {
    arm64-*)
      echo "__aarch64__"
      ;;
+    armv6-*)
+      echo "__ARM_ARCH_6__"
+      ;;
    armv7-*)
      echo "__ARM_ARCH_7A__"
      ;;
@@ -177,13 +176,8 @@ build_framework() {
 # Trap function. Cleans up the subtree used to build all targets contained in
 # $TARGETS.
 cleanup() {
-  local readonly res=$?
  cd "${ORIG_PWD}"

-  if [ $res -ne 0 ]; then
-    elog "build exited with error ($res)"
-  fi
-
  if [ "${PRESERVE_BUILD_OUTPUT}" != "yes" ]; then
    rm -rf "${BUILD_ROOT}"
  fi
@@ -193,21 +187,14 @@ iosbuild_usage() {
 cat << EOF
  Usage: ${0##*/} [arguments]
    --help: Display this message and exit.
-    --extra-configure-args <args>: Extra args to pass when configuring libvpx.
    --jobs: Number of make jobs.
    --preserve-build-output: Do not delete the build directory.
    --show-build-output: Show output from each library build.
-    --targets <targets>: Override default target list. Defaults:
-         ${TARGETS}
    --verbose: Output information about the environment and each stage of the
               build.
 EOF
 }

-elog() {
-  echo "${0##*/} failed because: $@" 1>&2
-}
-
 vlog() {
  if [ "${VERBOSE}" = "yes" ]; then
    echo "$@"
@@ -237,10 +224,6 @@ while [ -n "$1" ]; do
    --show-build-output)
      devnull=
      ;;
-    --targets)
-      TARGETS="$2"
-      shift
-      ;;
    --verbose)
      VERBOSE=yes
      ;;
@@ -256,7 +239,6 @@ if [ "${VERBOSE}" = "yes" ]; then
 cat << EOF
  BUILD_ROOT=${BUILD_ROOT}
  DIST_DIR=${DIST_DIR}
-  CONFIGURE_ARGS=${CONFIGURE_ARGS}
  EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
  FRAMEWORK_DIR=${FRAMEWORK_DIR}
  HEADER_DIR=${HEADER_DIR}
@@ -270,5 +252,3 @@ EOF
 fi

 build_framework "${TARGETS}"
-echo "Successfully built '${FRAMEWORK_DIR}' for:"
-echo "         ${TARGETS}"
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -0,0 +1,857 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+typedef enum {
+  OUTPUT_FMT_PLAIN,
+  OUTPUT_FMT_RVDS,
+  OUTPUT_FMT_GAS,
+  OUTPUT_FMT_C_HEADER,
+} output_fmt_t;
+
+int log_msg(const char *fmt, ...) {
+  int res;
+  va_list ap;
+  va_start(ap, fmt);
+  res = vfprintf(stderr, fmt, ap);
+  va_end(ap);
+  return res;
+}
+
+#if defined(__GNUC__) && __GNUC__
+
+#if defined(FORCE_PARSE_ELF)
+
+#if defined(__MACH__)
+#undef __MACH__
+#endif
+
+#if !defined(__ELF__)
+#define __ELF__
+#endif
+#endif
+
+#if defined(__MACH__)
+
+#include <mach-o/loader.h>
+#include <mach-o/nlist.h>
+
+int print_macho_equ(output_fmt_t mode, uint8_t* name, int val) {
+  switch (mode) {
+    case OUTPUT_FMT_RVDS:
+      printf("%-40s EQU %5d\n", name, val);
+      return 0;
+    case OUTPUT_FMT_GAS:
+      printf(".set %-40s, %5d\n", name, val);
+      return 0;
+    case OUTPUT_FMT_C_HEADER:
+      printf("#define %-40s %5d\n", name, val);
+      return 0;
+    default:
+      log_msg("Unsupported mode: %d", mode);
+      return 1;
+  }
+}
+
+int parse_macho(uint8_t *base_buf, size_t sz, output_fmt_t mode) {
+  int i, j;
+  struct mach_header header;
+  uint8_t *buf = base_buf;
+  int base_data_section = 0;
+  int bits = 0;
+
+  /* We can read in mach_header for 32 and 64 bit architectures
+   * because it's identical to mach_header_64 except for the last
+   * element (uint32_t reserved), which we don't use. Then, when
+   * we know which architecture we're looking at, increment buf
+   * appropriately.
+   */
+  memcpy(&header, buf, sizeof(struct mach_header));
+
+  if (header.magic == MH_MAGIC) {
+    if (header.cputype == CPU_TYPE_ARM
+        || header.cputype == CPU_TYPE_X86) {
+      bits = 32;
+      buf += sizeof(struct mach_header);
+    } else {
+      log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_[ARM|X86].\n");
+      goto bail;
+    }
+  } else if (header.magic == MH_MAGIC_64) {
+    if (header.cputype == CPU_TYPE_X86_64) {
+      bits = 64;
+      buf += sizeof(struct mach_header_64);
+    } else {
+      log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_X86_64.\n");
+      goto bail;
+    }
+  } else {
+    log_msg("Bad magic number for object file. 0x%x or 0x%x expected, 0x%x found.\n",
+            MH_MAGIC, MH_MAGIC_64, header.magic);
+    goto bail;
+  }
+
+  if (header.filetype != MH_OBJECT) {
+    log_msg("Bad filetype for object file. Currently only tested for MH_OBJECT.\n");
+    goto bail;
+  }
+
+  for (i = 0; i < header.ncmds; i++) {
+    struct load_command lc;
+
+    memcpy(&lc, buf, sizeof(struct load_command));
+
+    if (lc.cmd == LC_SEGMENT) {
+      uint8_t *seg_buf = buf;
+      struct section s;
+      struct segment_command seg_c;
+
+      memcpy(&seg_c, seg_buf, sizeof(struct segment_command));
+      seg_buf += sizeof(struct segment_command);
+
+      /* Although each section is given it's own offset, nlist.n_value
+       * references the offset of the first section. This isn't
+       * apparent without debug information because the offset of the
+       * data section is the same as the first section. However, with
+       * debug sections mixed in, the offset of the debug section
+       * increases but n_value still references the first section.
+       */
+      if (seg_c.nsects < 1) {
+        log_msg("Not enough sections\n");
+        goto bail;
+      }
+
+      memcpy(&s, seg_buf, sizeof(struct section));
+      base_data_section = s.offset;
+    } else if (lc.cmd == LC_SEGMENT_64) {
+      uint8_t *seg_buf = buf;
+      struct section_64 s;
+      struct segment_command_64 seg_c;
+
+      memcpy(&seg_c, seg_buf, sizeof(struct segment_command_64));
+      seg_buf += sizeof(struct segment_command_64);
+
+      /* Explanation in LG_SEGMENT */
+      if (seg_c.nsects < 1) {
+        log_msg("Not enough sections\n");
+        goto bail;
+      }
+
+      memcpy(&s, seg_buf, sizeof(struct section_64));
+      base_data_section = s.offset;
+    } else if (lc.cmd == LC_SYMTAB) {
+      if (base_data_section != 0) {
+        struct symtab_command sc;
+        uint8_t *sym_buf = base_buf;
+        uint8_t *str_buf = base_buf;
+
+        memcpy(&sc, buf, sizeof(struct symtab_command));
+
+        if (sc.cmdsize != sizeof(struct symtab_command)) {
+          log_msg("Can't find symbol table!\n");
+          goto bail;
+        }
+
+        sym_buf += sc.symoff;
+        str_buf += sc.stroff;
+
+        for (j = 0; j < sc.nsyms; j++) {
+          /* Location of string is cacluated each time from the
+           * start of the string buffer.  On darwin the symbols
+           * are prefixed by "_", so we bump the pointer by 1.
+           * The target value is defined as an int in *_asm_*_offsets.c,
+           * which is 4 bytes on all targets we currently use.
+           */
+          if (bits == 32) {
+            struct nlist nl;
+            int val;
+
+            memcpy(&nl, sym_buf, sizeof(struct nlist));
+            sym_buf += sizeof(struct nlist);
+
+            memcpy(&val, base_buf + base_data_section + nl.n_value,
+                   sizeof(val));
+            print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
+          } else { /* if (bits == 64) */
+            struct nlist_64 nl;
+            int val;
+
+            memcpy(&nl, sym_buf, sizeof(struct nlist_64));
+            sym_buf += sizeof(struct nlist_64);
+
+            memcpy(&val, base_buf + base_data_section + nl.n_value,
+                   sizeof(val));
+            print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
+          }
+        }
+      }
+    }
+
+    buf += lc.cmdsize;
+  }
+
+  return 0;
+bail:
+  return 1;
+
+}
+
+#elif defined(__ELF__)
+#include "elf.h"
+
+#define COPY_STRUCT(dst, buf, ofst, sz) do {\
+    if(ofst + sizeof((*(dst))) > sz) goto bail;\
+    memcpy(dst, buf+ofst, sizeof((*(dst))));\
+  } while(0)
+
+#define ENDIAN_ASSIGN(val, memb) do {\
+    if(!elf->le_data) {log_msg("Big Endian data not supported yet!\n");goto bail;}\
+    (val) = (memb);\
+  } while(0)
+
+#define ENDIAN_ASSIGN_IN_PLACE(memb) do {\
+    ENDIAN_ASSIGN(memb, memb);\
+  } while(0)
+
+typedef struct {
+  uint8_t      *buf; /* Buffer containing ELF data */
+  size_t        sz;  /* Buffer size */
+  int           le_data; /* Data is little-endian */
+  unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
+  int           bits; /* 32 or 64 */
+  Elf32_Ehdr    hdr32;
+  Elf64_Ehdr    hdr64;
+} elf_obj_t;
+
+int parse_elf_header(elf_obj_t *elf) {
+  int res;
+  /* Verify ELF Magic numbers */
+  COPY_STRUCT(&elf->e_ident, elf->buf, 0, elf->sz);
+  res = elf->e_ident[EI_MAG0] == ELFMAG0;
+  res &= elf->e_ident[EI_MAG1] == ELFMAG1;
+  res &= elf->e_ident[EI_MAG2] == ELFMAG2;
+  res &= elf->e_ident[EI_MAG3] == ELFMAG3;
+  res &= elf->e_ident[EI_CLASS] == ELFCLASS32
+         || elf->e_ident[EI_CLASS] == ELFCLASS64;
+  res &= elf->e_ident[EI_DATA] == ELFDATA2LSB;
+
+  if (!res) goto bail;
+
+  elf->le_data = elf->e_ident[EI_DATA] == ELFDATA2LSB;
+
+  /* Read in relevant values */
+  if (elf->e_ident[EI_CLASS] == ELFCLASS32) {
+    elf->bits = 32;
+    COPY_STRUCT(&elf->hdr32, elf->buf, 0, elf->sz);
+
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_type);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_machine);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_version);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_entry);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phoff);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shoff);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_flags);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_ehsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phentsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phnum);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shentsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shnum);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shstrndx);
+  } else { /* if (elf->e_ident[EI_CLASS] == ELFCLASS64) */
+    elf->bits = 64;
+    COPY_STRUCT(&elf->hdr64, elf->buf, 0, elf->sz);
+
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_type);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_machine);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_version);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_entry);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phoff);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shoff);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_flags);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_ehsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phentsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phnum);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shentsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shnum);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shstrndx);
+  }
+
+  return 0;
+bail:
+  log_msg("Failed to parse ELF file header");
+  return 1;
+}
+
+int parse_elf_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr32, Elf64_Shdr *hdr64) {
+  if (hdr32) {
+    if (idx >= elf->hdr32.e_shnum)
+      goto bail;
+
+    COPY_STRUCT(hdr32, elf->buf, elf->hdr32.e_shoff + idx * elf->hdr32.e_shentsize,
+                elf->sz);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_name);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_type);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_flags);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addr);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_offset);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_size);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_link);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_info);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addralign);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_entsize);
+  } else { /* if (hdr64) */
+    if (idx >= elf->hdr64.e_shnum)
+      goto bail;
+
+    COPY_STRUCT(hdr64, elf->buf, elf->hdr64.e_shoff + idx * elf->hdr64.e_shentsize,
+                elf->sz);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_name);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_type);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_flags);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addr);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_offset);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_size);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_link);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_info);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addralign);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_entsize);
+  }
+
+  return 0;
+bail:
+  return 1;
+}
+
+const char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx) {
+  if (elf->bits == 32) {
+    Elf32_Shdr shdr;
+
+    if (parse_elf_section(elf, s_idx, &shdr, NULL)) {
+      log_msg("Failed to parse ELF string table: section %d, index %d\n",
+              s_idx, idx);
+      return "";
+    }
+
+    return (char *)(elf->buf + shdr.sh_offset + idx);
+  } else { /* if (elf->bits == 64) */
+    Elf64_Shdr shdr;
+
+    if (parse_elf_section(elf, s_idx, NULL, &shdr)) {
+      log_msg("Failed to parse ELF string table: section %d, index %d\n",
+              s_idx, idx);
+      return "";
+    }
+
+    return (char *)(elf->buf + shdr.sh_offset + idx);
+  }
+}
+
+int parse_elf_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym32, Elf64_Sym *sym64) {
+  if (sym32) {
+    COPY_STRUCT(sym32, elf->buf, ofst, elf->sz);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_name);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_value);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_size);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_info);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_other);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_shndx);
+  } else { /* if (sym64) */
+    COPY_STRUCT(sym64, elf->buf, ofst, elf->sz);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_name);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_value);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_size);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_info);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_other);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_shndx);
+  }
+  return 0;
+bail:
+  return 1;
+}
+
+int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode) {
+  elf_obj_t    elf;
+  unsigned int ofst;
+  int          i;
+  Elf32_Off    strtab_off32;
+  Elf64_Off    strtab_off64; /* save String Table offset for later use */
+
+  memset(&elf, 0, sizeof(elf));
+  elf.buf = buf;
+  elf.sz = sz;
+
+  /* Parse Header */
+  if (parse_elf_header(&elf))
+    goto bail;
+
+  if (elf.bits == 32) {
+    Elf32_Shdr shdr;
+    for (i = 0; i < elf.hdr32.e_shnum; i++) {
+      parse_elf_section(&elf, i, &shdr, NULL);
+
+      if (shdr.sh_type == SHT_STRTAB) {
+        char strtsb_name[128];
+
+        strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
+
+        if (!(strcmp(strtsb_name, ".shstrtab"))) {
+          /* log_msg("found section: %s\n", strtsb_name); */
+          strtab_off32 = shdr.sh_offset;
+          break;
+        }
+      }
+    }
+  } else { /* if (elf.bits == 64) */
+    Elf64_Shdr shdr;
+    for (i = 0; i < elf.hdr64.e_shnum; i++) {
+      parse_elf_section(&elf, i, NULL, &shdr);
+
+      if (shdr.sh_type == SHT_STRTAB) {
+        char strtsb_name[128];
+
+        strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
+
+        if (!(strcmp(strtsb_name, ".shstrtab"))) {
+          /* log_msg("found section: %s\n", strtsb_name); */
+          strtab_off64 = shdr.sh_offset;
+          break;
+        }
+      }
+    }
+  }
+
+  /* Parse all Symbol Tables */
+  if (elf.bits == 32) {
+    Elf32_Shdr shdr;
+    for (i = 0; i < elf.hdr32.e_shnum; i++) {
+      parse_elf_section(&elf, i, &shdr, NULL);
+
+      if (shdr.sh_type == SHT_SYMTAB) {
+        for (ofst = shdr.sh_offset;
+             ofst < shdr.sh_offset + shdr.sh_size;
+             ofst += shdr.sh_entsize) {
+          Elf32_Sym sym;
+
+          parse_elf_symbol(&elf, ofst, &sym, NULL);
+
+          /* For all OBJECTS (data objects), extract the value from the
+           * proper data segment.
+           */
+          /* if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
+              log_msg("found data object %s\n",
+                      parse_elf_string_table(&elf,
+                                             shdr.sh_link,
+                                             sym.st_name));
+           */
+
+          if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
+              && sym.st_size == 4) {
+            Elf32_Shdr dhdr;
+            int val = 0;
+            char section_name[128];
+
+            parse_elf_section(&elf, sym.st_shndx, &dhdr, NULL);
+
+            /* For explanition - refer to _MSC_VER version of code */
+            strcpy(section_name, (char *)(elf.buf + strtab_off32 + dhdr.sh_name));
+            /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
+
+            if (strcmp(section_name, ".bss")) {
+              if (sizeof(val) != sym.st_size) {
+                /* The target value is declared as an int in
+                 * *_asm_*_offsets.c, which is 4 bytes on all
+                 * targets we currently use. Complain loudly if
+                 * this is not true.
+                 */
+                log_msg("Symbol size is wrong\n");
+                goto bail;
+              }
+
+              memcpy(&val,
+                     elf.buf + dhdr.sh_offset + sym.st_value,
+                     sym.st_size);
+            }
+
+            if (!elf.le_data) {
+              log_msg("Big Endian data not supported yet!\n");
+              goto bail;
+            }
+
+            switch (mode) {
+              case OUTPUT_FMT_RVDS:
+                printf("%-40s EQU %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
+              case OUTPUT_FMT_GAS:
+                printf(".equ %-40s, %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
+              case OUTPUT_FMT_C_HEADER:
+                printf("#define %-40s %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
+              default:
+                printf("%s = %d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+            }
+          }
+        }
+      }
+    }
+  } else { /* if (elf.bits == 64) */
+    Elf64_Shdr shdr;
+    for (i = 0; i < elf.hdr64.e_shnum; i++) {
+      parse_elf_section(&elf, i, NULL, &shdr);
+
+      if (shdr.sh_type == SHT_SYMTAB) {
+        for (ofst = shdr.sh_offset;
+             ofst < shdr.sh_offset + shdr.sh_size;
+             ofst += shdr.sh_entsize) {
+          Elf64_Sym sym;
+
+          parse_elf_symbol(&elf, ofst, NULL, &sym);
+
+          /* For all OBJECTS (data objects), extract the value from the
+           * proper data segment.
+           */
+          /* if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
+              log_msg("found data object %s\n",
+                      parse_elf_string_table(&elf,
+                                             shdr.sh_link,
+                                             sym.st_name));
+           */
+
+          if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT
+              && sym.st_size == 4) {
+            Elf64_Shdr dhdr;
+            int val = 0;
+            char section_name[128];
+
+            parse_elf_section(&elf, sym.st_shndx, NULL, &dhdr);
+
+            /* For explanition - refer to _MSC_VER version of code */
+            strcpy(section_name, (char *)(elf.buf + strtab_off64 + dhdr.sh_name));
+            /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
+
+            if ((strcmp(section_name, ".bss"))) {
+              if (sizeof(val) != sym.st_size) {
+                /* The target value is declared as an int in
+                 * *_asm_*_offsets.c, which is 4 bytes on all
+                 * targets we currently use. Complain loudly if
+                 * this is not true.
+                 */
+                log_msg("Symbol size is wrong\n");
+                goto bail;
+              }
+
+              memcpy(&val,
+                     elf.buf + dhdr.sh_offset + sym.st_value,
+                     sym.st_size);
+            }
+
+            if (!elf.le_data) {
+              log_msg("Big Endian data not supported yet!\n");
+              goto bail;
+            }
+
+            switch (mode) {
+              case OUTPUT_FMT_RVDS:
+                printf("%-40s EQU %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
+              case OUTPUT_FMT_GAS:
+                printf(".equ %-40s, %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
+              default:
+                printf("%s = %d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (mode == OUTPUT_FMT_RVDS)
+    printf("    END\n");
+
+  return 0;
+bail:
+  log_msg("Parse error: File does not appear to be valid ELF32 or ELF64\n");
+  return 1;
+}
+
+#endif
+#endif /* defined(__GNUC__) && __GNUC__ */
+
+
+#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
+/*  See "Microsoft Portable Executable and Common Object File Format Specification"
+    for reference.
+*/
+#define get_le32(x) ((*(x)) | (*(x+1)) << 8 |(*(x+2)) << 16 | (*(x+3)) << 24 )
+#define get_le16(x) ((*(x)) | (*(x+1)) << 8)
+
+int parse_coff(uint8_t *buf, size_t sz) {
+  unsigned int nsections, symtab_ptr, symtab_sz, strtab_ptr;
+  unsigned int sectionrawdata_ptr;
+  unsigned int i;
+  uint8_t *ptr;
+  uint32_t symoffset;
+
+  char **sectionlist;  // this array holds all section names in their correct order.
+  // it is used to check if the symbol is in .bss or .rdata section.
+
+  nsections = get_le16(buf + 2);
+  symtab_ptr = get_le32(buf + 8);
+  symtab_sz = get_le32(buf + 12);
+  strtab_ptr = symtab_ptr + symtab_sz * 18;
+
+  if (nsections > 96) {
+    log_msg("Too many sections\n");
+    return 1;
+  }
+
+  sectionlist = malloc(nsections * sizeof(sectionlist));
+
+  if (sectionlist == NULL) {
+    log_msg("Allocating first level of section list failed\n");
+    return 1;
+  }
+
+  // log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
+
+  /*
+  The size of optional header is always zero for an obj file. So, the section header
+  follows the file header immediately.
+  */
+
+  ptr = buf + 20;     // section header
+
+  for (i = 0; i < nsections; i++) {
+    char sectionname[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+    strncpy(sectionname, ptr, 8);
+    // log_msg("COFF: Parsing section %s\n",sectionname);
+
+    sectionlist[i] = malloc(strlen(sectionname) + 1);
+
+    if (sectionlist[i] == NULL) {
+      log_msg("Allocating storage for %s failed\n", sectionname);
+      goto bail;
+    }
+    strcpy(sectionlist[i], sectionname);
+
+    // check if it's .rdata and is not a COMDAT section.
+    if (!strcmp(sectionname, ".rdata") &&
+        (get_le32(ptr + 36) & 0x1000) == 0) {
+      sectionrawdata_ptr = get_le32(ptr + 20);
+    }
+
+    ptr += 40;
+  }
+
+  // log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
+  // log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
+
+  /*  The compiler puts the data with non-zero offset in .rdata section, but puts the data with
+      zero offset in .bss section. So, if the data in in .bss section, set offset=0.
+      Note from Wiki: In an object module compiled from C, the bss section contains
+      the local variables (but not functions) that were declared with the static keyword,
+      except for those with non-zero initial values. (In C, static variables are initialized
+      to zero by default.) It also contains the non-local (both extern and static) variables
+      that are also initialized to zero (either explicitly or by default).
+      */
+  // move to symbol table
+  /* COFF symbol table:
+      offset      field
+      0           Name(*)
+      8           Value
+      12          SectionNumber
+      14          Type
+      16          StorageClass
+      17          NumberOfAuxSymbols
+      */
+  ptr = buf + symtab_ptr;
+
+  for (i = 0; i < symtab_sz; i++) {
+    int16_t section = get_le16(ptr + 12); // section number
+
+    if (section > 0 && ptr[16] == 2) {
+      // if(section > 0 && ptr[16] == 3 && get_le32(ptr+8)) {
+
+      if (get_le32(ptr)) {
+        char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+        strncpy(name, ptr, 8);
+        // log_msg("COFF: Parsing symbol %s\n",name);
+        /* The 64bit Windows compiler doesn't prefix with an _.
+         * Check what's there, and bump if necessary
+         */
+        if (name[0] == '_')
+          printf("%-40s EQU ", name + 1);
+        else
+          printf("%-40s EQU ", name);
+      } else {
+        // log_msg("COFF: Parsing symbol %s\n",
+        //        buf + strtab_ptr + get_le32(ptr+4));
+        if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
+          printf("%-40s EQU ",
+                 buf + strtab_ptr + get_le32(ptr + 4) + 1);
+        else
+          printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
+      }
+
+      if (!(strcmp(sectionlist[section - 1], ".bss"))) {
+        symoffset = 0;
+      } else {
+        symoffset = get_le32(buf + sectionrawdata_ptr + get_le32(ptr + 8));
+      }
+
+      // log_msg("      Section: %d\n",section);
+      // log_msg("      Class:   %d\n",ptr[16]);
+      // log_msg("      Address: %u\n",get_le32(ptr+8));
+      // log_msg("      Offset: %u\n", symoffset);
+
+      printf("%5d\n", symoffset);
+    }
+
+    ptr += 18;
+  }
+
+  printf("    END\n");
+
+  for (i = 0; i < nsections; i++) {
+    free(sectionlist[i]);
+  }
+
+  free(sectionlist);
+
+  return 0;
+bail:
+
+  for (i = 0; i < nsections; i++) {
+    free(sectionlist[i]);
+  }
+
+  free(sectionlist);
+
+  return 1;
+}
+#endif /* defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__) */
+
+int main(int argc, char **argv) {
+  output_fmt_t mode = OUTPUT_FMT_PLAIN;
+  const char *f;
+  uint8_t *file_buf;
+  int res;
+  FILE *fp;
+  long int file_size;
+
+  if (argc < 2 || argc > 3) {
+    fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
+    fprintf(stderr, "  <obj file>\tobject file to parse\n");
+    fprintf(stderr, "Output Formats:\n");
+    fprintf(stderr, "  gas  - compatible with GNU assembler\n");
+    fprintf(stderr, "  rvds - compatible with armasm\n");
+    fprintf(stderr, "  cheader - c/c++ header file\n");
+    goto bail;
+  }
+
+  f = argv[2];
+
+  if (!strcmp(argv[1], "rvds"))
+    mode = OUTPUT_FMT_RVDS;
+  else if (!strcmp(argv[1], "gas"))
+    mode = OUTPUT_FMT_GAS;
+  else if (!strcmp(argv[1], "cheader"))
+    mode = OUTPUT_FMT_C_HEADER;
+  else
+    f = argv[1];
+
+  fp = fopen(f, "rb");
+
+  if (!fp) {
+    perror("Unable to open file");
+    goto bail;
+  }
+
+  if (fseek(fp, 0, SEEK_END)) {
+    perror("stat");
+    goto bail;
+  }
+
+  file_size = ftell(fp);
+  file_buf = malloc(file_size);
+
+  if (!file_buf) {
+    perror("malloc");
+    goto bail;
+  }
+
+  rewind(fp);
+
+  if (fread(file_buf, sizeof(char), file_size, fp) != file_size) {
+    perror("read");
+    goto bail;
+  }
+
+  if (fclose(fp)) {
+    perror("close");
+    goto bail;
+  }
+
+#if defined(__GNUC__) && __GNUC__
+#if defined(__MACH__)
+  res = parse_macho(file_buf, file_size, mode);
+#elif defined(__ELF__)
+  res = parse_elf(file_buf, file_size, mode);
+#endif
+#endif
+#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
+  res = parse_coff(file_buf, file_size);
+#endif
+
+  free(file_buf);
+
+  if (!res)
+    return EXIT_SUCCESS;
+
+bail:
+  return EXIT_FAILURE;
+}
--- a/build/make/rtcd.pl
+++ b/build/make/rtcd.pl
@@ -376,18 +376,17 @@ if ($opts{arch} eq 'x86') {
      @ALL_ARCHS = filter("$opts{arch}", qw/dspr2/);
      last;
    }
-    if (/HAVE_MSA=yes/) {
-      @ALL_ARCHS = filter("$opts{arch}", qw/msa/);
-      last;
-    }
  }
  close CONFIG_FILE;
  mips;
-} elsif ($opts{arch} eq 'armv6') {
-  @ALL_ARCHS = filter(qw/media/);
+} elsif ($opts{arch} eq 'armv5te') {
+  @ALL_ARCHS = filter(qw/edsp/);
  arm;
-} elsif ($opts{arch} =~ /armv7\w?/) {
-  @ALL_ARCHS = filter(qw/media neon_asm neon/);
+} elsif ($opts{arch} eq 'armv6') {
+  @ALL_ARCHS = filter(qw/edsp media/);
+  arm;
+} elsif ($opts{arch} eq 'armv7') {
+  @ALL_ARCHS = filter(qw/edsp media neon_asm neon/);
  @REQUIRES = filter(keys %required ? keys %required : qw/media/);
  &require(@REQUIRES);
  arm;
--- a/build/x86-msvs/obj_int_extract.bat
+++ b/build/x86-msvs/obj_int_extract.bat
@@ -0,0 +1,15 @@
+REM   Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+REM
+REM   Use of this source code is governed by a BSD-style license
+REM   that can be found in the LICENSE file in the root of the source
+REM   tree. An additional intellectual property rights grant can be found
+REM   in the file PATENTS.  All contributing project authors may
+REM   be found in the AUTHORS file in the root of the source tree.
+echo on
+
+REM Arguments:
+REM   %1 - Relative path to the directory containing the vp8 source directory.
+REM   %2 - Path to obj_int_extract.exe.
+cl /I. /I%1 /nologo /c "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
+%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
+
--- a/21
+++ b/21
@@ -26,7 +26,6 @@ Advanced options:
  ${toggle_unit_tests}            unit tests
  ${toggle_decode_perf_tests}     build decoder perf tests with unit tests
  ${toggle_encode_perf_tests}     build encoder perf tests with unit tests
-  --cpu=CPU                       tune for the specified CPU (ARM: cortex-a8, X86: sse3)
  --libc=PATH                     path to alternate libc
  --size-limit=WxH                max size to allow in the decoder
  --as={yasm|nasm|auto}           use specified assembler [auto, yasm preferred]
@@ -36,7 +35,6 @@ Advanced options:
  ${toggle_codec_srcs}            in/exclude codec library source code
  ${toggle_debug_libs}            in/exclude debug version of libraries
  ${toggle_static_msvcrt}         use static MSVCRT (VS builds only)
-  ${toggle_vp9_highbitdepth}      use VP9 high bit depth (10/12) profiles
  ${toggle_vp8}                   VP8 codec support
  ${toggle_vp9}                   VP9 codec support
  ${toggle_internal_stats}        output of encoder internal stats for debug, if supported (encoders)
@@ -58,8 +56,6 @@ Advanced options:
  ${toggle_postproc_visualizer}   macro block / block level visualizers
  ${toggle_multi_res_encoding}    enable multiple-resolution encoding
  ${toggle_temporal_denoising}    enable temporal denoising and disable the spatial denoiser
-  ${toggle_vp9_temporal_denoising}
-                                  enable vp9 temporal denoising
  ${toggle_webm_io}               enable input from and output to WebM container
  ${toggle_libyuv}                enable libyuv

@@ -97,6 +93,10 @@ EOF

 # all_platforms is a list of all supported target platforms. Maintain
 # alphabetically by architecture, generic-gnu last.
+all_platforms="${all_platforms} armv5te-android-gcc"
+all_platforms="${all_platforms} armv5te-linux-rvct"
+all_platforms="${all_platforms} armv5te-linux-gcc"
+all_platforms="${all_platforms} armv5te-none-rvct"
 all_platforms="${all_platforms} armv6-darwin-gcc"
 all_platforms="${all_platforms} armv6-linux-rvct"
 all_platforms="${all_platforms} armv6-linux-gcc"
@@ -128,7 +128,6 @@ all_platforms="${all_platforms} x86-darwin10-gcc"
 all_platforms="${all_platforms} x86-darwin11-gcc"
 all_platforms="${all_platforms} x86-darwin12-gcc"
 all_platforms="${all_platforms} x86-darwin13-gcc"
-all_platforms="${all_platforms} x86-darwin14-gcc"
 all_platforms="${all_platforms} x86-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86-linux-gcc"
 all_platforms="${all_platforms} x86-linux-icc"
@@ -146,7 +145,6 @@ all_platforms="${all_platforms} x86_64-darwin10-gcc"
 all_platforms="${all_platforms} x86_64-darwin11-gcc"
 all_platforms="${all_platforms} x86_64-darwin12-gcc"
 all_platforms="${all_platforms} x86_64-darwin13-gcc"
-all_platforms="${all_platforms} x86_64-darwin14-gcc"
 all_platforms="${all_platforms} x86_64-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
 all_platforms="${all_platforms} x86_64-linux-icc"
@@ -163,7 +161,6 @@ all_platforms="${all_platforms} universal-darwin10-gcc"
 all_platforms="${all_platforms} universal-darwin11-gcc"
 all_platforms="${all_platforms} universal-darwin12-gcc"
 all_platforms="${all_platforms} universal-darwin13-gcc"
-all_platforms="${all_platforms} universal-darwin14-gcc"
 all_platforms="${all_platforms} generic-gnu"

 # all_targets is a list of all targets that can be configured
@@ -209,7 +206,6 @@ enable_feature install_libs

 enable_feature static
 enable_feature optimizations
-enable_feature dependency_tracking
 enable_feature fast_unaligned #allow unaligned accesses, if supported by hw
 enable_feature spatial_resampling
 enable_feature multithread
@@ -258,7 +254,7 @@ ARCH_EXT_LIST="

    mips32
    dspr2
-    msa
+
    mips64

    mmx
@@ -283,11 +279,11 @@ HAVE_LIST="
 "
 EXPERIMENT_LIST="
    spatial_svc
+    vp9_temporal_denoising
    fp_mb_stats
    emulate_hardware
 "
 CONFIG_LIST="
-    dependency_tracking
    external_build
    install_docs
    install_bins
@@ -338,7 +334,6 @@ CONFIG_LIST="
    encode_perf_tests
    multi_res_encoding
    temporal_denoising
-    vp9_temporal_denoising
    coefficient_range_checking
    vp9_highbitdepth
    experimental
@@ -346,7 +341,6 @@ CONFIG_LIST="
    ${EXPERIMENT_LIST}
 "
 CMDLINE_SELECT="
-    dependency_tracking
    external_build
    extra_warnings
    werror
@@ -399,7 +393,6 @@ CMDLINE_SELECT="
    encode_perf_tests
    multi_res_encoding
    temporal_denoising
-    vp9_temporal_denoising
    coefficient_range_checking
    vp9_highbitdepth
    experimental
@@ -458,6 +451,8 @@ process_targets() {
    enabled child || write_common_config_banner
    enabled universal || write_common_target_config_h  ${BUILD_PFX}vpx_config.h

+    # TODO: add host tools target (obj_int_extract, etc)
+
    # For fat binaries, call configure recursively to configure for each
    # binary architecture to be included.
    if enabled universal; then
--- a/examples.mk
+++ b/examples.mk
@@ -338,7 +338,6 @@ $(foreach proj,$(call enabled,PROJECTS),\
 #
 %.dox: %.c
 	@echo "    [DOXY] $@"
-	@mkdir -p $(dir $@)
 	@echo "/*!\page example_$(@F:.dox=) $(@F:.dox=)" > $@
 	@echo "   \includelineno $(<F)" >> $@
 	@echo "*/" >> $@
--- a/examples/decode_to_md5.c
+++ b/examples/decode_to_md5.c
@@ -36,9 +36,9 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "../md5_utils.h"
-#include "../tools_common.h"
-#include "../video_reader.h"
+#include "./md5_utils.h"
+#include "./tools_common.h"
+#include "./video_reader.h"
 #include "./vpx_config.h"

 static void get_image_md5(const vpx_image_t *img, unsigned char digest[16]) {
--- a/examples/decode_with_drops.c
+++ b/examples/decode_with_drops.c
@@ -59,8 +59,8 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "../tools_common.h"
-#include "../video_reader.h"
+#include "./tools_common.h"
+#include "./video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;
--- a/examples/postproc.c
+++ b/examples/postproc.c
@@ -46,8 +46,8 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "../tools_common.h"
-#include "../video_reader.h"
+#include "./tools_common.h"
+#include "./video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;
--- a/examples/resize_util.c
+++ b/examples/resize_util.c
@@ -15,22 +15,15 @@
 #include <stdlib.h>
 #include <string.h>

-#include "../vp9/encoder/vp9_resize.h"
+#include "./vp9/encoder/vp9_resize.h"

-static const char *exec_name = NULL;
-
-static void usage() {
+static void usage(char *progname) {
  printf("Usage:\n");
  printf("%s <input_yuv> <width>x<height> <target_width>x<target_height> ",
-         exec_name);
+         progname);
  printf("<output_yuv> [<frames>]\n");
 }

-void usage_exit() {
-  usage();
-  exit(EXIT_FAILURE);
-}
-
 static int parse_dim(char *v, int *width, int *height) {
  char *x = strchr(v, 'x');
  if (x == NULL)
@@ -54,11 +47,9 @@ int main(int argc, char *argv[]) {
  int f, frames;
  int width, height, target_width, target_height;

-  exec_name = argv[0];
-
  if (argc < 5) {
    printf("Incorrect parameters:\n");
-    usage();
+    usage(argv[0]);
    return 1;
  }

@@ -66,25 +57,25 @@ int main(int argc, char *argv[]) {
  fout = argv[4];
  if (!parse_dim(argv[2], &width, &height)) {
    printf("Incorrect parameters: %s\n", argv[2]);
-    usage();
+    usage(argv[0]);
    return 1;
  }
  if (!parse_dim(argv[3], &target_width, &target_height)) {
    printf("Incorrect parameters: %s\n", argv[3]);
-    usage();
+    usage(argv[0]);
    return 1;
  }

  fpin = fopen(fin, "rb");
  if (fpin == NULL) {
    printf("Can't open file %s to read\n", fin);
-    usage();
+    usage(argv[0]);
    return 1;
  }
  fpout = fopen(fout, "wb");
  if (fpout == NULL) {
    printf("Can't open file %s to write\n", fout);
-    usage();
+    usage(argv[0]);
    return 1;
  }
  if (argc >= 6)
--- a/examples/set_maps.c
+++ b/examples/set_maps.c
@@ -50,8 +50,8 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"

-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 static const char *exec_name;

--- a/examples/simple_decoder.c
+++ b/examples/simple_decoder.c
@@ -82,8 +82,8 @@

 #include "vpx/vpx_decoder.h"

-#include "../tools_common.h"
-#include "../video_reader.h"
+#include "./tools_common.h"
+#include "./video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;
--- a/examples/simple_encoder.c
+++ b/examples/simple_encoder.c
@@ -101,8 +101,8 @@

 #include "vpx/vpx_encoder.h"

-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 static const char *exec_name;

--- a/examples/twopass_encoder.c
+++ b/examples/twopass_encoder.c
@@ -53,8 +53,8 @@

 #include "vpx/vpx_encoder.h"

-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 static const char *exec_name;

--- a/examples/vp8_multi_resolution_encoder.c
+++ b/examples/vp8_multi_resolution_encoder.c
@@ -8,730 +8,292 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-/*
- * This is an example demonstrating multi-resolution encoding in VP8.
- * High-resolution input video is down-sampled to lower-resolutions. The
- * encoder then encodes the video and outputs multiple bitstreams with
- * different resolutions.
- *
- * This test also allows for settings temporal layers for each spatial layer.
- * Different number of temporal layers per spatial stream may be used.
- * Currently up to 3 temporal layers per spatial stream (encoder) are supported
- * in this test.
- */

-#include "./vpx_config.h"
+// This is an example demonstrating multi-resolution encoding in VP8.
+// High-resolution input video is down-sampled to lower-resolutions. The
+// encoder then encodes the video and outputs multiple bitstreams with
+// different resolutions.
+//
+// Configure with --enable-multi-res-encoding flag to enable this example.

 #include <stdio.h>
 #include <stdlib.h>
-#include <stdarg.h>
 #include <string.h>
-#include <math.h>
-#include <assert.h>
-#include <sys/time.h>
-#if USE_POSIX_MMAP
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/mman.h>
-#include <fcntl.h>
-#include <unistd.h>
-#endif
-#include "vpx_ports/vpx_timer.h"
-#define VPX_CODEC_DISABLE_COMPAT 1
-#include "vpx/vpx_encoder.h"
-#include "vpx/vp8cx.h"
-#include "vpx_ports/mem_ops.h"
-#include "./tools_common.h"
-#define interface (vpx_codec_vp8_cx())
-#define fourcc    0x30385056

-void usage_exit() {
-  exit(EXIT_FAILURE);
-}
-
-/*
- * The input video frame is downsampled several times to generate a multi-level
- * hierarchical structure. NUM_ENCODERS is defined as the number of encoding
- * levels required. For example, if the size of input video is 1280x720,
- * NUM_ENCODERS is 3, and down-sampling factor is 2, the encoder outputs 3
- * bitstreams with resolution of 1280x720(level 0), 640x360(level 1), and
- * 320x180(level 2) respectively.
- */
-
-/* Number of encoders (spatial resolutions) used in this test. */
-#define NUM_ENCODERS 3
-
-/* Maximum number of temporal layers allowed for this test. */
-#define MAX_NUM_TEMPORAL_LAYERS 3
-
-/* This example uses the scaler function in libyuv. */
 #include "third_party/libyuv/include/libyuv/basic_types.h"
 #include "third_party/libyuv/include/libyuv/scale.h"
 #include "third_party/libyuv/include/libyuv/cpu_id.h"

-int (*read_frame_p)(FILE *f, vpx_image_t *img);
+#include "vpx/vpx_encoder.h"
+#include "vpx/vp8cx.h"

-static int read_frame(FILE *f, vpx_image_t *img) {
-    size_t nbytes, to_read;
-    int    res = 1;
+#include "./tools_common.h"
+#include "./video_writer.h"

-    to_read = img->w*img->h*3/2;
-    nbytes = fread(img->planes[0], 1, to_read, f);
-    if(nbytes != to_read) {
-        res = 0;
-        if(nbytes > 0)
-            printf("Warning: Read partial frame. Check your width & height!\n");
-    }
-    return res;
+// The input video frame is downsampled several times to generate a
+// multi-level  hierarchical structure. kNumEncoders is defined as the number
+// of encoding  levels required. For example, if the size of input video is
+// 1280x720, kNumEncoders is 3, and down-sampling factor is 2, the encoder
+// outputs 3 bitstreams with resolution of 1280x720(level 0),
+// 640x360(level 1), and 320x180(level 2) respectively.
+#define kNumEncoders 3
+
+static const char *exec_name;
+
+void usage_exit() {
+  fprintf(stderr,
+          "Usage: %s <width> <height> <infile> <outfile(s)> <output psnr?>\n",
+          exec_name);
+  exit(EXIT_FAILURE);
 }

-static int read_frame_by_row(FILE *f, vpx_image_t *img) {
-    size_t nbytes, to_read;
-    int    res = 1;
-    int plane;
+int main(int argc, char *argv[]) {
+  int frame_cnt = 0;
+  FILE *infile = NULL;
+  VpxVideoWriter *writers[kNumEncoders];
+  vpx_codec_ctx_t codec[kNumEncoders];
+  vpx_codec_enc_cfg_t cfg[kNumEncoders];
+  vpx_image_t raw[kNumEncoders];
+  const VpxInterface *const encoder = get_vpx_encoder_by_name("vp8");
+  // Currently, only realtime mode is supported in multi-resolution encoding.
+  const int arg_deadline = VPX_DL_REALTIME;
+  int i;
+  int width = 0;
+  int height = 0;
+  int frame_avail = 0;
+  int got_data = 0;

-    for (plane = 0; plane < 3; plane++)
+  // Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you
+  // don't need to know PSNR, which will skip PSNR calculation and save
+  // encoding time.
+  int show_psnr = 0;
+  uint64_t psnr_sse_total[kNumEncoders] = {0};
+  uint64_t psnr_samples_total[kNumEncoders] = {0};
+  double psnr_totals[kNumEncoders][4] = {{0, 0}};
+  int psnr_count[kNumEncoders] = {0};
+
+  // Set the required target bitrates for each resolution level.
+  // If target bitrate for highest-resolution level is set to 0,
+  // (i.e. target_bitrate[0]=0), we skip encoding at that level.
+  unsigned int target_bitrate[kNumEncoders] = {1000, 500, 100};
+
+  // Enter the frame rate of the input video.
+  const int framerate = 30;
+  // Set down-sampling factor for each resolution level.
+  //   dsf[0] controls down sampling from level 0 to level 1;
+  //   dsf[1] controls down sampling from level 1 to level 2;
+  //   dsf[2] is not used.
+  vpx_rational_t dsf[kNumEncoders] = {{2, 1}, {2, 1}, {1, 1}};
+
+  exec_name = argv[0];
+
+  if (!encoder)
+    die("Unsupported codec.");
+
+  // exe_name, input width, input height, input file,
+  // output file 1, output file 2, output file 3, psnr on/off
+  if (argc != (5 + kNumEncoders))
+    die("Invalid number of input options.");
+
+  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
+
+  width = strtol(argv[1], NULL, 0);
+  height = strtol(argv[2], NULL, 0);
+
+  if (width < 16 || width % 2 || height < 16 || height % 2)
+    die("Invalid resolution: %ldx%ld", width, height);
+
+  // Open input video file for encoding
+  if (!(infile = fopen(argv[3], "rb")))
+    die("Failed to open %s for reading", argv[3]);
+
+  show_psnr = strtol(argv[kNumEncoders + 4], NULL, 0);
+
+  // Populate default encoder configuration
+  for (i = 0; i < kNumEncoders; ++i) {
+    vpx_codec_err_t res =
+        vpx_codec_enc_config_default(encoder->codec_interface(), &cfg[i], 0);
+    if (res != VPX_CODEC_OK) {
+      printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
+      return EXIT_FAILURE;
+    }
+  }
+
+  // Update the default configuration according to needs of the application.
+  // Highest-resolution encoder settings
+  cfg[0].g_w = width;
+  cfg[0].g_h = height;
+  cfg[0].g_threads = 1;
+  cfg[0].rc_dropframe_thresh = 30;
+  cfg[0].rc_end_usage = VPX_CBR;
+  cfg[0].rc_resize_allowed = 0;
+  cfg[0].rc_min_quantizer = 4;
+  cfg[0].rc_max_quantizer = 56;
+  cfg[0].rc_undershoot_pct = 98;
+  cfg[0].rc_overshoot_pct = 100;
+  cfg[0].rc_buf_initial_sz = 500;
+  cfg[0].rc_buf_optimal_sz = 600;
+  cfg[0].rc_buf_sz = 1000;
+  cfg[0].g_error_resilient = 1;
+  cfg[0].g_lag_in_frames = 0;
+  cfg[0].kf_mode = VPX_KF_AUTO;  // VPX_KF_DISABLED
+  cfg[0].kf_min_dist = 3000;
+  cfg[0].kf_max_dist = 3000;
+  cfg[0].rc_target_bitrate = target_bitrate[0];
+  cfg[0].g_timebase.num = 1;
+  cfg[0].g_timebase.den = framerate;
+
+  // Other-resolution encoder settings
+  for (i = 1; i < kNumEncoders; ++i) {
+    cfg[i] = cfg[0];
+    cfg[i].g_threads = 1;
+    cfg[i].rc_target_bitrate = target_bitrate[i];
+
+    // Note: Width & height of other-resolution encoders are calculated
+    // from the highest-resolution encoder's size and the corresponding
+    // down_sampling_factor.
    {
-        unsigned char *ptr;
-        int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
-        int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
-        int r;
+      unsigned int iw = cfg[i - 1].g_w * dsf[i - 1].den + dsf[i - 1].num - 1;
+      unsigned int ih = cfg[i - 1].g_h * dsf[i - 1].den + dsf[i - 1].num - 1;
+      cfg[i].g_w = iw / dsf[i - 1].num;
+      cfg[i].g_h = ih / dsf[i - 1].num;
+    }

-        /* Determine the correct plane based on the image format. The for-loop
-         * always counts in Y,U,V order, but this may not match the order of
-         * the data on disk.
-         */
-        switch (plane)
-        {
-        case 1:
-            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
+    // Make width & height to be multiplier of 2.
+    if ((cfg[i].g_w) % 2)
+      cfg[i].g_w++;
+
+    if ((cfg[i].g_h) % 2)
+      cfg[i].g_h++;
+  }
+
+  // Open output file for each encoder to output bitstreams
+  for (i = 0; i < kNumEncoders; ++i) {
+    VpxVideoInfo info = {
+      encoder->fourcc,
+      cfg[i].g_w,
+      cfg[i].g_h,
+      {cfg[i].g_timebase.num, cfg[i].g_timebase.den}
+    };
+
+    if (!(writers[i] = vpx_video_writer_open(argv[i+4], kContainerIVF, &info)))
+      die("Failed to open %s for writing", argv[i+4]);
+  }
+
+  // Allocate image for each encoder
+  for (i = 0; i < kNumEncoders; ++i)
+    if (!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
+      die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
+
+  // Initialize multi-encoder
+  if (vpx_codec_enc_init_multi(&codec[0], encoder->codec_interface(), &cfg[0],
+                               kNumEncoders,
+                               show_psnr ? VPX_CODEC_USE_PSNR : 0, &dsf[0]))
+    die_codec(&codec[0], "Failed to initialize encoder");
+
+  // The extra encoding configuration parameters can be set as follows.
+  for (i = 0; i < kNumEncoders; i++) {
+    // Set encoding speed
+    if (vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, -6))
+      die_codec(&codec[i], "Failed to set cpu_used");
+
+    // Set static threshold.
+    if (vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1))
+      die_codec(&codec[i], "Failed to set static threshold");
+
+    // Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING
+    // Enable denoising for the highest-resolution encoder.
+    if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, i == 0))
+      die_codec(&codec[0], "Failed to set noise_sensitivity");
+  }
+
+  frame_avail = 1;
+  got_data = 0;
+
+  while (frame_avail || got_data) {
+    vpx_codec_iter_t iter[kNumEncoders] = {NULL};
+    const vpx_codec_cx_pkt_t *pkt[kNumEncoders];
+
+    frame_avail = vpx_img_read(&raw[0], infile);
+
+    if (frame_avail) {
+      for (i = 1; i < kNumEncoders; ++i) {
+        vpx_image_t *const prev = &raw[i - 1];
+
+        // Scale the image down a number of times by downsampling factor
+        // FilterMode 1 or 2 give better psnr than FilterMode 0.
+        I420Scale(prev->planes[VPX_PLANE_Y], prev->stride[VPX_PLANE_Y],
+                  prev->planes[VPX_PLANE_U], prev->stride[VPX_PLANE_U],
+                  prev->planes[VPX_PLANE_V], prev->stride[VPX_PLANE_V],
+                  prev->d_w, prev->d_h,
+                  raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
+                  raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
+                  raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
+                  raw[i].d_w, raw[i].d_h, 1);
+      }
+    }
+
+    // Encode frame.
+    if (vpx_codec_encode(&codec[0], frame_avail? &raw[0] : NULL,
+                         frame_cnt, 1, 0, arg_deadline)) {
+      die_codec(&codec[0], "Failed to encode frame");
+    }
+
+    for (i = kNumEncoders - 1; i >= 0; i--) {
+      got_data = 0;
+
+      while ((pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i]))) {
+        got_data = 1;
+        switch (pkt[i]->kind) {
+          case VPX_CODEC_CX_FRAME_PKT:
+            vpx_video_writer_write_frame(writers[i], pkt[i]->data.frame.buf,
+                                         pkt[i]->data.frame.sz, frame_cnt - 1);
+          break;
+          case VPX_CODEC_PSNR_PKT:
+            if (show_psnr) {
+              int j;
+              psnr_sse_total[i] += pkt[i]->data.psnr.sse[0];
+              psnr_samples_total[i] += pkt[i]->data.psnr.samples[0];
+              for (j = 0; j < 4; j++)
+                psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j];
+              psnr_count[i]++;
+            }
            break;
-        case 2:
-            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
+          default:
            break;
-        default:
-            ptr = img->planes[plane];
        }
+        printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT &&
+               (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
+        fflush(stdout);
+      }
+    }
+    frame_cnt++;
+  }
+  printf("\n");

-        for (r = 0; r < h; r++)
-        {
-            to_read = w;
+  fclose(infile);

-            nbytes = fread(ptr, 1, to_read, f);
-            if(nbytes != to_read) {
-                res = 0;
-                if(nbytes > 0)
-                    printf("Warning: Read partial frame. Check your width & height!\n");
-                break;
-            }
+  printf("Processed %d frames.\n", frame_cnt - 1);
+  for (i = 0; i < kNumEncoders; ++i) {
+    // Calculate PSNR and print it out
+    if (show_psnr && psnr_count[i] > 0) {
+      int j;
+      double ovpsnr = sse_to_psnr(psnr_samples_total[i], 255.0,
+                                  psnr_sse_total[i]);

-            ptr += img->stride[plane];
-        }
-        if (!res)
-            break;
+      fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i);
+      fprintf(stderr, " %.3lf", ovpsnr);
+      for (j = 0; j < 4; j++)
+        fprintf(stderr, " %.3lf", psnr_totals[i][j]/psnr_count[i]);
    }

-    return res;
-}
-
-static void write_ivf_file_header(FILE *outfile,
-                                  const vpx_codec_enc_cfg_t *cfg,
-                                  int frame_cnt) {
-    char header[32];
-
-    if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
-        return;
-    header[0] = 'D';
-    header[1] = 'K';
-    header[2] = 'I';
-    header[3] = 'F';
-    mem_put_le16(header+4,  0);                   /* version */
-    mem_put_le16(header+6,  32);                  /* headersize */
-    mem_put_le32(header+8,  fourcc);              /* headersize */
-    mem_put_le16(header+12, cfg->g_w);            /* width */
-    mem_put_le16(header+14, cfg->g_h);            /* height */
-    mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
-    mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
-    mem_put_le32(header+24, frame_cnt);           /* length */
-    mem_put_le32(header+28, 0);                   /* unused */
-
-    (void) fwrite(header, 1, 32, outfile);
-}
-
-static void write_ivf_frame_header(FILE *outfile,
-                                   const vpx_codec_cx_pkt_t *pkt)
-{
-    char             header[12];
-    vpx_codec_pts_t  pts;
-
-    if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
-        return;
-
-    pts = pkt->data.frame.pts;
-    mem_put_le32(header, pkt->data.frame.sz);
-    mem_put_le32(header+4, pts&0xFFFFFFFF);
-    mem_put_le32(header+8, pts >> 32);
-
-    (void) fwrite(header, 1, 12, outfile);
-}
-
-/* Temporal scaling parameters */
-/* This sets all the temporal layer parameters given |num_temporal_layers|,
- * including the target bit allocation across temporal layers. Bit allocation
- * parameters will be passed in as user parameters in another version.
- */
-static void set_temporal_layer_pattern(int num_temporal_layers,
-                                       vpx_codec_enc_cfg_t *cfg,
-                                       int bitrate,
-                                       int *layer_flags)
-{
-    assert(num_temporal_layers <= MAX_NUM_TEMPORAL_LAYERS);
-    switch (num_temporal_layers)
-    {
-    case 1:
-    {
-        /* 1-layer */
-        cfg->ts_number_layers     = 1;
-        cfg->ts_periodicity       = 1;
-        cfg->ts_rate_decimator[0] = 1;
-        cfg->ts_layer_id[0] = 0;
-        cfg->ts_target_bitrate[0] = bitrate;
-
-        // Update L only.
-        layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-        break;
-    }
-
-    case 2:
-    {
-        /* 2-layers, with sync point at first frame of layer 1. */
-        cfg->ts_number_layers     = 2;
-        cfg->ts_periodicity       = 2;
-        cfg->ts_rate_decimator[0] = 2;
-        cfg->ts_rate_decimator[1] = 1;
-        cfg->ts_layer_id[0] = 0;
-        cfg->ts_layer_id[1] = 1;
-        // Use 60/40 bit allocation as example.
-        cfg->ts_target_bitrate[0] = 0.6f * bitrate;
-        cfg->ts_target_bitrate[1] = bitrate;
-
-        /* 0=L, 1=GF */
-        // ARF is used as predictor for all frames, and is only updated on
-        // key frame. Sync point every 8 frames.
-
-        // Layer 0: predict from L and ARF, update L and G.
-        layer_flags[0] = VP8_EFLAG_NO_REF_GF |
-                         VP8_EFLAG_NO_UPD_ARF;
-
-        // Layer 1: sync point: predict from L and ARF, and update G.
-        layer_flags[1] = VP8_EFLAG_NO_REF_GF |
-                         VP8_EFLAG_NO_UPD_LAST |
-                         VP8_EFLAG_NO_UPD_ARF;
-
-        // Layer 0, predict from L and ARF, update L.
-        layer_flags[2] = VP8_EFLAG_NO_REF_GF  |
-                         VP8_EFLAG_NO_UPD_GF  |
-                         VP8_EFLAG_NO_UPD_ARF;
-
-        // Layer 1: predict from L, G and ARF, and update G.
-        layer_flags[3] = VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_UPD_LAST |
-                         VP8_EFLAG_NO_UPD_ENTROPY;
-
-        // Layer 0
-        layer_flags[4] = layer_flags[2];
-
-        // Layer 1
-        layer_flags[5] = layer_flags[3];
-
-        // Layer 0
-        layer_flags[6] = layer_flags[4];
-
-        // Layer 1
-        layer_flags[7] = layer_flags[5];
-        break;
-    }
-
-    case 3:
-    default:
-    {
-        // 3-layers structure where ARF is used as predictor for all frames,
-        // and is only updated on key frame.
-        // Sync points for layer 1 and 2 every 8 frames.
-        cfg->ts_number_layers     = 3;
-        cfg->ts_periodicity       = 4;
-        cfg->ts_rate_decimator[0] = 4;
-        cfg->ts_rate_decimator[1] = 2;
-        cfg->ts_rate_decimator[2] = 1;
-        cfg->ts_layer_id[0] = 0;
-        cfg->ts_layer_id[1] = 2;
-        cfg->ts_layer_id[2] = 1;
-        cfg->ts_layer_id[3] = 2;
-        // Use 40/20/40 bit allocation as example.
-        cfg->ts_target_bitrate[0] = 0.4f * bitrate;
-        cfg->ts_target_bitrate[1] = 0.6f * bitrate;
-        cfg->ts_target_bitrate[2] = bitrate;
-
-        /* 0=L, 1=GF, 2=ARF */
-
-        // Layer 0: predict from L and ARF; update L and G.
-        layer_flags[0] =  VP8_EFLAG_NO_UPD_ARF |
-                          VP8_EFLAG_NO_REF_GF;
-
-        // Layer 2: sync point: predict from L and ARF; update none.
-        layer_flags[1] = VP8_EFLAG_NO_REF_GF |
-                         VP8_EFLAG_NO_UPD_GF |
-                         VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_UPD_LAST |
-                         VP8_EFLAG_NO_UPD_ENTROPY;
-
-        // Layer 1: sync point: predict from L and ARF; update G.
-        layer_flags[2] = VP8_EFLAG_NO_REF_GF |
-                         VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_UPD_LAST;
-
-        // Layer 2: predict from L, G, ARF; update none.
-        layer_flags[3] = VP8_EFLAG_NO_UPD_GF |
-                         VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_UPD_LAST |
-                         VP8_EFLAG_NO_UPD_ENTROPY;
-
-        // Layer 0: predict from L and ARF; update L.
-        layer_flags[4] = VP8_EFLAG_NO_UPD_GF |
-                         VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_REF_GF;
-
-        // Layer 2: predict from L, G, ARF; update none.
-        layer_flags[5] = layer_flags[3];
-
-        // Layer 1: predict from L, G, ARF; update G.
-        layer_flags[6] = VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_UPD_LAST;
-
-        // Layer 2: predict from L, G, ARF; update none.
-        layer_flags[7] = layer_flags[3];
-        break;
-    }
-    }
-}
-
-/* The periodicity of the pattern given the number of temporal layers. */
-static int periodicity_to_num_layers[MAX_NUM_TEMPORAL_LAYERS] = {1, 8, 8};
-
-int main(int argc, char **argv)
-{
-    FILE                 *infile, *outfile[NUM_ENCODERS];
-    FILE                 *downsampled_input[NUM_ENCODERS - 1];
-    char                 filename[50];
-    vpx_codec_ctx_t      codec[NUM_ENCODERS];
-    vpx_codec_enc_cfg_t  cfg[NUM_ENCODERS];
-    int                  frame_cnt = 0;
-    vpx_image_t          raw[NUM_ENCODERS];
-    vpx_codec_err_t      res[NUM_ENCODERS];
-
-    int                  i;
-    long                 width;
-    long                 height;
-    int                  length_frame;
-    int                  frame_avail;
-    int                  got_data;
-    int                  flags = 0;
-    int                  layer_id = 0;
-
-    int                  layer_flags[VPX_TS_MAX_PERIODICITY * NUM_ENCODERS]
-                                     = {0};
-    int                  flag_periodicity;
-
-    /*Currently, only realtime mode is supported in multi-resolution encoding.*/
-    int                  arg_deadline = VPX_DL_REALTIME;
-
-    /* Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you
-       don't need to know PSNR, which will skip PSNR calculation and save
-       encoding time. */
-    int                  show_psnr = 0;
-    int                  key_frame_insert = 0;
-    uint64_t             psnr_sse_total[NUM_ENCODERS] = {0};
-    uint64_t             psnr_samples_total[NUM_ENCODERS] = {0};
-    double               psnr_totals[NUM_ENCODERS][4] = {{0,0}};
-    int                  psnr_count[NUM_ENCODERS] = {0};
-
-    double               cx_time = 0;
-    struct  timeval      tv1, tv2, difftv;
-
-    /* Set the required target bitrates for each resolution level.
-     * If target bitrate for highest-resolution level is set to 0,
-     * (i.e. target_bitrate[0]=0), we skip encoding at that level.
-     */
-    unsigned int         target_bitrate[NUM_ENCODERS]={1000, 500, 100};
-
-    /* Enter the frame rate of the input video */
-    int                  framerate = 30;
-
-    /* Set down-sampling factor for each resolution level.
-       dsf[0] controls down sampling from level 0 to level 1;
-       dsf[1] controls down sampling from level 1 to level 2;
-       dsf[2] is not used. */
-    vpx_rational_t dsf[NUM_ENCODERS] = {{2, 1}, {2, 1}, {1, 1}};
-
-    /* Set the number of temporal layers for each encoder/resolution level,
-     * starting from highest resoln down to lowest resoln. */
-    unsigned int         num_temporal_layers[NUM_ENCODERS] = {3, 3, 3};
-
-    if(argc!= (7 + 3 * NUM_ENCODERS))
-        die("Usage: %s <width> <height> <frame_rate>  <infile> <outfile(s)> "
-            "<rate_encoder(s)> <temporal_layer(s)> <key_frame_insert> <output psnr?> \n",
-            argv[0]);
-
-    printf("Using %s\n",vpx_codec_iface_name(interface));
-
-    width = strtol(argv[1], NULL, 0);
-    height = strtol(argv[2], NULL, 0);
-    framerate = strtol(argv[3], NULL, 0);
-
-    if(width < 16 || width%2 || height <16 || height%2)
-        die("Invalid resolution: %ldx%ld", width, height);
-
-    /* Open input video file for encoding */
-    if(!(infile = fopen(argv[4], "rb")))
-        die("Failed to open %s for reading", argv[4]);
-
-    /* Open output file for each encoder to output bitstreams */
-    for (i=0; i< NUM_ENCODERS; i++)
-    {
-        if(!target_bitrate[i])
-        {
-            outfile[i] = NULL;
-            continue;
-        }
-
-        if(!(outfile[i] = fopen(argv[i+5], "wb")))
-            die("Failed to open %s for writing", argv[i+4]);
-    }
-
-    // Bitrates per spatial layer: overwrite default rates above.
-    for (i=0; i< NUM_ENCODERS; i++)
-    {
-        target_bitrate[i] = strtol(argv[NUM_ENCODERS + 5 + i], NULL, 0);
-    }
-
-    // Temporal layers per spatial layers: overwrite default settings above.
-    for (i=0; i< NUM_ENCODERS; i++)
-    {
-        num_temporal_layers[i] = strtol(argv[2 * NUM_ENCODERS + 5 + i], NULL, 0);
-        if (num_temporal_layers[i] < 1 || num_temporal_layers[i] > 3)
-          die("Invalid temporal layers: %d, Must be 1, 2, or 3. \n",
-              num_temporal_layers);
-    }
-
-    /* Open file to write out each spatially downsampled input stream. */
-    for (i=0; i< NUM_ENCODERS - 1; i++)
-    {
-       // Highest resoln is encoder 0.
-        if (sprintf(filename,"ds%d.yuv",NUM_ENCODERS - i) < 0)
-        {
-            return EXIT_FAILURE;
-        }
-        downsampled_input[i] = fopen(filename,"wb");
-    }
-
-    key_frame_insert = strtol(argv[3 * NUM_ENCODERS + 5], NULL, 0);
-
-    show_psnr = strtol(argv[3 * NUM_ENCODERS + 6], NULL, 0);
-
-
-    /* Populate default encoder configuration */
-    for (i=0; i< NUM_ENCODERS; i++)
-    {
-        res[i] = vpx_codec_enc_config_default(interface, &cfg[i], 0);
-        if(res[i]) {
-            printf("Failed to get config: %s\n", vpx_codec_err_to_string(res[i]));
-            return EXIT_FAILURE;
-        }
-    }
-
-    /*
-     * Update the default configuration according to needs of the application.
-     */
-    /* Highest-resolution encoder settings */
-    cfg[0].g_w = width;
-    cfg[0].g_h = height;
-    cfg[0].rc_dropframe_thresh = 0;
-    cfg[0].rc_end_usage = VPX_CBR;
-    cfg[0].rc_resize_allowed = 0;
-    cfg[0].rc_min_quantizer = 2;
-    cfg[0].rc_max_quantizer = 56;
-    cfg[0].rc_undershoot_pct = 100;
-    cfg[0].rc_overshoot_pct = 15;
-    cfg[0].rc_buf_initial_sz = 500;
-    cfg[0].rc_buf_optimal_sz = 600;
-    cfg[0].rc_buf_sz = 1000;
-    cfg[0].g_error_resilient = 1;              /* Enable error resilient mode */
-    cfg[0].g_lag_in_frames   = 0;
-
-    /* Disable automatic keyframe placement */
-    /* Note: These 3 settings are copied to all levels. But, except the lowest
-     * resolution level, all other levels are set to VPX_KF_DISABLED internally.
-     */
-    cfg[0].kf_mode           = VPX_KF_AUTO;
-    cfg[0].kf_min_dist = 3000;
-    cfg[0].kf_max_dist = 3000;
-
-    cfg[0].rc_target_bitrate = target_bitrate[0];       /* Set target bitrate */
-    cfg[0].g_timebase.num = 1;                          /* Set fps */
-    cfg[0].g_timebase.den = framerate;
-
-    /* Other-resolution encoder settings */
-    for (i=1; i< NUM_ENCODERS; i++)
-    {
-        memcpy(&cfg[i], &cfg[0], sizeof(vpx_codec_enc_cfg_t));
-
-        cfg[i].rc_target_bitrate = target_bitrate[i];
-
-        /* Note: Width & height of other-resolution encoders are calculated
-         * from the highest-resolution encoder's size and the corresponding
-         * down_sampling_factor.
-         */
-        {
-            unsigned int iw = cfg[i-1].g_w*dsf[i-1].den + dsf[i-1].num - 1;
-            unsigned int ih = cfg[i-1].g_h*dsf[i-1].den + dsf[i-1].num - 1;
-            cfg[i].g_w = iw/dsf[i-1].num;
-            cfg[i].g_h = ih/dsf[i-1].num;
-        }
-
-        /* Make width & height to be multiplier of 2. */
-        // Should support odd size ???
-        if((cfg[i].g_w)%2)cfg[i].g_w++;
-        if((cfg[i].g_h)%2)cfg[i].g_h++;
-    }
-
-
-    // Set the number of threads per encode/spatial layer.
-    // (1, 1, 1) means no encoder threading.
-    cfg[0].g_threads = 2;
-    cfg[1].g_threads = 1;
-    cfg[2].g_threads = 1;
-
-    /* Allocate image for each encoder */
-    for (i=0; i< NUM_ENCODERS; i++)
-        if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
-            die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
-
-    if (raw[0].stride[VPX_PLANE_Y] == raw[0].d_w)
-        read_frame_p = read_frame;
-    else
-        read_frame_p = read_frame_by_row;
-
-    for (i=0; i< NUM_ENCODERS; i++)
-        if(outfile[i])
-            write_ivf_file_header(outfile[i], &cfg[i], 0);
-
-    /* Temporal layers settings */
-    for ( i=0; i<NUM_ENCODERS; i++)
-    {
-        set_temporal_layer_pattern(num_temporal_layers[i],
-                                   &cfg[i],
-                                   cfg[i].rc_target_bitrate,
-                                   &layer_flags[i * VPX_TS_MAX_PERIODICITY]);
-    }
-
-    /* Initialize multi-encoder */
-    if(vpx_codec_enc_init_multi(&codec[0], interface, &cfg[0], NUM_ENCODERS,
-                                (show_psnr ? VPX_CODEC_USE_PSNR : 0), &dsf[0]))
-        die_codec(&codec[0], "Failed to initialize encoder");
-
-    /* The extra encoding configuration parameters can be set as follows. */
-    /* Set encoding speed */
-    for ( i=0; i<NUM_ENCODERS; i++)
-    {
-        int speed = -6;
-        /* Lower speed for the lowest resolution. */
-        if (i == NUM_ENCODERS - 1) speed = -4;
-        if(vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, speed))
-            die_codec(&codec[i], "Failed to set cpu_used");
-    }
-
-    /* Set static threshold = 1 for all encoders */
-    for ( i=0; i<NUM_ENCODERS; i++)
-    {
-        if(vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1))
-            die_codec(&codec[i], "Failed to set static threshold");
-    }
-
-    /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */
-    /* Enable denoising for the highest-resolution encoder. */
-    if(vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 1))
-        die_codec(&codec[0], "Failed to set noise_sensitivity");
-    for ( i=1; i< NUM_ENCODERS; i++)
-    {
-        if(vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0))
-            die_codec(&codec[i], "Failed to set noise_sensitivity");
-    }
-
-    /* Set the number of token partitions */
-    for ( i=0; i<NUM_ENCODERS; i++)
-    {
-        if(vpx_codec_control(&codec[i], VP8E_SET_TOKEN_PARTITIONS, 1))
-            die_codec(&codec[i], "Failed to set static threshold");
-    }
-
-    /* Set the max intra target bitrate */
-    for ( i=0; i<NUM_ENCODERS; i++)
-    {
-        unsigned int max_intra_size_pct =
-            (int)(((double)cfg[0].rc_buf_optimal_sz * 0.5) * framerate / 10);
-        if(vpx_codec_control(&codec[i], VP8E_SET_MAX_INTRA_BITRATE_PCT,
-                             max_intra_size_pct))
-            die_codec(&codec[i], "Failed to set static threshold");
-       //printf("%d %d \n",i,max_intra_size_pct);
-    }
-
-    frame_avail = 1;
-    got_data = 0;
-
-    while(frame_avail || got_data)
-    {
-        vpx_codec_iter_t iter[NUM_ENCODERS]={NULL};
-        const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS];
-
-        flags = 0;
-        frame_avail = read_frame_p(infile, &raw[0]);
-
-        if(frame_avail)
-        {
-            for ( i=1; i<NUM_ENCODERS; i++)
-            {
-                /*Scale the image down a number of times by downsampling factor*/
-                /* FilterMode 1 or 2 give better psnr than FilterMode 0. */
-                I420Scale(raw[i-1].planes[VPX_PLANE_Y], raw[i-1].stride[VPX_PLANE_Y],
-                          raw[i-1].planes[VPX_PLANE_U], raw[i-1].stride[VPX_PLANE_U],
-                          raw[i-1].planes[VPX_PLANE_V], raw[i-1].stride[VPX_PLANE_V],
-                          raw[i-1].d_w, raw[i-1].d_h,
-                          raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
-                          raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
-                          raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
-                          raw[i].d_w, raw[i].d_h, 1);
-                /* Write out down-sampled input. */
-                length_frame = cfg[i].g_w *  cfg[i].g_h *3/2;
-                if (fwrite(raw[i].planes[0], 1, length_frame,
-                           downsampled_input[NUM_ENCODERS - i - 1]) !=
-                               length_frame)
-                {
-                    return EXIT_FAILURE;
-                }
-            }
-        }
-
-        /* Set the flags (reference and update) for all the encoders.*/
-        for ( i=0; i<NUM_ENCODERS; i++)
-        {
-            layer_id = cfg[i].ts_layer_id[frame_cnt % cfg[i].ts_periodicity];
-            flags = 0;
-            flag_periodicity = periodicity_to_num_layers
-                [num_temporal_layers[i] - 1];
-            flags = layer_flags[i * VPX_TS_MAX_PERIODICITY +
-                                frame_cnt % flag_periodicity];
-            // Key frame flag for first frame.
-            if (frame_cnt == 0)
-            {
-                flags |= VPX_EFLAG_FORCE_KF;
-            }
-            if (frame_cnt > 0 && frame_cnt == key_frame_insert)
-            {
-                flags = VPX_EFLAG_FORCE_KF;
-            }
-
-            vpx_codec_control(&codec[i], VP8E_SET_FRAME_FLAGS, flags);
-            vpx_codec_control(&codec[i], VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
-        }
-
-        gettimeofday(&tv1, NULL);
-        /* Encode each frame at multi-levels */
-        /* Note the flags must be set to 0 in the encode call if they are set
-           for each frame with the vpx_codec_control(), as done above. */
-        if(vpx_codec_encode(&codec[0], frame_avail? &raw[0] : NULL,
-            frame_cnt, 1, 0, arg_deadline))
-        {
-            die_codec(&codec[0], "Failed to encode frame");
-        }
-        gettimeofday(&tv2, NULL);
-        timersub(&tv2, &tv1, &difftv);
-        cx_time += (double)(difftv.tv_sec * 1000000 + difftv.tv_usec);
-        for (i=NUM_ENCODERS-1; i>=0 ; i--)
-        {
-            got_data = 0;
-            while( (pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i])) )
-            {
-                got_data = 1;
-                switch(pkt[i]->kind) {
-                    case VPX_CODEC_CX_FRAME_PKT:
-                        write_ivf_frame_header(outfile[i], pkt[i]);
-                        (void) fwrite(pkt[i]->data.frame.buf, 1,
-                                      pkt[i]->data.frame.sz, outfile[i]);
-                    break;
-                    case VPX_CODEC_PSNR_PKT:
-                        if (show_psnr)
-                        {
-                            int j;
-
-                            psnr_sse_total[i] += pkt[i]->data.psnr.sse[0];
-                            psnr_samples_total[i] += pkt[i]->data.psnr.samples[0];
-                            for (j = 0; j < 4; j++)
-                            {
-                                psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j];
-                            }
-                            psnr_count[i]++;
-                        }
-
-                        break;
-                    default:
-                        break;
-                }
-                printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT
-                       && (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)? "K":"");
-                fflush(stdout);
-            }
-        }
-        frame_cnt++;
-    }
-    printf("\n");
-    printf("FPS for encoding %d %f %f \n", frame_cnt, (float)cx_time / 1000000,
-           1000000 * (double)frame_cnt / (double)cx_time);
-
-    fclose(infile);
-
-    printf("Processed %ld frames.\n",(long int)frame_cnt-1);
-    for (i=0; i< NUM_ENCODERS; i++)
-    {
-        /* Calculate PSNR and print it out */
-        if ( (show_psnr) && (psnr_count[i]>0) )
-        {
-            int j;
-            double ovpsnr = sse_to_psnr(psnr_samples_total[i], 255.0,
-                                        psnr_sse_total[i]);
-
-            fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i);
-
-            fprintf(stderr, " %.3lf", ovpsnr);
-            for (j = 0; j < 4; j++)
-            {
-                fprintf(stderr, " %.3lf", psnr_totals[i][j]/psnr_count[i]);
-            }
-        }
-
-        if(vpx_codec_destroy(&codec[i]))
-            die_codec(&codec[i], "Failed to destroy codec");
-
-        vpx_img_free(&raw[i]);
-
-        if(!outfile[i])
-            continue;
-
-        /* Try to rewrite the file header with the actual frame count */
-        if(!fseek(outfile[i], 0, SEEK_SET))
-            write_ivf_file_header(outfile[i], &cfg[i], frame_cnt-1);
-        fclose(outfile[i]);
-    }
-    printf("\n");
-
-    return EXIT_SUCCESS;
+    if (vpx_codec_destroy(&codec[i]))
+      die_codec(&codec[i], "Failed to destroy codec");
+
+    vpx_img_free(&raw[i]);
+    vpx_video_writer_close(writers[i]);
+  }
+  printf("\n");
+
+  return EXIT_SUCCESS;
 }
--- a/examples/vp8cx_set_ref.c
+++ b/examples/vp8cx_set_ref.c
@@ -53,8 +53,8 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"

-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 static const char *exec_name;

--- a/examples/vp9_lossless_encoder.c
+++ b/examples/vp9_lossless_encoder.c
@@ -15,8 +15,8 @@
 #include "vpx/vpx_encoder.h"
 #include "vpx/vp8cx.h"

-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 static const char *exec_name;

--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -19,14 +19,14 @@
 #include <string.h>
 #include <time.h>

-#include "../args.h"
-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./args.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 #include "vpx/svc_context.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
-#include "../vpxstats.h"
+#include "./vpxstats.h"

 static const arg_def_t skip_frames_arg =
    ARG_DEF("s", "skip-frames", 1, "input frames to skip");
@@ -60,11 +60,6 @@ static const arg_def_t min_bitrate_arg =
    ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
 static const arg_def_t max_bitrate_arg =
    ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
-static const arg_def_t lag_in_frame_arg =
-    ARG_DEF(NULL, "lag-in-frames", 1, "Number of frame to input before "
-        "generating any outputs");
-static const arg_def_t rc_end_usage_arg =
-    ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");

 #if CONFIG_VP9_HIGHBITDEPTH
 static const struct arg_enum_list bitdepth_enum[] = {
@@ -85,11 +80,11 @@ static const arg_def_t *svc_args[] = {
  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &spatial_layers_arg,
  &kf_dist_arg,       &scale_factors_arg, &passes_arg,      &pass_arg,
  &fpf_name_arg,      &min_q_arg,         &max_q_arg,       &min_bitrate_arg,
-  &max_bitrate_arg,   &temporal_layers_arg,                 &lag_in_frame_arg,
+  &max_bitrate_arg,   &temporal_layers_arg,
 #if CONFIG_VP9_HIGHBITDEPTH
  &bitdepth_arg,
 #endif
-  &rc_end_usage_arg,  NULL
+  NULL
 };

 static const uint32_t default_frames_to_skip = 0;
@@ -212,10 +207,6 @@ static void parse_command_line(int argc, const char **argv_,
      min_bitrate = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
      max_bitrate = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &lag_in_frame_arg, argi)) {
-      enc_cfg->g_lag_in_frames = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &rc_end_usage_arg, argi)) {
-      enc_cfg->rc_end_usage = arg_parse_uint(&arg);
 #if CONFIG_VP9_HIGHBITDEPTH
    } else if (arg_match(&arg, &bitdepth_arg, argi)) {
      enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg);
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -19,12 +19,12 @@
 #include <string.h>

 #include "./vpx_config.h"
-#include "../vpx_ports/vpx_timer.h"
+#include "vpx_ports/vpx_timer.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"

-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 static const char *exec_name;

@@ -61,15 +61,6 @@ struct RateControlMetrics {
  double layer_avg_rate_mismatch[VPX_TS_MAX_LAYERS];
  // Actual encoding bitrate per layer (cumulative).
  double layer_encoding_bitrate[VPX_TS_MAX_LAYERS];
-  // Average of the short-time encoder actual bitrate.
-  // TODO(marpan): Should we add these short-time stats for each layer?
-  double avg_st_encoding_bitrate;
-  // Variance of the short-time encoder actual bitrate.
-  double variance_st_encoding_bitrate;
-  // Window (number of frames) for computing short-timee encoding bitrate.
-  int window_size;
-  // Number of window measurements.
-  int window_count;
 };

 // Note: these rate control metrics assume only 1 key frame in the
@@ -101,10 +92,6 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
    rc->layer_avg_frame_size[i] = 0.0;
    rc->layer_avg_rate_mismatch[i] = 0.0;
  }
-  rc->window_count = 0;
-  rc->window_size = 15;
-  rc->avg_st_encoding_bitrate = 0.0;
-  rc->variance_st_encoding_bitrate = 0.0;
 }

 static void printout_rate_control_summary(struct RateControlMetrics *rc,
@@ -112,7 +99,6 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
                                          int frame_cnt) {
  unsigned int i = 0;
  int tot_num_frames = 0;
-  double perc_fluctuation = 0.0;
  printf("Total number of processed frames: %d\n\n", frame_cnt -1);
  printf("Rate control layer stats for %d layer(s):\n\n",
      cfg->ts_number_layers);
@@ -139,17 +125,6 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
        100.0 * num_dropped / rc->layer_input_frames[i]);
    printf("\n");
  }
-  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
-  rc->variance_st_encoding_bitrate =
-      rc->variance_st_encoding_bitrate / rc->window_count -
-      (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
-  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
-      rc->avg_st_encoding_bitrate;
-  printf("Short-time stats, for window of %d frames: \n",rc->window_size);
-  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
-         rc->avg_st_encoding_bitrate,
-         sqrt(rc->variance_st_encoding_bitrate),
-         perc_fluctuation);
  if ((frame_cnt - 1) != tot_num_frames)
    die("Error: Number of input frames not equal to output! \n");
 }
@@ -481,11 +456,7 @@ int main(int argc, char **argv) {
  int layering_mode = 0;
  int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
  int flag_periodicity = 1;
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
  vpx_svc_layer_id_t layer_id = {0, 0};
-#else
-  vpx_svc_layer_id_t layer_id = {0};
-#endif
  const VpxInterface *encoder = NULL;
  FILE *infile = NULL;
  struct RateControlMetrics rc;
@@ -498,9 +469,6 @@ int main(int argc, char **argv) {
 #else
  const int min_args = min_args_base;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
-  double sum_bitrate = 0.0;
-  double sum_bitrate2 = 0.0;
-  double framerate  = 30.0;

  exec_name = argv[0];
  // Check usage and arguments.
@@ -606,17 +574,12 @@ int main(int argc, char **argv) {
  cfg.rc_resize_allowed = 0;
  cfg.rc_min_quantizer = 2;
  cfg.rc_max_quantizer = 56;
-  if (strncmp(encoder->name, "vp9", 3) == 0)
-    cfg.rc_max_quantizer = 52;
  cfg.rc_undershoot_pct = 50;
  cfg.rc_overshoot_pct = 50;
  cfg.rc_buf_initial_sz = 500;
  cfg.rc_buf_optimal_sz = 600;
  cfg.rc_buf_sz = 1000;

-  // Use 1 thread as default.
-  cfg.g_threads = 1;
-
  // Enable error resilient mode.
  cfg.g_error_resilient = 1;
  cfg.g_lag_in_frames   = 0;
@@ -641,7 +604,6 @@ int main(int argc, char **argv) {
    die("Failed to open %s for reading", argv[1]);
  }

-  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
  // Open an output file for each stream.
  for (i = 0; i < cfg.ts_number_layers; ++i) {
    char file_name[PATH_MAX];
@@ -675,27 +637,22 @@ int main(int argc, char **argv) {
  if (strncmp(encoder->name, "vp8", 3) == 0) {
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOnYOnly);
-    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
  } else if (strncmp(encoder->name, "vp9", 3) == 0) {
      vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
      vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
      vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
      vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
-      vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
-      vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
-      if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0)) {
+      if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) {
        die_codec(&codec, "Failed to set SVC");
    }
  }
-  if (strncmp(encoder->name, "vp8", 3) == 0) {
-    vpx_codec_control(&codec, VP8E_SET_SCREEN_CONTENT_MODE, 0);
-  }
+  vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
  vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1);
  // This controls the maximum target size of the key frame.
  // For generating smaller key frames, use a smaller max_intra_size_pct
  // value, like 100 or 200.
  {
-    const int max_intra_size_pct = 900;
+    const int max_intra_size_pct = 200;
    vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
                      max_intra_size_pct);
  }
@@ -705,21 +662,14 @@ int main(int argc, char **argv) {
    struct vpx_usec_timer timer;
    vpx_codec_iter_t iter = NULL;
    const vpx_codec_cx_pkt_t *pkt;
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
    // Update the temporal layer_id. No spatial layers in this test.
    layer_id.spatial_layer_id = 0;
-#endif
    layer_id.temporal_layer_id =
        cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
    if (strncmp(encoder->name, "vp9", 3) == 0) {
      vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
-    } else if (strncmp(encoder->name, "vp8", 3) == 0) {
-      vpx_codec_control(&codec, VP8E_SET_TEMPORAL_LAYER_ID,
-                        layer_id.temporal_layer_id);
    }
    flags = layer_flags[frame_cnt % flag_periodicity];
-    if (layering_mode == 0)
-      flags = 0;
    frame_avail = vpx_img_read(&raw, infile);
    if (frame_avail)
      ++rc.layer_input_frames[layer_id.temporal_layer_id];
@@ -755,33 +705,6 @@ int main(int argc, char **argv) {
              ++rc.layer_enc_frames[i];
            }
          }
-          // Update for short-time encoding bitrate states, for moving window
-          // of size rc->window, shifted by rc->window / 2.
-          // Ignore first window segment, due to key frame.
-          if (frame_cnt > rc.window_size) {
-            sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
-            if (frame_cnt % rc.window_size == 0) {
-              rc.window_count += 1;
-              rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
-              rc.variance_st_encoding_bitrate +=
-                  (sum_bitrate / rc.window_size) *
-                  (sum_bitrate / rc.window_size);
-              sum_bitrate = 0.0;
-            }
-          }
-          // Second shifted window.
-          if (frame_cnt > rc.window_size + rc.window_size / 2) {
-            sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
-            if (frame_cnt > 2 * rc.window_size &&
-                frame_cnt % rc.window_size == 0) {
-              rc.window_count += 1;
-              rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
-              rc.variance_st_encoding_bitrate +=
-                  (sum_bitrate2 / rc.window_size) *
-                  (sum_bitrate2 / rc.window_size);
-              sum_bitrate2 = 0.0;
-            }
-          }
          break;
          default:
            break;
--- a/libs.doxy_template
+++ b/libs.doxy_template
@@ -36,7 +36,7 @@ DOXYFILE_ENCODING      = UTF-8
 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded
 # by quotes) that should identify the project.

-PROJECT_NAME           = "WebM Codec SDK"
+PROJECT_NAME           = "WebM VP8 Codec SDK"

 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
 # base path where the generated documentation will be put.
@@ -415,6 +415,12 @@ MAX_INITIALIZER_LINES  = 30

 SHOW_USED_FILES        = YES

+# If the sources in your project are distributed over multiple directories
+# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
+# in the documentation. The default is NO.
+
+SHOW_DIRECTORIES       = NO
+
 # The FILE_VERSION_FILTER tag can be used to specify a program or script that
 # doxygen should invoke to get the current version for each file (typically from the
 # version control system). Doxygen will invoke the program by executing (via
@@ -709,6 +715,12 @@ HTML_FOOTER            =

 HTML_STYLESHEET        =

+# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
+# files or namespaces will be aligned in HTML using tables. If set to
+# NO a bullet list will be used.
+
+HTML_ALIGN_MEMBERS     = YES
+
 # If the GENERATE_HTMLHELP tag is set to YES, additional index files
 # will be generated that can be used as input for tools like the
 # Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
--- a/libs.mk
+++ b/libs.mk
@@ -17,6 +17,32 @@ else
  ASM:=.asm
 endif

+#
+# Calculate platform- and compiler-specific offsets for hand coded assembly
+#
+ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
+OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
+define asm_offsets_template
+$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).S
+	@echo "    [CREATE] $$@"
+	$$(qexec)LC_ALL=C grep $$(OFFSET_PATTERN) $$< | tr -d '$$$$\#' $$(ADS2GAS) > $$@
+$$(BUILD_PFX)$(2).S: $(2)
+CLEAN-OBJS += $$(BUILD_PFX)$(1) $(2).S
+endef
+else
+  ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
+define asm_offsets_template
+$$(BUILD_PFX)$(1): obj_int_extract
+$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).o
+	@echo "    [CREATE] $$@"
+	$$(qexec)./obj_int_extract rvds $$< $$(ADS2GAS) > $$@
+OBJS-yes += $$(BUILD_PFX)$(2).o
+CLEAN-OBJS += $$(BUILD_PFX)$(1)
+$$(filter %$$(ASM).o,$$(OBJS-yes)): $$(BUILD_PFX)$(1)
+endef
+endif # rvct
+endif # !gcc
+
 #
 # Rule to generate runtime cpu detection files
 #
@@ -179,13 +205,33 @@ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(call enabled,CODEC_EXPORTS)
 # based build systems.
 libvpx_srcs.txt:
 	@echo "    [CREATE] $@"
-	@echo $(CODEC_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
+	@echo $(CODEC_SRCS) | xargs -n1 echo | sort -u > $@
 CLEAN-OBJS += libvpx_srcs.txt


 ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
 ifeq ($(CONFIG_MSVS),yes)

+obj_int_extract.bat: $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat
+	@cp $^ $@
+
+obj_int_extract.$(VCPROJ_SFX): obj_int_extract.bat
+obj_int_extract.$(VCPROJ_SFX): $(SRC_PATH_BARE)/build/make/obj_int_extract.c
+	@echo "    [CREATE] $@"
+	$(qexec)$(GEN_VCPROJ) \
+    --exe \
+    --target=$(TOOLCHAIN) \
+    --name=obj_int_extract \
+    --ver=$(CONFIG_VS_VERSION) \
+    --proj-guid=E1360C65-D375-4335-8057-7ED99CC3F9B2 \
+    --src-path-bare="$(SRC_PATH_BARE)" \
+    $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
+    --out=$@ $^ \
+    -I. \
+    -I"$(SRC_PATH_BARE)" \
+
+PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.$(VCPROJ_SFX)
+
 vpx.def: $(call enabled,CODEC_EXPORTS)
 	@echo "    [CREATE] $@"
 	$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
@@ -200,7 +246,7 @@ ASM_INCLUDES := \
    vpx_config.asm \
    vpx_ports/x86_abi_support.asm \

-vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
+vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
 	@echo "    [CREATE] $@"
 	$(qexec)$(GEN_VCPROJ) \
            $(if $(CONFIG_SHARED),--dll,--lib) \
@@ -331,7 +377,7 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_config.asm
 endif

 #
-# Add assembler dependencies for configuration.
+# Add assembler dependencies for configuration and offsets
 #
 $(filter %.s.o,$(OBJS-yes)):     $(BUILD_PFX)vpx_config.asm
 $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
@@ -356,7 +402,7 @@ libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)

 libvpx_test_srcs.txt:
 	@echo "    [CREATE] $@"
-	@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
+	@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | sort -u > $@
 CLEAN-OBJS += libvpx_test_srcs.txt

 $(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1
@@ -489,11 +535,7 @@ libs.doxy: $(CODEC_DOC_SRCS)
 	@echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@

 ## Generate rtcd.h for all objects
-ifeq ($(CONFIG_DEPENDENCY_TRACKING),yes)
 $(OBJS-yes:.o=.d): $(RTCD)
-else
-$(OBJS-yes): $(RTCD)
-endif

 ## Update the global src list
 SRCS += $(CODEC_SRCS) $(LIBVPX_TEST_SRCS) $(GTEST_SRCS)
--- a/mainpage.dox
+++ b/mainpage.dox
@@ -1,4 +1,4 @@
-/*!\mainpage WebM Codec SDK
+/*!\mainpage WebM VP8 Codec SDK

  \section main_contents Page Contents
  - \ref main_intro
@@ -6,11 +6,11 @@
  - \ref main_support

  \section main_intro Introduction
-  Welcome to the WebM Codec SDK. This SDK allows you to integrate your
-  applications with the VP8 and VP9 video codecs, high quality, royalty free,
-  open source codecs deployed on billions of computers and devices worldwide.
+  Welcome to the WebM VP8 Codec SDK. This SDK allows you to integrate your
+  applications with the VP8 video codec, a high quality, royalty free, open
+  source codec deployed on millions of computers and devices worldwide.

-  This distribution of the WebM Codec SDK includes the following support:
+  This distribution of the WebM VP8 Codec SDK includes the following support:

  \if vp8_encoder
  - \ref vp8_encoder
@@ -28,12 +28,12 @@
  - Read the \ref samples "sample code" for examples of how to interact with the
    codec.
  - \ref codec reference
-  \if encoder
-  - \ref encoder reference
-  \endif
-  \if decoder
-  - \ref decoder reference
-  \endif
+    \if encoder
+    - \ref encoder reference
+    \endif
+    \if decoder
+    - \ref decoder reference
+    \endif

  \section main_support Support Options & FAQ
  The WebM project is an open source project supported by its community. For
--- a/solution.mk
+++ b/solution.mk
@@ -9,7 +9,7 @@
 ##

 # libvpx reverse dependencies (targets that depend on libvpx)
-VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest)
+VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest obj_int_extract)
 VPX_RDEPS=$(foreach vcp,\
              $(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.$(VCPROJ_SFX)=):vpx)

@@ -17,6 +17,7 @@ vpx.sln: $(wildcard *.$(VCPROJ_SFX))
 	@echo "    [CREATE] $@"
 	$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
            $(if $(filter vpx.$(VCPROJ_SFX),$^),$(VPX_RDEPS)) \
+            --dep=vpx:obj_int_extract \
            --dep=test_libvpx:gtest \
            --ver=$(CONFIG_VS_VERSION)\
            --out=$@ $^
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -29,14 +29,14 @@ class ACMRandom {
  uint16_t Rand16(void) {
    const uint32_t value =
        random_.Generate(testing::internal::Random::kMaxRange);
-    return (value >> 15) & 0xffff;
+    return (value >> 16) & 0xffff;
  }

  uint8_t Rand8(void) {
    const uint32_t value =
        random_.Generate(testing::internal::Random::kMaxRange);
    // There's a bit more entropy in the upper bits of this implementation.
-    return (value >> 23) & 0xff;
+    return (value >> 24) & 0xff;
  }

  uint8_t Rand8Extremes(void) {
--- a/test/byte_alignment_test.cc
+++ b/test/byte_alignment_test.cc
@@ -1,189 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <string>
-
-#include "./vpx_config.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#if CONFIG_WEBM_IO
-#include "test/webm_video_source.h"
-#endif
-
-namespace {
-
-//const int kLegacyByteAlignment = 0;
-//const int kLegacyYPlaneByteAlignment = 32;
-//const int kNumPlanesToCheck = 3;
-//const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
-//const char kVP9Md5File[] = "vp90-2-02-size-lf-1920x1080.webm.md5";
-
-#if CONFIG_WEBM_IO && 0
-
-struct ByteAlignmentTestParam {
-  int byte_alignment;
-  vpx_codec_err_t expected_value;
-  bool decode_remaining;
-};
-
-const ByteAlignmentTestParam kBaTestParams[] = {
-  {kLegacyByteAlignment, VPX_CODEC_OK, true},
-  {32, VPX_CODEC_OK, true},
-  {64, VPX_CODEC_OK, true},
-  {128, VPX_CODEC_OK, true},
-  {256, VPX_CODEC_OK, true},
-  {512, VPX_CODEC_OK, true},
-  {1024, VPX_CODEC_OK, true},
-  {1, VPX_CODEC_INVALID_PARAM, false},
-  {-2, VPX_CODEC_INVALID_PARAM, false},
-  {4, VPX_CODEC_INVALID_PARAM, false},
-  {16, VPX_CODEC_INVALID_PARAM, false},
-  {255, VPX_CODEC_INVALID_PARAM, false},
-  {2048, VPX_CODEC_INVALID_PARAM, false},
-};
-
-// Class for testing byte alignment of reference buffers.
-class ByteAlignmentTest
-    : public ::testing::TestWithParam<ByteAlignmentTestParam> {
- protected:
-  ByteAlignmentTest()
-      : video_(NULL),
-        decoder_(NULL),
-        md5_file_(NULL) {}
-
-  virtual void SetUp() {
-    video_ = new libvpx_test::WebMVideoSource(kVP9TestFile);
-    ASSERT_TRUE(video_ != NULL);
-    video_->Init();
-    video_->Begin();
-
-    const vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
-    decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
-    ASSERT_TRUE(decoder_ != NULL);
-
-    OpenMd5File(kVP9Md5File);
-  }
-
-  virtual void TearDown() {
-    if (md5_file_ != NULL)
-      fclose(md5_file_);
-
-    delete decoder_;
-    delete video_;
-  }
-
-  void SetByteAlignment(int byte_alignment, vpx_codec_err_t expected_value) {
-    decoder_->Control(VP9_SET_BYTE_ALIGNMENT, byte_alignment, expected_value);
-  }
-
-  vpx_codec_err_t DecodeOneFrame(int byte_alignment_to_check) {
-    const vpx_codec_err_t res =
-        decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
-    CheckDecodedFrames(byte_alignment_to_check);
-    if (res == VPX_CODEC_OK)
-      video_->Next();
-    return res;
-  }
-
-  vpx_codec_err_t DecodeRemainingFrames(int byte_alignment_to_check) {
-    for (; video_->cxdata() != NULL; video_->Next()) {
-      const vpx_codec_err_t res =
-          decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
-      if (res != VPX_CODEC_OK)
-        return res;
-      CheckDecodedFrames(byte_alignment_to_check);
-    }
-    return VPX_CODEC_OK;
-  }
-
- private:
-  // Check if |data| is aligned to |byte_alignment_to_check|.
-  // |byte_alignment_to_check| must be a power of 2.
-  void CheckByteAlignment(const uint8_t *data, int byte_alignment_to_check) {
-    ASSERT_EQ(0u, reinterpret_cast<size_t>(data) % byte_alignment_to_check);
-  }
-
-  // Iterate through the planes of the decoded frames and check for
-  // alignment based off |byte_alignment_to_check|.
-  void CheckDecodedFrames(int byte_alignment_to_check) {
-    libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
-    const vpx_image_t *img;
-
-    // Get decompressed data
-    while ((img = dec_iter.Next()) != NULL) {
-      if (byte_alignment_to_check == kLegacyByteAlignment) {
-        CheckByteAlignment(img->planes[0], kLegacyYPlaneByteAlignment);
-      } else {
-        for (int i = 0; i < kNumPlanesToCheck; ++i) {
-          CheckByteAlignment(img->planes[i], byte_alignment_to_check);
-        }
-      }
-      CheckMd5(*img);
-    }
-  }
-
-  // TODO(fgalligan): Move the MD5 testing code into another class.
-  void OpenMd5File(const std::string &md5_file_name_) {
-    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
-    ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
-        << md5_file_name_;
-  }
-
-  void CheckMd5(const vpx_image_t &img) {
-    ASSERT_TRUE(md5_file_ != NULL);
-    char expected_md5[33];
-    char junk[128];
-
-    // Read correct md5 checksums.
-    const int res = fscanf(md5_file_, "%s  %s", expected_md5, junk);
-    ASSERT_NE(EOF, res) << "Read md5 data failed";
-    expected_md5[32] = '\0';
-
-    ::libvpx_test::MD5 md5_res;
-    md5_res.Add(&img);
-    const char *const actual_md5 = md5_res.Get();
-
-    // Check md5 match.
-    ASSERT_STREQ(expected_md5, actual_md5) << "MD5 checksums don't match";
-  }
-
-  libvpx_test::WebMVideoSource *video_;
-  libvpx_test::VP9Decoder *decoder_;
-  FILE *md5_file_;
-};
-
-TEST_F(ByteAlignmentTest, SwitchByteAlignment) {
-  const int num_elements = 14;
-  const int byte_alignments[] = { 0, 32, 64, 128, 256, 512, 1024,
-                                  0, 1024, 32, 512, 64, 256, 128 };
-
-  for (int i = 0; i < num_elements; ++i) {
-    SetByteAlignment(byte_alignments[i], VPX_CODEC_OK);
-    ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame(byte_alignments[i]));
-  }
-  SetByteAlignment(byte_alignments[0], VPX_CODEC_OK);
-  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(byte_alignments[0]));
-}
-
-TEST_P(ByteAlignmentTest, TestAlignment) {
-  const ByteAlignmentTestParam t = GetParam();
-  SetByteAlignment(t.byte_alignment, t.expected_value);
-  if (t.decode_remaining)
-    ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(t.byte_alignment));
-}
-
-INSTANTIATE_TEST_CASE_P(Alignments, ByteAlignmentTest,
-                        ::testing::ValuesIn(kBaTestParams));
-
-#endif  // CONFIG_WEBM_IO
-
-}  // namespace
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -35,11 +35,6 @@ class CodecFactory {
  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const = 0;

-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline)  // NOLINT(runtime/int)
-                                 const = 0;
-
  virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
                                 unsigned long deadline,
                                 const unsigned long init_flags,
@@ -77,10 +72,6 @@ class VP8Decoder : public Decoder {
  VP8Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
      : Decoder(cfg, deadline) {}

-  VP8Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-             unsigned long deadline)  // NOLINT
-      : Decoder(cfg, flag, deadline) {}
-
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP8_DECODER
@@ -113,14 +104,8 @@ class VP8CodecFactory : public CodecFactory {

  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const {
-    return CreateDecoder(cfg, 0, deadline);
-  }
-
-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline) const {  // NOLINT
 #if CONFIG_VP8_DECODER
-    return new VP8Decoder(cfg, flags, deadline);
+    return new VP8Decoder(cfg, deadline);
 #else
    return NULL;
 #endif
@@ -169,10 +154,6 @@ class VP9Decoder : public Decoder {
  VP9Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
      : Decoder(cfg, deadline) {}

-  VP9Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-             unsigned long deadline)  // NOLINT
-      : Decoder(cfg, flag, deadline) {}
-
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP9_DECODER
@@ -205,14 +186,8 @@ class VP9CodecFactory : public CodecFactory {

  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const {
-    return CreateDecoder(cfg, 0, deadline);
-  }
-
-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline) const {  // NOLINT
 #if CONFIG_VP9_DECODER
-    return new VP9Decoder(cfg, flags, deadline);
+    return new VP9Decoder(cfg, deadline);
 #else
    return NULL;
 #endif
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -10,14 +10,12 @@

 #include <string.h>
 #include "test/acm_random.h"
-#include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
-#include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_filter.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
@@ -33,16 +31,13 @@ typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
                             int w, int h);

 struct ConvolveFunctions {
-  ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg,
-                    ConvolveFunc h8, ConvolveFunc h8_avg,
+  ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg,
                    ConvolveFunc v8, ConvolveFunc v8_avg,
                    ConvolveFunc hv8, ConvolveFunc hv8_avg,
                    int bd)
-      : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
-        v8_avg_(v8_avg), hv8_avg_(hv8_avg), use_highbd_(bd) {}
+      : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg),
+        hv8_avg_(hv8_avg), use_highbd_(bd) {}

-  ConvolveFunc copy_;
-  ConvolveFunc avg_;
  ConvolveFunc h8_;
  ConvolveFunc v8_;
  ConvolveFunc hv8_;
@@ -303,35 +298,25 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
        vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
    output_ = reinterpret_cast<uint8_t*>(
        vpx_memalign(kDataAlignment, kOutputBufferSize));
-    output_ref_ = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kOutputBufferSize));
 #if CONFIG_VP9_HIGHBITDEPTH
    input16_ = reinterpret_cast<uint16_t*>(
        vpx_memalign(kDataAlignment,
                     (kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
    output16_ = reinterpret_cast<uint16_t*>(
        vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
-    output16_ref_ = reinterpret_cast<uint16_t*>(
-        vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
 #endif
  }

-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
  static void TearDownTestCase() {
    vpx_free(input_ - 1);
    input_ = NULL;
    vpx_free(output_);
    output_ = NULL;
-    vpx_free(output_ref_);
-    output_ref_ = NULL;
 #if CONFIG_VP9_HIGHBITDEPTH
    vpx_free(input16_ - 1);
    input16_ = NULL;
    vpx_free(output16_);
    output16_ = NULL;
-    vpx_free(output16_ref_);
-    output16_ref_ = NULL;
 #endif
  }

@@ -397,13 +382,6 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 #endif
  }

-  void CopyOutputToRef() {
-    vpx_memcpy(output_ref_, output_, kOutputBufferSize);
-#if CONFIG_VP9_HIGHBITDEPTH
-    vpx_memcpy(output16_ref_, output16_, kOutputBufferSize);
-#endif
-  }
-
  void CheckGuardBlocks() {
    for (int i = 0; i < kOutputBufferSize; ++i) {
      if (IsIndexInBorder(i))
@@ -437,19 +415,6 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 #endif
  }

-  uint8_t *output_ref() const {
-#if CONFIG_VP9_HIGHBITDEPTH
-    if (UUT_->use_highbd_ == 0) {
-      return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
-    } else {
-      return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize +
-                                BorderLeft());
-    }
-#else
-    return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
-#endif
-  }
-
  uint16_t lookup(uint8_t *list, int index) const {
 #if CONFIG_VP9_HIGHBITDEPTH
    if (UUT_->use_highbd_ == 0) {
@@ -528,65 +493,24 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
  const ConvolveFunctions* UUT_;
  static uint8_t* input_;
  static uint8_t* output_;
-  static uint8_t* output_ref_;
 #if CONFIG_VP9_HIGHBITDEPTH
  static uint16_t* input16_;
  static uint16_t* output16_;
-  static uint16_t* output16_ref_;
  int mask_;
 #endif
 };

 uint8_t* ConvolveTest::input_ = NULL;
 uint8_t* ConvolveTest::output_ = NULL;
-uint8_t* ConvolveTest::output_ref_ = NULL;
 #if CONFIG_VP9_HIGHBITDEPTH
 uint16_t* ConvolveTest::input16_ = NULL;
 uint16_t* ConvolveTest::output16_ = NULL;
-uint16_t* ConvolveTest::output16_ref_ = NULL;
 #endif

 TEST_P(ConvolveTest, GuardBlocks) {
  CheckGuardBlocks();
 }

-TEST_P(ConvolveTest, Copy) {
-  uint8_t* const in = input();
-  uint8_t* const out = output();
-
-  ASM_REGISTER_STATE_CHECK(
-      UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
-                  Width(), Height()));
-
-  CheckGuardBlocks();
-
-  for (int y = 0; y < Height(); ++y)
-    for (int x = 0; x < Width(); ++x)
-      ASSERT_EQ(lookup(out, y * kOutputStride + x),
-                lookup(in, y * kInputStride + x))
-          << "(" << x << "," << y << ")";
-}
-
-TEST_P(ConvolveTest, Avg) {
-  uint8_t* const in = input();
-  uint8_t* const out = output();
-  uint8_t* const out_ref = output_ref();
-  CopyOutputToRef();
-
-  ASM_REGISTER_STATE_CHECK(
-      UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
-                Width(), Height()));
-
-  CheckGuardBlocks();
-
-  for (int y = 0; y < Height(); ++y)
-    for (int x = 0; x < Width(); ++x)
-      ASSERT_EQ(lookup(out, y * kOutputStride + x),
-                ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
-                                   lookup(out_ref, y * kOutputStride + x), 1))
-          << "(" << x << "," << y << ")";
-}
-
 TEST_P(ConvolveTest, CopyHoriz) {
  uint8_t* const in = input();
  uint8_t* const out = output();
@@ -1264,30 +1188,6 @@ void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
 }
 #endif  // HAVE_SSE2 && ARCH_X86_64

-void wrap_convolve_copy_c_8(const uint8_t *src, ptrdiff_t src_stride,
-                            uint8_t *dst, ptrdiff_t dst_stride,
-                            const int16_t *filter_x,
-                            int filter_x_stride,
-                            const int16_t *filter_y,
-                            int filter_y_stride,
-                            int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
-                             filter_x, filter_x_stride,
-                             filter_y, filter_y_stride, w, h, 8);
-}
-
-void wrap_convolve_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
-                           uint8_t *dst, ptrdiff_t dst_stride,
-                           const int16_t *filter_x,
-                           int filter_x_stride,
-                           const int16_t *filter_y,
-                           int filter_y_stride,
-                           int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
-                            filter_x, filter_x_stride,
-                            filter_y, filter_y_stride, w, h, 8);
-}
-
 void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
                              uint8_t *dst, ptrdiff_t dst_stride,
                              const int16_t *filter_x,
@@ -1360,30 +1260,6 @@ void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
                             filter_y, filter_y_stride, w, h, 8);
 }

-void wrap_convolve_copy_c_10(const uint8_t *src, ptrdiff_t src_stride,
-                             uint8_t *dst, ptrdiff_t dst_stride,
-                             const int16_t *filter_x,
-                             int filter_x_stride,
-                             const int16_t *filter_y,
-                             int filter_y_stride,
-                             int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
-                             filter_x, filter_x_stride,
-                             filter_y, filter_y_stride, w, h, 10);
-}
-
-void wrap_convolve_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
-                            uint8_t *dst, ptrdiff_t dst_stride,
-                            const int16_t *filter_x,
-                            int filter_x_stride,
-                            const int16_t *filter_y,
-                            int filter_y_stride,
-                            int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
-                            filter_x, filter_x_stride,
-                            filter_y, filter_y_stride, w, h, 10);
-}
-
 void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
                               const int16_t *filter_x,
@@ -1456,30 +1332,6 @@ void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
                             filter_y, filter_y_stride, w, h, 10);
 }

-void wrap_convolve_copy_c_12(const uint8_t *src, ptrdiff_t src_stride,
-                             uint8_t *dst, ptrdiff_t dst_stride,
-                             const int16_t *filter_x,
-                             int filter_x_stride,
-                             const int16_t *filter_y,
-                             int filter_y_stride,
-                             int w, int h) {
-  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
-                             filter_x, filter_x_stride,
-                             filter_y, filter_y_stride, w, h, 12);
-}
-
-void wrap_convolve_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
-                            uint8_t *dst, ptrdiff_t dst_stride,
-                            const int16_t *filter_x,
-                            int filter_x_stride,
-                            const int16_t *filter_y,
-                            int filter_y_stride,
-                            int w, int h) {
-  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
-                            filter_x, filter_x_stride,
-                            filter_y, filter_y_stride, w, h, 12);
-}
-
 void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
                               const int16_t *filter_x,
@@ -1553,7 +1405,6 @@ void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
 }

 const ConvolveFunctions convolve8_c(
-    wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
    wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
    wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
    wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
@@ -1572,7 +1423,6 @@ INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
    make_tuple(32, 64, &convolve8_c),
    make_tuple(64, 64, &convolve8_c)));
 const ConvolveFunctions convolve10_c(
-    wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
    wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
@@ -1591,7 +1441,6 @@ INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
    make_tuple(32, 64, &convolve10_c),
    make_tuple(64, 64, &convolve10_c)));
 const ConvolveFunctions convolve12_c(
-    wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
    wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
@@ -1613,7 +1462,6 @@ INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
 #else

 const ConvolveFunctions convolve8_c(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
    vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
    vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
    vp9_convolve8_c, vp9_convolve8_avg_c, 0);
@@ -1637,21 +1485,10 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
 #if HAVE_SSE2 && ARCH_X86_64
 #if CONFIG_VP9_HIGHBITDEPTH
 const ConvolveFunctions convolve8_sse2(
-    wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
-const ConvolveFunctions convolve10_sse2(
-    wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
-    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
-    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
-    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
-const ConvolveFunctions convolve12_sse2(
-    wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
-    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
-    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
-    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
-INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
+INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_sse2),
    make_tuple(8, 4, &convolve8_sse2),
    make_tuple(4, 8, &convolve8_sse2),
@@ -1664,7 +1501,12 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
    make_tuple(32, 32, &convolve8_sse2),
    make_tuple(64, 32, &convolve8_sse2),
    make_tuple(32, 64, &convolve8_sse2),
-    make_tuple(64, 64, &convolve8_sse2),
+    make_tuple(64, 64, &convolve8_sse2)));
+const ConvolveFunctions convolve10_sse2(
+    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
+    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
+    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
+INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve10_sse2),
    make_tuple(8, 4, &convolve10_sse2),
    make_tuple(4, 8, &convolve10_sse2),
@@ -1677,7 +1519,12 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
    make_tuple(32, 32, &convolve10_sse2),
    make_tuple(64, 32, &convolve10_sse2),
    make_tuple(32, 64, &convolve10_sse2),
-    make_tuple(64, 64, &convolve10_sse2),
+    make_tuple(64, 64, &convolve10_sse2)));
+const ConvolveFunctions convolve12_sse2(
+    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
+    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
+    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
+INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve12_sse2),
    make_tuple(8, 4, &convolve12_sse2),
    make_tuple(4, 8, &convolve12_sse2),
@@ -1693,7 +1540,6 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
    make_tuple(64, 64, &convolve12_sse2)));
 #else
 const ConvolveFunctions convolve8_sse2(
-    vp9_convolve_copy_sse2, vp9_convolve_avg_sse2,
    vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
    vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
    vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0);
@@ -1717,7 +1563,6 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(

 #if HAVE_SSSE3
 const ConvolveFunctions convolve8_ssse3(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
    vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
    vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
    vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0);
@@ -1740,7 +1585,6 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(

 #if HAVE_AVX2 && HAVE_SSSE3
 const ConvolveFunctions convolve8_avx2(
-    vp9_convolve_copy_c, vp9_convolve_avg_c,
    vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
    vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
    vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0);
@@ -1761,20 +1605,11 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
    make_tuple(64, 64, &convolve8_avx2)));
 #endif  // HAVE_AVX2 && HAVE_SSSE3

-#if HAVE_NEON
 #if HAVE_NEON_ASM
 const ConvolveFunctions convolve8_neon(
-    vp9_convolve_copy_neon, vp9_convolve_avg_neon,
    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
    vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
-#else  // HAVE_NEON
-const ConvolveFunctions convolve8_neon(
-    vp9_convolve_copy_neon, vp9_convolve_avg_neon,
-    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
-    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
-    vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
-#endif  // HAVE_NEON_ASM

 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_neon),
@@ -1790,11 +1625,10 @@ INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
    make_tuple(64, 32, &convolve8_neon),
    make_tuple(32, 64, &convolve8_neon),
    make_tuple(64, 64, &convolve8_neon)));
-#endif  // HAVE_NEON
+#endif

 #if HAVE_DSPR2
 const ConvolveFunctions convolve8_dspr2(
-    vp9_convolve_copy_dspr2, vp9_convolve_avg_dspr2,
    vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2,
    vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2,
    vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0);
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -38,25 +38,13 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
    first_drop_ = 0;
    bits_total_ = 0;
    duration_ = 0.0;
-    denoiser_offon_test_ = 0;
-    denoiser_offon_period_ = -1;
  }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 0)
-      encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
-
-    if (denoiser_offon_test_) {
-      ASSERT_GT(denoiser_offon_period_, 0)
-          << "denoiser_offon_period_ is not positive.";
-      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
-        // Flip denoiser_on_ periodically
-        denoiser_on_ ^= 1;
-      }
+    if (video->frame() == 1) {
      encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
    }
-
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
    duration_ = 0;
@@ -136,8 +124,6 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
  double effective_datarate_;
  size_t bits_in_last_frame_;
  int denoiser_on_;
-  int denoiser_offon_test_;
-  int denoiser_offon_period_;
 };

 #if CONFIG_TEMPORAL_DENOISING
@@ -169,29 +155,6 @@ TEST_P(DatarateTestLarge, DenoiserLevels) {
        << " The datarate for the file missed the target!";
  }
 }
-
-// Check basic datarate targeting, for a single bitrate, when denoiser is off
-// and on.
-TEST_P(DatarateTestLarge, DenoiserOffOn) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_CBR;
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 299);
-  cfg_.rc_target_bitrate = 300;
-  ResetModel();
-  // The denoiser is off by default.
-  denoiser_on_ = 0;
-  // Set the offon test flag.
-  denoiser_offon_test_ = 1;
-  denoiser_offon_period_ = 100;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
-      << " The datarate for the file exceeds the target!";
-  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
-      << " The datarate for the file missed the target!";
-}
 #endif  // CONFIG_TEMPORAL_DENOISING

 TEST_P(DatarateTestLarge, BasicBufferModel) {
@@ -283,8 +246,6 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
    for (int i = 0; i < 3; ++i) {
      bits_total_[i] = 0;
    }
-    denoiser_offon_test_ = 0;
-    denoiser_offon_period_ = -1;
  }

  //
@@ -352,32 +313,22 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 0)
+    if (video->frame() == 1) {
      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-
-    if (denoiser_offon_test_) {
-      ASSERT_GT(denoiser_offon_period_, 0)
-          << "denoiser_offon_period_ is not positive.";
-      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
-        // Flip denoiser_on_ periodically
-        denoiser_on_ ^= 1;
-      }
+      encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_);
    }
-
-    encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_);
-
    if (cfg_.ts_number_layers > 1) {
-      if (video->frame() == 0) {
+      if (video->frame() == 1) {
        encoder->Control(VP9E_SET_SVC, 1);
      }
-      vpx_svc_layer_id_t layer_id;
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
+      vpx_svc_layer_id_t layer_id = {0, 0};
      layer_id.spatial_layer_id = 0;
-#endif
      frame_flags_ = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
      layer_id.temporal_layer_id = SetLayerId(video->frame(),
                                              cfg_.ts_number_layers);
-      encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
+      if (video->frame() > 0) {
+       encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
+      }
    }
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
@@ -447,8 +398,6 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
  vpx_codec_pts_t first_drop_;
  int num_drops_;
  int denoiser_on_;
-  int denoiser_offon_test_;
-  int denoiser_offon_period_;
 };

 // Check basic rate targeting,
@@ -539,7 +488,7 @@ TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) {
        << " The first dropped frame for drop_thresh " << i
        << " > first dropped frame for drop_thresh "
        << i - kDropFrameThreshTestStep;
-    ASSERT_GE(num_drops_, last_num_drops * 0.90)
+    ASSERT_GE(num_drops_, last_num_drops)
        << " The number of dropped frames for drop_thresh " << i
        << " < number of dropped frames for drop_thresh "
        << i - kDropFrameThreshTestStep;
@@ -565,9 +514,6 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) {
  cfg_.ts_rate_decimator[0] = 2;
  cfg_.ts_rate_decimator[1] = 1;

-  if (deadline_ == VPX_DL_REALTIME)
-    cfg_.g_error_resilient = 1;
-
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 200);
  for (int i = 200; i <= 800; i += 200) {
@@ -703,43 +649,11 @@ TEST_P(DatarateTestVP9Large, DenoiserLevels) {
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
      << " The datarate for the file is greater than target by too much!";
 }
-
-// Check basic datarate targeting, for a single bitrate, when denoiser is off
-// and on.
-TEST_P(DatarateTestVP9Large, DenoiserOffOn) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_min_quantizer = 2;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 299);
-
-  // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
-  // there is only one denoiser mode: denoiserYonly(which is 1),
-  // but may add more modes in the future.
-  cfg_.rc_target_bitrate = 300;
-  ResetModel();
-  // The denoiser is off by default.
-  denoiser_on_ = 0;
-  // Set the offon test flag.
-  denoiser_offon_test_ = 1;
-  denoiser_offon_period_ = 100;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
-      << " The datarate for the file is lower than target by too much!";
-  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
-      << " The datarate for the file is greater than target by too much!";
-}
 #endif  // CONFIG_VP9_TEMPORAL_DENOISING

 VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES);
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
                          ::testing::Values(::libvpx_test::kOnePassGood,
-                                            ::libvpx_test::kRealTime),
+                          ::libvpx_test::kRealTime),
                          ::testing::Range(2, 7));
 }  // namespace
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -338,8 +338,8 @@ void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
 }
-#endif  // HAVE_SSE2
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
+#endif

 class Trans16x16TestBase {
 public:
@@ -546,7 +546,7 @@ class Trans16x16TestBase {
 #if CONFIG_VP9_HIGHBITDEPTH
    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

    for (int i = 0; i < count_test_block; ++i) {
      double out_r[kNumCoeffs];
@@ -562,7 +562,7 @@ class Trans16x16TestBase {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          in[j] = src16[j] - dst16[j];
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
        }
      }

@@ -576,7 +576,7 @@ class Trans16x16TestBase {
      } else {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                            16));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
@@ -585,7 +585,7 @@ class Trans16x16TestBase {
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
        const uint32_t diff = dst[j] - src[j];
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
        const uint32_t error = diff * diff;
        EXPECT_GE(1u, error)
            << "Error: 16x16 IDCT has error " << error
@@ -593,7 +593,6 @@ class Trans16x16TestBase {
      }
    }
  }
-
  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 10000;
@@ -605,13 +604,13 @@ class Trans16x16TestBase {
 #if CONFIG_VP9_HIGHBITDEPTH
    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
    DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

    for (int i = 0; i < count_test_block; ++i) {
      for (int j = 0; j < kNumCoeffs; ++j) {
        if (j < eob) {
          // Random values less than the threshold, either positive or negative
-          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
+          coeff[scan[j]] = rnd(thresh) * (1-2*(i%2));
        } else {
          coeff[scan[j]] = 0;
        }
@@ -622,7 +621,7 @@ class Trans16x16TestBase {
        } else {
          dst16[j] = 0;
          ref16[j] = 0;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
        }
      }
      if (bit_depth_ == VPX_BITS_8) {
@@ -633,7 +632,7 @@ class Trans16x16TestBase {
        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                 pitch_));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
@@ -642,7 +641,7 @@ class Trans16x16TestBase {
            bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
 #else
        const uint32_t diff = dst[j] - ref[j];
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
        const uint32_t error = diff * diff;
        EXPECT_EQ(0u, error)
            << "Error: 16x16 IDCT Comparison has error " << error
@@ -650,7 +649,6 @@ class Trans16x16TestBase {
      }
    }
  }
-
  int pitch_;
  int tx_type_;
  vpx_bit_depth_t bit_depth_;
@@ -798,9 +796,9 @@ class InvTrans16x16DCT
  virtual void SetUp() {
    ref_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    thresh_ = GET_PARAM(2);
+    thresh_  = GET_PARAM(2);
    bit_depth_ = GET_PARAM(3);
-    pitch_ = 16;
+    pitch_    = 16;
    mask_ = (1 << bit_depth_) - 1;
 }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
@@ -834,7 +832,7 @@ INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -860,7 +858,7 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -887,7 +885,7 @@ INSTANTIATE_TEST_CASE_P(
                   VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3,
                   VPX_BITS_8)));
-#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -932,7 +930,7 @@ INSTANTIATE_TEST_CASE_P(
                   &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
        make_tuple(&idct16x16_12,
                   &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
-#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -940,5 +938,5 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0,
                   VPX_BITS_8)));
-#endif  // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif
 }  // namespace
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -90,7 +90,7 @@ void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 12);
 }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
 public:
@@ -335,7 +335,7 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fdct32x32_rd_c,
                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -345,7 +345,7 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8),
        make_tuple(&vp9_fdct32x32_rd_c,
                   &vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
-#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -355,7 +355,7 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
        make_tuple(&vp9_fdct32x32_rd_sse2,
                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
-#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -371,7 +371,7 @@ INSTANTIATE_TEST_CASE_P(
                   VPX_BITS_8),
        make_tuple(&vp9_fdct32x32_rd_sse2, &vp9_idct32x32_1024_add_c, 1,
                   VPX_BITS_8)));
-#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -381,5 +381,5 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
        make_tuple(&vp9_fdct32x32_rd_avx2,
                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
-#endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif
 }  // namespace
--- a/test/decode_api_test.cc
+++ b/test/decode_api_test.cc
@@ -57,21 +57,6 @@ TEST(DecodeAPI, InvalidParams) {
  }
 }

-#if CONFIG_VP8_DECODER
-TEST(DecodeAPI, OptionalParams) {
-  vpx_codec_ctx_t dec;
-
-#if CONFIG_ERROR_CONCEALMENT
-  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, &vpx_codec_vp8_dx_algo, NULL,
-                                             VPX_CODEC_USE_ERROR_CONCEALMENT));
-#else
-  EXPECT_EQ(VPX_CODEC_INCAPABLE,
-            vpx_codec_dec_init(&dec, &vpx_codec_vp8_dx_algo, NULL,
-                               VPX_CODEC_USE_ERROR_CONCEALMENT));
-#endif  // CONFIG_ERROR_CONCEALMENT
-}
-#endif  // CONFIG_VP8_DECODER
-
 #if CONFIG_VP9_DECODER
 // Test VP9 codec controls after a decode error to ensure the code doesn't
 // misbehave.
@@ -80,7 +65,6 @@ void TestVp9Controls(vpx_codec_ctx_t *dec) {
    VP8D_GET_LAST_REF_UPDATES,
    VP8D_GET_FRAME_CORRUPTED,
    VP9D_GET_DISPLAY_SIZE,
-    VP9D_GET_FRAME_SIZE
  };
  int val[2];

--- a/test/decode_perf_test.cc
+++ b/test/decode_perf_test.cc
@@ -8,17 +8,13 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include <string>
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
 #include "test/ivf_video_source.h"
 #include "test/md5_helper.h"
 #include "test/util.h"
 #include "test/webm_video_source.h"
 #include "vpx_ports/vpx_timer.h"
-#include "./ivfenc.h"
 #include "./vpx_version.h"

 using std::tr1::make_tuple;
@@ -28,9 +24,7 @@ namespace {
 #define VIDEO_NAME 0
 #define THREADS 1

-const int kMaxPsnr = 100;
 const double kUsecsInSec = 1000000.0;
-const char kNewEncodeOutputFile[] = "new_encode.ivf";

 /*
 DecodePerfTest takes a tuple of filename + number of threads to decode with
@@ -111,163 +105,4 @@ TEST_P(DecodePerfTest, PerfTest) {
 INSTANTIATE_TEST_CASE_P(VP9, DecodePerfTest,
                        ::testing::ValuesIn(kVP9DecodePerfVectors));

-class VP9NewEncodeDecodePerfTest :
-    public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
- protected:
-  VP9NewEncodeDecodePerfTest()
-      : EncoderTest(GET_PARAM(0)),
-        encoding_mode_(GET_PARAM(1)),
-        speed_(0),
-        outfile_(0),
-        out_frames_(0) {
-  }
-
-  virtual ~VP9NewEncodeDecodePerfTest() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-
-    cfg_.g_lag_in_frames = 25;
-    cfg_.rc_min_quantizer = 2;
-    cfg_.rc_max_quantizer = 56;
-    cfg_.rc_dropframe_thresh = 0;
-    cfg_.rc_undershoot_pct = 50;
-    cfg_.rc_overshoot_pct = 50;
-    cfg_.rc_buf_sz = 1000;
-    cfg_.rc_buf_initial_sz = 500;
-    cfg_.rc_buf_optimal_sz = 600;
-    cfg_.rc_resize_allowed = 0;
-    cfg_.rc_end_usage = VPX_VBR;
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
-      encoder->Control(VP8E_SET_CPUUSED, speed_);
-      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
-      encoder->Control(VP9E_SET_TILE_COLUMNS, 2);
-    }
-  }
-
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-    const std::string data_path = getenv("LIBVPX_TEST_DATA_PATH");
-    const std::string path_to_source = data_path + "/" + kNewEncodeOutputFile;
-    outfile_ = fopen(path_to_source.c_str(), "wb");
-    ASSERT_TRUE(outfile_ != NULL);
-  }
-
-  virtual void EndPassHook() {
-    if (outfile_ != NULL) {
-      if (!fseek(outfile_, 0, SEEK_SET))
-        ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_);
-      fclose(outfile_);
-      outfile_ = NULL;
-    }
-  }
-
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    ++out_frames_;
-
-    // Write initial file header if first frame.
-    if (pkt->data.frame.pts == 0)
-      ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_);
-
-    // Write frame header and data.
-    ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz);
-    ASSERT_EQ(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_),
-              pkt->data.frame.sz);
-  }
-
-  virtual bool DoDecode() { return false; }
-
-  void set_speed(unsigned int speed) {
-    speed_ = speed;
-  }
-
- private:
-  libvpx_test::TestMode encoding_mode_;
-  uint32_t speed_;
-  FILE *outfile_;
-  uint32_t out_frames_;
-};
-
-struct EncodePerfTestVideo {
-  EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
-                      uint32_t bitrate_, int frames_)
-      : name(name_),
-        width(width_),
-        height(height_),
-        bitrate(bitrate_),
-        frames(frames_) {}
-  const char *name;
-  uint32_t width;
-  uint32_t height;
-  uint32_t bitrate;
-  int frames;
-};
-
-const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
-  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
-};
-
-TEST_P(VP9NewEncodeDecodePerfTest, PerfTest) {
-  SetUp();
-
-  // TODO(JBB): Make this work by going through the set of given files.
-  const int i = 0;
-  const vpx_rational timebase = { 33333333, 1000000000 };
-  cfg_.g_timebase = timebase;
-  cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;
-
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  const char *video_name = kVP9EncodePerfTestVectors[i].name;
-  libvpx_test::I420VideoSource video(
-      video_name,
-      kVP9EncodePerfTestVectors[i].width,
-      kVP9EncodePerfTestVectors[i].height,
-      timebase.den, timebase.num, 0,
-      kVP9EncodePerfTestVectors[i].frames);
-  set_speed(2);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  const uint32_t threads = 4;
-
-  libvpx_test::IVFVideoSource decode_video(kNewEncodeOutputFile);
-  decode_video.Init();
-
-  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
-  cfg.threads = threads;
-  libvpx_test::VP9Decoder decoder(cfg, 0);
-
-  vpx_usec_timer t;
-  vpx_usec_timer_start(&t);
-
-  for (decode_video.Begin(); decode_video.cxdata() != NULL;
-       decode_video.Next()) {
-    decoder.DecodeFrame(decode_video.cxdata(), decode_video.frame_size());
-  }
-
-  vpx_usec_timer_mark(&t);
-  const double elapsed_secs =
-      static_cast<double>(vpx_usec_timer_elapsed(&t)) / kUsecsInSec;
-  const unsigned decode_frames = decode_video.frame_number();
-  const double fps = static_cast<double>(decode_frames) / elapsed_secs;
-
-  printf("{\n");
-  printf("\t\"type\" : \"decode_perf_test\",\n");
-  printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
-  printf("\t\"videoName\" : \"%s\",\n", kNewEncodeOutputFile);
-  printf("\t\"threadCount\" : %u,\n", threads);
-  printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs);
-  printf("\t\"totalFrames\" : %u,\n", decode_frames);
-  printf("\t\"framesPerSecond\" : %f\n", fps);
-  printf("}\n");
-}
-
-VP9_INSTANTIATE_TEST_CASE(
-  VP9NewEncodeDecodePerfTest, ::testing::Values(::libvpx_test::kTwoPassGood));
 }  // namespace
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -65,7 +65,7 @@ void DecoderTest::HandlePeekResult(Decoder *const decoder,

 void DecoderTest::RunLoop(CompressedVideoSource *video,
                          const vpx_codec_dec_cfg_t &dec_cfg) {
-  Decoder* const decoder = codec_->CreateDecoder(dec_cfg, flags_, 0);
+  Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
  ASSERT_TRUE(decoder != NULL);
  bool end_of_file = false;

@@ -110,12 +110,4 @@ void DecoderTest::RunLoop(CompressedVideoSource *video) {
  RunLoop(video, dec_cfg);
 }

-void DecoderTest::set_cfg(const vpx_codec_dec_cfg_t &dec_cfg) {
-  memcpy(&cfg_, &dec_cfg, sizeof(cfg_));
-}
-
-void DecoderTest::set_flags(const vpx_codec_flags_t flags) {
-  flags_ = flags;
-}
-
 }  // namespace libvpx_test
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -41,13 +41,7 @@ class DxDataIterator {
 class Decoder {
 public:
  Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
-      : cfg_(cfg), flags_(0), deadline_(deadline), init_done_(false) {
-    memset(&decoder_, 0, sizeof(decoder_));
-  }
-
-  Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-          unsigned long deadline)  // NOLINT
-      : cfg_(cfg), flags_(flag), deadline_(deadline), init_done_(false) {
+      : cfg_(cfg), deadline_(deadline), init_done_(false) {
    memset(&decoder_, 0, sizeof(decoder_));
  }

@@ -72,7 +66,9 @@ class Decoder {
  }

  void Control(int ctrl_id, int arg) {
-    Control(ctrl_id, arg, VPX_CODEC_OK);
+    InitOnce();
+    const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
+    ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
  }

  void Control(int ctrl_id, const void *arg) {
@@ -81,12 +77,6 @@ class Decoder {
    ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
  }

-  void Control(int ctrl_id, int arg, vpx_codec_err_t expected_value) {
-    InitOnce();
-    const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
-    ASSERT_EQ(expected_value, res) << DecodeError();
-  }
-
  const char* DecodeError() {
    const char *detail = vpx_codec_error_detail(&decoder_);
    return detail ? detail : vpx_codec_error(&decoder_);
@@ -107,10 +97,6 @@ class Decoder {

  bool IsVP8() const;

-  vpx_codec_ctx_t * GetDecoder() {
-    return &decoder_;
-  }
-
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const = 0;

@@ -118,7 +104,7 @@ class Decoder {
    if (!init_done_) {
      const vpx_codec_err_t res = vpx_codec_dec_init(&decoder_,
                                                     CodecInterface(),
-                                                     &cfg_, flags_);
+                                                     &cfg_, 0);
      ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
      init_done_ = true;
    }
@@ -126,7 +112,6 @@ class Decoder {

  vpx_codec_ctx_t     decoder_;
  vpx_codec_dec_cfg_t cfg_;
-  vpx_codec_flags_t   flags_;
  unsigned int        deadline_;
  bool                init_done_;
 };
@@ -139,9 +124,6 @@ class DecoderTest {
  virtual void RunLoop(CompressedVideoSource *video,
                       const vpx_codec_dec_cfg_t &dec_cfg);

-  virtual void set_cfg(const vpx_codec_dec_cfg_t &dec_cfg);
-  virtual void set_flags(const vpx_codec_flags_t flags);
-
  // Hook to be called before decompressing every frame.
  virtual void PreDecodeFrameHook(const CompressedVideoSource& /*video*/,
                                  Decoder* /*decoder*/) {}
@@ -164,16 +146,11 @@ class DecoderTest {
                                const vpx_codec_err_t res_peek);

 protected:
-  explicit DecoderTest(const CodecFactory *codec)
-      : codec_(codec),
-        cfg_(),
-        flags_(0) {}
+  explicit DecoderTest(const CodecFactory *codec) : codec_(codec) {}

  virtual ~DecoderTest() {}

  const CodecFactory *codec_;
-  vpx_codec_dec_cfg_t cfg_;
-  vpx_codec_flags_t   flags_;
 };

 }  // namespace libvpx_test
--- a/test/encode_perf_test.cc
+++ b/test/encode_perf_test.cc
@@ -7,7 +7,6 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
-#include <string>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vpx_config.h"
 #include "./vpx_version.h"
@@ -51,8 +50,7 @@ const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
 };

-const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 };
-const int kEncodePerfTestThreads[] = { 1, 2, 4 };
+const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 12 };

 #define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0]))

@@ -65,8 +63,7 @@ class VP9EncodePerfTest
        min_psnr_(kMaxPsnr),
        nframes_(0),
        encoding_mode_(GET_PARAM(1)),
-        speed_(0),
-        threads_(1) {}
+        speed_(0) {}

  virtual ~VP9EncodePerfTest() {}

@@ -85,18 +82,12 @@ class VP9EncodePerfTest
    cfg_.rc_buf_optimal_sz = 600;
    cfg_.rc_resize_allowed = 0;
    cfg_.rc_end_usage = VPX_CBR;
-    cfg_.g_error_resilient = 1;
-    cfg_.g_threads = threads_;
  }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 0) {
-      const int log2_tile_columns = 3;
+    if (video->frame() == 1) {
      encoder->Control(VP8E_SET_CPUUSED, speed_);
-      encoder->Control(VP9E_SET_TILE_COLUMNS, log2_tile_columns);
-      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
-      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
    }
  }

@@ -122,77 +113,54 @@ class VP9EncodePerfTest
    speed_ = speed;
  }

-  void set_threads(unsigned int threads) {
-    threads_ = threads;
-  }
-
 private:
  double min_psnr_;
  unsigned int nframes_;
  libvpx_test::TestMode encoding_mode_;
  unsigned speed_;
-  unsigned int threads_;
 };

 TEST_P(VP9EncodePerfTest, PerfTest) {
  for (size_t i = 0; i < NELEMENTS(kVP9EncodePerfTestVectors); ++i) {
    for (size_t j = 0; j < NELEMENTS(kEncodePerfTestSpeeds); ++j) {
-      for (size_t k = 0; k < NELEMENTS(kEncodePerfTestThreads); ++k) {
-        if (kVP9EncodePerfTestVectors[i].width < 512 &&
-            kEncodePerfTestThreads[k] > 1)
-          continue;
-        else if (kVP9EncodePerfTestVectors[i].width < 1024 &&
-                 kEncodePerfTestThreads[k] > 2)
-          continue;
+      SetUp();

-        set_threads(kEncodePerfTestThreads[k]);
-        SetUp();
+      const vpx_rational timebase = { 33333333, 1000000000 };
+      cfg_.g_timebase = timebase;
+      cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;

-        const vpx_rational timebase = { 33333333, 1000000000 };
-        cfg_.g_timebase = timebase;
-        cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;
+      init_flags_ = VPX_CODEC_USE_PSNR;

-        init_flags_ = VPX_CODEC_USE_PSNR;
+      const unsigned frames = kVP9EncodePerfTestVectors[i].frames;
+      const char *video_name = kVP9EncodePerfTestVectors[i].name;
+      libvpx_test::I420VideoSource video(
+          video_name,
+          kVP9EncodePerfTestVectors[i].width,
+          kVP9EncodePerfTestVectors[i].height,
+          timebase.den, timebase.num, 0,
+          kVP9EncodePerfTestVectors[i].frames);
+      set_speed(kEncodePerfTestSpeeds[j]);

-        const unsigned frames = kVP9EncodePerfTestVectors[i].frames;
-        const char *video_name = kVP9EncodePerfTestVectors[i].name;
-        libvpx_test::I420VideoSource video(
-            video_name,
-            kVP9EncodePerfTestVectors[i].width,
-            kVP9EncodePerfTestVectors[i].height,
-            timebase.den, timebase.num, 0,
-            kVP9EncodePerfTestVectors[i].frames);
-        set_speed(kEncodePerfTestSpeeds[j]);
+      vpx_usec_timer t;
+      vpx_usec_timer_start(&t);

-        vpx_usec_timer t;
-        vpx_usec_timer_start(&t);
+      ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

-        ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+      vpx_usec_timer_mark(&t);
+      const double elapsed_secs = vpx_usec_timer_elapsed(&t) / kUsecsInSec;
+      const double fps = frames / elapsed_secs;
+      const double minimum_psnr = min_psnr();

-        vpx_usec_timer_mark(&t);
-        const double elapsed_secs = vpx_usec_timer_elapsed(&t) / kUsecsInSec;
-        const double fps = frames / elapsed_secs;
-        const double minimum_psnr = min_psnr();
-        std::string display_name(video_name);
-        if (kEncodePerfTestThreads[k] > 1) {
-          char thread_count[32];
-          snprintf(thread_count, sizeof(thread_count), "_t-%d",
-                   kEncodePerfTestThreads[k]);
-          display_name += thread_count;
-        }
-
-        printf("{\n");
-        printf("\t\"type\" : \"encode_perf_test\",\n");
-        printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
-        printf("\t\"videoName\" : \"%s\",\n", display_name.c_str());
-        printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs);
-        printf("\t\"totalFrames\" : %u,\n", frames);
-        printf("\t\"framesPerSecond\" : %f,\n", fps);
-        printf("\t\"minPsnr\" : %f,\n", minimum_psnr);
-        printf("\t\"speed\" : %d,\n", kEncodePerfTestSpeeds[j]);
-        printf("\t\"threads\" : %d\n", kEncodePerfTestThreads[k]);
-        printf("}\n");
-      }
+      printf("{\n");
+      printf("\t\"type\" : \"encode_perf_test\",\n");
+      printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+      printf("\t\"videoName\" : \"%s\",\n", video_name);
+      printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs);
+      printf("\t\"totalFrames\" : %u,\n", frames);
+      printf("\t\"framesPerSecond\" : %f,\n", fps);
+      printf("\t\"minPsnr\" : %f,\n", minimum_psnr);
+      printf("\t\"speed\" : %d\n", kEncodePerfTestSpeeds[j]);
+      printf("}\n");
    }
  }
 }
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -8,8 +8,6 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include <string>
-
 #include "./vpx_config.h"
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
@@ -19,40 +17,6 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"

 namespace libvpx_test {
-void Encoder::InitEncoder(VideoSource *video) {
-  vpx_codec_err_t res;
-  const vpx_image_t *img = video->img();
-
-  if (video->img() && !encoder_.priv) {
-    cfg_.g_w = img->d_w;
-    cfg_.g_h = img->d_h;
-    cfg_.g_timebase = video->timebase();
-    cfg_.rc_twopass_stats_in = stats_->buf();
-
-    // Default to 1 thread.
-    cfg_.g_threads = 1;
-    res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_,
-                             init_flags_);
-    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-
-#if CONFIG_VP9_ENCODER
-    if (CodecInterface() == &vpx_codec_vp9_cx_algo) {
-      // Default to 1 tile column for VP9.
-      const int log2_tile_columns = 0;
-      res = vpx_codec_control_(&encoder_, VP9E_SET_TILE_COLUMNS,
-                               log2_tile_columns);
-      ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-    } else
-#endif
-    {
-#if CONFIG_VP8_ENCODER
-      ASSERT_EQ(&vpx_codec_vp8_cx_algo, CodecInterface())
-          << "Unknown Codec Interface";
-#endif
-    }
-  }
-}
-
 void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) {
  if (video->img())
    EncodeFrameInternal(*video, frame_flags);
@@ -75,6 +39,17 @@ void Encoder::EncodeFrameInternal(const VideoSource &video,
  vpx_codec_err_t res;
  const vpx_image_t *img = video.img();

+  // Handle first frame initialization
+  if (!encoder_.priv) {
+    cfg_.g_w = img->d_w;
+    cfg_.g_h = img->d_h;
+    cfg_.g_timebase = video.timebase();
+    cfg_.rc_twopass_stats_in = stats_->buf();
+    res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_,
+                             init_flags_);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+  }
+
  // Handle frame resizing
  if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) {
    cfg_.g_w = img->d_w;
@@ -85,7 +60,8 @@ void Encoder::EncodeFrameInternal(const VideoSource &video,

  // Encode the frame
  API_REGISTER_STATE_CHECK(
-      res = vpx_codec_encode(&encoder_, img, video.pts(), video.duration(),
+      res = vpx_codec_encode(&encoder_,
+                             video.img(), video.pts(), video.duration(),
                             frame_flags, deadline_));
  ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
 }
@@ -101,7 +77,6 @@ void Encoder::Flush() {

 void EncoderTest::InitializeConfig() {
  const vpx_codec_err_t res = codec_->DefaultEncoderConfig(&cfg_, 0);
-  dec_cfg_ = vpx_codec_dec_cfg_t();
  ASSERT_EQ(VPX_CODEC_OK, res);
 }

@@ -135,7 +110,6 @@ void EncoderTest::SetMode(TestMode mode) {
 static bool compare_img(const vpx_image_t *img1,
                        const vpx_image_t *img2) {
  bool match = (img1->fmt == img2->fmt) &&
-               (img1->cs == img2->cs) &&
               (img1->d_w == img2->d_w) &&
               (img1->d_h == img2->d_h);

@@ -184,18 +158,9 @@ void EncoderTest::RunLoop(VideoSource *video) {
    Encoder* const encoder = codec_->CreateEncoder(cfg_, deadline_, init_flags_,
                                                   &stats_);
    ASSERT_TRUE(encoder != NULL);
-
-    video->Begin();
-    encoder->InitEncoder(video);
-
-    unsigned long dec_init_flags = 0;  // NOLINT
-    // Use fragment decoder if encoder outputs partitions.
-    // NOTE: fragment decoder and partition encoder are only supported by VP8.
-    if (init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION)
-      dec_init_flags |= VPX_CODEC_USE_INPUT_FRAGMENTS;
-    Decoder* const decoder = codec_->CreateDecoder(dec_cfg, dec_init_flags, 0);
+    Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
    bool again;
-    for (again = true; again; video->Next()) {
+    for (again = true, video->Begin(); again; video->Next()) {
      again = (video->img() != NULL);

      PreEncodeFrameHook(video);
@@ -235,13 +200,6 @@ void EncoderTest::RunLoop(VideoSource *video) {
        }
      }

-      // Flush the decoder when there are no more fragments.
-      if ((init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION) && has_dxdata) {
-        const vpx_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
-        if (!HandleDecodeResult(res_dec, *video, decoder))
-          break;
-      }
-
      if (has_dxdata && has_cxdata) {
        const vpx_image_t *img_enc = encoder->GetPreviewFrame();
        DxDataIterator dec_iter = decoder->GetDxData();
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -104,8 +104,6 @@ class Encoder {
    return CxDataIterator(&encoder_);
  }

-  void InitEncoder(VideoSource *video);
-
  const vpx_image_t *GetPreviewFrame() {
    return vpx_codec_get_preview_frame(&encoder_);
  }
@@ -140,12 +138,6 @@ class Encoder {
  }
 #endif

-  void Config(const vpx_codec_enc_cfg_t *cfg) {
-    const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg);
-    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-    cfg_ = *cfg;
-  }
-
  void set_deadline(unsigned long deadline) {
    deadline_ = deadline;
  }
@@ -193,11 +185,6 @@ class EncoderTest {
  // Map the TestMode enum to the deadline_ and passes_ variables.
  void SetMode(TestMode mode);

-  // Set encoder flag.
-  void set_init_flags(unsigned long flag) {  // NOLINT(runtime/int)
-    init_flags_ = flag;
-  }
-
  // Main loop
  virtual void RunLoop(VideoSource *video);

@@ -251,7 +238,6 @@ class EncoderTest {

  bool                 abort_;
  vpx_codec_enc_cfg_t  cfg_;
-  vpx_codec_dec_cfg_t  dec_cfg_;
  unsigned int         passes_;
  unsigned long        deadline_;
  TwopassStatsStore    stats_;
--- a/test/vp9_error_block_test.cc
+++ b/test/vp9_error_block_test.cc
@@ -13,11 +13,11 @@
 #include <string>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
+
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
@@ -27,16 +27,13 @@ using libvpx_test::ACMRandom;

 namespace {
 #if CONFIG_VP9_HIGHBITDEPTH
-const int kNumIterations = 1000;
+const int number_of_iterations = 1000;

 typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
-                                  const tran_low_t *dqcoeff,
-                                  intptr_t block_size,
-                                  int64_t *ssz, int bps);
-
+                               const tran_low_t *dqcoeff, intptr_t block_size,
+                               int64_t *ssz, int bps);
 typedef std::tr1::tuple<ErrorBlockFunc, ErrorBlockFunc, vpx_bit_depth_t>
                        ErrorBlockParam;
-
 class ErrorBlockTest
  : public ::testing::TestWithParam<ErrorBlockParam> {
 public:
@@ -66,12 +63,12 @@ TEST_P(ErrorBlockTest, OperationCheck) {
  int64_t ret;
  int64_t ref_ssz;
  int64_t ref_ret;
-  for (int i = 0; i < kNumIterations; ++i) {
+  for (int i = 0; i < number_of_iterations; ++i) {
    int err_count = 0;
    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
    for (int j = 0; j < block_size; j++) {
-      coeff[j]   = rnd(2 << 20) - (1 << 20);
-      dqcoeff[j] = rnd(2 << 20) - (1 << 20);
+      coeff[j]   = rnd(2<<20)-(1<<20);
+      dqcoeff[j] = rnd(2<<20)-(1<<20);
    }
    ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
                                  bit_depth_);
@@ -84,8 +81,8 @@ TEST_P(ErrorBlockTest, OperationCheck) {
    err_count_total += err_count;
  }
  EXPECT_EQ(0, err_count_total)
-      << "Error: Error Block Test, C output doesn't match SSE2 output. "
-      << "First failed at test case " << first_failure;
+    << "Error: Error Block Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
 }

 TEST_P(ErrorBlockTest, ExtremeValues) {
@@ -99,8 +96,8 @@ TEST_P(ErrorBlockTest, ExtremeValues) {
  int64_t ret;
  int64_t ref_ssz;
  int64_t ref_ret;
-  int max_val = ((1 << 20) - 1);
-  for (int i = 0; i < kNumIterations; ++i) {
+  int max_val = ((1<<20)-1);
+  for (int i = 0; i < number_of_iterations; ++i) {
    int err_count = 0;
    int k = (i / 9) % 5;

@@ -129,22 +126,21 @@ TEST_P(ErrorBlockTest, ExtremeValues) {
    err_count_total += err_count;
  }
  EXPECT_EQ(0, err_count_total)
-      << "Error: Error Block Test, C output doesn't match SSE2 output. "
-      << "First failed at test case " << first_failure;
+    << "Error: Error Block Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
 }

 using std::tr1::make_tuple;
-
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
-    SSE2, ErrorBlockTest,
-    ::testing::Values(
-        make_tuple(&vp9_highbd_block_error_sse2,
-                   &vp9_highbd_block_error_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_block_error_sse2,
-                   &vp9_highbd_block_error_c, VPX_BITS_12),
-        make_tuple(&vp9_highbd_block_error_sse2,
-                   &vp9_highbd_block_error_c, VPX_BITS_8)));
+  SSE2_C_COMPARE, ErrorBlockTest,
+  ::testing::Values(
+    make_tuple(&vp9_highbd_block_error_sse2,
+               &vp9_highbd_block_error_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_block_error_sse2,
+               &vp9_highbd_block_error_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_block_error_sse2,
+               &vp9_highbd_block_error_c, VPX_BITS_8)));
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -37,7 +37,6 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
  void Reset() {
    error_nframes_ = 0;
    droppable_nframes_ = 0;
-    pattern_switch_ = 0;
  }

  virtual void SetUp() {
@@ -57,77 +56,22 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
    nframes_++;
  }

-  //
-  // Frame flags and layer id for temporal layers.
-  // For two layers, test pattern is:
-  //   1     3
-  // 0    2     .....
-  // LAST is updated on base/layer 0, GOLDEN  updated on layer 1.
-  // Non-zero pattern_switch parameter means pattern will switch to
-  // not using LAST for frame_num >= pattern_switch.
-  int SetFrameFlags(int frame_num,
-                    int num_temp_layers,
-                    int pattern_switch) {
-    int frame_flags = 0;
-    if (num_temp_layers == 2) {
-        if (frame_num % 2 == 0) {
-          if (frame_num < pattern_switch || pattern_switch == 0) {
-            // Layer 0: predict from LAST and ARF, update LAST.
-            frame_flags = VP8_EFLAG_NO_REF_GF |
-                          VP8_EFLAG_NO_UPD_GF |
-                          VP8_EFLAG_NO_UPD_ARF;
-          } else {
-            // Layer 0: predict from GF and ARF, update GF.
-            frame_flags = VP8_EFLAG_NO_REF_LAST |
-                          VP8_EFLAG_NO_UPD_LAST |
-                          VP8_EFLAG_NO_UPD_ARF;
-          }
-        } else {
-          if (frame_num < pattern_switch || pattern_switch == 0) {
-            // Layer 1: predict from L, GF, and ARF, update GF.
-            frame_flags = VP8_EFLAG_NO_UPD_ARF |
-                          VP8_EFLAG_NO_UPD_LAST;
-          } else {
-            // Layer 1: predict from GF and ARF, update GF.
-            frame_flags = VP8_EFLAG_NO_REF_LAST |
-                          VP8_EFLAG_NO_UPD_LAST |
-                          VP8_EFLAG_NO_UPD_ARF;
-          }
-        }
-    }
-    return frame_flags;
-  }
-
-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video) {
    frame_flags_ &= ~(VP8_EFLAG_NO_UPD_LAST |
                      VP8_EFLAG_NO_UPD_GF |
                      VP8_EFLAG_NO_UPD_ARF);
-    // For temporal layer case.
-    if (cfg_.ts_number_layers > 1) {
-      frame_flags_ = SetFrameFlags(video->frame(),
-                                   cfg_.ts_number_layers,
-                                   pattern_switch_);
+    if (droppable_nframes_ > 0 &&
+        (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
      for (unsigned int i = 0; i < droppable_nframes_; ++i) {
        if (droppable_frames_[i] == video->frame()) {
-          std::cout << "Encoding droppable frame: "
+          std::cout << "             Encoding droppable frame: "
                    << droppable_frames_[i] << "\n";
+          frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
+                           VP8_EFLAG_NO_UPD_GF |
+                           VP8_EFLAG_NO_UPD_ARF);
+          return;
        }
      }
-    } else {
-       if (droppable_nframes_ > 0 &&
-         (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
-         for (unsigned int i = 0; i < droppable_nframes_; ++i) {
-           if (droppable_frames_[i] == video->frame()) {
-             std::cout << "Encoding droppable frame: "
-                       << droppable_frames_[i] << "\n";
-             frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
-                              VP8_EFLAG_NO_UPD_GF |
-                              VP8_EFLAG_NO_UPD_ARF);
-             return;
-           }
-         }
-       }
    }
  }

@@ -189,16 +133,11 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
    return mismatch_nframes_;
  }

-  void SetPatternSwitch(int frame_switch) {
-     pattern_switch_ = frame_switch;
-   }
-
 private:
  double psnr_;
  unsigned int nframes_;
  unsigned int error_nframes_;
  unsigned int droppable_nframes_;
-  unsigned int pattern_switch_;
  double mismatch_psnr_;
  unsigned int mismatch_nframes_;
  unsigned int error_frames_[kMaxErrorFrames];
@@ -297,290 +236,7 @@ TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
 #endif
 }

-// Check for successful decoding and no encoder/decoder mismatch
-// if we lose (i.e., drop before decoding) the enhancement layer frames for a
-// two layer temporal pattern. The base layer does not predict from the top
-// layer, so successful decoding is expected.
-TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
-  const vpx_rational timebase = { 33333333, 1000000000 };
-  cfg_.g_timebase = timebase;
-  cfg_.rc_target_bitrate = 500;
-  cfg_.g_lag_in_frames = 0;
-
-  cfg_.rc_end_usage = VPX_CBR;
-  // 2 Temporal layers, no spatial layers, CBR mode.
-  cfg_.ss_number_layers = 1;
-  cfg_.ts_number_layers = 2;
-  cfg_.ts_rate_decimator[0] = 2;
-  cfg_.ts_rate_decimator[1] = 1;
-  cfg_.ts_periodicity = 2;
-  cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
-  cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
-
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                     timebase.den, timebase.num, 0, 40);
-
-  // Error resilient mode ON.
-  cfg_.g_error_resilient = 1;
-  cfg_.kf_mode = VPX_KF_DISABLED;
-  SetPatternSwitch(0);
-
-  // The odd frames are the enhancement layer for 2 layer pattern, so set
-  // those frames as droppable. Drop the last 7 frames.
-  unsigned int num_droppable_frames = 7;
-  unsigned int droppable_frame_list[] = {27, 29, 31, 33, 35, 37, 39};
-  SetDroppableFrames(num_droppable_frames, droppable_frame_list);
-  SetErrorFrames(num_droppable_frames, droppable_frame_list);
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  // Test that no mismatches have been found
-  std::cout << "             Mismatch frames: "
-            << GetMismatchFrames() << "\n";
-  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
-
-  // Reset previously set of error/droppable frames.
-  Reset();
-}
-
-// Check for successful decoding and no encoder/decoder mismatch
-// for a two layer temporal pattern, where at some point in the
-// sequence, the LAST ref is not used anymore.
-TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) {
-  const vpx_rational timebase = { 33333333, 1000000000 };
-  cfg_.g_timebase = timebase;
-  cfg_.rc_target_bitrate = 500;
-  cfg_.g_lag_in_frames = 0;
-
-  cfg_.rc_end_usage = VPX_CBR;
-  // 2 Temporal layers, no spatial layers, CBR mode.
-  cfg_.ss_number_layers = 1;
-  cfg_.ts_number_layers = 2;
-  cfg_.ts_rate_decimator[0] = 2;
-  cfg_.ts_rate_decimator[1] = 1;
-  cfg_.ts_periodicity = 2;
-  cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
-  cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
-
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                     timebase.den, timebase.num, 0, 100);
-
-  // Error resilient mode ON.
-  cfg_.g_error_resilient = 1;
-  cfg_.kf_mode = VPX_KF_DISABLED;
-  SetPatternSwitch(60);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  // Test that no mismatches have been found
-  std::cout << "             Mismatch frames: "
-            << GetMismatchFrames() << "\n";
-  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
-
-  // Reset previously set of error/droppable frames.
-  Reset();
-}
-
-class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
- protected:
-  ErrorResilienceTestLargeCodecControls()
-      : EncoderTest(GET_PARAM(0)),
-        encoding_mode_(GET_PARAM(1)) {
-    Reset();
-  }
-
-  virtual ~ErrorResilienceTestLargeCodecControls() {}
-
-  void Reset() {
-    last_pts_ = 0;
-    tot_frame_number_ = 0;
-    // For testing up to 3 layers.
-    for (int i = 0; i < 3; ++i) {
-      bits_total_[i] = 0;
-    }
-    duration_ = 0.0;
-  }
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-  }
-
-  //
-  // Frame flags and layer id for temporal layers.
-  //
-
-  // For two layers, test pattern is:
-  //   1     3
-  // 0    2     .....
-  // For three layers, test pattern is:
-  //   1      3    5      7
-  //      2           6
-  // 0          4            ....
-  // LAST is always update on base/layer 0, GOLDEN is updated on layer 1,
-  // and ALTREF is updated on top layer for 3 layer pattern.
-  int SetFrameFlags(int frame_num, int num_temp_layers) {
-    int frame_flags = 0;
-    if (num_temp_layers == 2) {
-      if (frame_num % 2 == 0) {
-        // Layer 0: predict from L and ARF, update L.
-        frame_flags = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
-                      VP8_EFLAG_NO_UPD_ARF;
-      } else {
-        // Layer 1: predict from L, G and ARF, and update G.
-        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-                      VP8_EFLAG_NO_UPD_ENTROPY;
-      }
-    } else if (num_temp_layers == 3) {
-      if (frame_num % 4 == 0) {
-        // Layer 0: predict from L, update L.
-        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                      VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-      } else if ((frame_num - 2) % 4 == 0) {
-        // Layer 1: predict from L, G,  update G.
-        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-                      VP8_EFLAG_NO_REF_ARF;
-      }  else if ((frame_num - 1) % 2 == 0) {
-        // Layer 2: predict from L, G, ARF; update ARG.
-        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
-      }
-    }
-    return frame_flags;
-  }
-
-  int SetLayerId(int frame_num, int num_temp_layers) {
-    int layer_id = 0;
-    if (num_temp_layers == 2) {
-      if (frame_num % 2 == 0) {
-        layer_id = 0;
-      } else {
-         layer_id = 1;
-      }
-    } else if (num_temp_layers == 3) {
-      if (frame_num % 4 == 0) {
-        layer_id = 0;
-      } else if ((frame_num - 2) % 4 == 0) {
-        layer_id = 1;
-      } else if ((frame_num - 1) % 2 == 0) {
-        layer_id = 2;
-      }
-    }
-    return layer_id;
-  }
-
-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-                                  libvpx_test::Encoder *encoder) {
-    if (cfg_.ts_number_layers > 1) {
-        int layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers);
-        int frame_flags = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
-        if (video->frame() > 0) {
-          encoder->Control(VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
-          encoder->Control(VP8E_SET_FRAME_FLAGS, frame_flags);
-        }
-       const vpx_rational_t tb = video->timebase();
-       timebase_ = static_cast<double>(tb.num) / tb.den;
-       duration_ = 0;
-       return;
-    }
-  }
-
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    // Time since last timestamp = duration.
-    vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
-    if (duration > 1) {
-      // Update counter for total number of frames (#frames input to encoder).
-      // Needed for setting the proper layer_id below.
-      tot_frame_number_ += static_cast<int>(duration - 1);
-    }
-    int layer = SetLayerId(tot_frame_number_, cfg_.ts_number_layers);
-    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
-    // Update the total encoded bits. For temporal layers, update the cumulative
-    // encoded bits per layer.
-    for (int i = layer; i < static_cast<int>(cfg_.ts_number_layers); ++i) {
-      bits_total_[i] += frame_size_in_bits;
-    }
-    // Update the most recent pts.
-    last_pts_ = pkt->data.frame.pts;
-    ++tot_frame_number_;
-  }
-
-  virtual void EndPassHook(void) {
-    duration_ = (last_pts_ + 1) * timebase_;
-    if (cfg_.ts_number_layers  > 1) {
-      for (int layer = 0; layer < static_cast<int>(cfg_.ts_number_layers);
-          ++layer) {
-        if (bits_total_[layer]) {
-          // Effective file datarate:
-          effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_;
-        }
-      }
-    }
-  }
-
-  double effective_datarate_[3];
-   private:
-    libvpx_test::TestMode encoding_mode_;
-    vpx_codec_pts_t last_pts_;
-    double timebase_;
-    int64_t bits_total_[3];
-    double duration_;
-    int tot_frame_number_;
-  };
-
-// Check two codec controls used for:
-// (1) for setting temporal layer id, and (2) for settings encoder flags.
-// This test invokes those controls for each frame, and verifies encoder/decoder
-// mismatch and basic rate control response.
-// TODO(marpan): Maybe move this test to datarate_test.cc.
-TEST_P(ErrorResilienceTestLargeCodecControls, CodecControl3TemporalLayers) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_min_quantizer = 2;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.g_lag_in_frames = 0;
-  cfg_.kf_mode = VPX_KF_DISABLED;
-  cfg_.g_error_resilient = 1;
-
-  // 3 Temporal layers. Framerate decimation (4, 2, 1).
-  cfg_.ts_number_layers = 3;
-  cfg_.ts_rate_decimator[0] = 4;
-  cfg_.ts_rate_decimator[1] = 2;
-  cfg_.ts_rate_decimator[2] = 1;
-  cfg_.ts_periodicity = 4;
-  cfg_.ts_layer_id[0] = 0;
-  cfg_.ts_layer_id[1] = 2;
-  cfg_.ts_layer_id[2] = 1;
-  cfg_.ts_layer_id[3] = 2;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
-  for (int i = 200; i <= 800; i += 200) {
-    cfg_.rc_target_bitrate = i;
-    Reset();
-    // 40-20-40 bitrate allocation for 3 temporal layers.
-    cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
-      ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75)
-          << " The datarate for the file is lower than target by too much, "
-              "for layer: " << j;
-      ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25)
-          << " The datarate for the file is greater than target by too much, "
-              "for layer: " << j;
-    }
-  }
-}
-
 VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
-VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls,
-                          ONE_PASS_TEST_MODES);
 VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+
 }  // namespace
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -97,19 +97,13 @@ class ExternalFrameBufferList {
    return 0;
  }

-  // Marks the external frame buffer that |fb| is pointing to as free.
+  // Marks the external frame buffer that |fb| is pointing too as free.
  // Returns < 0 on an error.
  int ReturnFrameBuffer(vpx_codec_frame_buffer_t *fb) {
-    if (fb == NULL) {
-      EXPECT_TRUE(fb != NULL);
-      return -1;
-    }
+    EXPECT_TRUE(fb != NULL);
    ExternalFrameBuffer *const ext_fb =
        reinterpret_cast<ExternalFrameBuffer*>(fb->priv);
-    if (ext_fb == NULL) {
-      EXPECT_TRUE(ext_fb != NULL);
-      return -1;
-    }
+    EXPECT_TRUE(ext_fb != NULL);
    EXPECT_EQ(1, ext_fb->in_use);
    ext_fb->in_use = 0;
    return 0;
@@ -398,7 +392,7 @@ TEST_P(ExternalFrameBufferMD5Test, ExtFBMD5Match) {
  delete video;
 }

-#if CONFIG_WEBM_IO && 0
+#if CONFIG_WEBM_IO
 TEST_F(ExternalFrameBufferTest, MinFrameBuffers) {
  // Minimum number of external frame buffers for VP9 is
  // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS.
@@ -481,8 +475,8 @@ TEST_F(ExternalFrameBufferTest, SetAfterDecode) {
 }
 #endif  // CONFIG_WEBM_IO

-//VP9_INSTANTIATE_TEST_CASE(ExternalFrameBufferMD5Test,
-//                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
-//                                              libvpx_test::kVP9TestVectors +
-//                                              libvpx_test::kNumVP9TestVectors));
+VP9_INSTANTIATE_TEST_CASE(ExternalFrameBufferMD5Test,
+                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
+                                              libvpx_test::kVP9TestVectors +
+                                              libvpx_test::kNumVP9TestVectors));
 }  // namespace
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -84,8 +84,8 @@ void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
 }
-#endif  // HAVE_SSE2
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
+#endif

 class Trans4x4TestBase {
 public:
@@ -426,7 +426,7 @@ INSTANTIATE_TEST_CASE_P(
    C, Trans4x4DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -452,7 +452,7 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -466,7 +466,7 @@ INSTANTIATE_TEST_CASE_P(
    C, Trans4x4WHT,
    ::testing::Values(
        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -474,17 +474,14 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vp9_fdct4x4_c,
                   &vp9_idct4x4_16_add_neon, 0, VPX_BITS_8)));
-#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    NEON, Trans4x4HT,
+    DISABLED_NEON, Trans4x4HT,
    ::testing::Values(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
    !CONFIG_EMULATE_HARDWARE
@@ -507,7 +504,7 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8)));
-#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -535,5 +532,5 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
-#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif
 }  // namespace
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -62,10 +62,6 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
 using libvpx_test::ACMRandom;

 namespace {
-
-const int kSignBiasMaxDiff255 = 1500;
-const int kSignBiasMaxDiff15 = 10000;
-
 typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
 typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
 typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
@@ -126,8 +122,8 @@ void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
 }
-#endif  // HAVE_SSE2
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
+#endif

 class FwdTrans8x8TestBase {
 public:
@@ -164,7 +160,7 @@ class FwdTrans8x8TestBase {

    for (int j = 0; j < 64; ++j) {
      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = kSignBiasMaxDiff255;
+      const int max_diff = 1125;
      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
          << "Error: 8x8 FDCT/FHT has a sign bias > "
          << 1. * max_diff / count_test_block * 100 << "%"
@@ -177,7 +173,7 @@ class FwdTrans8x8TestBase {
    memset(count_sign_block, 0, sizeof(count_sign_block));

    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
+      // Initialize a test block with input range [-mask_/16, mask_/16].
      for (int j = 0; j < 64; ++j)
        test_input_block[j] = ((rnd.Rand16() & mask_) >> 4) -
                              ((rnd.Rand16() & mask_) >> 4);
@@ -194,9 +190,9 @@ class FwdTrans8x8TestBase {

    for (int j = 0; j < 64; ++j) {
      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = kSignBiasMaxDiff15;
+      const int max_diff = 10000;
      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
-          << "Error: 8x8 FDCT/FHT has a sign bias > "
+          << "Error: 4x4 FDCT/FHT has a sign bias > "
          << 1. * max_diff / count_test_block * 100 << "%"
          << " for input range [-15, 15] at index " << j
          << " count0: " << count_sign_block[j][0]
@@ -624,8 +620,8 @@ class InvTrans8x8DCT
  virtual void SetUp() {
    ref_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    thresh_ = GET_PARAM(2);
-    pitch_ = 8;
+    thresh_   = GET_PARAM(2);
+    pitch_    = 8;
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
  }
@@ -653,21 +649,20 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
+        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8HT,
    ::testing::Values(
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
@@ -676,12 +671,11 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
+        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
 #else
-// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
-// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8HT,
    ::testing::Values(
@@ -689,31 +683,24 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
-// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    NEON, FwdTrans8x8DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct8x8_neon, &vp9_idct8x8_64_add_neon, 0,
                   VPX_BITS_8)));
-#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    NEON, FwdTrans8x8HT,
+    DISABLED_NEON, FwdTrans8x8HT,
    ::testing::Values(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-// TODO(jingning): re-enable after these handle the expanded range [0, 65535]
-// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8DCT,
    ::testing::Values(
@@ -726,13 +713,12 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
-#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_highbd_fdct8x8_c,
                   &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
        make_tuple(&vp9_highbd_fdct8x8_sse2,
@@ -740,10 +726,10 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_highbd_fdct8x8_c,
                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
        make_tuple(&vp9_highbd_fdct8x8_sse2,
-                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
+                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
+        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
+

-// TODO(jingning): re-enable after these handle the expanded range [0, 65535]
-// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8HT,
    ::testing::Values(
@@ -765,12 +751,11 @@ INSTANTIATE_TEST_CASE_P(
                   &idct8x8_10_add_12_sse2, 6225, VPX_BITS_12),
        make_tuple(&idct8x8_12,
                   &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
-#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif
+

 #if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
    !CONFIG_EMULATE_HARDWARE
-// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
-// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    SSSE3, FwdTrans8x8DCT,
    ::testing::Values(
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -13,22 +13,104 @@
 #include <cstdlib>
 #include <string>

-#include "test/yuv_video_source.h"
+#include "test/video_source.h"

 namespace libvpx_test {

 // This class extends VideoSource to allow parsing of raw yv12
 // so that we can do actual file encodes.
-class I420VideoSource : public YUVVideoSource {
+class I420VideoSource : public VideoSource {
 public:
  I420VideoSource(const std::string &file_name,
                  unsigned int width, unsigned int height,
                  int rate_numerator, int rate_denominator,
                  unsigned int start, int limit)
-      : YUVVideoSource(file_name, VPX_IMG_FMT_I420,
-                       width, height,
-                       rate_numerator, rate_denominator,
-                       start, limit) {}
+      : file_name_(file_name),
+        input_file_(NULL),
+        img_(NULL),
+        start_(start),
+        limit_(limit),
+        frame_(0),
+        width_(0),
+        height_(0),
+        framerate_numerator_(rate_numerator),
+        framerate_denominator_(rate_denominator) {
+    // This initializes raw_sz_, width_, height_ and allocates an img.
+    SetSize(width, height);
+  }
+
+  virtual ~I420VideoSource() {
+    vpx_img_free(img_);
+    if (input_file_)
+      fclose(input_file_);
+  }
+
+  virtual void Begin() {
+    if (input_file_)
+      fclose(input_file_);
+    input_file_ = OpenTestDataFile(file_name_);
+    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+        << file_name_;
+    if (start_) {
+      fseek(input_file_, static_cast<unsigned>(raw_sz_) * start_, SEEK_SET);
+    }
+
+    frame_ = start_;
+    FillFrame();
+  }
+
+  virtual void Next() {
+    ++frame_;
+    FillFrame();
+  }
+
+  virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL;  }
+
+  // Models a stream where Timebase = 1/FPS, so pts == frame.
+  virtual vpx_codec_pts_t pts() const { return frame_; }
+
+  virtual unsigned long duration() const { return 1; }
+
+  virtual vpx_rational_t timebase() const {
+    const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
+    return t;
+  }
+
+  virtual unsigned int frame() const { return frame_; }
+
+  virtual unsigned int limit() const { return limit_; }
+
+  void SetSize(unsigned int width, unsigned int height) {
+    if (width != width_ || height != height_) {
+      vpx_img_free(img_);
+      img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, width, height, 1);
+      ASSERT_TRUE(img_ != NULL);
+      width_ = width;
+      height_ = height;
+      raw_sz_ = width * height * 3 / 2;
+    }
+  }
+
+  virtual void FillFrame() {
+    ASSERT_TRUE(input_file_ != NULL);
+    // Read a frame from input_file.
+    if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
+      limit_ = frame_;
+    }
+  }
+
+ protected:
+  std::string file_name_;
+  FILE *input_file_;
+  vpx_image_t *img_;
+  size_t raw_sz_;
+  unsigned int start_;
+  unsigned int limit_;
+  unsigned int frame_;
+  unsigned int width_;
+  unsigned int height_;
+  int framerate_numerator_;
+  int framerate_denominator_;
 };

 }  // namespace libvpx_test
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -110,23 +110,21 @@ TEST_P(InvalidFileTest, ReturnCode) {
  RunTest();
 }

-//const DecodeParam kVP9InvalidFileTests[] = {
-//  {1, "invalid-vp90-02-v2.webm"},
-//  {1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf"},
-//  {1, "invalid-vp90-03-v3.webm"},
-//  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf"},
-//  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf"},
-//  {1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf"},
-//  {1, "invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf"},
-//  {1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf"},
-//  {1, "invalid-vp91-2-mixedrefcsp-444to420.ivf"},
-//  {1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf"},
-//  {1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf"},
-//  {1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf"},
-//};
+const DecodeParam kVP9InvalidFileTests[] = {
+  {1, "invalid-vp90-02-v2.webm"},
+  {1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf"},
+  {1, "invalid-vp90-03-v3.webm"},
+  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf"},
+  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf"},
+  {1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf"},
+  {1, "invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf"},
+  {1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf"},
+  {1, "invalid-vp91-2-mixedrefcsp-444to420.ivf"},
+  {1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf"},
+};

-//VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
-//                          ::testing::ValuesIn(kVP9InvalidFileTests));
+VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
+                          ::testing::ValuesIn(kVP9InvalidFileTests));

 // This class will include test vectors that are expected to fail
 // peek. However they are still expected to have no fatal failures.
@@ -142,26 +140,25 @@ TEST_P(InvalidFileInvalidPeekTest, ReturnCode) {
  RunTest();
 }

-//const DecodeParam kVP9InvalidFileInvalidPeekTests[] = {
-//  {1, "invalid-vp90-01-v2.webm"},
-//};
+const DecodeParam kVP9InvalidFileInvalidPeekTests[] = {
+  {1, "invalid-vp90-01-v2.webm"},
+};

-//VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
-//                          ::testing::ValuesIn(kVP9InvalidFileInvalidPeekTests));
+VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
+                          ::testing::ValuesIn(kVP9InvalidFileInvalidPeekTests));

-//const DecodeParam kMultiThreadedVP9InvalidFileTests[] = {
-//  {4, "invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm"},
-//  {4, "invalid-"
-//      "vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf"},
-//  {4, "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf"},
-//  {2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf"},
-//  {4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf"},
-//};
+const DecodeParam kMultiThreadedVP9InvalidFileTests[] = {
+  {4, "invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm"},
+  {4, "invalid-"
+      "vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf"},
+  {2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf"},
+  {4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf"},
+};

-//INSTANTIATE_TEST_CASE_P(
-//    VP9MultiThreaded, InvalidFileTest,
-//    ::testing::Combine(
-//        ::testing::Values(
-//            static_cast<const libvpx_test::CodecFactory*>(&libvpx_test::kVP9)),
-//        ::testing::ValuesIn(kMultiThreadedVP9InvalidFileTests)));
+INSTANTIATE_TEST_CASE_P(
+    VP9MultiThreaded, InvalidFileTest,
+    ::testing::Combine(
+        ::testing::Values(
+            static_cast<const libvpx_test::CodecFactory*>(&libvpx_test::kVP9)),
+        ::testing::ValuesIn(kMultiThreadedVP9InvalidFileTests)));
 }  // namespace
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -21,9 +21,10 @@
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
-#include "vp9/common/vp9_loopfilter.h"
 #include "vpx/vpx_integer.h"

+#define MAX_LOOP_FILTER 63
+
 using libvpx_test::ACMRandom;

 namespace {
@@ -52,8 +53,9 @@ typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
                               const uint8_t *thresh1);
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
-typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;
+typedef std::tr1::tuple<loop_op_t, loop_op_t, vpx_bit_depth_t> loop8_param_t;
+typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t,
+                        vpx_bit_depth_t> dualloop8_param_t;

 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -107,36 +109,6 @@ void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_SSE2

-#if HAVE_NEON_ASM
-#if CONFIG_VP9_HIGHBITDEPTH
-// No neon high bitdepth functions.
-#else
-void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
-                              const uint8_t *limit, const uint8_t *thresh,
-                              int count) {
-  vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
-                           const uint8_t *limit, const uint8_t *thresh,
-                           int count) {
-  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
-                                   const uint8_t *limit, const uint8_t *thresh,
-                                   int count) {
-  vp9_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
-}
-
-void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
-                                const uint8_t *limit, const uint8_t *thresh,
-                                int count) {
-  vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
-}
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_NEON_ASM
-
 class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
 public:
  virtual ~Loop8Test6Param() {}
@@ -150,7 +122,7 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  int bit_depth_;
+  vpx_bit_depth_t bit_depth_;
  int mask_;
  loop_op_t loopfilter_op_;
  loop_op_t ref_loopfilter_op_;
@@ -169,7 +141,7 @@ class Loop8Test9Param : public ::testing::TestWithParam<dualloop8_param_t> {
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  int bit_depth_;
+  vpx_bit_depth_t bit_depth_;
  int mask_;
  dual_loop_op_t loopfilter_op_;
  dual_loop_op_t ref_loopfilter_op_;
@@ -179,7 +151,7 @@ TEST_P(Loop8Test6Param, OperationCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
-  int32_t bd = bit_depth_;
+  vpx_bit_depth_t bd = bit_depth_;
  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
 #else
@@ -190,12 +162,19 @@ TEST_P(Loop8Test6Param, OperationCheck) {
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    uint8_t tmp = rnd.Rand8();
+    // mblim  <= 3 * MAX_LOOP_FILTER + 4
+    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -242,7 +221,7 @@ TEST_P(Loop8Test6Param, OperationCheck) {
    ASM_REGISTER_STATE_CHECK(
        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
 #else
-    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count);
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count);
    ASM_REGISTER_STATE_CHECK(
        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -265,7 +244,7 @@ TEST_P(Loop8Test6Param, ValueCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
-  const int32_t bd = bit_depth_;
+  vpx_bit_depth_t bd = bit_depth_;
  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
 #else
@@ -274,27 +253,20 @@ TEST_P(Loop8Test6Param, ValueCheck) {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
-
-  // NOTE: The code in vp9_loopfilter.c:update_sharpness computes mblim as a
-  // function of sharpness_lvl and the loopfilter lvl as:
-  // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
-  // ...
-  // vpx_memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
-  //            SIMD_WIDTH);
-  // This means that the largest value for mblim will occur when sharpness_lvl
-  // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
-  // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
-  // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) =
-  // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4
-
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    uint8_t tmp = rnd.Rand8();
+    while (tmp > 3*MAX_LOOP_FILTER + 4) {  // mblim  <= 3*MAX_LOOP_FILTER + 4
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -315,7 +287,7 @@ TEST_P(Loop8Test6Param, ValueCheck) {
    ASM_REGISTER_STATE_CHECK(
        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
 #else
-    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count);
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count);
    ASM_REGISTER_STATE_CHECK(
        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -337,7 +309,7 @@ TEST_P(Loop8Test9Param, OperationCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
-  const int32_t bd = bit_depth_;
+  vpx_bit_depth_t bd = bit_depth_;
  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
 #else
@@ -348,12 +320,20 @@ TEST_P(Loop8Test9Param, OperationCheck) {
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    uint8_t tmp = rnd.Rand8();
+    // mblim  <= 3 * MAX_LOOP_FILTER + 4
+    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    // lim  <= MAX_LOOP_FILTER
+    while (tmp > MAX_LOOP_FILTER) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -363,12 +343,19 @@ TEST_P(Loop8Test9Param, OperationCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    tmp = rnd.Rand8();
+    // mblim  <= 3 * MAX_LOOP_FILTER + 4
+    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -450,12 +437,19 @@ TEST_P(Loop8Test9Param, ValueCheck) {
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    uint8_t tmp = rnd.Rand8();
+    // mblim  <= 3 * MAX_LOOP_FILTER + 4
+    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -465,12 +459,18 @@ TEST_P(Loop8Test9Param, ValueCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    tmp = rnd.Rand8();
+    while (tmp > 3 * MAX_LOOP_FILTER + 4) {  // mblim  <= 3*MAX_LOOP_FILTER + 4
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -486,7 +486,7 @@ TEST_P(Loop8Test9Param, ValueCheck) {
      ref_s[j] = s[j];
    }
 #if CONFIG_VP9_HIGHBITDEPTH
-    const int32_t bd = bit_depth_;
+    vpx_bit_depth_t bd = bit_depth_;
    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0,
                       blimit1, limit1, thresh1, bd);
    ASM_REGISTER_STATE_CHECK(
@@ -518,151 +518,118 @@ using std::tr1::make_tuple;
 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test6Param,
+    SSE2_C_COMPARE_SINGLE, Loop8Test6Param,
    ::testing::Values(
        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 8),
+                   &vp9_highbd_lpf_horizontal_4_c, VPX_BITS_8),
        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 8),
+                   &vp9_highbd_lpf_vertical_4_c, VPX_BITS_8),
        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 8),
+                   &vp9_highbd_lpf_horizontal_8_c, VPX_BITS_8),
        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 8),
+                   &vp9_highbd_lpf_horizontal_16_c, VPX_BITS_8),
        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 8),
+                   &vp9_highbd_lpf_vertical_8_c, VPX_BITS_8),
        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, 8),
+                   &wrapper_vertical_16_c, VPX_BITS_8),
        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 10),
+                   &vp9_highbd_lpf_horizontal_4_c, VPX_BITS_10),
        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 10),
+                   &vp9_highbd_lpf_vertical_4_c, VPX_BITS_10),
        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 10),
+                   &vp9_highbd_lpf_horizontal_8_c, VPX_BITS_10),
        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 10),
+                   &vp9_highbd_lpf_horizontal_16_c, VPX_BITS_10),
        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 10),
+                   &vp9_highbd_lpf_vertical_8_c, VPX_BITS_10),
        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, 10),
+                   &wrapper_vertical_16_c, VPX_BITS_10),
        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 12),
+                   &vp9_highbd_lpf_horizontal_4_c, VPX_BITS_12),
        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 12),
+                   &vp9_highbd_lpf_vertical_4_c, VPX_BITS_12),
        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 12),
+                   &vp9_highbd_lpf_horizontal_8_c, VPX_BITS_12),
        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 12),
+                   &vp9_highbd_lpf_horizontal_16_c, VPX_BITS_12),
        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 12),
+                   &vp9_highbd_lpf_vertical_8_c, VPX_BITS_12),
        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, 12),
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, 8),
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, 10),
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, 12)));
+                   &wrapper_vertical_16_c, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test6Param,
+    SSE2_C_COMPARE_SINGLE, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8),
-        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8),
-        make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8),
-        make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8)));
+        make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c,
+                   VPX_BITS_8),
+        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c,
+                   VPX_BITS_8),
+        make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c,
+                   VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif

-#if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8)));
-#endif
-
 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test9Param,
+    SSE2_C_COMPARE_DUAL, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, 8),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, 10),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, 12),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, 12),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, 12),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, 12)));
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, VPX_BITS_8),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, VPX_BITS_10),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test9Param,
+    SSE2_C_COMPARE_DUAL, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c,
+                   VPX_BITS_8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_SSE2
+
+#if HAVE_SSE2
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE2_C_COMPARE_DUAL, Loop8Test9Param,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_4_dual_c, VPX_BITS_8),
+        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_8_dual_c, VPX_BITS_8),
+        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
+                   &vp9_highbd_lpf_vertical_4_dual_c, VPX_BITS_8),
+        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
+                   &vp9_highbd_lpf_vertical_8_dual_c, VPX_BITS_8),
+        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_4_dual_c, VPX_BITS_10),
+        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_8_dual_c, VPX_BITS_10),
+        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
+                   &vp9_highbd_lpf_vertical_4_dual_c, VPX_BITS_10),
+        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
+                   &vp9_highbd_lpf_vertical_8_dual_c, VPX_BITS_10),
+        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_4_dual_c, VPX_BITS_12),
+        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_8_dual_c, VPX_BITS_12),
+        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
+                   &vp9_highbd_lpf_vertical_4_dual_c, VPX_BITS_12),
+        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
+                   &vp9_highbd_lpf_vertical_8_dual_c, VPX_BITS_12)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    SSE2_C_COMPARE_DUAL, Loop8Test9Param,
    ::testing::Values(
        make_tuple(&vp9_lpf_horizontal_4_dual_sse2,
-                   &vp9_lpf_horizontal_4_dual_c, 8),
+                   &vp9_lpf_horizontal_4_dual_c, VPX_BITS_8),
        make_tuple(&vp9_lpf_horizontal_8_dual_sse2,
-                   &vp9_lpf_horizontal_8_dual_c, 8),
+                   &vp9_lpf_horizontal_8_dual_c, VPX_BITS_8),
        make_tuple(&vp9_lpf_vertical_4_dual_sse2,
-                   &vp9_lpf_vertical_4_dual_c, 8),
+                   &vp9_lpf_vertical_4_dual_c, VPX_BITS_8),
        make_tuple(&vp9_lpf_vertical_8_dual_sse2,
-                   &vp9_lpf_vertical_8_dual_c, 8)));
+                   &vp9_lpf_vertical_8_dual_c, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif
-
-#if HAVE_NEON
-#if CONFIG_VP9_HIGHBITDEPTH
-// No neon high bitdepth functions.
-#else
-INSTANTIATE_TEST_CASE_P(
-    NEON, Loop8Test6Param,
-    ::testing::Values(
-#if HAVE_NEON_ASM
-// Using #if inside the macro is unsupported on MSVS but the tests are not
-// currently built for MSVS with ARM and NEON.
-        make_tuple(&vp9_lpf_horizontal_16_neon,
-                   &vp9_lpf_horizontal_16_c, 8),
-        make_tuple(&wrapper_vertical_16_neon,
-                   &wrapper_vertical_16_c, 8),
-        make_tuple(&wrapper_vertical_16_dual_neon,
-                   &wrapper_vertical_16_dual_c, 8),
-        make_tuple(&vp9_lpf_horizontal_8_neon,
-                   &vp9_lpf_horizontal_8_c, 8),
-        make_tuple(&vp9_lpf_vertical_8_neon,
-                   &vp9_lpf_vertical_8_c, 8),
-#endif  // HAVE_NEON_ASM
-        make_tuple(&vp9_lpf_horizontal_4_neon,
-                   &vp9_lpf_horizontal_4_c, 8),
-        make_tuple(&vp9_lpf_vertical_4_neon,
-                   &vp9_lpf_vertical_4_c, 8)));
-INSTANTIATE_TEST_CASE_P(
-    NEON, Loop8Test9Param,
-    ::testing::Values(
-#if HAVE_NEON_ASM
-        make_tuple(&vp9_lpf_horizontal_8_dual_neon,
-                   &vp9_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_8_dual_neon,
-                   &vp9_lpf_vertical_8_dual_c, 8),
-#endif  // HAVE_NEON_ASM
-        make_tuple(&vp9_lpf_horizontal_4_dual_neon,
-                   &vp9_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vp9_lpf_vertical_4_dual_neon,
-                   &vp9_lpf_vertical_4_dual_c, 8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_NEON
-
 }  // namespace
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -230,7 +230,7 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct4x4_1_add_c,
                   TX_4X4, 1)));

-#if HAVE_NEON
+#if HAVE_NEON_ASM
 INSTANTIATE_TEST_CASE_P(
    NEON, PartialIDctTest,
    ::testing::Values(
@@ -258,7 +258,7 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct4x4_16_add_c,
                   &vp9_idct4x4_1_add_neon,
                   TX_4X4, 1)));
-#endif  // HAVE_NEON
+#endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -8,6 +8,8 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <math.h>
+#include <stdlib.h>
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
@@ -17,181 +19,335 @@
 #include "test/util.h"

 #include "./vpx_config.h"
-#include "./vp8_rtcd.h"
-#include "vp8/common/blockd.h"
-#include "vp8/common/onyx.h"
-#include "vp8/encoder/block.h"
-#include "vp8/encoder/onyx_int.h"
-#include "vp8/encoder/quantize.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_integer.h"
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-const int kNumBlocks = 25;
-const int kNumBlockEntries = 16;
-
-typedef void (*VP8Quantize)(BLOCK *b, BLOCKD *d);
-
-typedef std::tr1::tuple<VP8Quantize, VP8Quantize> VP8QuantizeParam;

 using libvpx_test::ACMRandom;
-using std::tr1::make_tuple;

-// Create and populate a VP8_COMP instance which has a complete set of
-// quantization inputs as well as a second MACROBLOCKD for output.
-class QuantizeTestBase {
+namespace {
+#if CONFIG_VP9_HIGHBITDEPTH
+const int number_of_iterations = 100;
+
+typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
+                           int skip_block, const int16_t *zbin,
+                           const int16_t *round, const int16_t *quant,
+                           const int16_t *quant_shift,
+                           tran_low_t *qcoeff, tran_low_t *dqcoeff,
+                           const int16_t *dequant, int zbin_oq_value,
+                           uint16_t *eob, const int16_t *scan,
+                           const int16_t *iscan);
+typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t>
+    QuantizeParam;
+class QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
 public:
-  virtual ~QuantizeTestBase() {
-    vp8_remove_compressor(&vp8_comp_);
-    vp8_comp_ = NULL;
-    vpx_free(macroblockd_dst_);
-    macroblockd_dst_ = NULL;
-    libvpx_test::ClearSystemState();
-  }
-
- protected:
-  void SetupCompressor() {
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-
-    // The full configuration is necessary to generate the quantization tables.
-    VP8_CONFIG vp8_config;
-    vpx_memset(&vp8_config, 0, sizeof(vp8_config));
-
-    vp8_comp_ = vp8_create_compressor(&vp8_config);
-
-    // Set the tables based on a quantizer of 0.
-    vp8_set_quantizer(vp8_comp_, 0);
-
-    // Set up all the block/blockd pointers for the mb in vp8_comp_.
-    vp8cx_frame_init_quantizer(vp8_comp_);
-
-    // Copy macroblockd from the reference to get pre-set-up dequant values.
-    macroblockd_dst_ = reinterpret_cast<MACROBLOCKD *>(
-        vpx_memalign(32, sizeof(*macroblockd_dst_)));
-    vpx_memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd,
-               sizeof(*macroblockd_dst_));
-    // Fix block pointers - currently they point to the blocks in the reference
-    // structure.
-    vp8_setup_block_dptrs(macroblockd_dst_);
-  }
-
-  void UpdateQuantizer(int q) {
-    vp8_set_quantizer(vp8_comp_, q);
-
-    vpx_memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd,
-               sizeof(*macroblockd_dst_));
-    vp8_setup_block_dptrs(macroblockd_dst_);
-  }
-
-  void FillCoeffConstant(int16_t c) {
-    for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) {
-      vp8_comp_->mb.coeff[i] = c;
-    }
-  }
-
-  void FillCoeffRandom() {
-    for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) {
-      vp8_comp_->mb.coeff[i] = rnd_.Rand8();
-    }
-  }
-
-  void CheckOutput() {
-    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.qcoeff, macroblockd_dst_->qcoeff,
-                        sizeof(*macroblockd_dst_->qcoeff) * kNumBlocks *
-                            kNumBlockEntries))
-        << "qcoeff mismatch";
-    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.dqcoeff, macroblockd_dst_->dqcoeff,
-                        sizeof(*macroblockd_dst_->dqcoeff) * kNumBlocks *
-                            kNumBlockEntries))
-        << "dqcoeff mismatch";
-    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.eobs, macroblockd_dst_->eobs,
-                        sizeof(*macroblockd_dst_->eobs) * kNumBlocks))
-        << "eobs mismatch";
-  }
-
-  VP8_COMP *vp8_comp_;
-  MACROBLOCKD *macroblockd_dst_;
-
- private:
-  ACMRandom rnd_;
-};
-
-class QuantizeTest : public QuantizeTestBase,
-                     public ::testing::TestWithParam<VP8QuantizeParam> {
- protected:
+  virtual ~QuantizeTest() {}
  virtual void SetUp() {
-    SetupCompressor();
-    asm_quant_ = GET_PARAM(0);
-    c_quant_ = GET_PARAM(1);
+    quantize_op_   = GET_PARAM(0);
+    ref_quantize_op_ = GET_PARAM(1);
+    bit_depth_  = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
  }

-  void RunComparison() {
-    for (int i = 0; i < kNumBlocks; ++i) {
-      ASM_REGISTER_STATE_CHECK(
-          c_quant_(&vp8_comp_->mb.block[i], &vp8_comp_->mb.e_mbd.block[i]));
-      ASM_REGISTER_STATE_CHECK(
-          asm_quant_(&vp8_comp_->mb.block[i], &macroblockd_dst_->block[i]));
-    }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }

-    CheckOutput();
+ protected:
+  vpx_bit_depth_t bit_depth_;
+  int mask_;
+  QuantizeFunc quantize_op_;
+  QuantizeFunc ref_quantize_op_;
+};
+class Quantize32Test : public ::testing::TestWithParam<QuantizeParam> {
+ public:
+  virtual ~Quantize32Test() {}
+  virtual void SetUp() {
+    quantize_op_   = GET_PARAM(0);
+    ref_quantize_op_ = GET_PARAM(1);
+    bit_depth_  = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
  }

- private:
-  VP8Quantize asm_quant_;
-  VP8Quantize c_quant_;
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  vpx_bit_depth_t bit_depth_;
+  int mask_;
+  QuantizeFunc quantize_op_;
+  QuantizeFunc ref_quantize_op_;
 };

-TEST_P(QuantizeTest, TestZeroInput) {
-  FillCoeffConstant(0);
-  RunComparison();
-}
-
-TEST_P(QuantizeTest, TestLargeNegativeInput) {
-  FillCoeffConstant(0);
-  // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues
-  // like BUG=883 where the constant being compared was incorrectly initialized.
-  vp8_comp_->mb.coeff[0] = -8191;
-  RunComparison();
-}
-
-TEST_P(QuantizeTest, TestRandomInput) {
-  FillCoeffRandom();
-  RunComparison();
-}
-
-TEST_P(QuantizeTest, TestMultipleQ) {
-  for (int q = 0; q < QINDEX_RANGE; ++q) {
-    UpdateQuantizer(q);
-    FillCoeffRandom();
-    RunComparison();
+TEST_P(QuantizeTest, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int zbin_oq_value = 0;
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       256);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,           1);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int skip_block = i == 0;
+    TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
+    TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
+    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+    int count = (4 << sz) * (4 << sz);  // 16, 64, 256
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = rnd.Rand16()&mask_;
+    }
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr,
+                                          zbin_oq_value, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
  }
+  EXPECT_EQ(0, err_count_total)
+    << "Error: Quantization Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
 }
+TEST_P(Quantize32Test, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int zbin_oq_value = 0;
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       1024);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,           1);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int skip_block = i == 0;
+    TX_SIZE sz = TX_32X32;
+    TX_TYPE tx_type = (TX_TYPE)(i % 4);
+
+    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+    int count = (4 << sz) * (4 << sz);  // 1024
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = rnd.Rand16()&mask_;
+    }
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr,
+                                          zbin_oq_value, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+    << "Error: Quantization Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
+}
+TEST_P(QuantizeTest, EOBCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int zbin_oq_value = 0;
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       256);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,       1);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int skip_block = i == 0;
+    TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
+    TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
+    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+    int count = (4 << sz) * (4 << sz);  // 16, 64, 256
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    // Two random entries
+    for (int j = 0; j < count; j++) {
+        coeff_ptr[j] = 0;
+    }
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr,
+                                          zbin_oq_value, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+    << "Error: Quantization Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
+}
+TEST_P(Quantize32Test, EOBCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int zbin_oq_value = 0;
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       1024);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,       1);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int skip_block = i == 0;
+    TX_SIZE sz = TX_32X32;
+    TX_TYPE tx_type = (TX_TYPE)(i % 4);
+    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+    int count = (4 << sz) * (4 << sz);  // 1024
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+        coeff_ptr[j] = 0;
+    }
+    // Two random entries
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr,
+                                          zbin_oq_value, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+    << "Error: Quantization Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
+}
+using std::tr1::make_tuple;

 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
-    SSE2, QuantizeTest,
-    ::testing::Values(
-        make_tuple(&vp8_fast_quantize_b_sse2, &vp8_fast_quantize_b_c),
-        make_tuple(&vp8_regular_quantize_b_sse2, &vp8_regular_quantize_b_c)));
-#endif  // HAVE_SSE2
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, QuantizeTest,
-                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_ssse3,
-                                                     &vp8_fast_quantize_b_c)));
-#endif  // HAVE_SSSE3
-
-#if HAVE_SSE4_1
+  SSE2_C_COMPARE, QuantizeTest,
+  ::testing::Values(
+    make_tuple(&vp9_highbd_quantize_b_sse2,
+               &vp9_highbd_quantize_b_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_quantize_b_sse2,
+               &vp9_highbd_quantize_b_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_quantize_b_sse2,
+               &vp9_highbd_quantize_b_c, VPX_BITS_12)));
 INSTANTIATE_TEST_CASE_P(
-    SSE4_1, QuantizeTest,
-    ::testing::Values(make_tuple(&vp8_regular_quantize_b_sse4_1,
-                                 &vp8_regular_quantize_b_c)));
-#endif  // HAVE_SSE4_1
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest,
-                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_neon,
-                                                     &vp8_fast_quantize_b_c)));
-#endif  // HAVE_NEON
+  SSE2_C_COMPARE, Quantize32Test,
+  ::testing::Values(
+    make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+               &vp9_highbd_quantize_b_32x32_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+               &vp9_highbd_quantize_b_32x32_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+               &vp9_highbd_quantize_b_32x32_c, VPX_BITS_12)));
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -144,7 +144,6 @@ class ResizeTest : public ::libvpx_test::EncoderTest,

 TEST_P(ResizeTest, TestExternalResizeWorks) {
  ResizingVideoSource video;
-  cfg_.g_lag_in_frames = 0;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
@@ -154,9 +153,9 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
    const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight);

    EXPECT_EQ(expected_w, info->w)
-        << "Frame " << frame << " had unexpected width";
+        << "Frame " << frame << "had unexpected width";
    EXPECT_EQ(expected_h, info->h)
-        << "Frame " << frame << " had unexpected height";
+        << "Frame " << frame << "had unexpected height";
  }
 }

@@ -261,116 +260,7 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
  }
 }

-vpx_img_fmt_t CspForFrameNumber(int frame) {
-  if (frame < 10)
-    return VPX_IMG_FMT_I420;
-  if (frame < 20)
-    return VPX_IMG_FMT_I444;
-  return VPX_IMG_FMT_I420;
-}
-
-class ResizeCspTest : public ResizeTest {
- protected:
-#if WRITE_COMPRESSED_STREAM
-  ResizeCspTest()
-      : ResizeTest(),
-        frame0_psnr_(0.0),
-        outfile_(NULL),
-        out_frames_(0) {}
-#else
-  ResizeCspTest() : ResizeTest(), frame0_psnr_(0.0) {}
-#endif
-
-  virtual ~ResizeCspTest() {}
-
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-#if WRITE_COMPRESSED_STREAM
-    outfile_ = fopen("vp91-2-05-cspchape.ivf", "wb");
-#endif
-  }
-
-  virtual void EndPassHook() {
-#if WRITE_COMPRESSED_STREAM
-    if (outfile_) {
-      if (!fseek(outfile_, 0, SEEK_SET))
-        write_ivf_file_header(&cfg_, out_frames_, outfile_);
-      fclose(outfile_);
-      outfile_ = NULL;
-    }
-#endif
-  }
-
-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-                                  libvpx_test::Encoder *encoder) {
-    if (CspForFrameNumber(video->frame()) != VPX_IMG_FMT_I420 &&
-        cfg_.g_profile != 1) {
-      cfg_.g_profile = 1;
-      encoder->Config(&cfg_);
-    }
-    if (CspForFrameNumber(video->frame()) == VPX_IMG_FMT_I420 &&
-        cfg_.g_profile != 0) {
-      cfg_.g_profile = 0;
-      encoder->Config(&cfg_);
-    }
-  }
-
-  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
-    if (!frame0_psnr_)
-      frame0_psnr_ = pkt->data.psnr.psnr[0];
-    EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0);
-  }
-
-#if WRITE_COMPRESSED_STREAM
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    ++out_frames_;
-
-    // Write initial file header if first frame.
-    if (pkt->data.frame.pts == 0)
-      write_ivf_file_header(&cfg_, 0, outfile_);
-
-    // Write frame header and data.
-    write_ivf_frame_header(pkt, outfile_);
-    (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
-  }
-#endif
-
-  double frame0_psnr_;
-#if WRITE_COMPRESSED_STREAM
-  FILE *outfile_;
-  unsigned int out_frames_;
-#endif
-};
-
-class ResizingCspVideoSource : public ::libvpx_test::DummyVideoSource {
- public:
-  ResizingCspVideoSource() {
-    SetSize(kInitialWidth, kInitialHeight);
-    limit_ = 30;
-  }
-
-  virtual ~ResizingCspVideoSource() {}
-
- protected:
-  virtual void Next() {
-    ++frame_;
-    SetImageFormat(CspForFrameNumber(frame_));
-    FillFrame();
-  }
-};
-
-TEST_P(ResizeCspTest, TestResizeCspWorks) {
-  ResizingCspVideoSource video;
-  init_flags_ = VPX_CODEC_USE_PSNR;
-  cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
-  cfg_.g_lag_in_frames = 0;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
 VP8_INSTANTIATE_TEST_CASE(ResizeTest, ONE_PASS_TEST_MODES);
-VP9_INSTANTIATE_TEST_CASE(ResizeTest,
-                          ::testing::Values(::libvpx_test::kRealTime));
 VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest,
                          ::testing::Values(::libvpx_test::kOnePassBest));
-VP9_INSTANTIATE_TEST_CASE(ResizeCspTest,
-                          ::testing::Values(::libvpx_test::kRealTime));
 }  // namespace
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -25,9 +25,9 @@
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
+#include "vpx/vpx_codec.h"
 #include "test/util.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "vpx/vpx_codec.h"


 #if CONFIG_VP8_ENCODER
@@ -43,21 +43,24 @@ typedef unsigned int (*SadMxNVp9Func)(const unsigned char *source_ptr,
                                      int source_stride,
                                      const unsigned char *reference_ptr,
                                      int reference_stride);
-typedef std::tr1::tuple<int, int, SadMxNVp9Func, int> SadMxNVp9Param;
-typedef uint32_t (*SadMxNAvgVp9Func)(const uint8_t *source_ptr,
-                                     int source_stride,
-                                     const uint8_t *reference_ptr,
-                                     int reference_stride,
-                                     const uint8_t *second_pred);
-typedef std::tr1::tuple<int, int, SadMxNAvgVp9Func, int> SadMxNAvgVp9Param;
+typedef std::tr1::tuple<int, int, SadMxNVp9Func, int>
+    SadMxNVp9Param;
+typedef unsigned int (*SadMxNAvgVp9Func)(const unsigned char *source_ptr,
+                                         int source_stride,
+                                         const unsigned char *reference_ptr,
+                                         int reference_stride,
+                                         const unsigned char *second_pred);
+typedef std::tr1::tuple<int, int, SadMxNAvgVp9Func, int>
+    SadMxNAvgVp9Param;
 #endif

 typedef void (*SadMxNx4Func)(const uint8_t *src_ptr,
                             int src_stride,
-                             const uint8_t *const ref_ptr[],
+                             const unsigned char *const ref_ptr[],
                             int ref_stride,
-                             uint32_t *sad_array);
-typedef std::tr1::tuple<int, int, SadMxNx4Func, int> SadMxNx4Param;
+                             unsigned int *sad_array);
+typedef std::tr1::tuple<int, int, SadMxNx4Func, int>
+    SadMxNx4Param;

 using libvpx_test::ACMRandom;

@@ -135,15 +138,13 @@ class SADTestBase : public ::testing::Test {
      second_pred_ = second_pred8_;
    } else {
      use_high_bit_depth_ = true;
-      bit_depth_ = static_cast<vpx_bit_depth_t>(bd_);
-      source_data_ = CONVERT_TO_BYTEPTR(source_data16_);
+      bit_depth_ = (vpx_bit_depth_t) bd_;
+      source_data_    = CONVERT_TO_BYTEPTR(source_data16_);
      reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_);
-      second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_);
+      second_pred_    = CONVERT_TO_BYTEPTR(second_pred16_);
    }
-#else
-    bit_depth_ = VPX_BITS_8;
 #endif
-    mask_ = (1 << bit_depth_) - 1;
+    mask_ = (1 << bit_depth_)-1;
    source_stride_ = (width_ + 31) & ~31;
    reference_stride_ = width_ * 2;
    rnd_.Reset(ACMRandom::DeterministicSeed());
@@ -180,18 +181,15 @@ class SADTestBase : public ::testing::Test {
      for (int w = 0; w < width_; ++w) {
 #if CONFIG_VP9_HIGHBITDEPTH
        if (!use_high_bit_depth_) {
-          sad +=
-              abs(source8[h * source_stride_ + w] -
-                  reference8[h * reference_stride_ + w]);
+          sad += abs(source8[h * source_stride_ + w]
+                 - reference8[h * reference_stride_ + w]);
        } else {
-          sad +=
-              abs(source16[h * source_stride_ + w] -
-                  reference16[h * reference_stride_ + w]);
+          sad += abs(source16[h * source_stride_ + w]
+                 - reference16[h * reference_stride_ + w]);
        }
 #else
-        sad +=
-            abs(source[h * source_stride_ + w] -
-                reference[h * reference_stride_ + w]);
+        sad += abs(source[h * source_stride_ + w]
+                 - reference[h * reference_stride_ + w]);
 #endif
      }
      if (sad > max_sad) {
@@ -211,7 +209,7 @@ class SADTestBase : public ::testing::Test {
      const uint8_t *const source8 = source_data_;
      const uint8_t *const second_pred8 = second_pred_;
      const uint16_t *const reference16 =
-          CONVERT_TO_SHORTPTR(GetReference(block_idx));
+        CONVERT_TO_SHORTPTR(GetReference(block_idx));
      const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_);
      const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_);
 #else
@@ -223,20 +221,20 @@ class SADTestBase : public ::testing::Test {
      for (int w = 0; w < width_; ++w) {
 #if CONFIG_VP9_HIGHBITDEPTH
        if (!use_high_bit_depth_) {
-          const int tmp = second_pred8[h * width_ + w] +
-              reference8[h * reference_stride_ + w];
-          const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
+          int tmp = second_pred8[h * width_ + w] +
+                    reference8[h * reference_stride_ + w];
+          uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
          sad += abs(source8[h * source_stride_ + w] - comp_pred);
        } else {
-          const int tmp = second_pred16[h * width_ + w] +
-              reference16[h * reference_stride_ + w];
-          const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
+          int tmp = second_pred16[h * width_ + w] +
+                    reference16[h * reference_stride_ + w];
+          uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
          sad += abs(source16[h * source_stride_ + w] - comp_pred);
        }
 #else
-        const int tmp = second_pred[h * width_ + w] +
-            reference[h * reference_stride_ + w];
-        const uint8_t comp_pred = (tmp + 1) >> 1;
+        int tmp = second_pred[h * width_ + w] +
+                    reference[h * reference_stride_ + w];
+        uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1);
        sad += abs(source[h * source_stride_ + w] - comp_pred);
 #endif
      }
@@ -256,12 +254,12 @@ class SADTestBase : public ::testing::Test {
      for (int w = 0; w < width_; ++w) {
 #if CONFIG_VP9_HIGHBITDEPTH
        if (!use_high_bit_depth_) {
-          data8[h * stride + w] = static_cast<uint8_t>(fill_constant);
+          data8[h * stride + w] = fill_constant;
        } else {
          data16[h * stride + w] = fill_constant;
        }
 #else
-        data[h * stride + w] = static_cast<uint8_t>(fill_constant);
+        data[h * stride + w] = fill_constant;
 #endif
      }
    }
@@ -984,6 +982,20 @@ const SadMxNVp9Func sad_16x8_sse2_vp9 = vp9_sad16x8_sse2;
 const SadMxNVp9Func sad_8x16_sse2_vp9 = vp9_sad8x16_sse2;
 const SadMxNVp9Func sad_8x8_sse2_vp9 = vp9_sad8x8_sse2;
 const SadMxNVp9Func sad_8x4_sse2_vp9 = vp9_sad8x4_sse2;
+const SadMxNVp9Param sse2_vp9_tests[] = {
+  make_tuple(64, 64, sad_64x64_sse2_vp9, -1),
+  make_tuple(64, 32, sad_64x32_sse2_vp9, -1),
+  make_tuple(32, 64, sad_32x64_sse2_vp9, -1),
+  make_tuple(32, 32, sad_32x32_sse2_vp9, -1),
+  make_tuple(32, 16, sad_32x16_sse2_vp9, -1),
+  make_tuple(16, 32, sad_16x32_sse2_vp9, -1),
+  make_tuple(16, 16, sad_16x16_sse2_vp9, -1),
+  make_tuple(16, 8, sad_16x8_sse2_vp9, -1),
+  make_tuple(8, 16, sad_8x16_sse2_vp9, -1),
+  make_tuple(8, 8, sad_8x8_sse2_vp9, -1),
+  make_tuple(8, 4, sad_8x4_sse2_vp9, -1),
+};
+INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::ValuesIn(sse2_vp9_tests));

 const SadMxNx4Func sad_64x64x4d_sse2 = vp9_sad64x64x4d_sse2;
 const SadMxNx4Func sad_64x32x4d_sse2 = vp9_sad64x32x4d_sse2;
@@ -996,7 +1008,18 @@ const SadMxNx4Func sad_16x8x4d_sse2 = vp9_sad16x8x4d_sse2;
 const SadMxNx4Func sad_8x16x4d_sse2 = vp9_sad8x16x4d_sse2;
 const SadMxNx4Func sad_8x8x4d_sse2 = vp9_sad8x8x4d_sse2;
 const SadMxNx4Func sad_8x4x4d_sse2 = vp9_sad8x4x4d_sse2;
-
+INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
+                        make_tuple(64, 64, sad_64x64x4d_sse2, -1),
+                        make_tuple(64, 32, sad_64x32x4d_sse2, -1),
+                        make_tuple(32, 64, sad_32x64x4d_sse2, -1),
+                        make_tuple(32, 32, sad_32x32x4d_sse2, -1),
+                        make_tuple(32, 16, sad_32x16x4d_sse2, -1),
+                        make_tuple(16, 32, sad_16x32x4d_sse2, -1),
+                        make_tuple(16, 16, sad_16x16x4d_sse2, -1),
+                        make_tuple(16, 8,  sad_16x8x4d_sse2,  -1),
+                        make_tuple(8, 16,  sad_8x16x4d_sse2,  -1),
+                        make_tuple(8, 8,   sad_8x8x4d_sse2,   -1),
+                        make_tuple(8, 4,   sad_8x4x4d_sse2,   -1)));
 #if CONFIG_VP9_HIGHBITDEPTH
 const SadMxNVp9Func highbd_sad8x4_sse2_vp9 = vp9_highbd_sad8x4_sse2;
 const SadMxNVp9Func highbd_sad8x8_sse2_vp9 = vp9_highbd_sad8x8_sse2;
@@ -1009,55 +1032,53 @@ const SadMxNVp9Func highbd_sad32x32_sse2_vp9 = vp9_highbd_sad32x32_sse2;
 const SadMxNVp9Func highbd_sad32x64_sse2_vp9 = vp9_highbd_sad32x64_sse2;
 const SadMxNVp9Func highbd_sad64x32_sse2_vp9 = vp9_highbd_sad64x32_sse2;
 const SadMxNVp9Func highbd_sad64x64_sse2_vp9 = vp9_highbd_sad64x64_sse2;
+SadMxNVp9Param sse2_vp9_highbd_8_tests[] = {
+  make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 8),
+  make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 8),
+  make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 8),
+  make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 8),
+  make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 8),
+  make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 8),
+  make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 8),
+  make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 8),
+  make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 8),
+  make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 8),
+  make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 8)};
+INSTANTIATE_TEST_CASE_P(SSE2_8, SADVP9Test,
+                        ::testing::ValuesIn(sse2_vp9_highbd_8_tests));
+SadMxNVp9Param sse2_vp9_highbd_10_tests[] = {
+  make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 10),
+  make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 10),
+  make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 10),
+  make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 10),
+  make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 10),
+  make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 10),
+  make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 10),
+  make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 10),
+  make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 10),
+  make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 10),
+  make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 10)};
+INSTANTIATE_TEST_CASE_P(SSE2_10, SADVP9Test,
+                        ::testing::ValuesIn(sse2_vp9_highbd_10_tests));
+SadMxNVp9Param sse2_vp9_highbd_12_tests[] = {
+  make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 12),
+  make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 12),
+  make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 12),
+  make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 12),
+  make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 12),
+  make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 12),
+  make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 12),
+  make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 12),
+  make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 12),
+  make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 12),
+  make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 12)};
+INSTANTIATE_TEST_CASE_P(SSE2_12, SADVP9Test,
+                        ::testing::ValuesIn(sse2_vp9_highbd_12_tests));

-INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::Values(
-                        make_tuple(64, 64, sad_64x64_sse2_vp9, -1),
-                        make_tuple(64, 32, sad_64x32_sse2_vp9, -1),
-                        make_tuple(32, 64, sad_32x64_sse2_vp9, -1),
-                        make_tuple(32, 32, sad_32x32_sse2_vp9, -1),
-                        make_tuple(32, 16, sad_32x16_sse2_vp9, -1),
-                        make_tuple(16, 32, sad_16x32_sse2_vp9, -1),
-                        make_tuple(16, 16, sad_16x16_sse2_vp9, -1),
-                        make_tuple(16, 8, sad_16x8_sse2_vp9, -1),
-                        make_tuple(8, 16, sad_8x16_sse2_vp9, -1),
-                        make_tuple(8, 8, sad_8x8_sse2_vp9, -1),
-                        make_tuple(8, 4, sad_8x4_sse2_vp9, -1),
-                        make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 8),
-                        make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 8),
-                        make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 8),
-                        make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 8),
-                        make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 8),
-                        make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 8),
-                        make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 8),
-                        make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 8),
-                        make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 8),
-                        make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 8),
-                        make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 8),
-                        make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 10),
-                        make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 10),
-                        make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 10),
-                        make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 10),
-                        make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 10),
-                        make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 10),
-                        make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 10),
-                        make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 10),
-                        make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 10),
-                        make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 10),
-                        make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 10),
-                        make_tuple(8, 4, highbd_sad8x4_sse2_vp9, 12),
-                        make_tuple(8, 8, highbd_sad8x8_sse2_vp9, 12),
-                        make_tuple(8, 16, highbd_sad8x16_sse2_vp9, 12),
-                        make_tuple(16, 8, highbd_sad16x8_sse2_vp9, 12),
-                        make_tuple(16, 16, highbd_sad16x16_sse2_vp9, 12),
-                        make_tuple(16, 32, highbd_sad16x32_sse2_vp9, 12),
-                        make_tuple(32, 16, highbd_sad32x16_sse2_vp9, 12),
-                        make_tuple(32, 32, highbd_sad32x32_sse2_vp9, 12),
-                        make_tuple(32, 64, highbd_sad32x64_sse2_vp9, 12),
-                        make_tuple(64, 32, highbd_sad64x32_sse2_vp9, 12),
-                        make_tuple(64, 64, highbd_sad64x64_sse2_vp9, 12)));
-
-const SadMxNAvgVp9Func highbd_sad8x4_avg_sse2_vp9 = vp9_highbd_sad8x4_avg_sse2;
-const SadMxNAvgVp9Func highbd_sad8x8_avg_sse2_vp9 = vp9_highbd_sad8x8_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad8x4_avg_sse2_vp9 =
+  vp9_highbd_sad8x4_avg_sse2;
+const SadMxNAvgVp9Func highbd_sad8x8_avg_sse2_vp9 =
+  vp9_highbd_sad8x8_avg_sse2;
 const SadMxNAvgVp9Func highbd_sad8x16_avg_sse2_vp9 =
  vp9_highbd_sad8x16_avg_sse2;
 const SadMxNAvgVp9Func highbd_sad16x8_avg_sse2_vp9 =
@@ -1076,41 +1097,48 @@ const SadMxNAvgVp9Func highbd_sad64x32_avg_sse2_vp9 =
  vp9_highbd_sad64x32_avg_sse2;
 const SadMxNAvgVp9Func highbd_sad64x64_avg_sse2_vp9 =
  vp9_highbd_sad64x64_avg_sse2;
-
-INSTANTIATE_TEST_CASE_P(SSE2, SADavgVP9Test, ::testing::Values(
-                        make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 8),
-                        make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 8),
-                        make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 8),
-                        make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 8),
-                        make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 8),
-                        make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 8),
-                        make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 8),
-                        make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 8),
-                        make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 8),
-                        make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 8),
-                        make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 8),
-                        make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 10),
-                        make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 10),
-                        make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 10),
-                        make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 10),
-                        make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 10),
-                        make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 10),
-                        make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 10),
-                        make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 10),
-                        make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 10),
-                        make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 10),
-                        make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 10),
-                        make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 12),
-                        make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 12),
-                        make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 12),
-                        make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 12),
-                        make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 12),
-                        make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 12),
-                        make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 12),
-                        make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 12),
-                        make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 12),
-                        make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 12),
-                        make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 12)));
+SadMxNAvgVp9Param avg_sse2_vp9_highbd_8_tests[] = {
+  make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 8),
+  make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 8),
+  make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 8),
+  make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 8),
+  make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 8),
+  make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 8),
+  make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 8),
+  make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 8),
+  make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 8),
+  make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 8),
+  make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 8)};
+INSTANTIATE_TEST_CASE_P(SSE2_8, SADavgVP9Test,
+                        ::testing::ValuesIn(avg_sse2_vp9_highbd_8_tests));
+SadMxNAvgVp9Param avg_sse2_vp9_highbd_10_tests[] = {
+  make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 10),
+  make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 10),
+  make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 10),
+  make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 10),
+  make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 10),
+  make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 10),
+  make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 10),
+  make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 10),
+  make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 10),
+  make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 10),
+  make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 10)};
+INSTANTIATE_TEST_CASE_P(SSE2_10, SADavgVP9Test,
+                        ::testing::ValuesIn(avg_sse2_vp9_highbd_10_tests));
+SadMxNAvgVp9Param avg_sse2_vp9_highbd_12_tests[] = {
+  make_tuple(8, 4, highbd_sad8x4_avg_sse2_vp9, 12),
+  make_tuple(8, 8, highbd_sad8x8_avg_sse2_vp9, 12),
+  make_tuple(8, 16, highbd_sad8x16_avg_sse2_vp9, 12),
+  make_tuple(16, 8, highbd_sad16x8_avg_sse2_vp9, 12),
+  make_tuple(16, 16, highbd_sad16x16_avg_sse2_vp9, 12),
+  make_tuple(16, 32, highbd_sad16x32_avg_sse2_vp9, 12),
+  make_tuple(32, 16, highbd_sad32x16_avg_sse2_vp9, 12),
+  make_tuple(32, 32, highbd_sad32x32_avg_sse2_vp9, 12),
+  make_tuple(32, 64, highbd_sad32x64_avg_sse2_vp9, 12),
+  make_tuple(64, 32, highbd_sad64x32_avg_sse2_vp9, 12),
+  make_tuple(64, 64, highbd_sad64x64_avg_sse2_vp9, 12)};
+INSTANTIATE_TEST_CASE_P(SSE2_12, SADavgVP9Test,
+                        ::testing::ValuesIn(avg_sse2_vp9_highbd_12_tests));

 const SadMxNx4Func highbd_sad_64x64x4d_sse2 = vp9_highbd_sad64x64x4d_sse2;
 const SadMxNx4Func highbd_sad_64x32x4d_sse2 = vp9_highbd_sad64x32x4d_sse2;
@@ -1119,25 +1147,14 @@ const SadMxNx4Func highbd_sad_32x32x4d_sse2 = vp9_highbd_sad32x32x4d_sse2;
 const SadMxNx4Func highbd_sad_32x16x4d_sse2 = vp9_highbd_sad32x16x4d_sse2;
 const SadMxNx4Func highbd_sad_16x32x4d_sse2 = vp9_highbd_sad16x32x4d_sse2;
 const SadMxNx4Func highbd_sad_16x16x4d_sse2 = vp9_highbd_sad16x16x4d_sse2;
-const SadMxNx4Func highbd_sad_16x8x4d_sse2 = vp9_highbd_sad16x8x4d_sse2;
-const SadMxNx4Func highbd_sad_8x16x4d_sse2 = vp9_highbd_sad8x16x4d_sse2;
-const SadMxNx4Func highbd_sad_8x8x4d_sse2 = vp9_highbd_sad8x8x4d_sse2;
-const SadMxNx4Func highbd_sad_8x4x4d_sse2 = vp9_highbd_sad8x4x4d_sse2;
-const SadMxNx4Func highbd_sad_4x8x4d_sse2 = vp9_highbd_sad4x8x4d_sse2;
-const SadMxNx4Func highbd_sad_4x4x4d_sse2 = vp9_highbd_sad4x4x4d_sse2;
+const SadMxNx4Func highbd_sad_16x8x4d_sse2  = vp9_highbd_sad16x8x4d_sse2;
+const SadMxNx4Func highbd_sad_8x16x4d_sse2  = vp9_highbd_sad8x16x4d_sse2;
+const SadMxNx4Func highbd_sad_8x8x4d_sse2   = vp9_highbd_sad8x8x4d_sse2;
+const SadMxNx4Func highbd_sad_8x4x4d_sse2   = vp9_highbd_sad8x4x4d_sse2;
+const SadMxNx4Func highbd_sad_4x8x4d_sse2   = vp9_highbd_sad4x8x4d_sse2;
+const SadMxNx4Func highbd_sad_4x4x4d_sse2   = vp9_highbd_sad4x4x4d_sse2;

-INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
-                        make_tuple(64, 64, sad_64x64x4d_sse2, -1),
-                        make_tuple(64, 32, sad_64x32x4d_sse2, -1),
-                        make_tuple(32, 64, sad_32x64x4d_sse2, -1),
-                        make_tuple(32, 32, sad_32x32x4d_sse2, -1),
-                        make_tuple(32, 16, sad_32x16x4d_sse2, -1),
-                        make_tuple(16, 32, sad_16x32x4d_sse2, -1),
-                        make_tuple(16, 16, sad_16x16x4d_sse2, -1),
-                        make_tuple(16, 8, sad_16x8x4d_sse2,  -1),
-                        make_tuple(8, 16, sad_8x16x4d_sse2,  -1),
-                        make_tuple(8, 8, sad_8x8x4d_sse2,   -1),
-                        make_tuple(8, 4, sad_8x4x4d_sse2,   -1),
+INSTANTIATE_TEST_CASE_P(SSE2_8, SADx4Test, ::testing::Values(
                        make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 8),
                        make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 8),
                        make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 8),
@@ -1145,12 +1162,13 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
                        make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 8),
                        make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 8),
                        make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 8),
-                        make_tuple(16, 8, highbd_sad_16x8x4d_sse2,  8),
-                        make_tuple(8, 16, highbd_sad_8x16x4d_sse2,  8),
-                        make_tuple(8, 8, highbd_sad_8x8x4d_sse2,   8),
-                        make_tuple(8, 4, highbd_sad_8x4x4d_sse2,   8),
-                        make_tuple(4, 8, highbd_sad_4x8x4d_sse2,   8),
-                        make_tuple(4, 4, highbd_sad_4x4x4d_sse2,   8),
+                        make_tuple(16, 8,  highbd_sad_16x8x4d_sse2,  8),
+                        make_tuple(8,  16, highbd_sad_8x16x4d_sse2,  8),
+                        make_tuple(8,  8,  highbd_sad_8x8x4d_sse2,   8),
+                        make_tuple(8,  4,  highbd_sad_8x4x4d_sse2,   8),
+                        make_tuple(4,  8,  highbd_sad_4x8x4d_sse2,   8),
+                        make_tuple(4,  4,  highbd_sad_4x4x4d_sse2,   8)));
+INSTANTIATE_TEST_CASE_P(SSE2_10, SADx4Test, ::testing::Values(
                        make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 10),
                        make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 10),
                        make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 10),
@@ -1158,12 +1176,13 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
                        make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 10),
                        make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 10),
                        make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 10),
-                        make_tuple(16, 8, highbd_sad_16x8x4d_sse2,  10),
-                        make_tuple(8, 16, highbd_sad_8x16x4d_sse2,  10),
-                        make_tuple(8, 8, highbd_sad_8x8x4d_sse2,   10),
-                        make_tuple(8, 4, highbd_sad_8x4x4d_sse2,   10),
-                        make_tuple(4, 8, highbd_sad_4x8x4d_sse2,   10),
-                        make_tuple(4, 4, highbd_sad_4x4x4d_sse2,   10),
+                        make_tuple(16, 8,  highbd_sad_16x8x4d_sse2,  10),
+                        make_tuple(8,  16, highbd_sad_8x16x4d_sse2,  10),
+                        make_tuple(8,  8,  highbd_sad_8x8x4d_sse2,   10),
+                        make_tuple(8,  4,  highbd_sad_8x4x4d_sse2,   10),
+                        make_tuple(4,  8,  highbd_sad_4x8x4d_sse2,   10),
+                        make_tuple(4,  4,  highbd_sad_4x4x4d_sse2,   10)));
+INSTANTIATE_TEST_CASE_P(SSE2_12, SADx4Test, ::testing::Values(
                        make_tuple(64, 64, highbd_sad_64x64x4d_sse2, 12),
                        make_tuple(64, 32, highbd_sad_64x32x4d_sse2, 12),
                        make_tuple(32, 64, highbd_sad_32x64x4d_sse2, 12),
@@ -1171,38 +1190,12 @@ INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
                        make_tuple(32, 16, highbd_sad_32x16x4d_sse2, 12),
                        make_tuple(16, 32, highbd_sad_16x32x4d_sse2, 12),
                        make_tuple(16, 16, highbd_sad_16x16x4d_sse2, 12),
-                        make_tuple(16, 8, highbd_sad_16x8x4d_sse2,  12),
-                        make_tuple(8, 16, highbd_sad_8x16x4d_sse2,  12),
-                        make_tuple(8, 8, highbd_sad_8x8x4d_sse2,   12),
-                        make_tuple(8, 4, highbd_sad_8x4x4d_sse2,   12),
-                        make_tuple(4, 8, highbd_sad_4x8x4d_sse2,   12),
-                        make_tuple(4, 4, highbd_sad_4x4x4d_sse2,   12)));
-#else
-INSTANTIATE_TEST_CASE_P(SSE2, SADVP9Test, ::testing::Values(
-                        make_tuple(64, 64, sad_64x64_sse2_vp9, -1),
-                        make_tuple(64, 32, sad_64x32_sse2_vp9, -1),
-                        make_tuple(32, 64, sad_32x64_sse2_vp9, -1),
-                        make_tuple(32, 32, sad_32x32_sse2_vp9, -1),
-                        make_tuple(32, 16, sad_32x16_sse2_vp9, -1),
-                        make_tuple(16, 32, sad_16x32_sse2_vp9, -1),
-                        make_tuple(16, 16, sad_16x16_sse2_vp9, -1),
-                        make_tuple(16, 8, sad_16x8_sse2_vp9, -1),
-                        make_tuple(8, 16, sad_8x16_sse2_vp9, -1),
-                        make_tuple(8, 8, sad_8x8_sse2_vp9, -1),
-                        make_tuple(8, 4, sad_8x4_sse2_vp9, -1)));
-
-INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::Values(
-                        make_tuple(64, 64, sad_64x64x4d_sse2, -1),
-                        make_tuple(64, 32, sad_64x32x4d_sse2, -1),
-                        make_tuple(32, 64, sad_32x64x4d_sse2, -1),
-                        make_tuple(32, 32, sad_32x32x4d_sse2, -1),
-                        make_tuple(32, 16, sad_32x16x4d_sse2, -1),
-                        make_tuple(16, 32, sad_16x32x4d_sse2, -1),
-                        make_tuple(16, 16, sad_16x16x4d_sse2, -1),
-                        make_tuple(16, 8, sad_16x8x4d_sse2,  -1),
-                        make_tuple(8, 16, sad_8x16x4d_sse2,  -1),
-                        make_tuple(8, 8, sad_8x8x4d_sse2,   -1),
-                        make_tuple(8, 4, sad_8x4x4d_sse2,   -1)));
+                        make_tuple(16, 8,  highbd_sad_16x8x4d_sse2,  12),
+                        make_tuple(8,  16, highbd_sad_8x16x4d_sse2,  12),
+                        make_tuple(8,  8,  highbd_sad_8x8x4d_sse2,   12),
+                        make_tuple(8,  4,  highbd_sad_8x4x4d_sse2,   12),
+                        make_tuple(4,  8,  highbd_sad_4x8x4d_sse2,   12),
+                        make_tuple(4,  4,  highbd_sad_4x4x4d_sse2,   12)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // CONFIG_USE_X86INC
 #endif  // CONFIG_VP9_ENCODER
@@ -1234,24 +1227,14 @@ INSTANTIATE_TEST_CASE_P(SSE3, SADTest, ::testing::Values(
 #endif  // CONFIG_USE_X86INC
 #endif  // HAVE_SSSE3

-#if CONFIG_VP9_ENCODER
 #if HAVE_AVX2
+#if CONFIG_VP9_ENCODER
 const SadMxNx4Func sad_64x64x4d_avx2 = vp9_sad64x64x4d_avx2;
 const SadMxNx4Func sad_32x32x4d_avx2 = vp9_sad32x32x4d_avx2;
 INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::Values(
                        make_tuple(32, 32, sad_32x32x4d_avx2, -1),
                        make_tuple(64, 64, sad_64x64x4d_avx2, -1)));
+#endif  // CONFIG_VP9_ENCODER
 #endif  // HAVE_AVX2

-#if HAVE_NEON
-const SadMxNx4Func sad_16x16x4d_neon = vp9_sad16x16x4d_neon;
-const SadMxNx4Func sad_32x32x4d_neon = vp9_sad32x32x4d_neon;
-const SadMxNx4Func sad_64x64x4d_neon = vp9_sad64x64x4d_neon;
-INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::Values(
-                        make_tuple(16, 16, sad_16x16x4d_neon, -1),
-                        make_tuple(32, 32, sad_32x32x4d_neon, -1),
-                        make_tuple(64, 64, sad_64x64x4d_neon, -1)));
-#endif  // HAVE_NEON
-#endif  // CONFIG_VP9_ENCODER
-
 }  // namespace
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -225,9 +225,10 @@ class SvcTest : public ::testing::Test {
    EXPECT_EQ(received_frames, n);
  }

-  void DropEnhancementLayers(struct vpx_fixed_buf *const inputs,
-                             const int num_super_frames,
-                             const int remained_spatial_layers) {
+  void DropLayersAndMakeItVP9Comaptible(struct vpx_fixed_buf *const inputs,
+                                        const int num_super_frames,
+                                        const int remained_spatial_layers,
+                                        const bool is_multiple_frame_contexts) {
    ASSERT_TRUE(inputs != NULL);
    ASSERT_GT(num_super_frames, 0);
    ASSERT_GT(remained_spatial_layers, 0);
@@ -249,6 +250,45 @@ class SvcTest : public ::testing::Test {
      if (frame_count == 0) {
        // There's no super frame but only a single frame.
        ASSERT_EQ(1, remained_spatial_layers);
+        if (is_multiple_frame_contexts) {
+          // Make a new super frame.
+          uint8_t marker = 0xc1;
+          unsigned int mask;
+          int mag;
+
+          // Choose the magnitude.
+          for (mag = 0, mask = 0xff; mag < 4; ++mag) {
+            if (inputs[i].sz < mask)
+              break;
+            mask <<= 8;
+            mask |= 0xff;
+          }
+          marker |= mag << 3;
+          int index_sz = 2 + (mag + 1) * 2;
+
+          inputs[i].buf = realloc(inputs[i].buf, inputs[i].sz + index_sz + 16);
+          ASSERT_TRUE(inputs[i].buf != NULL);
+          uint8_t *frame_data = static_cast<uint8_t*>(inputs[i].buf);
+          frame_data[0] &= ~2;      // Set the show_frame flag to 0.
+          frame_data += inputs[i].sz;
+          // Add an one byte frame with show_existing_frame.
+          *frame_data++ = 0x88;
+
+          // Write the super frame index.
+          *frame_data++ = marker;
+
+          frame_sizes[0] = inputs[i].sz;
+          frame_sizes[1] = 1;
+          for (int j = 0; j < 2; ++j) {
+            unsigned int this_sz = frame_sizes[j];
+            for (int k = 0; k <= mag; k++) {
+              *frame_data++ = this_sz & 0xff;
+              this_sz >>= 8;
+            }
+          }
+          *frame_data++ = marker;
+          inputs[i].sz += index_sz + 1;
+        }
      } else {
        // Found a super frame.
        uint8_t *frame_data = static_cast<uint8_t*>(inputs[i].buf);
@@ -264,13 +304,16 @@ class SvcTest : public ::testing::Test {
        }
        ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. "
            << "remained_spatial_layers: " << remained_spatial_layers
-            << "    super_frame: " << i;
-        if (frame == frame_count - 1)
+            << "    super_frame: " << i
+            << "    is_multiple_frame_context: " << is_multiple_frame_contexts;
+        if (frame == frame_count - 1 && !is_multiple_frame_contexts)
          continue;

        frame_data += frame_sizes[frame];

        // We need to add one more frame for multiple frame contexts.
+        if (is_multiple_frame_contexts)
+          ++frame;
        uint8_t marker =
            static_cast<const uint8_t*>(inputs[i].buf)[inputs[i].sz - 1];
        const uint32_t mag = ((marker >> 3) & 0x3) + 1;
@@ -280,14 +323,35 @@ class SvcTest : public ::testing::Test {
        marker |= frame;

        // Copy existing frame sizes.
-        memmove(frame_data + 1, frame_start + inputs[i].sz - index_sz + 1,
-                new_index_sz - 2);
+        memmove(frame_data + (is_multiple_frame_contexts ? 2 : 1),
+                frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2);
+        if (is_multiple_frame_contexts) {
+          // Add a one byte frame with flag show_existing_frame.
+          *frame_data++ = 0x88 | (remained_spatial_layers - 1);
+        }
        // New marker.
        frame_data[0] = marker;
        frame_data += (mag * (frame + 1) + 1);

+        if (is_multiple_frame_contexts) {
+          // Write the frame size for the one byte frame.
+          frame_data -= mag;
+          *frame_data++ = 1;
+          for (uint32_t j = 1; j < mag; ++j) {
+            *frame_data++ = 0;
+          }
+        }
+
        *frame_data++ = marker;
        inputs[i].sz = frame_data - frame_start;
+
+        if (is_multiple_frame_contexts) {
+          // Change the show frame flag to 0 for all frames.
+          for (int j = 0; j < frame; ++j) {
+            frame_start[0] &= ~2;
+            frame_start += frame_sizes[j];
+          }
+        }
      }
    }
  }
@@ -491,7 +555,7 @@ TEST_F(SvcTest, TwoPassEncode2SpatialLayersDecodeBaseLayerOnly) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropEnhancementLayers(&outputs[0], 10, 1);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -509,13 +573,13 @@ TEST_F(SvcTest, TwoPassEncode5SpatialLayersDecode54321Layers) {
  Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]);

  DecodeNFrames(&outputs[0], 10);
-  DropEnhancementLayers(&outputs[0], 10, 4);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 4, false);
  DecodeNFrames(&outputs[0], 10);
-  DropEnhancementLayers(&outputs[0], 10, 3);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 3, false);
  DecodeNFrames(&outputs[0], 10);
-  DropEnhancementLayers(&outputs[0], 10, 2);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, false);
  DecodeNFrames(&outputs[0], 10);
-  DropEnhancementLayers(&outputs[0], 10, 1);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false);
  DecodeNFrames(&outputs[0], 10);

  FreeBitstreamBuffers(&outputs[0], 10);
@@ -552,9 +616,9 @@ TEST_F(SvcTest, TwoPassEncode3SNRLayersDecode321Layers) {
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]);
  DecodeNFrames(&outputs[0], 20);
-  DropEnhancementLayers(&outputs[0], 20, 2);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 2, false);
  DecodeNFrames(&outputs[0], 20);
-  DropEnhancementLayers(&outputs[0], 20, 1);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 1, false);
  DecodeNFrames(&outputs[0], 20);

  FreeBitstreamBuffers(&outputs[0], 20);
@@ -585,6 +649,7 @@ TEST_F(SvcTest, TwoPassEncode2SpatialLayersWithMultipleFrameContexts) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -602,7 +667,7 @@ TEST_F(SvcTest,
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropEnhancementLayers(&outputs[0], 10, 1);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -621,6 +686,7 @@ TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContexts) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -641,13 +707,32 @@ TEST_F(SvcTest,
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 3, &outputs[0]);

-  DecodeNFrames(&outputs[0], 10);
-  DropEnhancementLayers(&outputs[0], 10, 2);
-  DecodeNFrames(&outputs[0], 10);
-  DropEnhancementLayers(&outputs[0], 10, 1);
-  DecodeNFrames(&outputs[0], 10);
+  vpx_fixed_buf outputs_new[10];
+  for (int i = 0; i < 10; ++i) {
+    outputs_new[i].buf = malloc(outputs[i].sz + 16);
+    ASSERT_TRUE(outputs_new[i].buf != NULL);
+    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
+    outputs_new[i].sz = outputs[i].sz;
+  }
+  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 3, true);
+  DecodeNFrames(&outputs_new[0], 10);
+
+  for (int i = 0; i < 10; ++i) {
+    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
+    outputs_new[i].sz = outputs[i].sz;
+  }
+  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 2, true);
+  DecodeNFrames(&outputs_new[0], 10);
+
+  for (int i = 0; i < 10; ++i) {
+    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
+    outputs_new[i].sz = outputs[i].sz;
+  }
+  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 1, true);
+  DecodeNFrames(&outputs_new[0], 10);

  FreeBitstreamBuffers(&outputs[0], 10);
+  FreeBitstreamBuffers(&outputs_new[0], 10);
 }

 TEST_F(SvcTest, TwoPassEncode2TemporalLayers) {
@@ -684,6 +769,7 @@ TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithMultipleFrameContexts) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -728,6 +814,7 @@ TEST_F(SvcTest,
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
+  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);

  vpx_fixed_buf base_layer[5];
  for (int i = 0; i < 5; ++i)
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -7,17 +7,13 @@ LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_440.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
-LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv

-LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m

@@ -554,8 +550,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm
@@ -656,34 +650,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv422.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv422.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv440.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv440.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm.md5
-ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-10bit-yuv420.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-10bit-yuv420.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-12bit-yuv420.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-12bit-yuv420.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv422.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv422.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv422.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv422.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv440.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv440.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv440.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv440.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv444.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv444.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm.md5
-endif  # CONFIG_VP9_HIGHBITDEPTH

 # Invalid files for testing libvpx error checking.
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm
@@ -698,16 +666,10 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.iv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf
@@ -722,13 +684,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s738
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-1.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-2.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-3.webm

 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
-# Encode / Decode test
-LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.yuv
 # BBB VP9 streams
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_426x240_tile_1x1_180kbps.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_640x360_tile_1x2_337kbps.webm
@@ -764,6 +721,3 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomanarrows_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomasmallcameramovement_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += thaloundeskmtg_640_480_30.yuv
 endif  # CONFIG_ENCODE_PERF_TESTS
-
-# sort and remove duplicates
-LIBVPX_TEST_DATA-yes := $(sort $(LIBVPX_TEST_DATA-yes))
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -17,15 +17,12 @@ d637297561dd904eb2c97a9015deeb31c4a1e8d2  invalid-vp90-2-08-tile_1x4_frame_paral
 a432f96ff0a787268e2f94a8092ab161a18d1b06  park_joy_90p_10_420.y4m
 0b194cc312c3a2e84d156a221b0a5eb615dfddc5  park_joy_90p_10_422.y4m
 ff0e0a21dc2adc95b8c1b37902713700655ced17  park_joy_90p_10_444.y4m
-c934da6fb8cc54ee2a8c17c54cf6076dac37ead0  park_joy_90p_10_440.yuv
 614c32ae1eca391e867c70d19974f0d62664dd99  park_joy_90p_12_420.y4m
 c92825f1ea25c5c37855083a69faac6ac4641a9e  park_joy_90p_12_422.y4m
 b592189b885b6cc85db55cc98512a197d73d3b34  park_joy_90p_12_444.y4m
-82c1bfcca368c2f22bad7d693d690d5499ecdd11  park_joy_90p_12_440.yuv
 4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c  park_joy_90p_8_420.y4m
 7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947  park_joy_90p_8_422.y4m
 bdb7856e6bc93599bdda05c2e773a9f22b6c6d03  park_joy_90p_8_444.y4m
-81e1f3843748438b8f2e71db484eb22daf72e939  park_joy_90p_8_440.yuv
 b1f1c3ec79114b9a0651af24ce634afb44a9a419  rush_hour_444.y4m
 5184c46ddca8b1fadd16742e8500115bc8f749da  vp80-00-comprehensive-001.ivf
 65bf1bbbced81b97bd030f376d1b7f61a224793f  vp80-00-comprehensive-002.ivf
@@ -670,28 +667,8 @@ e3ab35d4316c5e81325c50f5236ceca4bc0d35df  vp90-2-15-segkey.webm.md5
 5661b0168752969f055eec37b05fa9fa947dc7eb  vp90-2-16-intra-only.webm.md5
 c01bb7938f9a9f25e0c37afdec2f2fb73b6cc7fa  vp90-2-17-show-existing-frame.webm
 cc75f351818b9a619818f5cc77b9bc013d0c1e11  vp90-2-17-show-existing-frame.webm.md5
-013708bd043f0821a3e56fb8404d82e7a0c7af6c  vp91-2-04-yuv422.webm
-1e58a7d23adad830a672f1733c9d2ae17890d59c  vp91-2-04-yuv422.webm.md5
-25d78f28948789d159a9453ebc13048b818251b1  vp91-2-04-yuv440.webm
-81b3870b27a7f695ef6a43e87ab04bbdb5aee2f5  vp91-2-04-yuv440.webm.md5
 0321d507ce62dedc8a51b4e9011f7a19aed9c3dc  vp91-2-04-yuv444.webm
 367e423dd41fdb49aa028574a2cfec5c2f325c5c  vp91-2-04-yuv444.webm.md5
-f77673b566f686853adefe0c578ad251b7241281  vp92-2-20-10bit-yuv420.webm
-abdedfaddacbbe1a15ac7a54e86360f03629fb7a  vp92-2-20-10bit-yuv420.webm.md5
-0c2c355a1b17b28537c5a3b19997c8783b69f1af  vp92-2-20-12bit-yuv420.webm
-afb2c2798703e039189b0a15c8ac5685aa51d33f  vp92-2-20-12bit-yuv420.webm.md5
-0d661bc6e83da33238981481efd1b1802d323d88  vp93-2-20-10bit-yuv422.webm
-10318907063db22eb02fad332556edbbecd443cc  vp93-2-20-10bit-yuv422.webm.md5
-ebc6be2f7511a0bdeac0b18c67f84ba7168839c7  vp93-2-20-12bit-yuv422.webm
-235232267c6a1dc8a11e45d600f1c99d2f8b42d4  vp93-2-20-12bit-yuv422.webm.md5
-f76b11b26d4beaceac7a7e7729dd5054d095164f  vp93-2-20-10bit-yuv440.webm
-757b33b5ac969c5999999488a731a3d1e6d9fb88  vp93-2-20-10bit-yuv440.webm.md5
-df8807dbd29bec795c2db9c3c18e511fbb988101  vp93-2-20-12bit-yuv440.webm
-ea4100930c3f59a1c23fbb33ab0ea01151cae159  vp93-2-20-12bit-yuv440.webm.md5
-189c1b5f404ff41a50a7fc96341085ad541314a9  vp93-2-20-10bit-yuv444.webm
-2dd0177c2f9d970b6e698892634c653630f91f40  vp93-2-20-10bit-yuv444.webm.md5
-bd44cf6e1c27343e3639df9ac21346aedd5d6973  vp93-2-20-12bit-yuv444.webm
-f36e5bdf5ec3213f32c0ddc82f95d82c5133bf27  vp93-2-20-12bit-yuv444.webm.md5
 eb438c6540eb429f74404eedfa3228d409c57874  desktop_640_360_30.yuv
 89e70ebd22c27d275fe14dc2f1a41841a6d8b9ab  kirland_640_480_30.yuv
 33c533192759e5bb4f07abfbac389dc259db4686  macmarcomoving_640_480_30.yuv
@@ -706,8 +683,6 @@ c12918cf0a716417fba2de35c3fc5ab90e52dfce  vp90-2-18-resize.ivf.md5
 717da707afcaa1f692ff1946f291054eb75a4f06  screendata.y4m
 b7c1296630cdf1a7ef493d15ff4f9eb2999202f6  invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf
 0a3884edb3fd8f9d9b500223e650f7de257b67d8  invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf.res
-359e138dfb66863828397b77000ea7a83c844d02  invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf
-bbd33de01c17b165b4ce00308e8a19a942023ab8  invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf.res
 fac89b5735be8a86b0dc05159f996a5c3208ae32  invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf
 0a3884edb3fd8f9d9b500223e650f7de257b67d8  invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf.res
 4506dfdcdf8ee4250924b075a0dcf1f070f72e5a  invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf
@@ -728,17 +703,3 @@ b03c408cf23158638da18dbc3323b99a1635c68a  invalid-vp90-2-12-droppable_1.ivf.s367
 a61774cf03fc584bd9f0904fc145253bb8ea6c4c  invalid-vp91-2-mixedrefcsp-444to420.ivf.res
 812d05a64a0d83c1b504d0519927ddc5a2cdb273  invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf
 1e472baaf5f6113459f0399a38a5a5e68d17799d  invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf.res
-f97088c7359fc8d3d5aa5eafe57bc7308b3ee124  vp90-2-20-big_superframe-01.webm
-47d7d409785afa33b123376de0c907336e6c7bd7  vp90-2-20-big_superframe-01.webm.md5
-65ade6d2786209582c50d34cfe22b3cdb033abaf  vp90-2-20-big_superframe-02.webm
-7c0ed8d04c4d06c5411dd2e5de2411d37f092db5  vp90-2-20-big_superframe-02.webm.md5
-667ec8718c982aef6be07eb94f083c2efb9d2d16  vp90-2-07-frame_parallel-1.webm
-bfc82bf848e9c05020d61e3ffc1e62f25df81d19  vp90-2-07-frame_parallel-1.webm.md5
-efd5a51d175cfdacd169ed23477729dc558030dc  invalid-vp90-2-07-frame_parallel-1.webm
-9f912712ec418be69adb910e2ca886a63c4cec08  invalid-vp90-2-07-frame_parallel-2.webm
-445f5a53ca9555341852997ccdd480a51540bd14  invalid-vp90-2-07-frame_parallel-3.webm
-d18c90709a0d03c82beadf10898b27d88fff719c  invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf
-d06285d109ecbaef63b0cbcc44d70a129186f51c  invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf.res
-e60d859b0ef2b331b21740cf6cb83fabe469b079  invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf
-0ae808dca4d3c1152a9576e14830b6faa39f1b4a  invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf.res
-9cfc855459e7549fd015c79e8eca512b2f2cb7e3  niklas_1280_720_30.y4m
--- a/test/test.mk
+++ b/test/test.mk
@@ -22,36 +22,28 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += aq_segment_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += datarate_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += i420_video_source.h
-LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += resize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_video_source.h
-LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += yuv_video_source.h

 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc

-LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += byte_alignment_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += invalid_file_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_frame_parallel_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc

 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.h
-LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += encode_test_driver.cc
+LIBVPX_TEST_SRCS-yes                   += encode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += encode_test_driver.h

-## IVF writing.
-LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += ../ivfenc.c ../ivfenc.h
-
 ## Y4m parsing.
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_test.cc ../y4menc.c ../y4menc.h

@@ -97,7 +89,6 @@ ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
 # These tests require both the encoder and decoder to be built.
 ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes)
 LIBVPX_TEST_SRCS-yes                   += vp8_boolcoder_test.cc
-LIBVPX_TEST_SRCS-yes                   += vp8_fragments_test.cc
 endif

 LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC)    += pp_filter_test.cc
@@ -106,7 +97,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc

 LIBVPX_TEST_SRCS-yes                   += idct_test.cc
 LIBVPX_TEST_SRCS-yes                   += intrapred_test.cc
@@ -130,7 +120,7 @@ LIBVPX_TEST_SRCS-yes                   += partial_idct_test.cc
 LIBVPX_TEST_SRCS-yes                   += superframe_test.cc
 LIBVPX_TEST_SRCS-yes                   += tile_independence_test.cc
 LIBVPX_TEST_SRCS-yes                   += vp9_boolcoder_test.cc
-LIBVPX_TEST_SRCS-yes                   += vp9_encoder_parms_get_to_decoder.cc
+
 endif

 LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += convolve_test.cc
@@ -144,8 +134,8 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += quantize_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += error_block_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += vp9_intrapred_test.cc

 ifeq ($(CONFIG_VP9_ENCODER),yes)
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -12,7 +12,6 @@
 #include <cstdlib>
 #include <string>
 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "../tools_common.h"
 #include "./vpx_config.h"
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
@@ -27,24 +26,10 @@

 namespace {

-enum DecodeMode {
-  kSerialMode,
-  kFrameParallelMode
-};
-
-const int kDecodeMode = 0;
-const int kThreads = 1;
-const int kFileName = 2;
-
-typedef std::tr1::tuple<int, int, const char*> DecodeParam;
-
 class TestVectorTest : public ::libvpx_test::DecoderTest,
-    public ::libvpx_test::CodecTestWithParam<DecodeParam> {
+    public ::libvpx_test::CodecTestWithParam<const char*> {
 protected:
-  TestVectorTest()
-      : DecoderTest(GET_PARAM(0)),
-        md5_file_(NULL) {
-  }
+  TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(NULL) {}

  virtual ~TestVectorTest() {
    if (md5_file_)
@@ -86,25 +71,8 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
 // checksums match the correct md5 data, then the test is passed. Otherwise,
 // the test failed.
 TEST_P(TestVectorTest, MD5Match) {
-  const DecodeParam input = GET_PARAM(1);
-  const std::string filename = std::tr1::get<kFileName>(input);
-  const int threads = std::tr1::get<kThreads>(input);
-  const int mode = std::tr1::get<kDecodeMode>(input);
+  const std::string filename = GET_PARAM(1);
  libvpx_test::CompressedVideoSource *video = NULL;
-  vpx_codec_flags_t flags = 0;
-  vpx_codec_dec_cfg_t cfg = {0};
-  char str[256];
-
-  if (mode == kFrameParallelMode) {
-    flags |= VPX_CODEC_USE_FRAME_THREADING;
-  }
-
-  cfg.threads = threads;
-
-  snprintf(str, sizeof(str) / sizeof(str[0]) - 1,
-           "file: %s  mode: %s threads: %d",
-           filename.c_str(), mode == 0 ? "Serial" : "Parallel", threads);
-  SCOPED_TRACE(str);

  // Open compressed video file.
  if (filename.substr(filename.length() - 3, 3) == "ivf") {
@@ -124,49 +92,18 @@ TEST_P(TestVectorTest, MD5Match) {
  const std::string md5_filename = filename + ".md5";
  OpenMD5File(md5_filename);

-  // Set decode config and flags.
-  set_cfg(cfg);
-  set_flags(flags);
-
  // Decode frame, and check the md5 matching.
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video, cfg));
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
  delete video;
 }

-// Test VP8 decode in serial mode with single thread.
-// NOTE: VP8 only support serial mode.
-VP8_INSTANTIATE_TEST_CASE(
-    TestVectorTest,
-    ::testing::Combine(
-        ::testing::Values(0),  // Serial Mode.
-        ::testing::Values(1),  // Single thread.
-        ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
-                            libvpx_test::kVP8TestVectors +
-                                libvpx_test::kNumVP8TestVectors)));
+VP8_INSTANTIATE_TEST_CASE(TestVectorTest,
+                          ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
+                                              libvpx_test::kVP8TestVectors +
+                                              libvpx_test::kNumVP8TestVectors));
+VP9_INSTANTIATE_TEST_CASE(TestVectorTest,
+                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
+                                              libvpx_test::kVP9TestVectors +
+                                              libvpx_test::kNumVP9TestVectors));

-// Test VP9 decode in serial mode with single thread.
-//VP9_INSTANTIATE_TEST_CASE(
-//    TestVectorTest,
-//    ::testing::Combine(
-//        ::testing::Values(0),  // Serial Mode.
-//        ::testing::Values(1),  // Single thread.
-//        ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
-//                            libvpx_test::kVP9TestVectors +
-//                                libvpx_test::kNumVP9TestVectors)));
-
-
-//#if CONFIG_VP9_DECODER
-//// Test VP9 decode in frame parallel mode with different number of threads.
-//INSTANTIATE_TEST_CASE_P(
-//    VP9MultiThreadedFrameParallel, TestVectorTest,
-//    ::testing::Combine(
-//        ::testing::Values(
-//            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
-//        ::testing::Combine(
-//            ::testing::Values(1),        // Frame Parallel mode.
-//            ::testing::Range(2, 9),      // With 2 ~ 8 threads.
-//            ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
-//                                libvpx_test::kVP9TestVectors +
-//                                    libvpx_test::kNumVP9TestVectors))));
-//#endif
 }  // namespace
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -184,14 +184,6 @@ const char *const kVP9TestVectors[] = {
  "vp90-2-18-resize.ivf", "vp90-2-19-skip.webm",
  "vp90-2-19-skip-01.webm", "vp90-2-19-skip-02.webm",
  "vp91-2-04-yuv444.webm",
-  "vp91-2-04-yuv422.webm", "vp91-2-04-yuv440.webm",
-#if CONFIG_VP9_HIGHBITDEPTH
-  "vp92-2-20-10bit-yuv420.webm", "vp92-2-20-12bit-yuv420.webm",
-  "vp93-2-20-10bit-yuv422.webm", "vp93-2-20-12bit-yuv422.webm",
-  "vp93-2-20-10bit-yuv440.webm", "vp93-2-20-12bit-yuv440.webm",
-  "vp93-2-20-10bit-yuv444.webm", "vp93-2-20-12bit-yuv444.webm",
-#endif  // CONFIG_VP9_HIGHBITDEPTH`
-  "vp90-2-20-big_superframe-01.webm", "vp90-2-20-big_superframe-02.webm",
 };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
 #endif  // CONFIG_VP9_DECODER
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -106,24 +106,22 @@ check_git_hashes() {
  fi
 }

-# $1 is the name of an environment variable containing a directory name to
-# test.
-test_env_var_dir() {
-  local dir=$(eval echo "\${$1}")
-  if [ ! -d "${dir}" ]; then
-    elog "'${dir}': No such directory"
-    elog "The $1 environment variable must be set to a valid directory."
-    return 1
-  fi
-}
-
 # This script requires that the LIBVPX_BIN_PATH, LIBVPX_CONFIG_PATH, and
 # LIBVPX_TEST_DATA_PATH variables are in the environment: Confirm that
 # the variables are set and that they all evaluate to directory paths.
 verify_vpx_test_environment() {
-  test_env_var_dir "LIBVPX_BIN_PATH" \
-    && test_env_var_dir "LIBVPX_CONFIG_PATH" \
-    && test_env_var_dir "LIBVPX_TEST_DATA_PATH"
+  if [ ! -d "${LIBVPX_BIN_PATH}" ]; then
+    echo "The LIBVPX_BIN_PATH environment variable must be set."
+    return 1
+  fi
+  if [ ! -d "${LIBVPX_CONFIG_PATH}" ]; then
+    echo "The LIBVPX_CONFIG_PATH environment variable must be set."
+    return 1
+  fi
+  if [ ! -d "${LIBVPX_TEST_DATA_PATH}" ]; then
+    echo "The LIBVPX_TEST_DATA_PATH environment variable must be set."
+    return 1
+  fi
 }

 # Greps vpx_config.h in LIBVPX_CONFIG_PATH for positional parameter one, which
@@ -263,9 +261,6 @@ run_tests() {
    return
  fi

-  # Don't bother with the environment tests if everything else was disabled.
-  [ -z "${tests_to_filter}" ] && return
-
  # Combine environment and actual tests.
  local tests_to_run="${env_tests} ${tests_to_filter}"

@@ -383,7 +378,8 @@ else
  VPX_TEST_TEMP_ROOT=/tmp
 fi

-VPX_TEST_OUTPUT_DIR="${VPX_TEST_TEMP_ROOT}/vpx_test_$$"
+VPX_TEST_RAND=$(awk 'BEGIN { srand(); printf "%d\n",(rand() * 32768)}')
+VPX_TEST_OUTPUT_DIR="${VPX_TEST_TEMP_ROOT}/vpx_test_${VPX_TEST_RAND}"

 if ! mkdir -p "${VPX_TEST_OUTPUT_DIR}" || \
   [ ! -d "${VPX_TEST_OUTPUT_DIR}" ]; then
@@ -401,7 +397,6 @@ VP8_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf"
 VP9_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-09-subpixel-00.ivf"

 VP9_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm"
-VP9_FPM_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-07-frame_parallel-1.webm"

 YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
 YUV_RAW_INPUT_WIDTH=352
@@ -422,6 +417,7 @@ vlog "$(basename "${0%.*}") test configuration:
  VPX_TEST_LIST_TESTS=${VPX_TEST_LIST_TESTS}
  VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}
  VPX_TEST_PREFIX=${VPX_TEST_PREFIX}
+  VPX_TEST_RAND=${VPX_TEST_RAND}
  VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
  VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT}
  VPX_TEST_TEMP_ROOT=${VPX_TEST_TEMP_ROOT}
--- a/test/user_priv_test.cc
+++ b/test/user_priv_test.cc
@@ -30,7 +30,7 @@ namespace {
 using std::string;
 using libvpx_test::ACMRandom;

-#if CONFIG_WEBM_IO && 0
+#if CONFIG_WEBM_IO

 void CheckUserPrivateData(void *user_priv, int *target) {
  // actual pointer value should be the same as expected.
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
@@ -7,19 +7,18 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
-
-#include <cstdlib>
+#include <stdlib.h>
 #include <new>

-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "./vpx_config.h"
-#include "vpx/vpx_codec.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+
 #include "vpx/vpx_integer.h"
+#include "./vpx_config.h"
 #include "vpx_mem/vpx_mem.h"
+#include "vpx/vpx_codec.h"
 #if CONFIG_VP8_ENCODER
 # include "./vp8_rtcd.h"
 # include "vp8/common/variance.h"
@@ -28,6 +27,7 @@
 # include "./vp9_rtcd.h"
 # include "vp9/encoder/vp9_variance.h"
 #endif
+#include "test/acm_random.h"

 namespace {

@@ -46,14 +46,13 @@ static unsigned int mb_ss_ref(const int16_t *src) {

 static unsigned int variance_ref(const uint8_t *src, const uint8_t *ref,
                                 int l2w, int l2h, int src_stride_coeff,
-                                 int ref_stride_coeff, uint32_t *sse_ptr,
+                                 int ref_stride_coeff, unsigned int *sse_ptr,
                                 bool use_high_bit_depth_,
                                 vpx_bit_depth_t bit_depth) {
 #if CONFIG_VP9_HIGHBITDEPTH
  int64_t se = 0;
  uint64_t sse = 0;
-  const int w = 1 << l2w;
-  const int h = 1 << l2h;
+  const int w = 1 << l2w, h = 1 << l2h;
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
      int diff;
@@ -71,14 +70,13 @@ static unsigned int variance_ref(const uint8_t *src, const uint8_t *ref,
    }
  }
  if (bit_depth > VPX_BITS_8) {
-    sse = ROUND_POWER_OF_TWO(sse, 2 * (bit_depth - 8));
-    se = ROUND_POWER_OF_TWO(se, bit_depth - 8);
+    sse = ROUND_POWER_OF_TWO(sse, 2*(bit_depth-8));
+    se = ROUND_POWER_OF_TWO(se, bit_depth-8);
  }
 #else
  int se = 0;
  unsigned int sse = 0;
-  const int w = 1 << l2w;
-  const int h = 1 << l2h;
+  const int w = 1 << l2w, h = 1 << l2h;
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
      int diff = ref[w * y * ref_stride_coeff + x] -
@@ -87,7 +85,7 @@ static unsigned int variance_ref(const uint8_t *src, const uint8_t *ref,
      sse += diff * diff;
    }
  }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
  *sse_ptr = sse;
  return sse - (((int64_t) se * se) >> (l2w + l2h));
 }
@@ -100,11 +98,11 @@ static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
 #if CONFIG_VP9_HIGHBITDEPTH
  int64_t se = 0;
  uint64_t sse = 0;
-  const int w = 1 << l2w;
-  const int h = 1 << l2h;
+  const int w = 1 << l2w, h = 1 << l2h;
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
-      // Bilinear interpolation at a 16th pel step.
+      // bilinear interpolation at a 16th pel step
+
      if (!use_high_bit_depth_) {
        const int a1 = ref[(w + 1) * (y + 0) + x + 0];
        const int a2 = ref[(w + 1) * (y + 0) + x + 1];
@@ -113,7 +111,7 @@ static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
        const int r = a + (((b - a) * yoff + 8) >> 4);
-        const int diff = r - src[w * y + x];
+        int diff = r - src[w * y + x];
        se += diff;
        sse += diff * diff;
      } else {
@@ -126,7 +124,7 @@ static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
        const int r = a + (((b - a) * yoff + 8) >> 4);
-        const int diff = r - src16[w * y + x];
+        int diff = r - src16[w * y + x];
        se += diff;
        sse += diff * diff;
      }
@@ -139,11 +137,10 @@ static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
 #else
  int se = 0;
  unsigned int sse = 0;
-  const int w = 1 << l2w;
-  const int h = 1 << l2h;
+  const int w = 1 << l2w, h = 1 << l2h;
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
-      // Bilinear interpolation at a 16th pel step.
+      // bilinear interpolation at a 16th pel step
      const int a1 = ref[(w + 1) * (y + 0) + x + 0];
      const int a2 = ref[(w + 1) * (y + 0) + x + 1];
      const int b1 = ref[(w + 1) * (y + 1) + x + 0];
@@ -151,12 +148,12 @@ static unsigned int subpel_variance_ref(const uint8_t *ref, const uint8_t *src,
      const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
      const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
      const int r = a + (((b - a) * yoff + 8) >> 4);
-      const int diff = r - src[w * y + x];
+      int diff = r - src[w * y + x];
      se += diff;
      sse += diff * diff;
    }
  }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
  *sse_ptr = sse;
  return sse - (((int64_t) se * se) >> (l2w + l2h));
 }
@@ -211,20 +208,21 @@ class VarianceTest
                                            VarianceFunctionType, int> > {
 public:
  virtual void SetUp() {
-    const tuple<int, int, VarianceFunctionType, int>& params = this->GetParam();
+    const tuple<int, int, VarianceFunctionType, int>& params =
+        this->GetParam();
    log2width_  = get<0>(params);
    width_ = 1 << log2width_;
    log2height_ = get<1>(params);
    height_ = 1 << log2height_;
    variance_ = get<2>(params);
    if (get<3>(params)) {
-      bit_depth_ = static_cast<vpx_bit_depth_t>(get<3>(params));
+      bit_depth_ = (vpx_bit_depth_t) get<3>(params);
      use_high_bit_depth_ = true;
    } else {
      bit_depth_ = VPX_BITS_8;
      use_high_bit_depth_ = false;
    }
-    mask_ = (1 << bit_depth_) - 1;
+    mask_ = (1 << bit_depth_)-1;

    rnd_.Reset(ACMRandom::DeterministicSeed());
    block_size_ = width_ * height_;
@@ -367,9 +365,9 @@ void VarianceTest<VarianceFunctionType>::RefStrideTest() {
    unsigned int sse1, sse2;
    unsigned int var1;

-    ASM_REGISTER_STATE_CHECK(
-        var1 = variance_(src_, width_ * src_stride_coeff,
-                         ref_, width_ * ref_stride_coeff, &sse1));
+    ASM_REGISTER_STATE_CHECK(var1 = variance_(src_, width_ * src_stride_coeff,
+                                         ref_, width_ * ref_stride_coeff,
+                                         &sse1));
    const unsigned int var2 = variance_ref(src_, ref_, log2width_,
                                           log2height_, src_stride_coeff,
                                           ref_stride_coeff, &sse2,
@@ -378,7 +376,6 @@ void VarianceTest<VarianceFunctionType>::RefStrideTest() {
    EXPECT_EQ(var1, var2);
  }
 }
-
 template<typename VarianceFunctionType>
 void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
  const int half = block_size_ / 2;
@@ -391,7 +388,7 @@ void VarianceTest<VarianceFunctionType>::OneQuarterTest() {
    vpx_memset16(CONVERT_TO_SHORTPTR(src_), 255 << (bit_depth_ - 8),
                 block_size_);
    vpx_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << (bit_depth_ - 8), half);
-    vpx_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half);
+    vpx_memset16(CONVERT_TO_SHORTPTR(ref_)+ half, 0, half);
  }
 #else
  memset(src_, 255, block_size_);
@@ -473,7 +470,8 @@ void MseTest<MseFunctionType>::RefTest_sse() {
    unsigned int sse2;
    unsigned int var1;
    const int stride_coeff = 1;
-    ASM_REGISTER_STATE_CHECK(var1 = mse_(src_, width_, ref_, width_));
+    ASM_REGISTER_STATE_CHECK(
+        var1 = mse_(src_, width_, ref_, width_));
    variance_ref(src_, ref_, log2width_, log2height_, stride_coeff,
                 stride_coeff, &sse2, false, VPX_BITS_8);
    EXPECT_EQ(var1, sse2);
@@ -502,6 +500,7 @@ void MseTest<MseFunctionType>::MaxTest_sse() {
 #endif

 #if CONFIG_VP9_ENCODER
+
 unsigned int subpel_avg_variance_ref(const uint8_t *ref,
                                     const uint8_t *src,
                                     const uint8_t *second_pred,
@@ -513,8 +512,7 @@ unsigned int subpel_avg_variance_ref(const uint8_t *ref,
 #if CONFIG_VP9_HIGHBITDEPTH
  int64_t se = 0;
  uint64_t sse = 0;
-  const int w = 1 << l2w;
-  const int h = 1 << l2h;
+  const int w = 1 << l2w, h = 1 << l2h;
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
      // bilinear interpolation at a 16th pel step
@@ -526,7 +524,7 @@ unsigned int subpel_avg_variance_ref(const uint8_t *ref,
        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
        const int r = a + (((b - a) * yoff + 8) >> 4);
-        const int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+        int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
        se += diff;
        sse += diff * diff;
      } else {
@@ -540,7 +538,7 @@ unsigned int subpel_avg_variance_ref(const uint8_t *ref,
        const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
        const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
        const int r = a + (((b - a) * yoff + 8) >> 4);
-        const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
+        int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x];
        se += diff;
        sse += diff * diff;
      }
@@ -553,8 +551,7 @@ unsigned int subpel_avg_variance_ref(const uint8_t *ref,
 #else
  int se = 0;
  unsigned int sse = 0;
-  const int w = 1 << l2w;
-  const int h = 1 << l2h;
+  const int w = 1 << l2w, h = 1 << l2h;
  for (int y = 0; y < h; y++) {
    for (int x = 0; x < w; x++) {
      // bilinear interpolation at a 16th pel step
@@ -565,12 +562,12 @@ unsigned int subpel_avg_variance_ref(const uint8_t *ref,
      const int a = a1 + (((a2 - a1) * xoff + 8) >> 4);
      const int b = b1 + (((b2 - b1) * xoff + 8) >> 4);
      const int r = a + (((b - a) * yoff + 8) >> 4);
-      const int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
+      int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x];
      se += diff;
      sse += diff * diff;
    }
  }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
  *sse_ptr = sse;
  return sse - (((int64_t) se * se) >> (l2w + l2h));
 }
@@ -605,20 +602,18 @@ class SubpelVarianceTest
      sec_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
      ref_ = new uint8_t[block_size_ + width_ + height_ + 1];
    } else {
-      src_ = CONVERT_TO_BYTEPTR(
-          reinterpret_cast<uint16_t *>(
-              vpx_memalign(16, block_size_*sizeof(uint16_t))));
-      sec_ = CONVERT_TO_BYTEPTR(
-          reinterpret_cast<uint16_t *>(
-              vpx_memalign(16, block_size_*sizeof(uint16_t))));
-      ref_ = CONVERT_TO_BYTEPTR(
-          new uint16_t[block_size_ + width_ + height_ + 1]);
+      src_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(vpx_memalign(16,
+                                          block_size_*sizeof(uint16_t))));
+      sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast<uint16_t *>(vpx_memalign(16,
+                                          block_size_*sizeof(uint16_t))));
+      ref_ = CONVERT_TO_BYTEPTR(new uint16_t[block_size_ + width_ + height_
+                                             + 1]);
    }
 #else
    src_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
    sec_ = reinterpret_cast<uint8_t *>(vpx_memalign(16, block_size_));
    ref_ = new uint8_t[block_size_ + width_ + height_ + 1];
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
    ASSERT_TRUE(src_ != NULL);
    ASSERT_TRUE(sec_ != NULL);
    ASSERT_TRUE(ref_ != NULL);
@@ -686,7 +681,7 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {
      for (int j = 0; j < block_size_ + width_ + height_ + 1; j++) {
        ref_[j] = rnd_.Rand8();
      }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
      unsigned int sse1, sse2;
      unsigned int var1;
      ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
@@ -703,9 +698,9 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::RefTest() {

 template<typename SubpelVarianceFunctionType>
 void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
-  // Compare against reference.
-  // Src: Set the first half of values to 0, the second half to the maximum.
-  // Ref: Set the first half of values to the maximum, the second half to 0.
+  // Compare against reference
+  // Src: Set the first half of values to 0, the second half to the maximum
+  // Ref: Set the first half of values to the maximum, the second half to 0
  for (int x = 0; x < 16; ++x) {
    for (int y = 0; y < 16; ++y) {
      const int half = block_size_ / 2;
@@ -719,22 +714,23 @@ void SubpelVarianceTest<SubpelVarianceFunctionType>::ExtremeRefTest() {
        vpx_memset16(CONVERT_TO_SHORTPTR(src_), mask_, half);
        vpx_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half);
        vpx_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half);
-        vpx_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask_,
-                     half + width_ + height_ + 1);
+        vpx_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask_, half + width_ +
+                     height_ + 1);
      }
 #else
      memset(src_, 0, half);
      memset(src_ + half, 255, half);
      memset(ref_, 255, half);
      memset(ref_ + half, 0, half + width_ + height_ + 1);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
      unsigned int sse1, sse2;
      unsigned int var1;
-      ASM_REGISTER_STATE_CHECK(
-          var1 = subpel_variance_(ref_, width_ + 1, x, y, src_, width_, &sse1));
-      const unsigned int var2 =
-          subpel_variance_ref(ref_, src_, log2width_, log2height_, x, y, &sse2,
-                              use_high_bit_depth_, bit_depth_);
+      ASM_REGISTER_STATE_CHECK(var1 = subpel_variance_(ref_, width_ + 1, x, y,
+                                                   src_, width_, &sse1));
+      const unsigned int var2 = subpel_variance_ref(ref_, src_, log2width_,
+                                                    log2height_, x, y, &sse2,
+                                                    use_high_bit_depth_,
+                                                    bit_depth_);
      EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y;
      EXPECT_EQ(var1, var2) << "at position " << x << ", " << y;
    }
@@ -1013,12 +1009,12 @@ const vp9_variance_fn_t highbd_variance64x32_c = vp9_highbd_variance64x32_c;
 const vp9_variance_fn_t highbd_variance64x64_c = vp9_highbd_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
    C, VP9VarianceHighTest,
-    ::testing::Values(make_tuple(2, 2, highbd_10_variance4x4_c, 10),
-                      make_tuple(2, 3, highbd_10_variance4x8_c, 10),
-                      make_tuple(3, 2, highbd_10_variance8x4_c, 10),
-                      make_tuple(3, 3, highbd_10_variance8x8_c, 10),
-                      make_tuple(3, 4, highbd_10_variance8x16_c, 10),
-                      make_tuple(4, 3, highbd_10_variance16x8_c, 10),
+    ::testing::Values(make_tuple(2, 2, highbd_10_variance4x4_c,   10),
+                      make_tuple(2, 3, highbd_10_variance4x8_c,   10),
+                      make_tuple(3, 2, highbd_10_variance8x4_c,   10),
+                      make_tuple(3, 3, highbd_10_variance8x8_c,   10),
+                      make_tuple(3, 4, highbd_10_variance8x16_c,  10),
+                      make_tuple(4, 3, highbd_10_variance16x8_c,  10),
                      make_tuple(4, 4, highbd_10_variance16x16_c, 10),
                      make_tuple(4, 5, highbd_10_variance16x32_c, 10),
                      make_tuple(5, 4, highbd_10_variance32x16_c, 10),
@@ -1026,12 +1022,12 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(5, 6, highbd_10_variance32x64_c, 10),
                      make_tuple(6, 5, highbd_10_variance64x32_c, 10),
                      make_tuple(6, 6, highbd_10_variance64x64_c, 10),
-                      make_tuple(2, 2, highbd_12_variance4x4_c, 12),
-                      make_tuple(2, 3, highbd_12_variance4x8_c, 12),
-                      make_tuple(3, 2, highbd_12_variance8x4_c, 12),
-                      make_tuple(3, 3, highbd_12_variance8x8_c, 12),
-                      make_tuple(3, 4, highbd_12_variance8x16_c, 12),
-                      make_tuple(4, 3, highbd_12_variance16x8_c, 12),
+                      make_tuple(2, 2, highbd_12_variance4x4_c,   12),
+                      make_tuple(2, 3, highbd_12_variance4x8_c,   12),
+                      make_tuple(3, 2, highbd_12_variance8x4_c,   12),
+                      make_tuple(3, 3, highbd_12_variance8x8_c,   12),
+                      make_tuple(3, 4, highbd_12_variance8x16_c,  12),
+                      make_tuple(4, 3, highbd_12_variance16x8_c,  12),
                      make_tuple(4, 4, highbd_12_variance16x16_c, 12),
                      make_tuple(4, 5, highbd_12_variance16x32_c, 12),
                      make_tuple(5, 4, highbd_12_variance32x16_c, 12),
@@ -1039,12 +1035,12 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(5, 6, highbd_12_variance32x64_c, 12),
                      make_tuple(6, 5, highbd_12_variance64x32_c, 12),
                      make_tuple(6, 6, highbd_12_variance64x64_c, 12),
-                      make_tuple(2, 2, highbd_variance4x4_c, 8),
-                      make_tuple(2, 3, highbd_variance4x8_c, 8),
-                      make_tuple(3, 2, highbd_variance8x4_c, 8),
-                      make_tuple(3, 3, highbd_variance8x8_c, 8),
-                      make_tuple(3, 4, highbd_variance8x16_c, 8),
-                      make_tuple(4, 3, highbd_variance16x8_c, 8),
+                      make_tuple(2, 2, highbd_variance4x4_c,   8),
+                      make_tuple(2, 3, highbd_variance4x8_c,   8),
+                      make_tuple(3, 2, highbd_variance8x4_c,   8),
+                      make_tuple(3, 3, highbd_variance8x8_c,   8),
+                      make_tuple(3, 4, highbd_variance8x16_c,  8),
+                      make_tuple(4, 3, highbd_variance16x8_c,  8),
                      make_tuple(4, 4, highbd_variance16x16_c, 8),
                      make_tuple(4, 5, highbd_variance16x32_c, 8),
                      make_tuple(5, 4, highbd_variance32x16_c, 8),
@@ -1216,12 +1212,12 @@ const vp9_subpixvariance_fn_t highbd_subpel_variance64x64_c =
    vp9_highbd_sub_pixel_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
    C, VP9SubpelVarianceHighTest,
-    ::testing::Values(make_tuple(2, 2, highbd_10_subpel_variance4x4_c, 10),
-                      make_tuple(2, 3, highbd_10_subpel_variance4x8_c, 10),
-                      make_tuple(3, 2, highbd_10_subpel_variance8x4_c, 10),
-                      make_tuple(3, 3, highbd_10_subpel_variance8x8_c, 10),
-                      make_tuple(3, 4, highbd_10_subpel_variance8x16_c, 10),
-                      make_tuple(4, 3, highbd_10_subpel_variance16x8_c, 10),
+    ::testing::Values(make_tuple(2, 2, highbd_10_subpel_variance4x4_c,   10),
+                      make_tuple(2, 3, highbd_10_subpel_variance4x8_c,   10),
+                      make_tuple(3, 2, highbd_10_subpel_variance8x4_c,   10),
+                      make_tuple(3, 3, highbd_10_subpel_variance8x8_c,   10),
+                      make_tuple(3, 4, highbd_10_subpel_variance8x16_c,  10),
+                      make_tuple(4, 3, highbd_10_subpel_variance16x8_c,  10),
                      make_tuple(4, 4, highbd_10_subpel_variance16x16_c, 10),
                      make_tuple(4, 5, highbd_10_subpel_variance16x32_c, 10),
                      make_tuple(5, 4, highbd_10_subpel_variance32x16_c, 10),
@@ -1229,12 +1225,12 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(5, 6, highbd_10_subpel_variance32x64_c, 10),
                      make_tuple(6, 5, highbd_10_subpel_variance64x32_c, 10),
                      make_tuple(6, 6, highbd_10_subpel_variance64x64_c, 10),
-                      make_tuple(2, 2, highbd_12_subpel_variance4x4_c, 12),
-                      make_tuple(2, 3, highbd_12_subpel_variance4x8_c, 12),
-                      make_tuple(3, 2, highbd_12_subpel_variance8x4_c, 12),
-                      make_tuple(3, 3, highbd_12_subpel_variance8x8_c, 12),
-                      make_tuple(3, 4, highbd_12_subpel_variance8x16_c, 12),
-                      make_tuple(4, 3, highbd_12_subpel_variance16x8_c, 12),
+                      make_tuple(2, 2, highbd_12_subpel_variance4x4_c,   12),
+                      make_tuple(2, 3, highbd_12_subpel_variance4x8_c,   12),
+                      make_tuple(3, 2, highbd_12_subpel_variance8x4_c,   12),
+                      make_tuple(3, 3, highbd_12_subpel_variance8x8_c,   12),
+                      make_tuple(3, 4, highbd_12_subpel_variance8x16_c,  12),
+                      make_tuple(4, 3, highbd_12_subpel_variance16x8_c,  12),
                      make_tuple(4, 4, highbd_12_subpel_variance16x16_c, 12),
                      make_tuple(4, 5, highbd_12_subpel_variance16x32_c, 12),
                      make_tuple(5, 4, highbd_12_subpel_variance32x16_c, 12),
@@ -1242,12 +1238,12 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(5, 6, highbd_12_subpel_variance32x64_c, 12),
                      make_tuple(6, 5, highbd_12_subpel_variance64x32_c, 12),
                      make_tuple(6, 6, highbd_12_subpel_variance64x64_c, 12),
-                      make_tuple(2, 2, highbd_subpel_variance4x4_c, 8),
-                      make_tuple(2, 3, highbd_subpel_variance4x8_c, 8),
-                      make_tuple(3, 2, highbd_subpel_variance8x4_c, 8),
-                      make_tuple(3, 3, highbd_subpel_variance8x8_c, 8),
-                      make_tuple(3, 4, highbd_subpel_variance8x16_c, 8),
-                      make_tuple(4, 3, highbd_subpel_variance16x8_c, 8),
+                      make_tuple(2, 2, highbd_subpel_variance4x4_c,   8),
+                      make_tuple(2, 3, highbd_subpel_variance4x8_c,   8),
+                      make_tuple(3, 2, highbd_subpel_variance8x4_c,   8),
+                      make_tuple(3, 3, highbd_subpel_variance8x8_c,   8),
+                      make_tuple(3, 4, highbd_subpel_variance8x16_c,  8),
+                      make_tuple(4, 3, highbd_subpel_variance16x8_c,  8),
                      make_tuple(4, 4, highbd_subpel_variance16x16_c, 8),
                      make_tuple(4, 5, highbd_subpel_variance16x32_c, 8),
                      make_tuple(5, 4, highbd_subpel_variance32x16_c, 8),
@@ -1335,46 +1331,71 @@ const vp9_subp_avg_variance_fn_t highbd_subpel_avg_variance64x64_c =
    vp9_highbd_sub_pixel_avg_variance64x64_c;
 INSTANTIATE_TEST_CASE_P(
    C, VP9SubpelAvgVarianceHighTest,
-    ::testing::Values(
-        make_tuple(2, 2, highbd_10_subpel_avg_variance4x4_c, 10),
-        make_tuple(2, 3, highbd_10_subpel_avg_variance4x8_c, 10),
-        make_tuple(3, 2, highbd_10_subpel_avg_variance8x4_c, 10),
-        make_tuple(3, 3, highbd_10_subpel_avg_variance8x8_c, 10),
-        make_tuple(3, 4, highbd_10_subpel_avg_variance8x16_c, 10),
-        make_tuple(4, 3, highbd_10_subpel_avg_variance16x8_c, 10),
-        make_tuple(4, 4, highbd_10_subpel_avg_variance16x16_c, 10),
-        make_tuple(4, 5, highbd_10_subpel_avg_variance16x32_c, 10),
-        make_tuple(5, 4, highbd_10_subpel_avg_variance32x16_c, 10),
-        make_tuple(5, 5, highbd_10_subpel_avg_variance32x32_c, 10),
-        make_tuple(5, 6, highbd_10_subpel_avg_variance32x64_c, 10),
-        make_tuple(6, 5, highbd_10_subpel_avg_variance64x32_c, 10),
-        make_tuple(6, 6, highbd_10_subpel_avg_variance64x64_c, 10),
-        make_tuple(2, 2, highbd_12_subpel_avg_variance4x4_c, 12),
-        make_tuple(2, 3, highbd_12_subpel_avg_variance4x8_c, 12),
-        make_tuple(3, 2, highbd_12_subpel_avg_variance8x4_c, 12),
-        make_tuple(3, 3, highbd_12_subpel_avg_variance8x8_c, 12),
-        make_tuple(3, 4, highbd_12_subpel_avg_variance8x16_c, 12),
-        make_tuple(4, 3, highbd_12_subpel_avg_variance16x8_c, 12),
-        make_tuple(4, 4, highbd_12_subpel_avg_variance16x16_c, 12),
-        make_tuple(4, 5, highbd_12_subpel_avg_variance16x32_c, 12),
-        make_tuple(5, 4, highbd_12_subpel_avg_variance32x16_c, 12),
-        make_tuple(5, 5, highbd_12_subpel_avg_variance32x32_c, 12),
-        make_tuple(5, 6, highbd_12_subpel_avg_variance32x64_c, 12),
-        make_tuple(6, 5, highbd_12_subpel_avg_variance64x32_c, 12),
-        make_tuple(6, 6, highbd_12_subpel_avg_variance64x64_c, 12),
-        make_tuple(2, 2, highbd_subpel_avg_variance4x4_c, 8),
-        make_tuple(2, 3, highbd_subpel_avg_variance4x8_c, 8),
-        make_tuple(3, 2, highbd_subpel_avg_variance8x4_c, 8),
-        make_tuple(3, 3, highbd_subpel_avg_variance8x8_c, 8),
-        make_tuple(3, 4, highbd_subpel_avg_variance8x16_c, 8),
-        make_tuple(4, 3, highbd_subpel_avg_variance16x8_c, 8),
-        make_tuple(4, 4, highbd_subpel_avg_variance16x16_c, 8),
-        make_tuple(4, 5, highbd_subpel_avg_variance16x32_c, 8),
-        make_tuple(5, 4, highbd_subpel_avg_variance32x16_c, 8),
-        make_tuple(5, 5, highbd_subpel_avg_variance32x32_c, 8),
-        make_tuple(5, 6, highbd_subpel_avg_variance32x64_c, 8),
-        make_tuple(6, 5, highbd_subpel_avg_variance64x32_c, 8),
-        make_tuple(6, 6, highbd_subpel_avg_variance64x64_c, 8)));
+    ::testing::Values(make_tuple(2, 2, highbd_10_subpel_avg_variance4x4_c,
+                                 10),
+                      make_tuple(2, 3, highbd_10_subpel_avg_variance4x8_c,
+                                 10),
+                      make_tuple(3, 2, highbd_10_subpel_avg_variance8x4_c,
+                                 10),
+                      make_tuple(3, 3, highbd_10_subpel_avg_variance8x8_c,
+                                 10),
+                      make_tuple(3, 4, highbd_10_subpel_avg_variance8x16_c,
+                                 10),
+                      make_tuple(4, 3, highbd_10_subpel_avg_variance16x8_c,
+                                 10),
+                      make_tuple(4, 4, highbd_10_subpel_avg_variance16x16_c,
+                                 10),
+                      make_tuple(4, 5, highbd_10_subpel_avg_variance16x32_c,
+                                 10),
+                      make_tuple(5, 4, highbd_10_subpel_avg_variance32x16_c,
+                                 10),
+                      make_tuple(5, 5, highbd_10_subpel_avg_variance32x32_c,
+                                 10),
+                      make_tuple(5, 6, highbd_10_subpel_avg_variance32x64_c,
+                                 10),
+                      make_tuple(6, 5, highbd_10_subpel_avg_variance64x32_c,
+                                 10),
+                      make_tuple(6, 6, highbd_10_subpel_avg_variance64x64_c,
+                                 10),
+                      make_tuple(2, 2, highbd_12_subpel_avg_variance4x4_c,
+                                 12),
+                      make_tuple(2, 3, highbd_12_subpel_avg_variance4x8_c,
+                                 12),
+                      make_tuple(3, 2, highbd_12_subpel_avg_variance8x4_c,
+                                 12),
+                      make_tuple(3, 3, highbd_12_subpel_avg_variance8x8_c,
+                                 12),
+                      make_tuple(3, 4, highbd_12_subpel_avg_variance8x16_c,
+                                 12),
+                      make_tuple(4, 3, highbd_12_subpel_avg_variance16x8_c,
+                                 12),
+                      make_tuple(4, 4, highbd_12_subpel_avg_variance16x16_c,
+                                12),
+                      make_tuple(4, 5, highbd_12_subpel_avg_variance16x32_c,
+                                12),
+                      make_tuple(5, 4, highbd_12_subpel_avg_variance32x16_c,
+                                 12),
+                      make_tuple(5, 5, highbd_12_subpel_avg_variance32x32_c,
+                                 12),
+                      make_tuple(5, 6, highbd_12_subpel_avg_variance32x64_c,
+                                 12),
+                      make_tuple(6, 5, highbd_12_subpel_avg_variance64x32_c,
+                                 12),
+                      make_tuple(6, 6, highbd_12_subpel_avg_variance64x64_c,
+                                 12),
+                      make_tuple(2, 2, highbd_subpel_avg_variance4x4_c,   8),
+                      make_tuple(2, 3, highbd_subpel_avg_variance4x8_c,   8),
+                      make_tuple(3, 2, highbd_subpel_avg_variance8x4_c,   8),
+                      make_tuple(3, 3, highbd_subpel_avg_variance8x8_c,   8),
+                      make_tuple(3, 4, highbd_subpel_avg_variance8x16_c,  8),
+                      make_tuple(4, 3, highbd_subpel_avg_variance16x8_c,  8),
+                      make_tuple(4, 4, highbd_subpel_avg_variance16x16_c, 8),
+                      make_tuple(4, 5, highbd_subpel_avg_variance16x32_c, 8),
+                      make_tuple(5, 4, highbd_subpel_avg_variance32x16_c, 8),
+                      make_tuple(5, 5, highbd_subpel_avg_variance32x32_c, 8),
+                      make_tuple(5, 6, highbd_subpel_avg_variance32x64_c, 8),
+                      make_tuple(6, 5, highbd_subpel_avg_variance64x32_c, 8),
+                      make_tuple(6, 6, highbd_subpel_avg_variance64x64_c, 8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_SSE2
@@ -1553,9 +1574,9 @@ const vp9_variance_fn_t highbd_12_variance64x64_sse2 =
    vp9_highbd_12_variance64x64_sse2;
 INSTANTIATE_TEST_CASE_P(
    SSE2, VP9VarianceHighTest,
-    ::testing::Values(make_tuple(3, 3, highbd_10_variance8x8_sse2, 10),
-                      make_tuple(3, 4, highbd_10_variance8x16_sse2, 10),
-                      make_tuple(4, 3, highbd_10_variance16x8_sse2, 10),
+    ::testing::Values(make_tuple(3, 3, highbd_10_variance8x8_sse2,   10),
+                      make_tuple(3, 4, highbd_10_variance8x16_sse2,  10),
+                      make_tuple(4, 3, highbd_10_variance16x8_sse2,  10),
                      make_tuple(4, 4, highbd_10_variance16x16_sse2, 10),
                      make_tuple(4, 5, highbd_10_variance16x32_sse2, 10),
                      make_tuple(5, 4, highbd_10_variance32x16_sse2, 10),
@@ -1563,9 +1584,9 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(5, 6, highbd_10_variance32x64_sse2, 10),
                      make_tuple(6, 5, highbd_10_variance64x32_sse2, 10),
                      make_tuple(6, 6, highbd_10_variance64x64_sse2, 10),
-                      make_tuple(3, 3, highbd_12_variance8x8_sse2, 12),
-                      make_tuple(3, 4, highbd_12_variance8x16_sse2, 12),
-                      make_tuple(4, 3, highbd_12_variance16x8_sse2, 12),
+                      make_tuple(3, 3, highbd_12_variance8x8_sse2,   12),
+                      make_tuple(3, 4, highbd_12_variance8x16_sse2,  12),
+                      make_tuple(4, 3, highbd_12_variance16x8_sse2,  12),
                      make_tuple(4, 4, highbd_12_variance16x16_sse2, 12),
                      make_tuple(4, 5, highbd_12_variance16x32_sse2, 12),
                      make_tuple(5, 4, highbd_12_variance32x16_sse2, 12),
@@ -1573,9 +1594,9 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(5, 6, highbd_12_variance32x64_sse2, 12),
                      make_tuple(6, 5, highbd_12_variance64x32_sse2, 12),
                      make_tuple(6, 6, highbd_12_variance64x64_sse2, 12),
-                      make_tuple(3, 3, highbd_variance8x8_sse2, 8),
-                      make_tuple(3, 4, highbd_variance8x16_sse2, 8),
-                      make_tuple(4, 3, highbd_variance16x8_sse2, 8),
+                      make_tuple(3, 3, highbd_variance8x8_sse2,   8),
+                      make_tuple(3, 4, highbd_variance8x16_sse2,  8),
+                      make_tuple(4, 3, highbd_variance16x8_sse2,  8),
                      make_tuple(4, 4, highbd_variance16x16_sse2, 8),
                      make_tuple(4, 5, highbd_variance16x32_sse2, 8),
                      make_tuple(5, 4, highbd_variance32x16_sse2, 8),
@@ -1662,10 +1683,10 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(5, 6, highbd_10_subpel_variance32x64_sse2, 10),
                      make_tuple(6, 5, highbd_10_subpel_variance64x32_sse2, 10),
                      make_tuple(6, 6, highbd_10_subpel_variance64x64_sse2, 10),
-                      make_tuple(3, 2, highbd_12_subpel_variance8x4_sse2, 12),
-                      make_tuple(3, 3, highbd_12_subpel_variance8x8_sse2, 12),
-                      make_tuple(3, 4, highbd_12_subpel_variance8x16_sse2, 12),
-                      make_tuple(4, 3, highbd_12_subpel_variance16x8_sse2, 12),
+                      make_tuple(3, 2, highbd_12_subpel_variance8x4_sse2,   12),
+                      make_tuple(3, 3, highbd_12_subpel_variance8x8_sse2,   12),
+                      make_tuple(3, 4, highbd_12_subpel_variance8x16_sse2,  12),
+                      make_tuple(4, 3, highbd_12_subpel_variance16x8_sse2,  12),
                      make_tuple(4, 4, highbd_12_subpel_variance16x16_sse2, 12),
                      make_tuple(4, 5, highbd_12_subpel_variance16x32_sse2, 12),
                      make_tuple(5, 4, highbd_12_subpel_variance32x16_sse2, 12),
@@ -1753,10 +1774,10 @@ const vp9_subp_avg_variance_fn_t highbd_12_subpel_avg_variance64x64_sse2 =
 INSTANTIATE_TEST_CASE_P(
    SSE2, VP9SubpelAvgVarianceHighTest,
    ::testing::Values(
-                  make_tuple(3, 2, highbd_10_subpel_avg_variance8x4_sse2, 10),
-                  make_tuple(3, 3, highbd_10_subpel_avg_variance8x8_sse2, 10),
-                  make_tuple(3, 4, highbd_10_subpel_avg_variance8x16_sse2, 10),
-                  make_tuple(4, 3, highbd_10_subpel_avg_variance16x8_sse2, 10),
+                  make_tuple(3, 2, highbd_10_subpel_avg_variance8x4_sse2,   10),
+                  make_tuple(3, 3, highbd_10_subpel_avg_variance8x8_sse2,   10),
+                  make_tuple(3, 4, highbd_10_subpel_avg_variance8x16_sse2,  10),
+                  make_tuple(4, 3, highbd_10_subpel_avg_variance16x8_sse2,  10),
                  make_tuple(4, 4, highbd_10_subpel_avg_variance16x16_sse2, 10),
                  make_tuple(4, 5, highbd_10_subpel_avg_variance16x32_sse2, 10),
                  make_tuple(5, 4, highbd_10_subpel_avg_variance32x16_sse2, 10),
@@ -1764,10 +1785,10 @@ INSTANTIATE_TEST_CASE_P(
                  make_tuple(5, 6, highbd_10_subpel_avg_variance32x64_sse2, 10),
                  make_tuple(6, 5, highbd_10_subpel_avg_variance64x32_sse2, 10),
                  make_tuple(6, 6, highbd_10_subpel_avg_variance64x64_sse2, 10),
-                  make_tuple(3, 2, highbd_12_subpel_avg_variance8x4_sse2, 12),
-                  make_tuple(3, 3, highbd_12_subpel_avg_variance8x8_sse2, 12),
-                  make_tuple(3, 4, highbd_12_subpel_avg_variance8x16_sse2, 12),
-                  make_tuple(4, 3, highbd_12_subpel_avg_variance16x8_sse2, 12),
+                  make_tuple(3, 2, highbd_12_subpel_avg_variance8x4_sse2,   12),
+                  make_tuple(3, 3, highbd_12_subpel_avg_variance8x8_sse2,   12),
+                  make_tuple(3, 4, highbd_12_subpel_avg_variance8x16_sse2,  12),
+                  make_tuple(4, 3, highbd_12_subpel_avg_variance16x8_sse2,  12),
                  make_tuple(4, 4, highbd_12_subpel_avg_variance16x16_sse2, 12),
                  make_tuple(4, 5, highbd_12_subpel_avg_variance16x32_sse2, 12),
                  make_tuple(5, 4, highbd_12_subpel_avg_variance32x16_sse2, 12),
@@ -1775,10 +1796,10 @@ INSTANTIATE_TEST_CASE_P(
                  make_tuple(5, 6, highbd_12_subpel_avg_variance32x64_sse2, 12),
                  make_tuple(6, 5, highbd_12_subpel_avg_variance64x32_sse2, 12),
                  make_tuple(6, 6, highbd_12_subpel_avg_variance64x64_sse2, 12),
-                  make_tuple(3, 2, highbd_subpel_avg_variance8x4_sse2, 8),
-                  make_tuple(3, 3, highbd_subpel_avg_variance8x8_sse2, 8),
-                  make_tuple(3, 4, highbd_subpel_avg_variance8x16_sse2, 8),
-                  make_tuple(4, 3, highbd_subpel_avg_variance16x8_sse2, 8),
+                  make_tuple(3, 2, highbd_subpel_avg_variance8x4_sse2,   8),
+                  make_tuple(3, 3, highbd_subpel_avg_variance8x8_sse2,   8),
+                  make_tuple(3, 4, highbd_subpel_avg_variance8x16_sse2,  8),
+                  make_tuple(4, 3, highbd_subpel_avg_variance16x8_sse2,  8),
                  make_tuple(4, 4, highbd_subpel_avg_variance16x16_sse2, 8),
                  make_tuple(4, 5, highbd_subpel_avg_variance16x32_sse2, 8),
                  make_tuple(5, 4, highbd_subpel_avg_variance32x16_sse2, 8),
@@ -1914,17 +1935,11 @@ INSTANTIATE_TEST_CASE_P(
 const vp9_variance_fn_t variance8x8_neon = vp9_variance8x8_neon;
 const vp9_variance_fn_t variance16x16_neon = vp9_variance16x16_neon;
 const vp9_variance_fn_t variance32x32_neon = vp9_variance32x32_neon;
-const vp9_variance_fn_t variance32x64_neon = vp9_variance32x64_neon;
-const vp9_variance_fn_t variance64x32_neon = vp9_variance64x32_neon;
-const vp9_variance_fn_t variance64x64_neon = vp9_variance64x64_neon;
 INSTANTIATE_TEST_CASE_P(
    NEON, VP9VarianceTest,
    ::testing::Values(make_tuple(3, 3, variance8x8_neon, 0),
                      make_tuple(4, 4, variance16x16_neon, 0),
-                      make_tuple(5, 5, variance32x32_neon, 0),
-                      make_tuple(5, 6, variance32x64_neon, 0),
-                      make_tuple(6, 5, variance64x32_neon, 0),
-                      make_tuple(6, 6, variance64x64_neon, 0)));
+                      make_tuple(5, 5, variance32x32_neon, 0)));

 const vp9_subpixvariance_fn_t subpel_variance8x8_neon =
    vp9_sub_pixel_variance8x8_neon;
@@ -1932,14 +1947,11 @@ const vp9_subpixvariance_fn_t subpel_variance16x16_neon =
    vp9_sub_pixel_variance16x16_neon;
 const vp9_subpixvariance_fn_t subpel_variance32x32_neon =
    vp9_sub_pixel_variance32x32_neon;
-const vp9_subpixvariance_fn_t subpel_variance64x64_neon =
-    vp9_sub_pixel_variance64x64_neon;
 INSTANTIATE_TEST_CASE_P(
    NEON, VP9SubpelVarianceTest,
    ::testing::Values(make_tuple(3, 3, subpel_variance8x8_neon, 0),
                      make_tuple(4, 4, subpel_variance16x16_neon, 0),
-                      make_tuple(5, 5, subpel_variance32x32_neon, 0),
-                      make_tuple(6, 6, subpel_variance64x64_neon, 0)));
+                      make_tuple(5, 5, subpel_variance32x32_neon, 0)));
 #endif  // HAVE_NEON
 #endif  // CONFIG_VP9_ENCODER

--- a/test/video_source.h
+++ b/test/video_source.h
@@ -134,13 +134,8 @@ class VideoSource {

 class DummyVideoSource : public VideoSource {
 public:
-  DummyVideoSource()
-      : img_(NULL),
-        limit_(100),
-        width_(80),
-        height_(64),
-        format_(VPX_IMG_FMT_I420) {
-    ReallocImage();
+  DummyVideoSource() : img_(NULL), limit_(100), width_(0), height_(0) {
+    SetSize(80, 64);
  }

  virtual ~DummyVideoSource() { vpx_img_free(img_); }
@@ -179,35 +174,23 @@ class DummyVideoSource : public VideoSource {

  void SetSize(unsigned int width, unsigned int height) {
    if (width != width_ || height != height_) {
+      vpx_img_free(img_);
+      raw_sz_ = ((width + 31)&~31) * height * 3 / 2;
+      img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, width, height, 32);
      width_ = width;
      height_ = height;
-      ReallocImage();
-    }
-  }
-
-  void SetImageFormat(vpx_img_fmt_t format) {
-    if (format_ != format) {
-      format_ = format;
-      ReallocImage();
    }
  }

 protected:
  virtual void FillFrame() { if (img_) memset(img_->img_data, 0, raw_sz_); }

-  void ReallocImage() {
-    vpx_img_free(img_);
-    img_ = vpx_img_alloc(NULL, format_, width_, height_, 32);
-    raw_sz_ = ((img_->w + 31) & ~31) * img_->h * img_->bps / 8;
-  }
-
  vpx_image_t *img_;
  size_t       raw_sz_;
  unsigned int limit_;
  unsigned int frame_;
  unsigned int width_;
  unsigned int height_;
-  vpx_img_fmt_t format_;
 };


--- a/test/vp8_denoiser_sse2_test.cc
+++ b/test/vp8_denoiser_sse2_test.cc
@@ -28,18 +28,19 @@ using libvpx_test::ACMRandom;
 namespace {

 const int kNumPixels = 16 * 16;
-class VP8DenoiserTest : public ::testing::TestWithParam<int> {
+class VP8DenoiserTest
+    : public ::testing::TestWithParam<int> {
 public:
  virtual ~VP8DenoiserTest() {}

  virtual void SetUp() {
-    increase_denoising_ = GetParam();
+    increase_denoising = GetParam();
  }

  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  int increase_denoising_;
+  int increase_denoising;
 };

 TEST_P(VP8DenoiserTest, BitexactCheck) {
@@ -62,8 +63,8 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {

  for (int i = 0; i < count_test_block; ++i) {
    // Generate random motion magnitude, 20% of which exceed the threshold.
-    const int motion_magnitude_ran =
-        rnd.Rand8() % static_cast<int>(MOTION_MAGNITUDE_THRESHOLD * 1.2);
+    uint8_t motion_magnitude_random
+              = rnd.Rand8() % (uint8_t)(MOTION_MAGNITUDE_THRESHOLD * 1.2);

    // Initialize a test block with random number in range [0, 255].
    for (int j = 0; j < kNumPixels; ++j) {
@@ -71,20 +72,20 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
      sig_block_sse2[j] = sig_block_c[j] = rnd.Rand8();
      // The pixels in mc_avg_block are generated by adding a random
      // number in range [-19, 19] to corresponding pixels in sig_block.
-      temp = sig_block_c[j] + (rnd.Rand8() % 2 == 0 ? -1 : 1) *
-             (rnd.Rand8() % 20);
+      temp = sig_block_c[j] + (rnd.Rand8() % 2 == 0? -1 : 1) *
+             (rnd.Rand8()%20);
      // Clip.
-      mc_avg_block[j] = (temp < 0) ? 0 : ((temp > 255) ? 255 : temp);
+      mc_avg_block[j] = (temp < 0? 0 : (temp > 255? 255 : temp));
    }

    // Test denosiser on Y component.
-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_c(
-        mc_avg_block, stride, avg_block_c, stride, sig_block_c, stride,
-        motion_magnitude_ran, increase_denoising_));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_c(mc_avg_block, stride,
+                               avg_block_c, stride, sig_block_c, stride,
+                               motion_magnitude_random, increase_denoising));

-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_sse2(
-        mc_avg_block, stride, avg_block_sse2, stride, sig_block_sse2, stride,
-        motion_magnitude_ran, increase_denoising_));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_sse2(mc_avg_block, stride,
+                               avg_block_sse2, stride, sig_block_sse2, stride,
+                               motion_magnitude_random, increase_denoising));

    // Check bitexactness.
    for (int h = 0; h < 16; ++h) {
@@ -93,14 +94,14 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
      }
    }

-    // Test denoiser on UV component.
-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_c(
-        mc_avg_block, stride, avg_block_c, stride, sig_block_c, stride,
-        motion_magnitude_ran, increase_denoising_));
+    // Test denosiser on UV component.
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_c(mc_avg_block, stride,
+                               avg_block_c, stride, sig_block_c, stride,
+                               motion_magnitude_random, increase_denoising));

-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_sse2(
-        mc_avg_block, stride, avg_block_sse2, stride, sig_block_sse2, stride,
-        motion_magnitude_ran, increase_denoising_));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_sse2(mc_avg_block, stride,
+                               avg_block_sse2, stride, sig_block_sse2, stride,
+                               motion_magnitude_random, increase_denoising));

    // Check bitexactness.
    for (int h = 0; h < 16; ++h) {
@@ -112,5 +113,7 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
 }

 // Test for all block size.
-INSTANTIATE_TEST_CASE_P(SSE2, VP8DenoiserTest, ::testing::Values(0, 1));
+INSTANTIATE_TEST_CASE_P(
+    SSE2, VP8DenoiserTest,
+    ::testing::Values(0, 1));
 }  // namespace
--- a/test/vp8_fragments_test.cc
+++ b/test/vp8_fragments_test.cc
@@ -1,37 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/video_source.h"
-
-namespace {
-
-class VP8FramgmentsTest
-    : public ::libvpx_test::EncoderTest,
-      public ::testing::Test {
- protected:
-  VP8FramgmentsTest() : EncoderTest(&::libvpx_test::kVP8) {}
-  virtual ~VP8FramgmentsTest() {}
-
-  virtual void SetUp() {
-    const unsigned long init_flags =  // NOLINT(runtime/int)
-        VPX_CODEC_USE_OUTPUT_PARTITION;
-    InitializeConfig();
-    SetMode(::libvpx_test::kRealTime);
-    set_init_flags(init_flags);
-  }
-};
-
-TEST_F(VP8FramgmentsTest, TestFragmentsEncodeDecode) {
-  ::libvpx_test::RandomVideoSource video;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-}  // namespace
--- a/test/vp9_avg_test.cc
+++ b/test/vp9_avg_test.cc
@@ -57,7 +57,7 @@ class AverageTestBase : public ::testing::Test {
  }

  // Sum Pixels
-  unsigned int ReferenceAverage8x8(const uint8_t* source, int pitch ) {
+  unsigned int ReferenceAverage(const uint8_t* source, int pitch ) {
    unsigned int average = 0;
    for (int h = 0; h < 8; ++h)
      for (int w = 0; w < 8; ++w)
@@ -65,14 +65,6 @@ class AverageTestBase : public ::testing::Test {
    return ((average + 32) >> 6);
  }

-  unsigned int ReferenceAverage4x4(const uint8_t* source, int pitch ) {
-    unsigned int average = 0;
-    for (int h = 0; h < 4; ++h)
-      for (int w = 0; w < 4; ++w)
-        average += source[h * source_stride_ + w];
-    return ((average + 8) >> 4);
-  }
-
  void FillConstant(uint8_t fill_constant) {
    for (int i = 0; i < width_ * height_; ++i) {
        source_data_[i] = fill_constant;
@@ -93,7 +85,7 @@ class AverageTestBase : public ::testing::Test {
 };
 typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch);

-typedef std::tr1::tuple<int, int, int, int, AverageFunction> AvgFunc;
+typedef std::tr1::tuple<int, int, int, AverageFunction> AvgFunc;

 class AverageTest
    : public AverageTestBase,
@@ -103,18 +95,12 @@ class AverageTest

 protected:
  void CheckAverages() {
-    unsigned int expected = 0;
-    if (GET_PARAM(3) == 8) {
-      expected = ReferenceAverage8x8(source_data_+ GET_PARAM(2),
-                                     source_stride_);
-    } else  if (GET_PARAM(3) == 4) {
-      expected = ReferenceAverage4x4(source_data_+ GET_PARAM(2),
-                                     source_stride_);
-    }
+    unsigned int expected = ReferenceAverage(source_data_+ GET_PARAM(2),
+                                             source_stride_);

-    ASM_REGISTER_STATE_CHECK(GET_PARAM(4)(source_data_+ GET_PARAM(2),
+    ASM_REGISTER_STATE_CHECK(GET_PARAM(3)(source_data_+ GET_PARAM(2),
                                          source_stride_));
-    unsigned int actual = GET_PARAM(4)(source_data_+ GET_PARAM(2),
+    unsigned int actual = GET_PARAM(3)(source_data_+ GET_PARAM(2),
                                       source_stride_);

    EXPECT_EQ(expected, actual);
@@ -148,30 +134,16 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, AverageTest,
    ::testing::Values(
-        make_tuple(16, 16, 1, 8, &vp9_avg_8x8_c),
-        make_tuple(16, 16, 1, 4, &vp9_avg_4x4_c)));
+        make_tuple(16, 16, 1, &vp9_avg_8x8_c)));


 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
    SSE2, AverageTest,
    ::testing::Values(
-        make_tuple(16, 16, 0, 8, &vp9_avg_8x8_sse2),
-        make_tuple(16, 16, 5, 8, &vp9_avg_8x8_sse2),
-        make_tuple(32, 32, 15, 8, &vp9_avg_8x8_sse2),
-        make_tuple(16, 16, 0, 4, &vp9_avg_4x4_sse2),
-        make_tuple(16, 16, 5, 4, &vp9_avg_4x4_sse2),
-        make_tuple(32, 32, 15, 4, &vp9_avg_4x4_sse2)));
-
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, AverageTest,
-    ::testing::Values(
-        make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon),
-        make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon),
-        make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon)));
+        make_tuple(16, 16, 0, &vp9_avg_8x8_sse2),
+        make_tuple(16, 16, 5, &vp9_avg_8x8_sse2),
+        make_tuple(32, 32, 15, &vp9_avg_8x8_sse2)));

 #endif

--- a/test/vp9_decrypt_test.cc
+++ b/test/vp9_decrypt_test.cc
@@ -43,29 +43,29 @@ void test_decrypt_cb(void *decrypt_state, const uint8_t *input,

 namespace libvpx_test {

-//TEST(TestDecrypt, DecryptWorksVp9) {
-//  libvpx_test::IVFVideoSource video("vp90-2-05-resize.ivf");
-//  video.Init();
-//
-//  vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
-//  VP9Decoder decoder(dec_cfg, 0);
-//
-//  video.Begin();
-//
-//  // no decryption
-//  vpx_codec_err_t res = decoder.DecodeFrame(video.cxdata(), video.frame_size());
-//  ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
-//
-//  // decrypt frame
-//  video.Next();
-//
-//  std::vector<uint8_t> encrypted(video.frame_size());
-//  encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size(), 0);
-//  vpx_decrypt_init di = { test_decrypt_cb, &encrypted[0] };
-//  decoder.Control(VPXD_SET_DECRYPTOR, &di);
-//
-//  res = decoder.DecodeFrame(&encrypted[0], encrypted.size());
-//  ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
-//}
+TEST(TestDecrypt, DecryptWorksVp9) {
+  libvpx_test::IVFVideoSource video("vp90-2-05-resize.ivf");
+  video.Init();
+
+  vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
+  VP9Decoder decoder(dec_cfg, 0);
+
+  video.Begin();
+
+  // no decryption
+  vpx_codec_err_t res = decoder.DecodeFrame(video.cxdata(), video.frame_size());
+  ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
+
+  // decrypt frame
+  video.Next();
+
+  std::vector<uint8_t> encrypted(video.frame_size());
+  encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size(), 0);
+  vpx_decrypt_init di = { test_decrypt_cb, &encrypted[0] };
+  decoder.Control(VPXD_SET_DECRYPTOR, &di);
+
+  res = decoder.DecodeFrame(&encrypted[0], encrypted.size());
+  ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
+}

 }  // namespace libvpx_test
--- a/test/vp9_denoiser_sse2_test.cc
+++ b/test/vp9_denoiser_sse2_test.cc
@@ -29,18 +29,19 @@ using libvpx_test::ACMRandom;
 namespace {

 const int kNumPixels = 64 * 64;
-class VP9DenoiserTest : public ::testing::TestWithParam<BLOCK_SIZE> {
+class VP9DenoiserTest
+    : public ::testing::TestWithParam<int> {
 public:
  virtual ~VP9DenoiserTest() {}

  virtual void SetUp() {
-    bs_ = GetParam();
+    bs = (BLOCK_SIZE)GetParam();
  }

  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  BLOCK_SIZE bs_;
+  BLOCK_SIZE bs;
 };

 TEST_P(VP9DenoiserTest, BitexactCheck) {
@@ -59,8 +60,8 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {

  for (int i = 0; i < count_test_block; ++i) {
    // Generate random motion magnitude, 20% of which exceed the threshold.
-    const int motion_magnitude_random =
-        rnd.Rand8() % static_cast<int>(MOTION_MAGNITUDE_THRESHOLD * 1.2);
+    uint8_t motion_magnitude_random
+              = rnd.Rand8() % (uint8_t)(MOTION_MAGNITUDE_THRESHOLD * 1.2);

    // Initialize a test block with random number in range [0, 255].
    for (int j = 0; j < kNumPixels; ++j) {
@@ -68,23 +69,23 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
      sig_block[j] = rnd.Rand8();
      // The pixels in mc_avg_block are generated by adding a random
      // number in range [-19, 19] to corresponding pixels in sig_block.
-      temp = sig_block[j] + ((rnd.Rand8() % 2 == 0) ? -1 : 1) *
-             (rnd.Rand8() % 20);
+      temp = sig_block[j] + (rnd.Rand8() % 2 == 0? -1 : 1) *
+             (rnd.Rand8()%20);
      // Clip.
-      mc_avg_block[j] = (temp < 0) ? 0 : ((temp > 255) ? 255 : temp);
+      mc_avg_block[j] = (temp < 0? 0 : (temp > 255? 255 : temp));
    }

-    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_c(
-        sig_block, 64, mc_avg_block, 64, avg_block_c,
-        64, 0, bs_, motion_magnitude_random));
+    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_c(sig_block, 64,
+                             mc_avg_block, 64, avg_block_c, 64,
+                             0, bs, motion_magnitude_random));

-    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_sse2(
-        sig_block, 64, mc_avg_block, 64, avg_block_sse2,
-        64, 0, bs_, motion_magnitude_random));
+    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_sse2(sig_block, 64,
+                             mc_avg_block, 64, avg_block_sse2, 64,
+                             0, bs, motion_magnitude_random));

    // Test bitexactness.
-    for (int h = 0; h < (4 << b_height_log2_lookup[bs_]); ++h) {
-      for (int w = 0; w < (4 << b_width_log2_lookup[bs_]); ++w) {
+    for (int h = 0; h < (4 << b_height_log2_lookup[bs]); ++h) {
+      for (int w = 0; w < (4 << b_width_log2_lookup[bs]); ++w) {
        EXPECT_EQ(avg_block_c[h * 64 + w], avg_block_sse2[h * 64 + w]);
      }
    }
--- a/test/vp9_encoder_parms_get_to_decoder.cc
+++ b/test/vp9_encoder_parms_get_to_decoder.cc
@@ -1,193 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/y4m_video_source.h"
-#include "test/yuv_video_source.h"
-#include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "vp9/decoder/vp9_decoder.h"
-
-typedef vpx_codec_stream_info_t vp9_stream_info_t;
-struct vpx_codec_alg_priv {
-  vpx_codec_priv_t        base;
-  vpx_codec_dec_cfg_t     cfg;
-  vp9_stream_info_t       si;
-  struct VP9Decoder      *pbi;
-  int                     postproc_cfg_set;
-  vp8_postproc_cfg_t      postproc_cfg;
-  vpx_decrypt_cb          decrypt_cb;
-  void                   *decrypt_state;
-  vpx_image_t             img;
-  int                     img_avail;
-  int                     flushed;
-  int                     invert_tile_order;
-  int                     frame_parallel_decode;
-
-  // External frame buffer info to save for VP9 common.
-  void *ext_priv;  // Private data associated with the external frame buffers.
-  vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
-  vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
-};
-
-static vpx_codec_alg_priv_t *get_alg_priv(vpx_codec_ctx_t *ctx) {
-  return (vpx_codec_alg_priv_t *)ctx->priv;
-}
-
-namespace {
-
-const unsigned int kFramerate = 50;
-const int kCpuUsed = 2;
-
-struct EncodePerfTestVideo {
-  const char *name;
-  uint32_t width;
-  uint32_t height;
-  uint32_t bitrate;
-  int frames;
-};
-
-const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
-  {"niklas_1280_720_30.y4m", 1280, 720, 600, 10},
-};
-
-struct EncodeParameters {
-  int32_t tile_rows;
-  int32_t tile_cols;
-  int32_t lossless;
-  int32_t error_resilient;
-  int32_t frame_parallel;
-  vpx_color_space_t cs;
-  // TODO(JBB): quantizers / bitrate
-};
-
-const EncodeParameters kVP9EncodeParameterSet[] = {
-    {0, 0, 0, 1, 0, VPX_CS_BT_601},
-    {0, 0, 0, 0, 0, VPX_CS_BT_709},
-    {0, 0, 1, 0, 0, VPX_CS_BT_2020},
-    {0, 2, 0, 0, 1, VPX_CS_UNKNOWN},
-    // TODO(JBB): Test profiles (requires more work).
-};
-
-int is_extension_y4m(const char *filename) {
-  const char *dot = strrchr(filename, '.');
-  if (!dot || dot == filename)
-    return 0;
-  else
-    return !strcmp(dot, ".y4m");
-}
-
-class Vp9EncoderParmsGetToDecoder
-    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWith2Params<EncodeParameters, \
-                                                 EncodePerfTestVideo> {
- protected:
-  Vp9EncoderParmsGetToDecoder()
-      : EncoderTest(GET_PARAM(0)),
-        encode_parms(GET_PARAM(1)) {
-  }
-
-  virtual ~Vp9EncoderParmsGetToDecoder() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(::libvpx_test::kTwoPassGood);
-    cfg_.g_lag_in_frames = 25;
-    cfg_.g_error_resilient = encode_parms.error_resilient;
-    dec_cfg_.threads = 4;
-    test_video_ = GET_PARAM(2);
-    cfg_.rc_target_bitrate = test_video_.bitrate;
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
-      encoder->Control(VP9E_SET_COLOR_SPACE, encode_parms.cs);
-      encoder->Control(VP9E_SET_LOSSLESS, encode_parms.lossless);
-      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING,
-                       encode_parms.frame_parallel);
-      encoder->Control(VP9E_SET_TILE_ROWS, encode_parms.tile_rows);
-      encoder->Control(VP9E_SET_TILE_COLUMNS, encode_parms.tile_cols);
-      encoder->Control(VP8E_SET_CPUUSED, kCpuUsed);
-      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
-      encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
-      encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
-      encoder->Control(VP8E_SET_ARNR_TYPE, 3);
-    }
-  }
-
-  virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
-                                  const libvpx_test::VideoSource& video,
-                                  libvpx_test::Decoder *decoder) {
-    vpx_codec_ctx_t* vp9_decoder = decoder->GetDecoder();
-    vpx_codec_alg_priv_t* priv =
-        (vpx_codec_alg_priv_t*) get_alg_priv(vp9_decoder);
-
-    VP9Decoder* pbi = priv->pbi;
-    VP9_COMMON* common = &pbi->common;
-
-    if (encode_parms.lossless) {
-      EXPECT_EQ(common->base_qindex, 0);
-      EXPECT_EQ(common->y_dc_delta_q, 0);
-      EXPECT_EQ(common->uv_dc_delta_q, 0);
-      EXPECT_EQ(common->uv_ac_delta_q, 0);
-      EXPECT_EQ(common->tx_mode, ONLY_4X4);
-    }
-    EXPECT_EQ(common->error_resilient_mode, encode_parms.error_resilient);
-    if (encode_parms.error_resilient) {
-      EXPECT_EQ(common->frame_parallel_decoding_mode, 1);
-      EXPECT_EQ(common->use_prev_frame_mvs, 0);
-    } else {
-      EXPECT_EQ(common->frame_parallel_decoding_mode,
-                encode_parms.frame_parallel);
-    }
-    EXPECT_EQ(common->color_space, encode_parms.cs);
-    EXPECT_EQ(common->log2_tile_cols, encode_parms.tile_cols);
-    EXPECT_EQ(common->log2_tile_rows, encode_parms.tile_rows);
-
-    EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
-    return VPX_CODEC_OK == res_dec;
-  }
-
-  EncodePerfTestVideo test_video_;
-
- private:
-  EncodeParameters encode_parms;
-};
-
-// TODO(hkuang): This test conflicts with frame parallel decode. So disable it
-// for now until fix.
-TEST_P(Vp9EncoderParmsGetToDecoder, DISABLED_BitstreamParms) {
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  libvpx_test::VideoSource *video;
-  if (is_extension_y4m(test_video_.name)) {
-    video = new libvpx_test::Y4mVideoSource(test_video_.name,
-                                            0, test_video_.frames);
-  } else {
-    video = new libvpx_test::YUVVideoSource(test_video_.name,
-                                            VPX_IMG_FMT_I420,
-                                            test_video_.width,
-                                            test_video_.height,
-                                            kFramerate, 1, 0,
-                                            test_video_.frames);
-  }
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
-  delete(video);
-}
-
-VP9_INSTANTIATE_TEST_CASE(
-    Vp9EncoderParmsGetToDecoder,
-    ::testing::ValuesIn(kVP9EncodeParameterSet),
-    ::testing::ValuesIn(kVP9EncodePerfTestVectors));
-
-}  // namespace
--- a/test/vp9_end_to_end_test.cc
+++ b/test/vp9_end_to_end_test.cc
@@ -1,189 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/y4m_video_source.h"
-#include "test/yuv_video_source.h"
-#include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-namespace {
-
-const unsigned int kWidth  = 160;
-const unsigned int kHeight = 90;
-const unsigned int kFramerate = 50;
-const unsigned int kFrames = 10;
-const int kBitrate = 500;
-// List of psnr thresholds for speed settings 0-7 and 5 encoding modes
-const double kPsnrThreshold[][5] = {
-  { 36.0, 37.0, 37.0, 37.0, 37.0 },
-  { 35.0, 36.0, 36.0, 36.0, 36.0 },
-  { 34.0, 35.0, 35.0, 35.0, 35.0 },
-  { 33.0, 34.0, 34.0, 34.0, 34.0 },
-  { 32.0, 33.0, 33.0, 33.0, 33.0 },
-  { 31.0, 32.0, 32.0, 32.0, 32.0 },
-  { 30.0, 31.0, 31.0, 31.0, 31.0 },
-  { 29.0, 30.0, 30.0, 30.0, 30.0 },
-};
-
-typedef struct {
-  const char *filename;
-  unsigned int input_bit_depth;
-  vpx_img_fmt fmt;
-  vpx_bit_depth_t bit_depth;
-  unsigned int profile;
-} TestVideoParam;
-
-const TestVideoParam kTestVectors[] = {
-  {"park_joy_90p_8_420.y4m", 8, VPX_IMG_FMT_I420, VPX_BITS_8, 0},
-  {"park_joy_90p_8_422.y4m", 8, VPX_IMG_FMT_I422, VPX_BITS_8, 1},
-  {"park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444, VPX_BITS_8, 1},
-  {"park_joy_90p_8_440.yuv", 8, VPX_IMG_FMT_I440, VPX_BITS_8, 1},
-#if CONFIG_VP9_HIGHBITDEPTH
-  {"park_joy_90p_10_420.y4m", 10, VPX_IMG_FMT_I42016, VPX_BITS_10, 2},
-  {"park_joy_90p_10_422.y4m", 10, VPX_IMG_FMT_I42216, VPX_BITS_10, 3},
-  {"park_joy_90p_10_444.y4m", 10, VPX_IMG_FMT_I44416, VPX_BITS_10, 3},
-  {"park_joy_90p_10_440.yuv", 10, VPX_IMG_FMT_I44016, VPX_BITS_10, 3},
-  {"park_joy_90p_12_420.y4m", 12, VPX_IMG_FMT_I42016, VPX_BITS_12, 2},
-  {"park_joy_90p_12_422.y4m", 12, VPX_IMG_FMT_I42216, VPX_BITS_12, 3},
-  {"park_joy_90p_12_444.y4m", 12, VPX_IMG_FMT_I44416, VPX_BITS_12, 3},
-  {"park_joy_90p_12_440.yuv", 12, VPX_IMG_FMT_I44016, VPX_BITS_12, 3},
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-};
-
-// Encoding modes tested
-const libvpx_test::TestMode kEncodingModeVectors[] = {
-  ::libvpx_test::kTwoPassGood,
-  ::libvpx_test::kOnePassGood,
-  ::libvpx_test::kRealTime,
-};
-
-// Speed settings tested
-const int kCpuUsedVectors[] = {1, 2, 3, 5, 6};
-
-int is_extension_y4m(const char *filename) {
-  const char *dot = strrchr(filename, '.');
-  if (!dot || dot == filename)
-    return 0;
-  else
-    return !strcmp(dot, ".y4m");
-}
-
-class EndToEndTestLarge
-    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWith3Params<libvpx_test::TestMode, \
-                                                 TestVideoParam, int> {
- protected:
-  EndToEndTestLarge()
-      : EncoderTest(GET_PARAM(0)),
-        test_video_param_(GET_PARAM(2)),
-        cpu_used_(GET_PARAM(3)),
-        psnr_(0.0),
-        nframes_(0),
-        encoding_mode_(GET_PARAM(1)) {
-  }
-
-  virtual ~EndToEndTestLarge() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-    if (encoding_mode_ != ::libvpx_test::kRealTime) {
-      cfg_.g_lag_in_frames = 5;
-      cfg_.rc_end_usage = VPX_VBR;
-    } else {
-      cfg_.g_lag_in_frames = 0;
-      cfg_.rc_end_usage = VPX_CBR;
-      cfg_.rc_buf_sz = 1000;
-      cfg_.rc_buf_initial_sz = 500;
-      cfg_.rc_buf_optimal_sz = 600;
-    }
-    dec_cfg_.threads = 4;
-  }
-
-  virtual void BeginPassHook(unsigned int) {
-    psnr_ = 0.0;
-    nframes_ = 0;
-  }
-
-  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
-    psnr_ += pkt->data.psnr.psnr[0];
-    nframes_++;
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
-      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
-      encoder->Control(VP9E_SET_TILE_COLUMNS, 4);
-      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
-      if (encoding_mode_ != ::libvpx_test::kRealTime) {
-        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
-        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
-        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
-        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
-      }
-    }
-  }
-
-  double GetAveragePsnr() const {
-    if (nframes_)
-      return psnr_ / nframes_;
-    return 0.0;
-  }
-
-  double GetPsnrThreshold() {
-    return kPsnrThreshold[cpu_used_][encoding_mode_];
-  }
-
-  TestVideoParam test_video_param_;
-  int cpu_used_;
-
- private:
-  double psnr_;
-  unsigned int nframes_;
-  libvpx_test::TestMode encoding_mode_;
-};
-
-TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) {
-  cfg_.rc_target_bitrate = kBitrate;
-  cfg_.g_error_resilient = 0;
-  cfg_.g_profile = test_video_param_.profile;
-  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
-  cfg_.g_bit_depth = test_video_param_.bit_depth;
-  init_flags_ = VPX_CODEC_USE_PSNR;
-  if (cfg_.g_bit_depth > 8)
-    init_flags_ |= VPX_CODEC_USE_HIGHBITDEPTH;
-
-  libvpx_test::VideoSource *video;
-  if (is_extension_y4m(test_video_param_.filename)) {
-    video = new libvpx_test::Y4mVideoSource(test_video_param_.filename,
-                                            0, kFrames);
-  } else {
-    video = new libvpx_test::YUVVideoSource(test_video_param_.filename,
-                                            test_video_param_.fmt,
-                                            kWidth, kHeight,
-                                            kFramerate, 1, 0, kFrames);
-  }
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
-  const double psnr = GetAveragePsnr();
-  EXPECT_GT(psnr, GetPsnrThreshold());
-  delete(video);
-}
-
-VP9_INSTANTIATE_TEST_CASE(
-    EndToEndTestLarge,
-    ::testing::ValuesIn(kEncodingModeVectors),
-    ::testing::ValuesIn(kTestVectors),
-    ::testing::ValuesIn(kCpuUsedVectors));
-
-}  // namespace
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -1,137 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <string>
-#include <vector>
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-
-namespace {
-class VP9EncoderThreadTest
-    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
- protected:
-  VP9EncoderThreadTest()
-      : EncoderTest(GET_PARAM(0)),
-        encoder_initialized_(false),
-        tiles_(2),
-        encoding_mode_(GET_PARAM(1)),
-        set_cpu_used_(GET_PARAM(2)) {
-    init_flags_ = VPX_CODEC_USE_PSNR;
-    vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
-    cfg.w = 1280;
-    cfg.h = 720;
-    decoder_ = codec_->CreateDecoder(cfg, 0);
-
-    md5_.clear();
-  }
-  virtual ~VP9EncoderThreadTest() {
-    delete decoder_;
-  }
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-
-    if (encoding_mode_ != ::libvpx_test::kRealTime) {
-      cfg_.g_lag_in_frames = 3;
-      cfg_.rc_end_usage = VPX_VBR;
-      cfg_.rc_2pass_vbr_minsection_pct = 5;
-      cfg_.rc_2pass_vbr_minsection_pct = 2000;
-    } else {
-      cfg_.g_lag_in_frames = 0;
-      cfg_.rc_end_usage = VPX_CBR;
-      cfg_.g_error_resilient = 1;
-    }
-    cfg_.rc_max_quantizer = 56;
-    cfg_.rc_min_quantizer = 0;
-  }
-
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-    encoder_initialized_ = false;
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (!encoder_initialized_) {
-      // Encode 4 column tiles.
-      encoder->Control(VP9E_SET_TILE_COLUMNS, tiles_);
-      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-      if (encoding_mode_ != ::libvpx_test::kRealTime) {
-        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
-        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
-        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
-        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
-      } else {
-        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
-        encoder->Control(VP9E_SET_AQ_MODE, 3);
-      }
-      encoder_initialized_ = true;
-    }
-  }
-
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    const vpx_codec_err_t res = decoder_->DecodeFrame(
-        reinterpret_cast<uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz);
-    if (res != VPX_CODEC_OK) {
-      abort_ = true;
-      ASSERT_EQ(VPX_CODEC_OK, res);
-    }
-    const vpx_image_t *img = decoder_->GetDxData().Next();
-
-    if (img) {
-      ::libvpx_test::MD5 md5_res;
-      md5_res.Add(img);
-      md5_.push_back(md5_res.Get());
-    }
-  }
-
-  bool encoder_initialized_;
-  int tiles_;
-  ::libvpx_test::TestMode encoding_mode_;
-  int set_cpu_used_;
-  ::libvpx_test::Decoder *decoder_;
-  std::vector<std::string> md5_;
-};
-
-TEST_P(VP9EncoderThreadTest, EncoderResultTest) {
-  std::vector<std::string> single_thr_md5, multi_thr_md5;
-
-  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20);
-
-  cfg_.rc_target_bitrate = 1000;
-
-  // Encode using single thread.
-  cfg_.g_threads = 1;
-  init_flags_ = VPX_CODEC_USE_PSNR;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  single_thr_md5 = md5_;
-  md5_.clear();
-
-  // Encode using multiple threads.
-  cfg_.g_threads = 4;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  multi_thr_md5 = md5_;
-  md5_.clear();
-
-  // Compare to check if two vectors are equal.
-  ASSERT_EQ(single_thr_md5, multi_thr_md5);
-}
-
-VP9_INSTANTIATE_TEST_CASE(
-    VP9EncoderThreadTest,
-    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
-                      ::libvpx_test::kRealTime),
-    ::testing::Range(1, 9));
-}  // namespace
--- a/test/vp9_frame_parallel_test.cc
+++ b/test/vp9_frame_parallel_test.cc
@@ -1,209 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#if CONFIG_WEBM_IO
-#include "test/webm_video_source.h"
-#endif
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-using std::string;
-
-#if CONFIG_WEBM_IO && 0
-
-struct FileList {
-  const char *name;
-  // md5 sum for decoded frames which does not include skipped frames.
-  const char *expected_md5;
-  const int pause_frame_num;
-};
-
-// Decodes |filename| with |num_threads|. Pause at the specified frame_num,
-// seek to next key frame and then continue decoding until the end. Return
-// the md5 of the decoded frames which does not include skipped frames.
-string DecodeFile(const string &filename, int num_threads, int pause_num) {
-  libvpx_test::WebMVideoSource video(filename);
-  video.Init();
-  int in_frames = 0;
-  int out_frames = 0;
-
-  vpx_codec_dec_cfg_t cfg = {0};
-  cfg.threads = num_threads;
-  vpx_codec_flags_t flags = 0;
-  flags |= VPX_CODEC_USE_FRAME_THREADING;
-  libvpx_test::VP9Decoder decoder(cfg, flags, 0);
-
-  libvpx_test::MD5 md5;
-  video.Begin();
-
-  do {
-    ++in_frames;
-    const vpx_codec_err_t res =
-        decoder.DecodeFrame(video.cxdata(), video.frame_size());
-    if (res != VPX_CODEC_OK) {
-      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
-      break;
-    }
-
-    // Pause at specified frame number.
-    if (in_frames == pause_num) {
-      // Flush the decoder and then seek to next key frame.
-      decoder.DecodeFrame(NULL, 0);
-      video.SeekToNextKeyFrame();
-    } else {
-      video.Next();
-    }
-
-    // Flush the decoder at the end of the video.
-    if (!video.cxdata())
-      decoder.DecodeFrame(NULL, 0);
-
-    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
-    const vpx_image_t *img;
-
-    // Get decompressed data
-    while ((img = dec_iter.Next())) {
-      ++out_frames;
-      md5.Add(img);
-    }
-  } while (video.cxdata() != NULL);
-
-  EXPECT_EQ(in_frames, out_frames) <<
-      "Input frame count does not match output frame count";
-
-  return string(md5.Get());
-}
-
-void DecodeFiles(const FileList files[]) {
-  for (const FileList *iter = files; iter->name != NULL; ++iter) {
-    SCOPED_TRACE(iter->name);
-    for (int t = 2; t <= 8; ++t) {
-      EXPECT_EQ(iter->expected_md5,
-                DecodeFile(iter->name, t, iter->pause_frame_num))
-          << "threads = " << t;
-    }
-  }
-}
-
-TEST(VP9MultiThreadedFrameParallel, PauseSeekResume) {
-  // vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
-  // one key frame for every ten frames.
-  static const FileList files[] = {
-    { "vp90-2-07-frame_parallel-1.webm",
-      "6ea7c3875d67252e7caf2bc6e75b36b1", 6},
-    { "vp90-2-07-frame_parallel-1.webm",
-      "4bb634160c7356a8d7d4299b6dc83a45", 12},
-    { "vp90-2-07-frame_parallel-1.webm",
-      "89772591e6ef461f9fa754f916c78ed8", 26},
-    { NULL, NULL, 0},
-  };
-  DecodeFiles(files);
-}
-
-struct InvalidFileList {
-  const char *name;
-  // md5 sum for decoded frames which does not include corrupted frames.
-  const char *expected_md5;
-  // Expected number of decoded frames which does not include corrupted frames.
-  const int expected_frame_count;
-};
-
-// Decodes |filename| with |num_threads|. Return the md5 of the decoded
-// frames which does not include corrupted frames.
-string DecodeInvalidFile(const string &filename, int num_threads,
-                         int expected_frame_count) {
-  libvpx_test::WebMVideoSource video(filename);
-  video.Init();
-
-  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
-  cfg.threads = num_threads;
-  const vpx_codec_flags_t flags = VPX_CODEC_USE_FRAME_THREADING;
-  libvpx_test::VP9Decoder decoder(cfg, flags, 0);
-
-  libvpx_test::MD5 md5;
-  video.Begin();
-
-  int out_frames = 0;
-  do {
-    const vpx_codec_err_t res =
-        decoder.DecodeFrame(video.cxdata(), video.frame_size());
-    // TODO(hkuang): frame parallel mode should return an error on corruption.
-    if (res != VPX_CODEC_OK) {
-      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
-      break;
-    }
-
-    video.Next();
-
-    // Flush the decoder at the end of the video.
-    if (!video.cxdata())
-      decoder.DecodeFrame(NULL, 0);
-
-    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
-    const vpx_image_t *img;
-
-    // Get decompressed data
-    while ((img = dec_iter.Next())) {
-      ++out_frames;
-      md5.Add(img);
-    }
-  } while (video.cxdata() != NULL);
-
-  EXPECT_EQ(expected_frame_count, out_frames) <<
-      "Input frame count does not match expected output frame count";
-
-  return string(md5.Get());
-}
-
-void DecodeInvalidFiles(const InvalidFileList files[]) {
-  for (const InvalidFileList *iter = files; iter->name != NULL; ++iter) {
-    SCOPED_TRACE(iter->name);
-    for (int t = 2; t <= 8; ++t) {
-      EXPECT_EQ(iter->expected_md5,
-                DecodeInvalidFile(iter->name, t, iter->expected_frame_count))
-          << "threads = " << t;
-    }
-  }
-}
-
-TEST(VP9MultiThreadedFrameParallel, InvalidFileTest) {
-  static const InvalidFileList files[] = {
-    // invalid-vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
-    // one key frame for every ten frames. The 11th frame has corrupted data.
-    { "invalid-vp90-2-07-frame_parallel-1.webm",
-      "0549d0f45f60deaef8eb708e6c0eb6cb", 30},
-    // invalid-vp90-2-07-frame_parallel-2.webm is a 40 frame video file with
-    // one key frame for every ten frames. The 1st and 31st frames have
-    // corrupted data.
-    { "invalid-vp90-2-07-frame_parallel-2.webm",
-      "6a1f3cf6f9e7a364212fadb9580d525e", 20},
-    // invalid-vp90-2-07-frame_parallel-3.webm is a 40 frame video file with
-    // one key frame for every ten frames. The 5th and 13th frames have
-    // corrupted data.
-    { "invalid-vp90-2-07-frame_parallel-3.webm",
-      "8256544308de926b0681e04685b98677", 27},
-    { NULL, NULL, 0},
-  };
-  DecodeInvalidFiles(files);
-}
-
-#endif  // CONFIG_WEBM_IO
-}  // namespace
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -1,349 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_entropy.h"
-#include "vpx/vpx_integer.h"
-
-using libvpx_test::ACMRandom;
-
-namespace {
-#if CONFIG_VP9_HIGHBITDEPTH
-const int number_of_iterations = 100;
-
-typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
-                             int skip_block, const int16_t *zbin,
-                             const int16_t *round, const int16_t *quant,
-                             const int16_t *quant_shift,
-                             tran_low_t *qcoeff, tran_low_t *dqcoeff,
-                             const int16_t *dequant,
-                             uint16_t *eob, const int16_t *scan,
-                             const int16_t *iscan);
-typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t>
-    QuantizeParam;
-
-class VP9QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
- public:
-  virtual ~VP9QuantizeTest() {}
-  virtual void SetUp() {
-    quantize_op_   = GET_PARAM(0);
-    ref_quantize_op_ = GET_PARAM(1);
-    bit_depth_  = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  vpx_bit_depth_t bit_depth_;
-  int mask_;
-  QuantizeFunc quantize_op_;
-  QuantizeFunc ref_quantize_op_;
-};
-
-class VP9Quantize32Test : public ::testing::TestWithParam<QuantizeParam> {
- public:
-  virtual ~VP9Quantize32Test() {}
-  virtual void SetUp() {
-    quantize_op_   = GET_PARAM(0);
-    ref_quantize_op_ = GET_PARAM(1);
-    bit_depth_  = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  vpx_bit_depth_t bit_depth_;
-  int mask_;
-  QuantizeFunc quantize_op_;
-  QuantizeFunc ref_quantize_op_;
-};
-
-TEST_P(VP9QuantizeTest, OperationCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < number_of_iterations; ++i) {
-    const int skip_block = i == 0;
-    const TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
-    const TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    const int count = (4 << sz) * (4 << sz);  // 16, 64, 256
-    int err_count = 0;
-    *eob_ptr = rnd.Rand16();
-    *ref_eob_ptr = *eob_ptr;
-    for (int j = 0; j < count; j++) {
-      coeff_ptr[j] = rnd.Rand16()&mask_;
-    }
-    for (int j = 0; j < 2; j++) {
-      zbin_ptr[j] = rnd.Rand16()&mask_;
-      round_ptr[j] = rnd.Rand16();
-      quant_ptr[j] = rnd.Rand16();
-      quant_shift_ptr[j] = rnd.Rand16();
-      dequant_ptr[j] = rnd.Rand16();
-    }
-    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
-                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
-                     ref_dqcoeff_ptr, dequant_ptr,
-                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
-    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
-                                          zbin_ptr, round_ptr, quant_ptr,
-                                          quant_shift_ptr, qcoeff_ptr,
-                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
-                                          scan_order->scan, scan_order->iscan));
-    for (int j = 0; j < sz; ++j) {
-      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
-          (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
-    }
-    err_count += (*ref_eob_ptr != *eob_ptr);
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Quantization Test, C output doesn't match SSE2 output. "
-      << "First failed at test case " << first_failure;
-}
-
-TEST_P(VP9Quantize32Test, OperationCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < number_of_iterations; ++i) {
-    const int skip_block = i == 0;
-    const TX_SIZE sz = TX_32X32;
-    const TX_TYPE tx_type = (TX_TYPE)(i % 4);
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    const int count = (4 << sz) * (4 << sz);  // 1024
-    int err_count = 0;
-    *eob_ptr = rnd.Rand16();
-    *ref_eob_ptr = *eob_ptr;
-    for (int j = 0; j < count; j++) {
-      coeff_ptr[j] = rnd.Rand16()&mask_;
-    }
-    for (int j = 0; j < 2; j++) {
-      zbin_ptr[j] = rnd.Rand16()&mask_;
-      round_ptr[j] = rnd.Rand16();
-      quant_ptr[j] = rnd.Rand16();
-      quant_shift_ptr[j] = rnd.Rand16();
-      dequant_ptr[j] = rnd.Rand16();
-    }
-    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
-                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
-                     ref_dqcoeff_ptr, dequant_ptr,
-                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
-    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
-                                          zbin_ptr, round_ptr, quant_ptr,
-                                          quant_shift_ptr, qcoeff_ptr,
-                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
-                                          scan_order->scan, scan_order->iscan));
-    for (int j = 0; j < sz; ++j) {
-      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
-          (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
-    }
-    err_count += (*ref_eob_ptr != *eob_ptr);
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Quantization Test, C output doesn't match SSE2 output. "
-      << "First failed at test case " << first_failure;
-}
-
-TEST_P(VP9QuantizeTest, EOBCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < number_of_iterations; ++i) {
-    int skip_block = i == 0;
-    TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
-    TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    int count = (4 << sz) * (4 << sz);  // 16, 64, 256
-    int err_count = 0;
-    *eob_ptr = rnd.Rand16();
-    *ref_eob_ptr = *eob_ptr;
-    // Two random entries
-    for (int j = 0; j < count; j++) {
-      coeff_ptr[j] = 0;
-    }
-    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
-    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
-    for (int j = 0; j < 2; j++) {
-      zbin_ptr[j] = rnd.Rand16()&mask_;
-      round_ptr[j] = rnd.Rand16();
-      quant_ptr[j] = rnd.Rand16();
-      quant_shift_ptr[j] = rnd.Rand16();
-      dequant_ptr[j] = rnd.Rand16();
-    }
-
-    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
-                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
-                     ref_dqcoeff_ptr, dequant_ptr,
-                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
-    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
-                                          zbin_ptr, round_ptr, quant_ptr,
-                                          quant_shift_ptr, qcoeff_ptr,
-                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
-                                          scan_order->scan, scan_order->iscan));
-
-    for (int j = 0; j < sz; ++j) {
-      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
-          (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
-    }
-    err_count += (*ref_eob_ptr != *eob_ptr);
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Quantization Test, C output doesn't match SSE2 output. "
-      << "First failed at test case " << first_failure;
-}
-
-TEST_P(VP9Quantize32Test, EOBCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, zbin_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, round_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, quant_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, quant_shift_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, dequant_ptr, 2);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr, 1);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr, 1);
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < number_of_iterations; ++i) {
-    int skip_block = i == 0;
-    TX_SIZE sz = TX_32X32;
-    TX_TYPE tx_type = (TX_TYPE)(i % 4);
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    int count = (4 << sz) * (4 << sz);  // 1024
-    int err_count = 0;
-    *eob_ptr = rnd.Rand16();
-    *ref_eob_ptr = *eob_ptr;
-    for (int j = 0; j < count; j++) {
-      coeff_ptr[j] = 0;
-    }
-    // Two random entries
-    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
-    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
-    for (int j = 0; j < 2; j++) {
-      zbin_ptr[j] = rnd.Rand16()&mask_;
-      round_ptr[j] = rnd.Rand16();
-      quant_ptr[j] = rnd.Rand16();
-      quant_shift_ptr[j] = rnd.Rand16();
-      dequant_ptr[j] = rnd.Rand16();
-    }
-
-    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
-                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
-                     ref_dqcoeff_ptr, dequant_ptr,
-                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
-    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
-                                          zbin_ptr, round_ptr, quant_ptr,
-                                          quant_shift_ptr, qcoeff_ptr,
-                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
-                                          scan_order->scan, scan_order->iscan));
-
-    for (int j = 0; j < sz; ++j) {
-      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
-          (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
-    }
-    err_count += (*ref_eob_ptr != *eob_ptr);
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-      << "Error: Quantization Test, C output doesn't match SSE2 output. "
-      << "First failed at test case " << first_failure;
-}
-using std::tr1::make_tuple;
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, VP9QuantizeTest,
-    ::testing::Values(
-        make_tuple(&vp9_highbd_quantize_b_sse2,
-                   &vp9_highbd_quantize_b_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_quantize_b_sse2,
-                   &vp9_highbd_quantize_b_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_quantize_b_sse2,
-                   &vp9_highbd_quantize_b_c, VPX_BITS_12)));
-INSTANTIATE_TEST_CASE_P(
-    SSE2, VP9Quantize32Test,
-    ::testing::Values(
-        make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
-                   &vp9_highbd_quantize_b_32x32_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
-                   &vp9_highbd_quantize_b_32x32_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
-                   &vp9_highbd_quantize_b_32x32_c, VPX_BITS_12)));
-#endif  // HAVE_SSE2
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-}  // namespace
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -152,7 +152,7 @@ TEST(VP9WorkerThreadTest, TestInterfaceAPI) {
 // -----------------------------------------------------------------------------
 // Multi-threaded decode tests

-#if CONFIG_WEBM_IO && 0
+#if CONFIG_WEBM_IO
 struct FileList {
  const char *name;
  const char *expected_md5;
@@ -207,7 +207,7 @@ int Reset(VP9Worker *const /*worker*/) { return 1; }
 int Sync(VP9Worker *const worker) { return !worker->had_error; }

 void Execute(VP9Worker *const worker) {
-  worker->had_error |= !worker->hook(worker->data1, worker->data2);
+  worker->had_error |= worker->hook(worker->data1, worker->data2);
 }

 void Launch(VP9Worker *const worker) { Execute(worker); }
--- a/test/vpxdec.sh
+++ b/test/vpxdec.sh
@@ -16,8 +16,7 @@

 # Environment check: Make sure input is available.
 vpxdec_verify_environment() {
-  if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_WEBM_FILE}" ] || \
-    [ ! -e "${VP9_FPM_WEBM_FILE}" ] ; then
+  if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_WEBM_FILE}" ]; then
    elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
    return 1
  fi
@@ -79,20 +78,8 @@ vpxdec_vp9_webm() {
  fi
 }

-vpxdec_vp9_webm_frame_parallel() {
-  if [ "$(vpxdec_can_decode_vp9)" = "yes" ] && \
-     [ "$(webm_io_available)" = "yes" ]; then
-    for threads in 2 3 4 5 6 7 8; do
-      vpxdec "${VP9_FPM_WEBM_FILE}" --summary --noblit --threads=$threads \
-        --frame-parallel
-    done
-  fi
-
-}
-
 vpxdec_tests="vpxdec_vp8_ivf
              vpxdec_vp8_ivf_pipe_input
-              vpxdec_vp9_webm
-              vpxdec_vp9_webm_frame_parallel"
+              vpxdec_vp9_webm"

 run_tests vpxdec_verify_environment "${vpxdec_tests}"
--- a/test/webm_video_source.h
+++ b/test/webm_video_source.h
@@ -69,18 +69,6 @@ class WebMVideoSource : public CompressedVideoSource {
    }
  }

-  void SeekToNextKeyFrame() {
-    ASSERT_TRUE(vpx_ctx_->file != NULL);
-    do {
-      const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_);
-      ASSERT_GE(status, 0) << "webm_read_frame failed";
-      ++frame_;
-      if (status == 1) {
-        end_of_file_ = true;
-      }
-    } while (!webm_ctx_->is_key_frame && !end_of_file_);
-  }
-
  virtual const uint8_t *cxdata() const {
    return end_of_file_ ? NULL : buf_;
  }
--- a/test/yuv_video_source.h
+++ b/test/yuv_video_source.h
@@ -1,151 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#ifndef TEST_YUV_VIDEO_SOURCE_H_
-#define TEST_YUV_VIDEO_SOURCE_H_
-
-#include <cstdio>
-#include <cstdlib>
-#include <string>
-
-#include "test/video_source.h"
-#include "vpx/vpx_image.h"
-
-namespace libvpx_test {
-
-// This class extends VideoSource to allow parsing of raw YUV
-// formats of various color sampling and bit-depths so that we can
-// do actual file encodes.
-class YUVVideoSource : public VideoSource {
- public:
-  YUVVideoSource(const std::string &file_name, vpx_img_fmt format,
-                 unsigned int width, unsigned int height,
-                 int rate_numerator, int rate_denominator,
-                 unsigned int start, int limit)
-      : file_name_(file_name),
-        input_file_(NULL),
-        img_(NULL),
-        start_(start),
-        limit_(limit),
-        frame_(0),
-        width_(0),
-        height_(0),
-        format_(VPX_IMG_FMT_NONE),
-        framerate_numerator_(rate_numerator),
-        framerate_denominator_(rate_denominator) {
-    // This initializes format_, raw_size_, width_, height_ and allocates img.
-    SetSize(width, height, format);
-  }
-
-  virtual ~YUVVideoSource() {
-    vpx_img_free(img_);
-    if (input_file_)
-      fclose(input_file_);
-  }
-
-  virtual void Begin() {
-    if (input_file_)
-      fclose(input_file_);
-    input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
-                                     << file_name_;
-    if (start_)
-      fseek(input_file_, static_cast<unsigned>(raw_size_) * start_, SEEK_SET);
-
-    frame_ = start_;
-    FillFrame();
-  }
-
-  virtual void Next() {
-    ++frame_;
-    FillFrame();
-  }
-
-  virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL;  }
-
-  // Models a stream where Timebase = 1/FPS, so pts == frame.
-  virtual vpx_codec_pts_t pts() const { return frame_; }
-
-  virtual unsigned long duration() const { return 1; }
-
-  virtual vpx_rational_t timebase() const {
-    const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
-    return t;
-  }
-
-  virtual unsigned int frame() const { return frame_; }
-
-  virtual unsigned int limit() const { return limit_; }
-
-  virtual void SetSize(unsigned int width, unsigned int height,
-                       vpx_img_fmt format) {
-    if (width != width_ || height != height_ || format != format_) {
-      vpx_img_free(img_);
-      img_ = vpx_img_alloc(NULL, format, width, height, 1);
-      ASSERT_TRUE(img_ != NULL);
-      width_ = width;
-      height_ = height;
-      format_ = format;
-      switch (format) {
-        case VPX_IMG_FMT_I420:
-          raw_size_ = width * height * 3 / 2;
-          break;
-        case VPX_IMG_FMT_I422:
-          raw_size_ = width * height * 2;
-          break;
-        case VPX_IMG_FMT_I440:
-          raw_size_ = width * height * 2;
-          break;
-        case VPX_IMG_FMT_I444:
-          raw_size_ = width * height * 3;
-          break;
-        case VPX_IMG_FMT_I42016:
-          raw_size_ = width * height * 3;
-          break;
-        case VPX_IMG_FMT_I42216:
-          raw_size_ = width * height * 4;
-          break;
-        case VPX_IMG_FMT_I44016:
-          raw_size_ = width * height * 4;
-          break;
-        case VPX_IMG_FMT_I44416:
-          raw_size_ = width * height * 6;
-          break;
-        default:
-          ASSERT_TRUE(0);
-      }
-    }
-  }
-
-  virtual void FillFrame() {
-    ASSERT_TRUE(input_file_ != NULL);
-    // Read a frame from input_file.
-    if (fread(img_->img_data, raw_size_, 1, input_file_) == 0) {
-      limit_ = frame_;
-    }
-  }
-
- protected:
-  std::string file_name_;
-  FILE *input_file_;
-  vpx_image_t *img_;
-  size_t raw_size_;
-  unsigned int start_;
-  unsigned int limit_;
-  unsigned int frame_;
-  unsigned int width_;
-  unsigned int height_;
-  vpx_img_fmt format_;
-  int framerate_numerator_;
-  int framerate_denominator_;
-};
-
-}  // namespace libvpx_test
-
-#endif  // TEST_YUV_VIDEO_SOURCE_H_
--- a/third_party/libyuv/README.libvpx
+++ b/third_party/libyuv/README.libvpx
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1305
+Version: 1060
 License: BSD
 License File: LICENSE

@@ -13,4 +13,4 @@ which down-samples the original input video (f.g. 1280x720) a number of times
 in order to encode multiple resolution bit streams.

 Local Modifications:
-cherry pick r1311 'disable nv12 avx2 for vs9/10 that dont support avx2 instructions.'
+cherry-pick 'Issue 24479004: Fix building with MSVC for arm'
--- a/third_party/libyuv/include/libyuv/compare.h
+++ b/third_party/libyuv/include/libyuv/compare.h
@@ -22,11 +22,6 @@ extern "C" {
 LIBYUV_API
 uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);

-// Scan an opaque argb image and return fourcc based on alpha offset.
-// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
-LIBYUV_API
-uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height);
-
 // Sum Square Error - used to compute Mean Square Error or PSNR.
 LIBYUV_API
 uint64 ComputeSumSquareError(const uint8* src_a,
--- a/third_party/libyuv/include/libyuv/convert.h
+++ b/third_party/libyuv/include/libyuv/convert.h
@@ -113,6 +113,15 @@ int M420ToI420(const uint8* src_m420, int src_stride_m420,
               uint8* dst_v, int dst_stride_v,
               int width, int height);

+// Convert Q420 to I420.
+LIBYUV_API
+int Q420ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height);
+
 // ARGB little endian (bgra in memory) to I420.
 LIBYUV_API
 int ARGBToI420(const uint8* src_frame, int src_stride_frame,
@@ -202,6 +211,8 @@ int MJPGSize(const uint8* sample, size_t sample_size,
             int* width, int* height);
 #endif

+// Note Bayer formats (BGGR) To I420 are in format_conversion.h
+
 // Convert camera sample to I420 with cropping, rotation and vertical flip.
 // "src_size" is needed to parse MJPG.
 // "dst_stride_y" number of bytes in a row of the dst_y plane.
--- a/third_party/libyuv/include/libyuv/convert_argb.h
+++ b/third_party/libyuv/include/libyuv/convert_argb.h
@@ -18,6 +18,7 @@
 #include "libyuv/rotate.h"

 // TODO(fbarchard): This set of functions should exactly match convert.h
+// Add missing Q420.
 // TODO(fbarchard): Add tests. Create random content of right size and convert
 // with C vs Opt and or to I420 and compare.
 // TODO(fbarchard): Some of these functions lack parameter setting.
@@ -103,6 +104,13 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height);

+// TODO(fbarchard): Convert Q420 to ARGB.
+// LIBYUV_API
+// int Q420ToARGB(const uint8* src_y, int src_stride_y,
+//                const uint8* src_yuy2, int src_stride_yuy2,
+//                uint8* dst_argb, int dst_stride_argb,
+//                int width, int height);
+
 // Convert YUY2 to ARGB.
 LIBYUV_API
 int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
@@ -115,22 +123,6 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height);

-// Convert J420 to ARGB.
-LIBYUV_API
-int J420ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
-// Convert J422 to ARGB.
-LIBYUV_API
-int J422ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height);
-
 // BGRA little endian (argb in memory) to ARGB.
 LIBYUV_API
 int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
@@ -192,6 +184,8 @@ int MJPGToARGB(const uint8* sample, size_t sample_size,
               int dst_width, int dst_height);
 #endif

+// Note Bayer formats (BGGR) to ARGB are in format_conversion.h.
+
 // Convert camera sample to ARGB with cropping, rotation and vertical flip.
 // "src_size" is needed to parse MJPG.
 // "dst_stride_argb" number of bytes in a row of the dst_argb plane.
--- a/third_party/libyuv/include/libyuv/convert_from.h
+++ b/third_party/libyuv/include/libyuv/convert_from.h
@@ -57,6 +57,7 @@ int I400Copy(const uint8* src_y, int src_stride_y,
             int width, int height);

 // TODO(fbarchard): I420ToM420
+// TODO(fbarchard): I420ToQ420

 LIBYUV_API
 int I420ToNV12(const uint8* src_y, int src_stride_y,
@@ -151,6 +152,8 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
                   uint8* dst_frame, int dst_stride_frame,
                   int width, int height);

+// Note Bayer formats (BGGR) To I420 are in format_conversion.h.
+
 // Convert I420 to specified format.
 // "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
 //    buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
--- a/third_party/libyuv/include/libyuv/convert_from_argb.h
+++ b/third_party/libyuv/include/libyuv/convert_from_argb.h
@@ -61,13 +61,6 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
                 uint8* dst_rgb565, int dst_stride_rgb565,
                 int width, int height);

-// Convert ARGB To RGB565 with 8x8 dither matrix (64 bytes).
-// Values in dither matrix from 0 to 255.  128 is best for no dither.
-LIBYUV_API
-int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
-                       uint8* dst_rgb565, int dst_stride_rgb565,
-                       const uint8* dither8x8, int width, int height);
-
 // Convert ARGB To ARGB1555.
 LIBYUV_API
 int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
@@ -112,14 +105,6 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
               uint8* dst_v, int dst_stride_v,
               int width, int height);

-// Convert ARGB to J422.
-LIBYUV_API
-int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_yj, int dst_stride_yj,
-               uint8* dst_u, int dst_stride_u,
-               uint8* dst_v, int dst_stride_v,
-               int width, int height);
-
 // Convert ARGB To I411.
 LIBYUV_API
 int ARGBToI411(const uint8* src_argb, int src_stride_argb,
--- a/third_party/libyuv/include/libyuv/format_conversion.h
+++ b/third_party/libyuv/include/libyuv/format_conversion.h
@@ -0,0 +1,168 @@
+/*
+ *  Copyright 2011 The LibYuv Project Authors. All rights reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS. All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef INCLUDE_LIBYUV_FORMATCONVERSION_H_  // NOLINT
+#define INCLUDE_LIBYUV_FORMATCONVERSION_H_
+
+#include "libyuv/basic_types.h"
+
+#ifdef __cplusplus
+namespace libyuv {
+extern "C" {
+#endif
+
+// Convert Bayer RGB formats to I420.
+LIBYUV_API
+int BayerBGGRToI420(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_y, int dst_stride_y,
+                    uint8* dst_u, int dst_stride_u,
+                    uint8* dst_v, int dst_stride_v,
+                    int width, int height);
+
+LIBYUV_API
+int BayerGBRGToI420(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_y, int dst_stride_y,
+                    uint8* dst_u, int dst_stride_u,
+                    uint8* dst_v, int dst_stride_v,
+                    int width, int height);
+
+LIBYUV_API
+int BayerGRBGToI420(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_y, int dst_stride_y,
+                    uint8* dst_u, int dst_stride_u,
+                    uint8* dst_v, int dst_stride_v,
+                    int width, int height);
+
+LIBYUV_API
+int BayerRGGBToI420(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_y, int dst_stride_y,
+                    uint8* dst_u, int dst_stride_u,
+                    uint8* dst_v, int dst_stride_v,
+                    int width, int height);
+
+// Temporary API mapper.
+#define BayerRGBToI420(b, bs, f, y, ys, u, us, v, vs, w, h) \
+    BayerToI420(b, bs, y, ys, u, us, v, vs, w, h, f)
+
+LIBYUV_API
+int BayerToI420(const uint8* src_bayer, int src_stride_bayer,
+                uint8* dst_y, int dst_stride_y,
+                uint8* dst_u, int dst_stride_u,
+                uint8* dst_v, int dst_stride_v,
+                int width, int height,
+                uint32 src_fourcc_bayer);
+
+// Convert I420 to Bayer RGB formats.
+LIBYUV_API
+int I420ToBayerBGGR(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    uint8* dst_frame, int dst_stride_frame,
+                    int width, int height);
+
+LIBYUV_API
+int I420ToBayerGBRG(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    uint8* dst_frame, int dst_stride_frame,
+                    int width, int height);
+
+LIBYUV_API
+int I420ToBayerGRBG(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    uint8* dst_frame, int dst_stride_frame,
+                    int width, int height);
+
+LIBYUV_API
+int I420ToBayerRGGB(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    uint8* dst_frame, int dst_stride_frame,
+                    int width, int height);
+
+// Temporary API mapper.
+#define I420ToBayerRGB(y, ys, u, us, v, vs, b, bs, f, w, h) \
+    I420ToBayer(y, ys, u, us, v, vs, b, bs, w, h, f)
+
+LIBYUV_API
+int I420ToBayer(const uint8* src_y, int src_stride_y,
+                const uint8* src_u, int src_stride_u,
+                const uint8* src_v, int src_stride_v,
+                uint8* dst_frame, int dst_stride_frame,
+                int width, int height,
+                uint32 dst_fourcc_bayer);
+
+// Convert Bayer RGB formats to ARGB.
+LIBYUV_API
+int BayerBGGRToARGB(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height);
+
+LIBYUV_API
+int BayerGBRGToARGB(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height);
+
+LIBYUV_API
+int BayerGRBGToARGB(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height);
+
+LIBYUV_API
+int BayerRGGBToARGB(const uint8* src_bayer, int src_stride_bayer,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height);
+
+// Temporary API mapper.
+#define BayerRGBToARGB(b, bs, f, a, as, w, h) BayerToARGB(b, bs, a, as, w, h, f)
+
+LIBYUV_API
+int BayerToARGB(const uint8* src_bayer, int src_stride_bayer,
+                uint8* dst_argb, int dst_stride_argb,
+                int width, int height,
+                uint32 src_fourcc_bayer);
+
+// Converts ARGB to Bayer RGB formats.
+LIBYUV_API
+int ARGBToBayerBGGR(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_bayer, int dst_stride_bayer,
+                    int width, int height);
+
+LIBYUV_API
+int ARGBToBayerGBRG(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_bayer, int dst_stride_bayer,
+                    int width, int height);
+
+LIBYUV_API
+int ARGBToBayerGRBG(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_bayer, int dst_stride_bayer,
+                    int width, int height);
+
+LIBYUV_API
+int ARGBToBayerRGGB(const uint8* src_argb, int src_stride_argb,
+                    uint8* dst_bayer, int dst_stride_bayer,
+                    int width, int height);
+
+// Temporary API mapper.
+#define ARGBToBayerRGB(a, as, b, bs, f, w, h) ARGBToBayer(b, bs, a, as, w, h, f)
+
+LIBYUV_API
+int ARGBToBayer(const uint8* src_argb, int src_stride_argb,
+                uint8* dst_bayer, int dst_stride_bayer,
+                int width, int height,
+                uint32 dst_fourcc_bayer);
+
+#ifdef __cplusplus
+}  // extern "C"
+}  // namespace libyuv
+#endif
+
+#endif  // INCLUDE_LIBYUV_FORMATCONVERSION_H_  NOLINT
--- a/third_party/libyuv/include/libyuv/row.h
+++ b/third_party/libyuv/include/libyuv/row.h
--- a/third_party/libyuv/include/libyuv/scale.h
+++ b/third_party/libyuv/include/libyuv/scale.h
@@ -34,7 +34,6 @@ void ScalePlane(const uint8* src, int src_stride,
                int dst_width, int dst_height,
                enum FilterMode filtering);

-LIBYUV_API
 void ScalePlane_16(const uint16* src, int src_stride,
                   int src_width, int src_height,
                   uint16* dst, int dst_stride,
--- a/third_party/libyuv/include/libyuv/scale_row.h
+++ b/third_party/libyuv/include/libyuv/scale_row.h
@@ -44,13 +44,21 @@ extern "C" {

 // The following are available on Neon platforms:
 #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
-    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
+    (defined(__ARM_NEON__) || defined(LIBYUV_NEON))
 #define HAS_SCALEROWDOWN2_NEON
 #define HAS_SCALEROWDOWN4_NEON
 #define HAS_SCALEROWDOWN34_NEON
 #define HAS_SCALEROWDOWN38_NEON
 #define HAS_SCALEARGBROWDOWNEVEN_NEON
 #define HAS_SCALEARGBROWDOWN2_NEON
+#elif !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
+    (defined(__aarch64__) || defined(LIBYUV_NEON))
+/* #define HAS_SCALEROWDOWN2_NEON */
+/* #define HAS_SCALEROWDOWN4_NEON */
+/* #define HAS_SCALEROWDOWN34_NEON */
+/* #define HAS_SCALEROWDOWN38_NEON */
+/* #define HAS_SCALEARGBROWDOWNEVEN_NEON */
+/* #define HAS_SCALEARGBROWDOWN2_NEON */
 #endif

 // The following are available on Mips platforms:
@@ -200,6 +208,15 @@ void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                              uint8* dst_ptr, int dst_width);
 void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                           uint8* dst_ptr, int dst_width);
+void ScaleRowDown2_Unaligned_SSE2(const uint8* src_ptr,
+                                  ptrdiff_t src_stride,
+                                  uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Linear_Unaligned_SSE2(const uint8* src_ptr,
+                                        ptrdiff_t src_stride,
+                                        uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_Unaligned_SSE2(const uint8* src_ptr,
+                                     ptrdiff_t src_stride,
+                                     uint8* dst_ptr, int dst_width);
 void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                        uint8* dst_ptr, int dst_width);
 void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
@@ -250,10 +267,10 @@ void ScaleARGBFilterCols_SSSE3(uint8* dst_argb, const uint8* src_argb,
 void ScaleARGBColsUp2_SSE2(uint8* dst_argb, const uint8* src_argb,
                           int dst_width, int x, int dx);
 // Row functions.
-void ScaleARGBRowDownEven_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+void ScaleARGBRowDownEven_NEON(const uint8* src_argb, int src_stride,
                               int src_stepx,
                               uint8* dst_argb, int dst_width);
-void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, ptrdiff_t src_stride,
+void ScaleARGBRowDownEvenBox_NEON(const uint8* src_argb, int src_stride,
                                  int src_stepx,
                                  uint8* dst_argb, int dst_width);
 void ScaleARGBRowDown2_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
--- a/third_party/libyuv/include/libyuv/version.h
+++ b/third_party/libyuv/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 1305
+#define LIBYUV_VERSION 1059

 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/third_party/libyuv/include/libyuv/video_common.h
+++ b/third_party/libyuv/include/libyuv/video_common.h
@@ -62,7 +62,7 @@ enum FourCC {

  // 2 Secondary YUV formats: row biplanar.
  FOURCC_M420 = FOURCC('M', '4', '2', '0'),
-  FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
+  FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),

  // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
  FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
@@ -75,7 +75,7 @@ enum FourCC {
  FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'),  // argb1555 LE.
  FOURCC_R444 = FOURCC('R', '4', '4', '4'),  // argb4444 LE.

-  // 4 Secondary RGB formats: 4 Bayer Patterns. deprecated.
+  // 4 Secondary RGB formats: 4 Bayer Patterns.
  FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
  FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
  FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
--- a/third_party/libyuv/source/compare.cc
+++ b/third_party/libyuv/source/compare.cc
@@ -19,7 +19,6 @@
 #include "libyuv/basic_types.h"
 #include "libyuv/cpu_id.h"
 #include "libyuv/row.h"
-#include "libyuv/video_common.h"

 #ifdef __cplusplus
 namespace libyuv {
@@ -79,54 +78,6 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
  return seed;
 }

-static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
-  int x;
-  for (x = 0; x < width - 1; x += 2) {
-    if (argb[0] != 255) {  // First byte is not Alpha of 255, so not ARGB.
-      return FOURCC_BGRA;
-    }
-    if (argb[3] != 255) {  // 4th byte is not Alpha of 255, so not BGRA.
-      return FOURCC_ARGB;
-    }
-    if (argb[4] != 255) {  // Second pixel first byte is not Alpha of 255.
-      return FOURCC_BGRA;
-    }
-    if (argb[7] != 255) {  // Second pixel 4th byte is not Alpha of 255.
-      return FOURCC_ARGB;
-    }
-    argb += 8;
-  }
-  if (width & 1) {
-    if (argb[0] != 255) {  // First byte is not Alpha of 255, so not ARGB.
-      return FOURCC_BGRA;
-    }
-    if (argb[3] != 255) {  // 4th byte is not Alpha of 255, so not BGRA.
-      return FOURCC_ARGB;
-    }
-  }
-  return 0;
-}
-
-// Scan an opaque argb image and return fourcc based on alpha offset.
-// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
-LIBYUV_API
-uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
-  uint32 fourcc = 0;
-  int h;
-
-  // Coalesce rows.
-  if (stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    stride_argb = 0;
-  }
-  for (h = 0; h < height && fourcc == 0; ++h) {
-    fourcc = ARGBDetectRow_C(argb, width);
-    argb += stride_argb;
-  }
-  return fourcc;
-}
-
 uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
 #if !defined(LIBYUV_DISABLE_NEON) && \
    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
@@ -163,7 +114,8 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
  }
 #endif
 #if defined(HAS_SUMSQUAREERROR_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) &&
+      IS_ALIGNED(src_a, 16) && IS_ALIGNED(src_b, 16)) {
    // Note only used for multiples of 16 so count is not checked.
    SumSquareError = SumSquareError_SSE2;
  }
--- a/third_party/libyuv/source/compare_neon.cc
+++ b/third_party/libyuv/source/compare_neon.cc
@@ -16,8 +16,7 @@ namespace libyuv {
 extern "C" {
 #endif

-#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
-    !defined(__aarch64__)
+#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__)

 uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
  volatile uint32 sse;
@@ -57,7 +56,46 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
  return sse;
 }

-#endif  // defined(__ARM_NEON__) && !defined(__aarch64__)
+#elif !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+
+uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
+  volatile uint32 sse;
+  asm volatile (
+    "eor        v16.16b, v16.16b, v16.16b      \n"
+    "eor        v18.16b, v18.16b, v18.16b      \n"
+    "eor        v17.16b, v17.16b, v17.16b      \n"
+    "eor        v19.16b, v19.16b, v19.16b      \n"
+
+    ".p2align  2                               \n"
+  "1:                                          \n"
+    MEMACCESS(0)
+    "ld1        {v0.16b}, [%0], #16            \n"
+    MEMACCESS(1)
+    "ld1        {v1.16b}, [%1], #16            \n"
+    "subs       %2, %2, #16                    \n"
+    "usubl      v2.8h, v0.8b, v1.8b            \n"
+    "usubl2     v3.8h, v0.16b, v1.16b          \n"
+    "smlal      v16.4s, v2.4h, v2.4h           \n"
+    "smlal      v17.4s, v3.4h, v3.4h           \n"
+    "smlal2     v18.4s, v2.8h, v2.8h           \n"
+    "smlal2     v19.4s, v3.8h, v3.8h           \n"
+    "bgt        1b                             \n"
+
+    "add        v16.4s, v16.4s, v17.4s         \n"
+    "add        v18.4s, v18.4s, v19.4s         \n"
+    "add        v19.4s, v16.4s, v18.4s         \n"
+    "addv       s0, v19.4s                     \n"
+    "fmov       %w3, s0                        \n"
+    : "+r"(src_a),
+      "+r"(src_b),
+      "+r"(count),
+      "=r"(sse)
+    :
+    : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
+  return sse;
+}
+
+#endif  // __ARM_NEON__

 #ifdef __cplusplus
 }  // extern "C"
--- a/third_party/libyuv/source/compare_neon64.cc
+++ b/third_party/libyuv/source/compare_neon64.cc
@@ -1,63 +0,0 @@
-/*
- *  Copyright 2012 The LibYuv Project Authors. All rights reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS. All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "libyuv/basic_types.h"
-#include "libyuv/row.h"
-
-#ifdef __cplusplus
-namespace libyuv {
-extern "C" {
-#endif
-
-#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
-  volatile uint32 sse;
-  asm volatile (
-    "eor        v16.16b, v16.16b, v16.16b      \n"
-    "eor        v18.16b, v18.16b, v18.16b      \n"
-    "eor        v17.16b, v17.16b, v17.16b      \n"
-    "eor        v19.16b, v19.16b, v19.16b      \n"
-
-    ".p2align  2                               \n"
-  "1:                                          \n"
-    MEMACCESS(0)
-    "ld1        {v0.16b}, [%0], #16            \n"
-    MEMACCESS(1)
-    "ld1        {v1.16b}, [%1], #16            \n"
-    "subs       %2, %2, #16                    \n"
-    "usubl      v2.8h, v0.8b, v1.8b            \n"
-    "usubl2     v3.8h, v0.16b, v1.16b          \n"
-    "smlal      v16.4s, v2.4h, v2.4h           \n"
-    "smlal      v17.4s, v3.4h, v3.4h           \n"
-    "smlal2     v18.4s, v2.8h, v2.8h           \n"
-    "smlal2     v19.4s, v3.8h, v3.8h           \n"
-    "b.gt       1b                             \n"
-
-    "add        v16.4s, v16.4s, v17.4s         \n"
-    "add        v18.4s, v18.4s, v19.4s         \n"
-    "add        v19.4s, v16.4s, v18.4s         \n"
-    "addv       s0, v19.4s                     \n"
-    "fmov       %w3, s0                        \n"
-    : "+r"(src_a),
-      "+r"(src_b),
-      "+r"(count),
-      "=r"(sse)
-    :
-    : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
-  return sse;
-}
-
-#endif  // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
-
-#ifdef __cplusplus
-}  // extern "C"
-}  // namespace libyuv
-#endif
--- a/third_party/libyuv/source/compare_posix.cc
+++ b/third_party/libyuv/source/compare_posix.cc
@@ -25,10 +25,11 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
    "pxor      %%xmm5,%%xmm5                   \n"
    LABELALIGN
  "1:                                          \n"
-    "movdqu    " MEMACCESS(0) ",%%xmm1         \n"
+    "movdqa    " MEMACCESS(0) ",%%xmm1         \n"
    "lea       " MEMLEA(0x10, 0) ",%0          \n"
-    "movdqu    " MEMACCESS(1) ",%%xmm2         \n"
+    "movdqa    " MEMACCESS(1) ",%%xmm2         \n"
    "lea       " MEMLEA(0x10, 1) ",%1          \n"
+    "sub       $0x10,%2                        \n"
    "movdqa    %%xmm1,%%xmm3                   \n"
    "psubusb   %%xmm2,%%xmm1                   \n"
    "psubusb   %%xmm3,%%xmm2                   \n"
@@ -40,7 +41,6 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
    "pmaddwd   %%xmm2,%%xmm2                   \n"
    "paddd     %%xmm1,%%xmm0                   \n"
    "paddd     %%xmm2,%%xmm0                   \n"
-    "sub       $0x10,%2                        \n"
    "jg        1b                              \n"

    "pshufd    $0xee,%%xmm0,%%xmm1             \n"
@@ -53,7 +53,11 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
    "+r"(src_b),      // %1
    "+r"(count),      // %2
    "=g"(sse)         // %3
-  :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+  :
+  : "memory", "cc"
+#if defined(__SSE2__)
+    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
+#endif
  );  // NOLINT
  return sse;
 }
@@ -120,13 +124,13 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
    "pmulld    %%xmm5,%%xmm1                   \n"
    "paddd     %%xmm4,%%xmm3                   \n"
    "paddd     %%xmm2,%%xmm1                   \n"
+    "sub       $0x10,%1                        \n"
    "paddd     %%xmm3,%%xmm1                   \n"
    "pshufd    $0xe,%%xmm1,%%xmm2              \n"
    "paddd     %%xmm2,%%xmm1                   \n"
    "pshufd    $0x1,%%xmm1,%%xmm2              \n"
    "paddd     %%xmm2,%%xmm1                   \n"
    "paddd     %%xmm1,%%xmm0                   \n"
-    "sub       $0x10,%1                        \n"
    "jg        1b                              \n"
    "movd      %%xmm0,%3                       \n"
  : "+r"(src),        // %0
@@ -139,7 +143,9 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
    "m"(kHashMul2),   // %7
    "m"(kHashMul3)    // %8
  : "memory", "cc"
+#if defined(__SSE2__)
    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
+#endif
  );  // NOLINT
  return hash;
 }
--- a/third_party/libyuv/source/compare_win.cc
+++ b/third_party/libyuv/source/compare_win.cc
@@ -27,11 +27,13 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
    pxor       xmm0, xmm0
    pxor       xmm5, xmm5

+    align      4
  wloop:
-    movdqu     xmm1, [eax]
+    movdqa     xmm1, [eax]
    lea        eax,  [eax + 16]
-    movdqu     xmm2, [edx]
+    movdqa     xmm2, [edx]
    lea        edx,  [edx + 16]
+    sub        ecx, 16
    movdqa     xmm3, xmm1  // abs trick
    psubusb    xmm1, xmm2
    psubusb    xmm2, xmm3
@@ -43,7 +45,6 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
    pmaddwd    xmm2, xmm2
    paddd      xmm0, xmm1
    paddd      xmm0, xmm2
-    sub        ecx, 16
    jg         wloop

    pshufd     xmm1, xmm0, 0xee
@@ -69,10 +70,12 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
    vpxor      ymm5, ymm5, ymm5  // constant 0 for unpck
    sub        edx, eax

+    align      4
  wloop:
    vmovdqu    ymm1, [eax]
    vmovdqu    ymm2, [eax + edx]
    lea        eax,  [eax + 32]
+    sub        ecx, 32
    vpsubusb   ymm3, ymm1, ymm2  // abs difference trick
    vpsubusb   ymm2, ymm2, ymm1
    vpor       ymm1, ymm2, ymm3
@@ -82,7 +85,6 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
    vpmaddwd   ymm1, ymm1, ymm1
    vpaddd     ymm0, ymm0, ymm1
    vpaddd     ymm0, ymm0, ymm2
-    sub        ecx, 32
    jg         wloop

    vpshufd    ymm1, ymm0, 0xee  // 3, 2 + 1, 0 both lanes.
@@ -143,6 +145,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
    pxor       xmm7, xmm7        // constant 0 for unpck
    movdqa     xmm6, kHash16x33

+    align      4
  wloop:
    movdqu     xmm1, [eax]       // src[0-15]
    lea        eax, [eax + 16]
@@ -167,6 +170,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
    pmulld(0xcd)                 // pmulld     xmm1, xmm5
    paddd      xmm3, xmm4        // add 16 results
    paddd      xmm1, xmm2
+    sub        ecx, 16
    paddd      xmm1, xmm3

    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
@@ -174,7 +178,6 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
    pshufd     xmm2, xmm1, 0x01
    paddd      xmm1, xmm2
    paddd      xmm0, xmm1
-    sub        ecx, 16
    jg         wloop

    movd       eax, xmm0         // return hash
@@ -192,6 +195,7 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
    movd       xmm0, [esp + 12]  // seed
    movdqa     xmm6, kHash16x33

+    align      4
  wloop:
    vpmovzxbd  xmm3, dword ptr [eax]  // src[0-3]
    pmulld     xmm0, xmm6  // hash *= 33 ^ 16
@@ -205,13 +209,13 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
    pmulld     xmm1, kHashMul3
    paddd      xmm3, xmm4        // add 16 results
    paddd      xmm1, xmm2
+    sub        ecx, 16
    paddd      xmm1, xmm3
    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
    paddd      xmm1, xmm2
    pshufd     xmm2, xmm1, 0x01
    paddd      xmm1, xmm2
    paddd      xmm0, xmm1
-    sub        ecx, 16
    jg         wloop

    movd       eax, xmm0         // return hash
--- a/third_party/libyuv/source/convert.cc
+++ b/third_party/libyuv/source/convert.cc
@@ -188,14 +188,17 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
                       int width, int height) {
  int y;
  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
-#if defined(HAS_COPYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
+#if defined(HAS_COPYROW_X86)
+  if (TestCpuFlag(kCpuHasX86) && IS_ALIGNED(width, 4)) {
+    CopyRow = CopyRow_X86;
  }
 #endif
-#if defined(HAS_COPYROW_AVX)
-  if (TestCpuFlag(kCpuHasAVX)) {
-    CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX;
+#if defined(HAS_COPYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
+      IS_ALIGNED(src, 16) &&
+      IS_ALIGNED(src_stride_0, 16) && IS_ALIGNED(src_stride_1, 16) &&
+      IS_ALIGNED(dst, 16) && IS_ALIGNED(dst_stride, 16)) {
+    CopyRow = CopyRow_SSE2;
  }
 #endif
 #if defined(HAS_COPYROW_ERMS)
@@ -204,8 +207,8 @@ static void CopyPlane2(const uint8* src, int src_stride_0, int src_stride_1,
  }
 #endif
 #if defined(HAS_COPYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
+    CopyRow = CopyRow_NEON;
  }
 #endif
 #if defined(HAS_COPYROW_MIPS)
@@ -280,15 +283,20 @@ static int X420ToI420(const uint8* src_y,
    src_stride_uv = dst_stride_u = dst_stride_v = 0;
  }
 #if defined(HAS_SPLITUVROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) && halfwidth >= 16) {
    SplitUVRow = SplitUVRow_Any_SSE2;
    if (IS_ALIGNED(halfwidth, 16)) {
-      SplitUVRow = SplitUVRow_SSE2;
+      SplitUVRow = SplitUVRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_uv, 16) && IS_ALIGNED(src_stride_uv, 16) &&
+          IS_ALIGNED(dst_u, 16) && IS_ALIGNED(dst_stride_u, 16) &&
+          IS_ALIGNED(dst_v, 16) && IS_ALIGNED(dst_stride_v, 16)) {
+        SplitUVRow = SplitUVRow_SSE2;
+      }
    }
  }
 #endif
 #if defined(HAS_SPLITUVROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
+  if (TestCpuFlag(kCpuHasAVX2) && halfwidth >= 32) {
    SplitUVRow = SplitUVRow_Any_AVX2;
    if (IS_ALIGNED(halfwidth, 32)) {
      SplitUVRow = SplitUVRow_AVX2;
@@ -296,7 +304,7 @@ static int X420ToI420(const uint8* src_y,
  }
 #endif
 #if defined(HAS_SPLITUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && halfwidth >= 16) {
    SplitUVRow = SplitUVRow_Any_NEON;
    if (IS_ALIGNED(halfwidth, 16)) {
      SplitUVRow = SplitUVRow_NEON;
@@ -304,13 +312,15 @@ static int X420ToI420(const uint8* src_y,
  }
 #endif
 #if defined(HAS_SPLITUVROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
-      IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
-      IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
-      IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
+  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && halfwidth >= 16) {
    SplitUVRow = SplitUVRow_Any_MIPS_DSPR2;
    if (IS_ALIGNED(halfwidth, 16)) {
-      SplitUVRow = SplitUVRow_MIPS_DSPR2;
+      SplitUVRow = SplitUVRow_Unaligned_MIPS_DSPR2;
+      if (IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
+          IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
+          IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
+        SplitUVRow = SplitUVRow_MIPS_DSPR2;
+      }
    }
  }
 #endif
@@ -381,6 +391,125 @@ int M420ToI420(const uint8* src_m420, int src_stride_m420,
                    width, height);
 }

+// Convert Q420 to I420.
+// Format is rows of YY/YUYV
+LIBYUV_API
+int Q420ToI420(const uint8* src_y, int src_stride_y,
+               const uint8* src_yuy2, int src_stride_yuy2,
+               uint8* dst_y, int dst_stride_y,
+               uint8* dst_u, int dst_stride_u,
+               uint8* dst_v, int dst_stride_v,
+               int width, int height) {
+  int y;
+  int halfheight;
+  void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
+  void (*YUY2ToUV422Row)(const uint8* src_yuy2, uint8* dst_u, uint8* dst_v,
+      int pix) = YUY2ToUV422Row_C;
+  void (*YUY2ToYRow)(const uint8* src_yuy2, uint8* dst_y, int pix) =
+      YUY2ToYRow_C;
+  if (!src_y || !src_yuy2 ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    dst_y = dst_y + (height - 1) * dst_stride_y;
+    dst_u = dst_u + (halfheight - 1) * dst_stride_u;
+    dst_v = dst_v + (halfheight - 1) * dst_stride_v;
+    dst_stride_y = -dst_stride_y;
+    dst_stride_u = -dst_stride_u;
+    dst_stride_v = -dst_stride_v;
+  }
+  // CopyRow for rows of just Y in Q420 copied to Y plane of I420.
+#if defined(HAS_COPYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) {
+    CopyRow = CopyRow_NEON;
+  }
+#endif
+#if defined(HAS_COPYROW_X86)
+  if (IS_ALIGNED(width, 4)) {
+    CopyRow = CopyRow_X86;
+  }
+#endif
+#if defined(HAS_COPYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32) &&
+      IS_ALIGNED(src_y, 16) && IS_ALIGNED(src_stride_y, 16) &&
+      IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+    CopyRow = CopyRow_SSE2;
+  }
+#endif
+#if defined(HAS_COPYROW_ERMS)
+  if (TestCpuFlag(kCpuHasERMS)) {
+    CopyRow = CopyRow_ERMS;
+  }
+#endif
+#if defined(HAS_COPYROW_MIPS)
+  if (TestCpuFlag(kCpuHasMIPS)) {
+    CopyRow = CopyRow_MIPS;
+  }
+#endif
+
+#if defined(HAS_YUY2TOYROW_SSE2)
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
+    YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2;
+    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
+    if (IS_ALIGNED(width, 16)) {
+      YUY2ToUV422Row = YUY2ToUV422Row_Unaligned_SSE2;
+      YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
+        YUY2ToUV422Row = YUY2ToUV422Row_SSE2;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          YUY2ToYRow = YUY2ToYRow_SSE2;
+        }
+      }
+    }
+  }
+#endif
+#if defined(HAS_YUY2TOYROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
+    YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2;
+    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
+    if (IS_ALIGNED(width, 32)) {
+      YUY2ToUV422Row = YUY2ToUV422Row_AVX2;
+      YUY2ToYRow = YUY2ToYRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_YUY2TOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
+    YUY2ToYRow = YUY2ToYRow_Any_NEON;
+    if (width >= 16) {
+      YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON;
+    }
+    if (IS_ALIGNED(width, 16)) {
+      YUY2ToYRow = YUY2ToYRow_NEON;
+      YUY2ToUV422Row = YUY2ToUV422Row_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height - 1; y += 2) {
+    CopyRow(src_y, dst_y, width);
+    src_y += src_stride_y;
+    dst_y += dst_stride_y;
+
+    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
+    YUY2ToYRow(src_yuy2, dst_y, width);
+    src_yuy2 += src_stride_yuy2;
+    dst_y += dst_stride_y;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  if (height & 1) {
+    CopyRow(src_y, dst_y, width);
+    YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width);
+  }
+  return 0;
+}
+
 // Convert YUY2 to I420.
 LIBYUV_API
 int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
@@ -400,17 +529,23 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
    src_stride_yuy2 = -src_stride_yuy2;
  }
 #if defined(HAS_YUY2TOYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
    YUY2ToUVRow = YUY2ToUVRow_Any_SSE2;
    YUY2ToYRow = YUY2ToYRow_Any_SSE2;
    if (IS_ALIGNED(width, 16)) {
-      YUY2ToUVRow = YUY2ToUVRow_SSE2;
-      YUY2ToYRow = YUY2ToYRow_SSE2;
+      YUY2ToUVRow = YUY2ToUVRow_Unaligned_SSE2;
+      YUY2ToYRow = YUY2ToYRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16)) {
+        YUY2ToUVRow = YUY2ToUVRow_SSE2;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          YUY2ToYRow = YUY2ToYRow_SSE2;
+        }
+      }
    }
  }
 #endif
 #if defined(HAS_YUY2TOYROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
    YUY2ToUVRow = YUY2ToUVRow_Any_AVX2;
    YUY2ToYRow = YUY2ToYRow_Any_AVX2;
    if (IS_ALIGNED(width, 32)) {
@@ -420,9 +555,11 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
  }
 #endif
 #if defined(HAS_YUY2TOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    YUY2ToYRow = YUY2ToYRow_Any_NEON;
-    YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
+    if (width >= 16) {
+      YUY2ToUVRow = YUY2ToUVRow_Any_NEON;
+    }
    if (IS_ALIGNED(width, 16)) {
      YUY2ToYRow = YUY2ToYRow_NEON;
      YUY2ToUVRow = YUY2ToUVRow_NEON;
@@ -465,17 +602,23 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
    src_stride_uyvy = -src_stride_uyvy;
  }
 #if defined(HAS_UYVYTOYROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 16) {
    UYVYToUVRow = UYVYToUVRow_Any_SSE2;
    UYVYToYRow = UYVYToYRow_Any_SSE2;
    if (IS_ALIGNED(width, 16)) {
-      UYVYToUVRow = UYVYToUVRow_SSE2;
-      UYVYToYRow = UYVYToYRow_SSE2;
+      UYVYToUVRow = UYVYToUVRow_Unaligned_SSE2;
+      UYVYToYRow = UYVYToYRow_Unaligned_SSE2;
+      if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16)) {
+        UYVYToUVRow = UYVYToUVRow_SSE2;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          UYVYToYRow = UYVYToYRow_SSE2;
+        }
+      }
    }
  }
 #endif
 #if defined(HAS_UYVYTOYROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
    UYVYToUVRow = UYVYToUVRow_Any_AVX2;
    UYVYToYRow = UYVYToYRow_Any_AVX2;
    if (IS_ALIGNED(width, 32)) {
@@ -485,9 +628,11 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
  }
 #endif
 #if defined(HAS_UYVYTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    UYVYToYRow = UYVYToYRow_Any_NEON;
-    UYVYToUVRow = UYVYToUVRow_Any_NEON;
+    if (width >= 16) {
+      UYVYToUVRow = UYVYToUVRow_Any_NEON;
+    }
    if (IS_ALIGNED(width, 16)) {
      UYVYToYRow = UYVYToYRow_NEON;
      UYVYToUVRow = UYVYToUVRow_NEON;
@@ -535,17 +680,23 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
    src_stride_argb = -src_stride_argb;
  }
 #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVRow = ARGBToUVRow_SSSE3;
-      ARGBToYRow = ARGBToYRow_SSSE3;
+      ARGBToUVRow = ARGBToUVRow_Unaligned_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_argb, 16) && IS_ALIGNED(src_stride_argb, 16)) {
+        ARGBToUVRow = ARGBToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          ARGBToYRow = ARGBToYRow_SSSE3;
+        }
+      }
    }
  }
 #endif
 #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 32) {
    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
    ARGBToYRow = ARGBToYRow_Any_AVX2;
    if (IS_ALIGNED(width, 32)) {
@@ -555,7 +706,7 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
  }
 #endif
 #if defined(HAS_ARGBTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    ARGBToYRow = ARGBToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      ARGBToYRow = ARGBToYRow_NEON;
@@ -563,7 +714,7 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
  }
 #endif
 #if defined(HAS_ARGBTOUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && width >= 16) {
    ARGBToUVRow = ARGBToUVRow_Any_NEON;
    if (IS_ALIGNED(width, 16)) {
      ARGBToUVRow = ARGBToUVRow_NEON;
@@ -610,31 +761,34 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
    src_bgra = src_bgra + (height - 1) * src_stride_bgra;
    src_stride_bgra = -src_stride_bgra;
  }
-#if defined(HAS_BGRATOYROW_SSSE3) && defined(HAS_BGRATOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+#if defined(HAS_BGRATOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    BGRAToUVRow = BGRAToUVRow_Any_SSSE3;
    BGRAToYRow = BGRAToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
-      BGRAToUVRow = BGRAToUVRow_SSSE3;
-      BGRAToYRow = BGRAToYRow_SSSE3;
+      BGRAToUVRow = BGRAToUVRow_Unaligned_SSSE3;
+      BGRAToYRow = BGRAToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_bgra, 16) && IS_ALIGNED(src_stride_bgra, 16)) {
+        BGRAToUVRow = BGRAToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          BGRAToYRow = BGRAToYRow_SSSE3;
+        }
+      }
    }
  }
-#endif
-#if defined(HAS_BGRATOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_BGRATOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    BGRAToYRow = BGRAToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      BGRAToYRow = BGRAToYRow_NEON;
    }
-  }
-#endif
-#if defined(HAS_BGRATOUVROW_NEON)
-    if (TestCpuFlag(kCpuHasNEON)) {
+    if (width >= 16) {
      BGRAToUVRow = BGRAToUVRow_Any_NEON;
      if (IS_ALIGNED(width, 16)) {
        BGRAToUVRow = BGRAToUVRow_NEON;
      }
    }
+  }
 #endif

  for (y = 0; y < height - 1; y += 2) {
@@ -676,29 +830,32 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
    src_abgr = src_abgr + (height - 1) * src_stride_abgr;
    src_stride_abgr = -src_stride_abgr;
  }
-#if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+#if defined(HAS_ABGRTOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ABGRToUVRow = ABGRToUVRow_Any_SSSE3;
    ABGRToYRow = ABGRToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
-      ABGRToUVRow = ABGRToUVRow_SSSE3;
-      ABGRToYRow = ABGRToYRow_SSSE3;
+      ABGRToUVRow = ABGRToUVRow_Unaligned_SSSE3;
+      ABGRToYRow = ABGRToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_abgr, 16) && IS_ALIGNED(src_stride_abgr, 16)) {
+        ABGRToUVRow = ABGRToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          ABGRToYRow = ABGRToYRow_SSSE3;
+        }
+      }
    }
  }
-#endif
-#if defined(HAS_ABGRTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_ABGRTOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    ABGRToYRow = ABGRToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      ABGRToYRow = ABGRToYRow_NEON;
    }
-  }
-#endif
-#if defined(HAS_ABGRTOUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    ABGRToUVRow = ABGRToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      ABGRToUVRow = ABGRToUVRow_NEON;
+    if (width >= 16) {
+      ABGRToUVRow = ABGRToUVRow_Any_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        ABGRToUVRow = ABGRToUVRow_NEON;
+      }
    }
  }
 #endif
@@ -742,29 +899,32 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
    src_rgba = src_rgba + (height - 1) * src_stride_rgba;
    src_stride_rgba = -src_stride_rgba;
  }
-#if defined(HAS_RGBATOYROW_SSSE3) && defined(HAS_RGBATOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+#if defined(HAS_RGBATOYROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    RGBAToUVRow = RGBAToUVRow_Any_SSSE3;
    RGBAToYRow = RGBAToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
-      RGBAToUVRow = RGBAToUVRow_SSSE3;
-      RGBAToYRow = RGBAToYRow_SSSE3;
+      RGBAToUVRow = RGBAToUVRow_Unaligned_SSSE3;
+      RGBAToYRow = RGBAToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_rgba, 16) && IS_ALIGNED(src_stride_rgba, 16)) {
+        RGBAToUVRow = RGBAToUVRow_SSSE3;
+        if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+          RGBAToYRow = RGBAToYRow_SSSE3;
+        }
+      }
    }
  }
-#endif
-#if defined(HAS_RGBATOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_RGBATOYROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    RGBAToYRow = RGBAToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      RGBAToYRow = RGBAToYRow_NEON;
    }
-  }
-#endif
-#if defined(HAS_RGBATOUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    RGBAToUVRow = RGBAToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      RGBAToUVRow = RGBAToUVRow_NEON;
+    if (width >= 16) {
+      RGBAToUVRow = RGBAToUVRow_Any_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        RGBAToUVRow = RGBAToUVRow_NEON;
+      }
    }
  }
 #endif
@@ -818,23 +978,22 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
  }

 #if defined(HAS_RGB24TOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    RGB24ToYRow = RGB24ToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      RGB24ToYRow = RGB24ToYRow_NEON;
    }
-  }
-#endif
-#if defined(HAS_RGB24TOUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      RGB24ToUVRow = RGB24ToUVRow_NEON;
+    if (width >= 16) {
+      RGB24ToUVRow = RGB24ToUVRow_Any_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        RGB24ToUVRow = RGB24ToUVRow_NEON;
+      }
    }
  }
-#endif
+#else  // HAS_RGB24TOYROW_NEON
+
 #if defined(HAS_RGB24TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
      RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
@@ -842,7 +1001,7 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
  }
 #endif
 #if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
      ARGBToUVRow = ARGBToUVRow_SSSE3;
@@ -850,13 +1009,17 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
  }
 #endif
 #if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
    }
  }
 #endif  // HAS_ARGBTOUVROW_SSSE3
+#endif  // HAS_RGB24TOYROW_NEON

  {
 #if !defined(HAS_RGB24TOYROW_NEON)
@@ -932,23 +1095,22 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
  }

 #if defined(HAS_RAWTOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    RAWToYRow = RAWToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      RAWToYRow = RAWToYRow_NEON;
    }
-  }
-#endif
-#if defined(HAS_RAWTOUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    RAWToUVRow = RAWToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      RAWToUVRow = RAWToUVRow_NEON;
+    if (width >= 16) {
+      RAWToUVRow = RAWToUVRow_Any_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        RAWToUVRow = RAWToUVRow_NEON;
+      }
    }
  }
-#endif
+#else  // HAS_RAWTOYROW_NEON
+
 #if defined(HAS_RAWTOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
      RAWToARGBRow = RAWToARGBRow_SSSE3;
@@ -956,7 +1118,7 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
  }
 #endif
 #if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
      ARGBToUVRow = ARGBToUVRow_SSSE3;
@@ -964,13 +1126,17 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
  }
 #endif
 #if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
    }
  }
 #endif  // HAS_ARGBTOUVROW_SSSE3
+#endif  // HAS_RAWTOYROW_NEON

  {
    // Allocate 2 rows of ARGB.
@@ -1044,20 +1210,22 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
  }

 #if defined(HAS_RGB565TOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    RGB565ToYRow = RGB565ToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      RGB565ToYRow = RGB565ToYRow_NEON;
    }
-    RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      RGB565ToUVRow = RGB565ToUVRow_NEON;
+    if (width >= 16) {
+      RGB565ToUVRow = RGB565ToUVRow_Any_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        RGB565ToUVRow = RGB565ToUVRow_NEON;
+      }
    }
  }
 #else  // HAS_RGB565TOYROW_NEON

 #if defined(HAS_RGB565TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
    RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
    if (IS_ALIGNED(width, 8)) {
      RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
@@ -1065,7 +1233,7 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
  }
 #endif
 #if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
      ARGBToUVRow = ARGBToUVRow_SSSE3;
@@ -1073,10 +1241,13 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
  }
 #endif
 #if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
    }
  }
 #endif  // HAS_ARGBTOUVROW_SSSE3
@@ -1156,20 +1327,22 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
  }

 #if defined(HAS_ARGB1555TOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      ARGB1555ToYRow = ARGB1555ToYRow_NEON;
    }
-    ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
+    if (width >= 16) {
+      ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        ARGB1555ToUVRow = ARGB1555ToUVRow_NEON;
+      }
    }
  }
 #else  // HAS_ARGB1555TOYROW_NEON

 #if defined(HAS_ARGB1555TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
    if (IS_ALIGNED(width, 8)) {
      ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
@@ -1177,7 +1350,7 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
  }
 #endif
 #if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
      ARGBToUVRow = ARGBToUVRow_SSSE3;
@@ -1185,10 +1358,13 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
  }
 #endif
 #if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
    }
  }
 #endif  // HAS_ARGBTOUVROW_SSSE3
@@ -1269,20 +1445,22 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
  }

 #if defined(HAS_ARGB4444TOYROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      ARGB4444ToYRow = ARGB4444ToYRow_NEON;
    }
-    ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
+    if (width >= 16) {
+      ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON;
+      if (IS_ALIGNED(width, 16)) {
+        ARGB4444ToUVRow = ARGB4444ToUVRow_NEON;
+      }
    }
  }
 #else  // HAS_ARGB4444TOYROW_NEON

 #if defined(HAS_ARGB4444TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
    if (IS_ALIGNED(width, 8)) {
      ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
@@ -1290,7 +1468,7 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
  }
 #endif
 #if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
      ARGBToUVRow = ARGBToUVRow_SSSE3;
@@ -1298,10 +1476,13 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
  }
 #endif
 #if defined(HAS_ARGBTOUVROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
-      ARGBToYRow = ARGBToYRow_SSSE3;
+      ARGBToYRow = ARGBToYRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_y, 16) && IS_ALIGNED(dst_stride_y, 16)) {
+        ARGBToYRow = ARGBToYRow_SSSE3;
+      }
    }
  }
 #endif  // HAS_ARGBTOUVROW_SSSE3
--- a/third_party/libyuv/source/convert_argb.cc
+++ b/third_party/libyuv/source/convert_argb.cc
@@ -11,6 +11,7 @@
 #include "libyuv/convert_argb.h"

 #include "libyuv/cpu_id.h"
+#include "libyuv/format_conversion.h"
 #ifdef HAVE_JPEG
 #include "libyuv/mjpeg_decoder.h"
 #endif
@@ -78,15 +79,17 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
  }
 #if defined(HAS_I444TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    I444ToARGBRow = I444ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      I444ToARGBRow = I444ToARGBRow_SSSE3;
+      I444ToARGBRow = I444ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        I444ToARGBRow = I444ToARGBRow_SSSE3;
+      }
    }
  }
-#endif
-#if defined(HAS_I444TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_I444TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    I444ToARGBRow = I444ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      I444ToARGBRow = I444ToARGBRow_NEON;
@@ -138,15 +141,18 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
  }
 #if defined(HAS_I422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_SSSE3;
+      I422ToARGBRow = I422ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        I422ToARGBRow = I422ToARGBRow_SSSE3;
+      }
    }
  }
 #endif
 #if defined(HAS_I422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
+  if (TestCpuFlag(kCpuHasAVX2) && width >= 16) {
    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
    if (IS_ALIGNED(width, 16)) {
      I422ToARGBRow = I422ToARGBRow_AVX2;
@@ -154,7 +160,7 @@ int I422ToARGB(const uint8* src_y, int src_stride_y,
  }
 #endif
 #if defined(HAS_I422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    I422ToARGBRow = I422ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      I422ToARGBRow = I422ToARGBRow_NEON;
@@ -215,15 +221,17 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
  }
 #if defined(HAS_I411TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    I411ToARGBRow = I411ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      I411ToARGBRow = I411ToARGBRow_SSSE3;
+      I411ToARGBRow = I411ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        I411ToARGBRow = I411ToARGBRow_SSSE3;
+      }
    }
  }
-#endif
-#if defined(HAS_I411TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_I411TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    I411ToARGBRow = I411ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      I411ToARGBRow = I411ToARGBRow_NEON;
@@ -268,23 +276,15 @@ int I400ToARGB_Reference(const uint8* src_y, int src_stride_y,
    src_stride_y = dst_stride_argb = 0;
  }
 #if defined(HAS_YTOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    YToARGBRow = YToARGBRow_Any_SSE2;
    if (IS_ALIGNED(width, 8)) {
      YToARGBRow = YToARGBRow_SSE2;
    }
  }
-#endif
-#if defined(HAS_YTOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    YToARGBRow = YToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      YToARGBRow = YToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_YTOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_YTOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    YToARGBRow = YToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      YToARGBRow = YToARGBRow_NEON;
@@ -326,15 +326,17 @@ int I400ToARGB(const uint8* src_y, int src_stride_y,
    src_stride_y = dst_stride_argb = 0;
  }
 #if defined(HAS_I400TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8) {
    I400ToARGBRow = I400ToARGBRow_Any_SSE2;
    if (IS_ALIGNED(width, 8)) {
-      I400ToARGBRow = I400ToARGBRow_SSE2;
+      I400ToARGBRow = I400ToARGBRow_Unaligned_SSE2;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        I400ToARGBRow = I400ToARGBRow_SSE2;
+      }
    }
  }
-#endif
-#if defined(HAS_I400TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_I400TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    I400ToARGBRow = I400ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      I400ToARGBRow = I400ToARGBRow_NEON;
@@ -445,15 +447,15 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
    src_stride_rgb24 = dst_stride_argb = 0;
  }
 #if defined(HAS_RGB24TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
      RGB24ToARGBRow = RGB24ToARGBRow_SSSE3;
    }
  }
-#endif
-#if defined(HAS_RGB24TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_RGB24TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      RGB24ToARGBRow = RGB24ToARGBRow_NEON;
@@ -495,15 +497,15 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
    src_stride_raw = dst_stride_argb = 0;
  }
 #if defined(HAS_RAWTOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    RAWToARGBRow = RAWToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
      RAWToARGBRow = RAWToARGBRow_SSSE3;
    }
  }
-#endif
-#if defined(HAS_RAWTOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_RAWTOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    RAWToARGBRow = RAWToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      RAWToARGBRow = RAWToARGBRow_NEON;
@@ -545,15 +547,15 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
    src_stride_rgb565 = dst_stride_argb = 0;
  }
 #if defined(HAS_RGB565TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2;
    if (IS_ALIGNED(width, 8)) {
      RGB565ToARGBRow = RGB565ToARGBRow_SSE2;
    }
  }
-#endif
-#if defined(HAS_RGB565TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_RGB565TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      RGB565ToARGBRow = RGB565ToARGBRow_NEON;
@@ -595,15 +597,15 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
    src_stride_argb1555 = dst_stride_argb = 0;
  }
 #if defined(HAS_ARGB1555TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2;
    if (IS_ALIGNED(width, 8)) {
      ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2;
    }
  }
-#endif
-#if defined(HAS_ARGB1555TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_ARGB1555TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON;
@@ -645,15 +647,15 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
    src_stride_argb4444 = dst_stride_argb = 0;
  }
 #if defined(HAS_ARGB4444TOARGBROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
+  if (TestCpuFlag(kCpuHasSSE2) && width >= 8 &&
+      IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2;
    if (IS_ALIGNED(width, 8)) {
      ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2;
    }
  }
-#endif
-#if defined(HAS_ARGB4444TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_ARGB4444TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON;
@@ -691,23 +693,17 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
    dst_stride_argb = -dst_stride_argb;
  }
 #if defined(HAS_NV12TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      NV12ToARGBRow = NV12ToARGBRow_SSSE3;
+      NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        NV12ToARGBRow = NV12ToARGBRow_SSSE3;
+      }
    }
  }
-#endif
-#if defined(HAS_NV12TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      NV12ToARGBRow = NV12ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_NV12TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_NV12TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      NV12ToARGBRow = NV12ToARGBRow_NEON;
@@ -748,23 +744,18 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
    dst_stride_argb = -dst_stride_argb;
  }
 #if defined(HAS_NV21TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      NV21ToARGBRow = NV21ToARGBRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_NV21TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    NV21ToARGBRow = NV21ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      NV21ToARGBRow = NV21ToARGBRow_AVX2;
+      NV21ToARGBRow = NV21ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        NV21ToARGBRow = NV21ToARGBRow_SSSE3;
+      }
    }
  }
 #endif
 #if defined(HAS_NV21TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    NV21ToARGBRow = NV21ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      NV21ToARGBRow = NV21ToARGBRow_NEON;
@@ -804,23 +795,17 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
    dst_stride_argb = -dst_stride_argb;
  }
 #if defined(HAS_NV12TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 8) {
    NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 8)) {
-      NV12ToARGBRow = NV12ToARGBRow_SSSE3;
+      NV12ToARGBRow = NV12ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        NV12ToARGBRow = NV12ToARGBRow_SSSE3;
+      }
    }
  }
-#endif
-#if defined(HAS_NV12TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    NV12ToARGBRow = NV12ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      NV12ToARGBRow = NV12ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_NV12TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_NV12TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    NV12ToARGBRow = NV12ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      NV12ToARGBRow = NV12ToARGBRow_NEON;
@@ -867,23 +852,19 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
    src_stride_yuy2 = dst_stride_argb = 0;
  }
 #if defined(HAS_YUY2TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  // Posix is 16, Windows is 8.
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
-      YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
+      YUY2ToARGBRow = YUY2ToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_yuy2, 16) && IS_ALIGNED(src_stride_yuy2, 16) &&
+          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        YUY2ToARGBRow = YUY2ToARGBRow_SSSE3;
+      }
    }
  }
-#endif
-#if defined(HAS_YUY2TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    YUY2ToARGBRow = YUY2ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      YUY2ToARGBRow = YUY2ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_YUY2TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_YUY2TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      YUY2ToARGBRow = YUY2ToARGBRow_NEON;
@@ -924,23 +905,19 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
    src_stride_uyvy = dst_stride_argb = 0;
  }
 #if defined(HAS_UYVYTOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
+  // Posix is 16, Windows is 8.
+  if (TestCpuFlag(kCpuHasSSSE3) && width >= 16) {
    UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
-      UYVYToARGBRow = UYVYToARGBRow_SSSE3;
+      UYVYToARGBRow = UYVYToARGBRow_Unaligned_SSSE3;
+      if (IS_ALIGNED(src_uyvy, 16) && IS_ALIGNED(src_stride_uyvy, 16) &&
+          IS_ALIGNED(dst_argb, 16) && IS_ALIGNED(dst_stride_argb, 16)) {
+        UYVYToARGBRow = UYVYToARGBRow_SSSE3;
+      }
    }
  }
-#endif
-#if defined(HAS_UYVYTOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    UYVYToARGBRow = UYVYToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 32)) {
-      UYVYToARGBRow = UYVYToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_UYVYTOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
+#elif defined(HAS_UYVYTOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON) && width >= 8) {
    UYVYToARGBRow = UYVYToARGBRow_Any_NEON;
    if (IS_ALIGNED(width, 8)) {
      UYVYToARGBRow = UYVYToARGBRow_NEON;
@@ -955,152 +932,6 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
  return 0;
 }

-// Convert J420 to ARGB.
-LIBYUV_API
-int J420ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  int y;
-  void (*J422ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = J422ToARGBRow_C;
-  if (!src_y || !src_u || !src_v || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-#if defined(HAS_J422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToARGBRow = J422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      J422ToARGBRow = J422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToARGBRow = J422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert J422 to ARGB.
-LIBYUV_API
-int J422ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  int y;
-  void (*J422ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = J422ToARGBRow_C;
-  if (!src_y || !src_u || !src_v ||
-      !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      src_stride_u * 2 == width &&
-      src_stride_v * 2 == width &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
-  }
-#if defined(HAS_J422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToARGBRow = J422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      J422ToARGBRow = J422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToARGBRow = J422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-  }
-  return 0;
-}
-
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
--- a/Show More
+++ b/Show More