Adjust optimize_b RD parameters

Coding gain lowres 0.51% midres 0.36% Change-Id: I1e9f2f9341bad12d9023f97c73d0e991ae5ec7f0
Enable optimize_b for intra blocks
2016-05-06 09:56:59 -07:00 · 2016-05-06 09:55:45 -07:00 · 2016-05-03 14:42:17 -07:00 · 2016-05-02 19:15:13 +00:00 · 2016-05-01 12:25:57 -07:00 · 2016-04-26 22:08:20 +00:00
868 changed files with 144813 additions and 128511 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -30,32 +30,28 @@
 /examples/decode_with_partial_drops
 /examples/example_xma
 /examples/postproc
-/examples/resize_util
 /examples/set_maps
 /examples/simple_decoder
 /examples/simple_encoder
 /examples/twopass_encoder
 /examples/vp8_multi_resolution_encoder
 /examples/vp8cx_set_ref
-/examples/vp9_lossless_encoder
 /examples/vp9_spatial_scalable_encoder
 /examples/vpx_temporal_scalable_patterns
-/examples/vpx_temporal_svc_encoder
 /ivfdec
 /ivfdec.dox
 /ivfenc
 /ivfenc.dox
 /libvpx.so*
 /libvpx.ver
+/obj_int_extract
 /samples.dox
-/test_intra_pred_speed
 /test_libvpx
 /vp8_api1_migration.dox
 /vp[89x]_rtcd.h
 /vpx.pc
 /vpx_config.c
 /vpx_config.h
-/vpx_dsp_rtcd.h
 /vpx_scale_rtcd.h
 /vpx_version.h
 /vpxdec
--- a/.mailmap
+++ b/.mailmap
@@ -1,37 +1,18 @@
 Adrian Grange <agrange@google.com>
-Aℓex Converse <aconverse@google.com>
-Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
 Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
-Alpha Lam <hclam@google.com> <hclam@chromium.org>
-Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
-Deb Mukherjee <debargha@google.com>
-Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
-Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
 Hangyu Kuang <hkuang@google.com>
-Hui Su <huisu@google.com>
-Jacky Chen <jackychen@google.com>
 Jim Bankoski <jimbankoski@google.com>
+John Koleszar <jkoleszar@google.com>
 Johann Koenig <johannkoenig@google.com>
 Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
-Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com>
-Johann Koenig <johannkoenig@google.com> <johannkoenig@chromium.org>
-John Koleszar <jkoleszar@google.com>
-Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
-Marco Paniconi <marpan@google.com>
-Marco Paniconi <marpan@google.com> <marpan@chromium.org>
+Johann Koenig <johannkoenig@google.com> <johannkoenig@dhcp-172-19-7-52.mtv.corp.google.com>
 Pascal Massimino <pascal.massimino@gmail.com>
-Paul Wilkins <paulwilkins@google.com>
-Peter de Rivaz <peter.derivaz@gmail.com>
-Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
+Sami Pietilä <samipietila@google.com>
+Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
+Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
+Tom Finegan <tomfinegan@google.com>
 Ralph Giles <giles@xiph.org> <giles@entropywave.com>
 Ralph Giles <giles@xiph.org> <giles@mozilla.com>
-Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
-Sami Pietilä <samipietila@google.com>
-Tamar Levy <tamar.levy@intel.com>
-Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
-Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
-Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
-Tom Finegan <tomfinegan@google.com>
-Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
+Alpha Lam <hclam@google.com> <hclam@chromium.org>
+Deb Mukherjee <debargha@google.com>
 Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
-Yaowu Xu <yaowu@google.com> <Yaowu Xu>
--- a/48
+++ b/48
@@ -3,11 +3,10 @@

 Aaron Watry <awatry@gmail.com>
 Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
-Adam Xu <adam@xuyaowu.com>
 Adrian Grange <agrange@google.com>
-Aℓex Converse <aconverse@google.com>
 Ahmad Sharif <asharif@google.com>
 Alexander Voronov <avoronov@graphics.cs.msu.ru>
+Alex Converse <alex.converse@gmail.com>
 Alexis Ballier <aballier@gentoo.org>
 Alok Ahuja <waveletcoeff@gmail.com>
 Alpha Lam <hclam@google.com>
@@ -15,69 +14,44 @@ A.Mahfoodh <ab.mahfoodh@gmail.com>
 Ami Fischman <fischman@chromium.org>
 Andoni Morales Alastruey <ylatuya@gmail.com>
 Andres Mejia <mcitadel@gmail.com>
-Andrew Russell <anrussell@google.com>
-Angie Chiang <angiebird@google.com>
 Aron Rosenberg <arosenberg@logitech.com>
 Attila Nagy <attilanagy@google.com>
-Brion Vibber <bvibber@wikimedia.org>
 changjun.yang <changjun.yang@intel.com>
-Charles 'Buck' Krasic <ckrasic@google.com>
 chm <chm@rock-chips.com>
 Christian Duvivier <cduvivier@google.com>
-Daniele Castagna <dcastagna@chromium.org>
 Daniel Kang <ddkang@google.com>
 Deb Mukherjee <debargha@google.com>
-Dim Temp <dimtemp0@gmail.com>
 Dmitry Kovalev <dkovalev@google.com>
 Dragan Mrdjan <dmrdjan@mips.com>
-Ed Baker <edward.baker@intel.com>
-Ehsan Akhgari <ehsan.akhgari@gmail.com>
-Erik Niemeyer <erik.a.niemeyer@intel.com>
+Erik Niemeyer <erik.a.niemeyer@gmail.com>
 Fabio Pedretti <fabio.ped@libero.it>
 Frank Galligan <fgalligan@google.com>
 Fredrik Söderquist <fs@opera.com>
 Fritz Koenig <frkoenig@google.com>
 Gaute Strokkenes <gaute.strokkenes@broadcom.com>
-Geza Lore <gezalore@gmail.com>
-Ghislain MARY <ghislainmary2@gmail.com>
 Giuseppe Scrivano <gscrivano@gnu.org>
-Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
 Guillaume Martres <gmartres@google.com>
 Guillermo Ballester Valor <gbvalor@gmail.com>
 Hangyu Kuang <hkuang@google.com>
-Hanno Böck <hanno@hboeck.de>
 Henrik Lundin <hlundin@google.com>
 Hui Su <huisu@google.com>
 Ivan Maltz <ivanmaltz@google.com>
-Jacek Caban <cjacek@gmail.com>
-Jacky Chen <jackychen@google.com>
 James Berry <jamesberry@google.com>
-James Yu <james.yu@linaro.org>
 James Zern <jzern@google.com>
-Jan Gerber <j@mailb.org>
 Jan Kratochvil <jan.kratochvil@redhat.com>
 Janne Salonen <jsalonen@google.com>
-Jean-Yves Avenard <jyavenard@mozilla.com>
 Jeff Faust <jfaust@google.com>
 Jeff Muizelaar <jmuizelaar@mozilla.com>
 Jeff Petkau <jpet@chromium.org>
-Jia Jia <jia.jia@linaro.org>
-Jian Zhou <zhoujian@google.com>
 Jim Bankoski <jimbankoski@google.com>
 Jingning Han <jingning@google.com>
-Joey Parrish <joeyparrish@google.com>
 Johann Koenig <johannkoenig@google.com>
 John Koleszar <jkoleszar@google.com>
-Johnny Klonaris <google@jawknee.com>
-John Stark <jhnstrk@gmail.com>
 Joshua Bleecher Snyder <josh@treelinelabs.com>
 Joshua Litt <joshualitt@google.com>
-Julia Robson <juliamrobson@gmail.com>
 Justin Clift <justin@salasaga.org>
 Justin Lebar <justin.lebar@gmail.com>
 KO Myung-Hun <komh@chollian.net>
-Lawrence Velázquez <larryv@macports.org>
-Linfeng Zhang <linfengz@google.com>
 Lou Quillio <louquillio@google.com>
 Luca Barbato <lu_zero@gentoo.org>
 Makoto Kato <makoto.kt@gmail.com>
@@ -91,52 +65,36 @@ Michael Kohler <michaelkohler@live.com>
 Mike Frysinger <vapier@chromium.org>
 Mike Hommey <mhommey@mozilla.com>
 Mikhal Shemer <mikhal@google.com>
-Minghai Shang <minghai@google.com>
 Morton Jonuschat <yabawock@gmail.com>
-Nico Weber <thakis@chromium.org>
 Parag Salasakar <img.mips1@gmail.com>
 Pascal Massimino <pascal.massimino@gmail.com>
 Patrik Westin <patrik.westin@gmail.com>
 Paul Wilkins <paulwilkins@google.com>
 Pavol Rusnak <stick@gk2.sk>
 Paweł Hajdan <phajdan@google.com>
-Pengchong Jin <pengchong@google.com>
-Peter de Rivaz <peter.derivaz@gmail.com>
 Philip Jägenstedt <philipj@opera.com>
 Priit Laes <plaes@plaes.org>
 Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
 Rafaël Carré <funman@videolan.org>
 Ralph Giles <giles@xiph.org>
 Rob Bradford <rob@linux.intel.com>
-Ronald S. Bultje <rsbultje@gmail.com>
-Rui Ueyama <ruiu@google.com>
+Ronald S. Bultje <rbultje@google.com>
 Sami Pietilä <samipietila@google.com>
-Sasi Inguva <isasi@google.com>
 Scott Graham <scottmg@chromium.org>
 Scott LaVarnway <slavarnway@google.com>
-Sean McGovern <gseanmcg@gmail.com>
-Sergey Kolomenkin <kolomenkin@gmail.com>
-Sergey Ulanov <sergeyu@chromium.org>
 Shimon Doodkin <helpmepro1@gmail.com>
-Shunyao Li <shunyaoli@google.com>
 Stefan Holmer <holmer@google.com>
 Suman Sunkara <sunkaras@google.com>
 Taekhyun Kim <takim@nvidia.com>
 Takanori MATSUURA <t.matsuu@gmail.com>
 Tamar Levy <tamar.levy@intel.com>
-Tao Bai <michaelbai@chromium.org>
 Tero Rintaluoma <teror@google.com>
 Thijs Vermeir <thijsvermeir@gmail.com>
-Tim Kopp <tkopp@google.com>
 Timothy B. Terriberry <tterribe@xiph.org>
 Tom Finegan <tomfinegan@google.com>
 Vignesh Venkatasubramanian <vigneshv@google.com>
 Yaowu Xu <yaowu@google.com>
-Yi Luo <luoyi@google.com>
-Yongzhe Wang <yongzhe@google.com>
 Yunqing Wang <yunqingwang@google.com>
-Yury Gitman <yuryg@google.com>
-Zoe Liu <zoeliu@google.com>
 Google Inc.
 The Mozilla Foundation
 The Xiph.Org Foundation
--- a/70
+++ b/70
@@ -1,73 +1,3 @@
-2016-07-20 v1.6.0 "Khaki Campbell Duck"
-  This release improves upon the VP9 encoder and speeds up the encoding and
-  decoding processes.
-
-  - Upgrading:
-    This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum
-    in vpx_image and some minor changes to the VP8_COMP structure.
-
-    The default key frame interval for VP9 has changed from 128 to 9999.
-
-  - Enhancement:
-    A core focus has been performance for low end Intel processors. SSSE3
-    instructions such as 'pshufb' have been avoided and instructions have been
-    reordered to better accommodate the more constrained pipelines.
-
-    As a result, devices based on Celeron processors have seen substantial
-    decoding improvements. From Indian Runner Duck to Javan Whistling Duck,
-    decoding speed improved between 10 and 30%. Between Javan Whistling Duck
-    and Khaki Campbell Duck, it improved another 10 to 15%.
-
-    While Celeron benefited most, Core-i5 also improved 5% and 10% between the
-    respective releases.
-
-    Realtime performance for WebRTC for both speed and quality has received a
-    lot of attention.
-
-  - Bug Fixes:
-    A number of fuzzing issues, found variously by Mozilla, Chromium and others,
-    have been fixed and we strongly recommend updating.
-
-2015-11-09 v1.5.0 "Javan Whistling Duck"
-  This release improves upon the VP9 encoder and speeds up the encoding and
-  decoding processes.
-
-  - Upgrading:
-    This release is ABI incompatible with 1.4.0. It drops deprecated VP8
-    controls and adds a variety of VP9 controls for testing.
-
-    The vpxenc utility now prefers VP9 by default.
-
-  - Enhancements:
-    Faster VP9 encoding and decoding
-    Smaller library size by combining functions used by VP8 and VP9
-
-  - Bug Fixes:
-    A variety of fuzzing issues
-
-2015-04-03 v1.4.0 "Indian Runner Duck"
-  This release includes significant improvements to the VP9 codec.
-
-  - Upgrading:
-    This release is ABI incompatible with 1.3.0. It drops the compatibility
-    layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec
-    controls for VP9.
-
-  - Enhancements:
-    Faster VP9 encoding and decoding
-    Multithreaded VP9 decoding (tile and frame-based)
-    Multithreaded VP9 encoding - on by default
-    YUV 4:2:2 and 4:4:4 support in VP9
-    10 and 12bit support in VP9
-    64bit ARM support by replacing ARM assembly with intrinsics
-
-  - Bug Fixes:
-    Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0
-    files.
-
-  - Known Issues:
-    Frame Parallel decoding fails for segmented and non-420 files.
-
 2013-11-15 v1.3.0 "Forest"
  This release introduces the VP9 codec in a backward-compatible way.
  All existing users of VP8 can continue to use the library without
--- a/2
+++ b/2
@@ -17,7 +17,7 @@ or agree to the institution of patent litigation or any other patent
 enforcement activity against any entity (including a cross-claim or
 counterclaim in a lawsuit) alleging that any of these implementations of WebM
 or any code incorporated within any of these implementations of WebM
-constitute direct or contributory patent infringement, or inducement of
+constitutes direct or contributory patent infringement, or inducement of
 patent infringement, then any patent rights granted to you under this License
 for these implementations of WebM shall terminate as of the date such
 litigation is filed.
--- a/25
+++ b/25
@@ -1,4 +1,4 @@
-README - 20 July 2016
+README - 30 May 2014

 Welcome to the WebM VP8/VP9 Codec SDK!

@@ -47,6 +47,11 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  --help output of the configure script. As of this writing, the list of
  available targets is:

+    armv5te-android-gcc
+    armv5te-linux-rvct
+    armv5te-linux-gcc
+    armv5te-none-rvct
+    armv6-darwin-gcc
    armv6-linux-rvct
    armv6-linux-gcc
    armv6-none-rvct
@@ -58,10 +63,15 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    armv7-none-rvct
    armv7-win32-vs11
    armv7-win32-vs12
-    armv7-win32-vs14
    armv7s-darwin-gcc
    mips32-linux-gcc
    mips64-linux-gcc
+    ppc32-darwin8-gcc
+    ppc32-darwin9-gcc
+    ppc32-linux-gcc
+    ppc64-darwin8-gcc
+    ppc64-darwin9-gcc
+    ppc64-linux-gcc
    sparc-solaris-gcc
    x86-android-gcc
    x86-darwin8-gcc
@@ -72,7 +82,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-darwin11-gcc
    x86-darwin12-gcc
    x86-darwin13-gcc
-    x86-darwin14-gcc
    x86-iphonesimulator-gcc
    x86-linux-gcc
    x86-linux-icc
@@ -85,14 +94,11 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-win32-vs10
    x86-win32-vs11
    x86-win32-vs12
-    x86-win32-vs14
-    x86_64-android-gcc
    x86_64-darwin9-gcc
    x86_64-darwin10-gcc
    x86_64-darwin11-gcc
    x86_64-darwin12-gcc
    x86_64-darwin13-gcc
-    x86_64-darwin14-gcc
    x86_64-iphonesimulator-gcc
    x86_64-linux-gcc
    x86_64-linux-icc
@@ -103,7 +109,12 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-win64-vs10
    x86_64-win64-vs11
    x86_64-win64-vs12
-    x86_64-win64-vs14
+    universal-darwin8-gcc
+    universal-darwin9-gcc
+    universal-darwin10-gcc
+    universal-darwin11-gcc
+    universal-darwin12-gcc
+    universal-darwin13-gcc
    generic-gnu

  The generic-gnu target, in conjunction with the CROSS environment variable,
--- a/args.c
+++ b/args.c
@@ -14,7 +14,9 @@
 #include <limits.h>
 #include "args.h"

-#include "vpx_ports/msvc.h"
+#ifdef _MSC_VER
+#define snprintf _snprintf
+#endif

 #if defined(__GNUC__) && __GNUC__
 extern void die(const char *fmt, ...) __attribute__((noreturn));
--- a/build/arm-msvs/obj_int_extract.bat
+++ b/build/arm-msvs/obj_int_extract.bat
@@ -0,0 +1,18 @@
+REM   Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+REM
+REM   Use of this source code is governed by a BSD-style license
+REM   that can be found in the LICENSE file in the root of the source
+REM   tree. An additional intellectual property rights grant can be found
+REM   in the file PATENTS.  All contributing project authors may
+REM   be found in the AUTHORS file in the root of the source tree.
+echo on
+
+REM Arguments:
+REM   %1 - Relative path to the directory containing the vp8 and vpx_scale
+REM        source directories.
+REM   %2 - Path to obj_int_extract.exe.
+cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
+%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
+
+cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vpx_scale/vpx_scale_asm_offsets.c"
+%2\obj_int_extract.exe rvds "vpx_scale_asm_offsets.obj" > "vpx_scale_asm_offsets.asm"
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -43,7 +43,7 @@
 # will remove any NEON dependency.

 # To change to building armeabi, run ./libvpx/configure again, but with
-# --target=armv6-android-gcc and modify the Application.mk file to
+# --target=arm5te-android-gcc and modify the Application.mk file to
 # set APP_ABI := armeabi
 #
 # Running ndk-build will build libvpx and include it in your project.
@@ -60,15 +60,13 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
  include $(CONFIG_DIR)libs-armv7-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq  ($(TARGET_ARCH_ABI),armeabi)
-  include $(CONFIG_DIR)libs-armv6-android-gcc.mk
+  include $(CONFIG_DIR)libs-armv5te-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq  ($(TARGET_ARCH_ABI),arm64-v8a)
  include $(CONFIG_DIR)libs-armv8-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq ($(TARGET_ARCH_ABI),x86)
  include $(CONFIG_DIR)libs-x86-android-gcc.mk
-else ifeq ($(TARGET_ARCH_ABI),x86_64)
-  include $(CONFIG_DIR)libs-x86_64-android-gcc.mk
 else ifeq ($(TARGET_ARCH_ABI),mips)
  include $(CONFIG_DIR)libs-mips-android-gcc.mk
 else
@@ -93,8 +91,51 @@ LOCAL_CFLAGS := -O3
 # like x86inc.asm and x86_abi_support.asm
 LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)

+# -----------------------------------------------------------------------------
+# Template  : asm_offsets_template
+# Arguments : 1: assembly offsets file to be created
+#             2: c file to base assembly offsets on
+# Returns   : None
+# Usage     : $(eval $(call asm_offsets_template,<asmfile>, <srcfile>
+# Rationale : Create offsets at compile time using for structures that are
+#             defined in c, but used in assembly functions.
+# -----------------------------------------------------------------------------
+define asm_offsets_template
+
+_SRC:=$(2)
+_OBJ:=$(ASM_CNV_PATH)/$$(notdir $(2)).S
+
+_FLAGS = $$($$(my)CFLAGS) \
+          $$(call get-src-file-target-cflags,$(2)) \
+          $$(call host-c-includes,$$(LOCAL_C_INCLUDES) $$(CONFIG_DIR)) \
+          $$(LOCAL_CFLAGS) \
+          $$(NDK_APP_CFLAGS) \
+          $$(call host-c-includes,$$($(my)C_INCLUDES)) \
+          -DINLINE_ASM \
+          -S \
+
+_TEXT = "Compile $$(call get-src-file-text,$(2))"
+_CC   = $$(TARGET_CC)
+
+$$(eval $$(call ev-build-file))
+
+$(1) : $$(_OBJ) $(2)
+	@mkdir -p $$(dir $$@)
+	@grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)$(ASM_CONVERSION) > $$@
+endef
+
+# Use ads2gas script to convert from RVCT format to GAS format.  This
+#  puts the processed file under $(ASM_CNV_PATH).  Local clean rule
+#  to handle removing these
+ifeq ($(CONFIG_VP8_ENCODER), yes)
+  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm
+endif
+ifeq ($(HAVE_NEON_ASM), yes)
+  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm
+endif
+
 .PRECIOUS: %.asm.s
-$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm
+$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm $(ASM_CNV_OFFSETS_DEPEND)
 	@mkdir -p $(dir $@)
 	@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@

@@ -160,41 +201,45 @@ LOCAL_CFLAGS += \

 LOCAL_MODULE := libvpx

+LOCAL_LDLIBS := -llog
+
 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
  LOCAL_STATIC_LIBRARIES := cpufeatures
 endif

 # Add a dependency to force generation of the RTCD files.
-define rtcd_dep_template
-rtcd_dep_template_SRCS := $(addprefix $(LOCAL_PATH)/, $(LOCAL_SRC_FILES))
-rtcd_dep_template_SRCS := $$(rtcd_dep_template_SRCS:.neon=)
 ifeq ($(CONFIG_VP8), yes)
-$$(rtcd_dep_template_SRCS): vp8_rtcd.h
+$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
 endif
 ifeq ($(CONFIG_VP9), yes)
-$$(rtcd_dep_template_SRCS): vp9_rtcd.h
+$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp9_rtcd.h
 endif
-$$(rtcd_dep_template_SRCS): vpx_scale_rtcd.h
-$$(rtcd_dep_template_SRCS): vpx_dsp_rtcd.h
+$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_scale_rtcd.h

-ifneq ($(findstring $(TARGET_ARCH_ABI),x86 x86_64),)
-$$(rtcd_dep_template_SRCS): vpx_config.asm
+ifeq ($(TARGET_ARCH_ABI),x86)
+$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
 endif
-endef
-
-$(eval $(call rtcd_dep_template))

 .PHONY: clean
 clean:
 	@echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]"
 	@$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
+	@$(RM) $(patsubst %.asm, %.*, $(ASM_CNV_OFFSETS_DEPEND))
 	@$(RM) -r $(ASM_CNV_PATH)
 	@$(RM) $(CLEAN-OBJS)

-ifeq ($(ENABLE_SHARED),1)
-  include $(BUILD_SHARED_LIBRARY)
-else
-  include $(BUILD_STATIC_LIBRARY)
+include $(BUILD_SHARED_LIBRARY)
+
+ifeq ($(HAVE_NEON), yes)
+  $(eval $(call asm_offsets_template,\
+    $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm, \
+    $(LIBVPX_PATH)/vpx_scale/vpx_scale_asm_offsets.c))
+endif
+
+ifeq ($(CONFIG_VP8_ENCODER), yes)
+  $(eval $(call asm_offsets_template,\
+    $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm, \
+    $(LIBVPX_PATH)/vp8/encoder/vp8_asm_enc_offsets.c))
 endif

 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -22,10 +22,8 @@ clean:: .DEFAULT
 exampletest: .DEFAULT
 install:: .DEFAULT
 test:: .DEFAULT
-test-no-data-check:: .DEFAULT
 testdata:: .DEFAULT
 utiltest: .DEFAULT
-exampletest-no-data-check utiltest-no-data-check: .DEFAULT


 # Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
@@ -58,10 +56,13 @@ dist:
        fi
 endif

-# Since we invoke make recursively for multiple targets we need to include the
-# .mk file for the correct target, but only when $(target) is non-empty.
 ifneq ($(target),)
-include $(target)-$(TOOLCHAIN).mk
+# Normally, we want to build the filename from the target and the toolchain.
+# This disambiguates from the $(target).mk file that exists in the source tree.
+# However, the toolchain is part of the target in universal builds, so we
+# don't want to include TOOLCHAIN in that case. FAT_ARCHS is used to test
+# if we're in the universal case.
+include $(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk
 endif
 BUILD_ROOT?=.
 VPATH=$(SRC_PATH_BARE)
@@ -115,29 +116,28 @@ test::
 testdata::
 .PHONY: utiltest
 utiltest:
-.PHONY: test-no-data-check exampletest-no-data-check utiltest-no-data-check
-test-no-data-check::
-exampletest-no-data-check utiltest-no-data-check:

-# Force to realign stack always on OS/2
+# Add compiler flags for intrinsic files
 ifeq ($(TOOLCHAIN), x86-os2-gcc)
-CFLAGS += -mstackrealign
+STACKREALIGN=-mstackrealign
+else
+STACKREALIGN=
 endif

 $(BUILD_PFX)%_mmx.c.d: CFLAGS += -mmmx
 $(BUILD_PFX)%_mmx.c.o: CFLAGS += -mmmx
-$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2
-$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2
-$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3
-$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3
-$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3
-$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3
-$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1
-$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1
-$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx
-$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx
-$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2
-$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2
+$(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2 $(STACKREALIGN)
+$(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2 $(STACKREALIGN)
+$(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3 $(STACKREALIGN)
+$(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3 $(STACKREALIGN)
+$(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3 $(STACKREALIGN)
+$(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3 $(STACKREALIGN)
+$(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1 $(STACKREALIGN)
+$(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1 $(STACKREALIGN)
+$(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx $(STACKREALIGN)
+$(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(STACKREALIGN)
+$(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(STACKREALIGN)
+$(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(STACKREALIGN)

 $(BUILD_PFX)%.c.d: %.c
 	$(if $(quiet),@echo "    [DEP] $@")
@@ -146,7 +146,6 @@ $(BUILD_PFX)%.c.d: %.c

 $(BUILD_PFX)%.c.o: %.c
 	$(if $(quiet),@echo "    [CC] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CC) $(INTERNAL_CFLAGS) $(CFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.cc.d: %.cc
@@ -156,7 +155,6 @@ $(BUILD_PFX)%.cc.d: %.cc

 $(BUILD_PFX)%.cc.o: %.cc
 	$(if $(quiet),@echo "    [CXX] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.cpp.d: %.cpp
@@ -166,7 +164,6 @@ $(BUILD_PFX)%.cpp.d: %.cpp

 $(BUILD_PFX)%.cpp.o: %.cpp
 	$(if $(quiet),@echo "    [CXX] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.asm.d: %.asm
@@ -177,7 +174,6 @@ $(BUILD_PFX)%.asm.d: %.asm

 $(BUILD_PFX)%.asm.o: %.asm
 	$(if $(quiet),@echo "    [AS] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<

 $(BUILD_PFX)%.s.d: %.s
@@ -188,14 +184,12 @@ $(BUILD_PFX)%.s.d: %.s

 $(BUILD_PFX)%.s.o: %.s
 	$(if $(quiet),@echo "    [AS] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<

 .PRECIOUS: %.c.S
 %.c.S: CFLAGS += -DINLINE_ASM
 $(BUILD_PFX)%.c.S: %.c
 	$(if $(quiet),@echo "    [GEN] $@")
-	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CC) -S $(CFLAGS) -o $@ $<

 .PRECIOUS: %.asm.s
@@ -222,6 +216,14 @@ else
 	$(qexec)cp $< $@
 endif

+#
+# Rule to extract assembly constants from C sources
+#
+obj_int_extract: build/make/obj_int_extract.c
+	$(if $(quiet),@echo "    [HOSTCC] $@")
+	$(qexec)$(HOSTCC) -I. -I$(SRC_PATH_BARE) -o $@ $<
+CLEAN-OBJS += obj_int_extract
+
 #
 # Utility functions
 #
@@ -283,7 +285,7 @@ define archive_template
 # for creating them.
 $(1):
 	$(if $(quiet),@echo "    [AR] $$@")
-	$(qexec)$$(AR) $$(ARFLAGS) $$@ $$^
+	$(qexec)$$(AR) $$(ARFLAGS) $$@ $$?
 endef

 define so_template
@@ -313,15 +315,18 @@ $(1):
        $$(filter %.o,$$^) $$(extralibs)
 endef

-define dll_template
-# Not using a pattern rule here because we don't want to generate empty
-# archives when they are listed as a dependency in files not responsible
-# for creating them.
-$(1):
-	$(if $(quiet),@echo "    [LD] $$@")
-	$(qexec)$$(LD) -Zdll $$(LDFLAGS) \
-        -o $$@ \
-        $$(filter %.o,$$^) $$(extralibs) $$(EXPORTS_FILE)
+
+
+define lipo_lib_template
+$(1): $(addsuffix /$(1),$(FAT_ARCHS))
+	$(if $(quiet),@echo "    [LIPO] $$@")
+	$(qexec)libtool -static -o $$@ $$?
+endef
+
+define lipo_bin_template
+$(1): $(addsuffix /$(1),$(FAT_ARCHS))
+	$(if $(quiet),@echo "    [LIPO] $$@")
+	$(qexec)lipo -output $$@ -create $$?
 endef


@@ -335,11 +340,9 @@ endif
 skip_deps := $(filter %clean,$(MAKECMDGOALS))
 skip_deps += $(findstring testdata,$(MAKECMDGOALS))
 ifeq ($(strip $(skip_deps)),)
-  ifeq ($(CONFIG_DEPENDENCY_TRACKING),yes)
-    # Older versions of make don't like -include directives with no arguments
-    ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
-      -include $(filter %.d,$(OBJS-yes:.o=.d))
-    endif
+  # Older versions of make don't like -include directives with no arguments
+  ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
+    -include $(filter %.d,$(OBJS-yes:.o=.d))
  endif
 endif

@@ -380,9 +383,8 @@ LIBS=$(call enabled,LIBS)
 .libs: $(LIBS)
 	@touch $@
 $(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib))))
-$(foreach lib,$(filter %so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
-$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
-$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dll,$(LIBS)),$(eval $(call dll_template,$(lib))))
+$(foreach lib,$(filter %so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
+$(foreach lib,$(filter %$(VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))

 INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS)
 ifeq ($(MAKECMDGOALS),dist)
@@ -422,7 +424,11 @@ ifneq ($(call enabled,DIST-SRCS),)
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_sln.sh
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_vcxproj.sh
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/msvs_common.sh
+    DIST-SRCS-$(CONFIG_MSVS)  += build/x86-msvs/obj_int_extract.bat
+    DIST-SRCS-$(CONFIG_MSVS)  += build/arm-msvs/obj_int_extract.bat
    DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
+    # Include obj_int_extract if we use offsets from *_asm_*_offsets
+    DIST-SRCS-$(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64)    += build/make/obj_int_extract.c
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2gas.pl
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2gas_apple.pl
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2armasm_ms.pl
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -73,10 +73,6 @@ generate_filter() {
                open_tag File RelativePath="$f"

                if [ "$pat" == "asm" ] && $asm_use_custom_step; then
-                    # Avoid object file name collisions, i.e. vpx_config.c and
-                    # vpx_config.asm produce the same object file without
-                    # this additional suffix.
-                    objf=${objf%.obj}_asm.obj
                    for plat in "${platforms[@]}"; do
                        for cfg in Debug Release; do
                            open_tag FileConfiguration \
@@ -193,7 +189,7 @@ for opt in "$@"; do
 done

 # Make one call to fix_path for file_list to improve performance.
-fix_file_list file_list
+fix_file_list

 outfile=${outfile:-/dev/stdout}
 guid=${guid:-`generate_uuid`}
@@ -299,7 +295,22 @@ generate_vcproj() {
        case "$target" in
            x86*)
                case "$name" in
+                    obj_int_extract)
+                        tag Tool \
+                            Name="VCCLCompilerTool" \
+                            Optimization="0" \
+                            AdditionalIncludeDirectories="$incs" \
+                            PreprocessorDefinitions="WIN32;DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
+                            RuntimeLibrary="$debug_runtime" \
+                            WarningLevel="3" \
+                            DebugInformationFormat="1" \
+                            $warn_64bit \
+                    ;;
                    vpx)
+                        tag Tool \
+                            Name="VCPreBuildEventTool" \
+                            CommandLine="call obj_int_extract.bat &quot;$src_path_bare&quot; $plat_no_ws\\\$(ConfigurationName)" \
+
                        tag Tool \
                            Name="VCCLCompilerTool" \
                            Optimization="0" \
@@ -336,6 +347,11 @@ generate_vcproj() {
                case "$target" in
                    x86*)
                        case "$name" in
+                            obj_int_extract)
+                                tag Tool \
+                                    Name="VCLinkerTool" \
+                                    GenerateDebugInformation="true" \
+                            ;;
                            *)
                                tag Tool \
                                    Name="VCLinkerTool" \
@@ -384,7 +400,24 @@ generate_vcproj() {
        case "$target" in
            x86*)
                case "$name" in
+                    obj_int_extract)
+                        tag Tool \
+                            Name="VCCLCompilerTool" \
+                            Optimization="2" \
+                            FavorSizeorSpeed="1" \
+                            AdditionalIncludeDirectories="$incs" \
+                            PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
+                            RuntimeLibrary="$release_runtime" \
+                            UsePrecompiledHeader="0" \
+                            WarningLevel="3" \
+                            DebugInformationFormat="0" \
+                            $warn_64bit \
+                    ;;
                    vpx)
+                        tag Tool \
+                            Name="VCPreBuildEventTool" \
+                            CommandLine="call obj_int_extract.bat &quot;$src_path_bare&quot; $plat_no_ws\\\$(ConfigurationName)" \
+
                        tag Tool \
                            Name="VCCLCompilerTool" \
                            Optimization="2" \
@@ -423,6 +456,11 @@ generate_vcproj() {
                case "$target" in
                    x86*)
                        case "$name" in
+                            obj_int_extract)
+                                tag Tool \
+                                    Name="VCLinkerTool" \
+                                    GenerateDebugInformation="true" \
+                            ;;
                            *)
                                tag Tool \
                                    Name="VCLinkerTool" \
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -19,13 +19,13 @@ show_help() {
    cat <<EOF
 Usage: ${self_basename} [options] file1 [file2 ...]

-This script generates a Visual Studio solution file from a list of project
+This script generates a Visual Studio 2005 solution file from a list of project
 files.

 Options:
    --help                      Print this message
    --out=outfile               Redirect output to a file
-    --ver=version               Version (7,8,9,10,11,12,14) of visual studio to generate for
+    --ver=version               Version (7,8,9,10,11) of visual studio to generate for
    --target=isa-os-cc          Target specifier
 EOF
    exit 1
@@ -255,7 +255,7 @@ for opt in "$@"; do
    ;;
    --ver=*) vs_ver="$optval"
             case $optval in
-             [789]|10|11|12|14)
+             [789]|10|11|12)
             ;;
             *) die Unrecognized Visual Studio Version in $opt
             ;;
@@ -300,15 +300,12 @@ case "${vs_ver:-8}" in
    12) sln_vers="12.00"
       sln_vers_str="Visual Studio 2013"
    ;;
-    14) sln_vers="14.00"
-       sln_vers_str="Visual Studio 2015"
-    ;;
 esac
 case "${vs_ver:-8}" in
    [789])
    sfx=vcproj
    ;;
-    10|11|12|14)
+    10|11|12)
    sfx=vcxproj
    ;;
 esac
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -34,7 +34,7 @@ Options:
    --name=project_name         Name of the project (required)
    --proj-guid=GUID            GUID to use for the project
    --module-def=filename       File containing export definitions (for DLLs)
-    --ver=version               Version (10,11,12,14) of visual studio to generate for
+    --ver=version               Version (10,11,12) of visual studio to generate for
    --src-path-bare=dir         Path to root of source tree
    -Ipath/to/include           Additional include directories
    -DFLAG[=value]              Preprocessor macros to define
@@ -168,7 +168,7 @@ for opt in "$@"; do
        --ver=*)
            vs_ver="$optval"
            case "$optval" in
-                10|11|12|14)
+                10|11|12)
                ;;
                *) die Unrecognized Visual Studio Version in $opt
                ;;
@@ -211,14 +211,14 @@ for opt in "$@"; do
 done

 # Make one call to fix_path for file_list to improve performance.
-fix_file_list file_list
+fix_file_list

 outfile=${outfile:-/dev/stdout}
 guid=${guid:-`generate_uuid`}
 asm_use_custom_step=false
 uses_asm=${uses_asm:-false}
 case "${vs_ver:-11}" in
-    10|11|12|14)
+    10|11|12)
       asm_use_custom_step=$uses_asm
    ;;
 esac
@@ -262,9 +262,15 @@ case "$target" in
        asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} &quot;%(FullPath)&quot;"
    ;;
    arm*)
-        platforms[0]="ARM"
-        asm_Debug_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
-        asm_Release_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
+        asm_Debug_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
+        asm_Release_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
+        if [ "$name" = "obj_int_extract" ]; then
+            # We don't want to build this tool for the target architecture,
+            # but for an architecture we can run locally during the build.
+            platforms[0]="Win32"
+        else
+            platforms[0]="ARM"
+        fi
    ;;
    *) die "Unsupported target $target!"
    ;;
@@ -344,9 +350,6 @@ generate_vcxproj() {
                # has to enable AppContainerApplication as well.
                tag_content PlatformToolset v120
            fi
-            if [ "$vs_ver" = "14" ]; then
-                tag_content PlatformToolset v140
-            fi
            tag_content CharacterSet Unicode
            if [ "$config" = "Release" ]; then
                tag_content WholeProgramOptimization true
@@ -397,13 +400,23 @@ generate_vcxproj() {
                if [ "$hostplat" == "ARM" ]; then
                    hostplat=Win32
                fi
+                open_tag PreBuildEvent
+                tag_content Command "call obj_int_extract.bat &quot;$src_path_bare&quot; $hostplat\\\$(Configuration)"
+                close_tag PreBuildEvent
            fi
            open_tag ClCompile
            if [ "$config" = "Debug" ]; then
                opt=Disabled
                runtime=$debug_runtime
                curlibs=$debug_libs
-                debug=_DEBUG
+                case "$name" in
+                obj_int_extract)
+                    debug=DEBUG
+                    ;;
+                *)
+                    debug=_DEBUG
+                    ;;
+                esac
            else
                opt=MaxSpeed
                runtime=$release_runtime
@@ -411,7 +424,14 @@ generate_vcxproj() {
                tag_content FavorSizeOrSpeed Speed
                debug=NDEBUG
            fi
-            extradefines=";$defines"
+            case "$name" in
+            obj_int_extract)
+                extradefines=";_CONSOLE"
+                ;;
+            *)
+                extradefines=";$defines"
+                ;;
+            esac
            tag_content Optimization $opt
            tag_content AdditionalIncludeDirectories "$incs;%(AdditionalIncludeDirectories)"
            tag_content PreprocessorDefinitions "WIN32;$debug;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE$extradefines;%(PreprocessorDefinitions)"
@@ -431,6 +451,10 @@ generate_vcxproj() {
            case "$proj_kind" in
            exe)
                open_tag Link
+                if [ "$name" != "obj_int_extract" ]; then
+                    tag_content AdditionalDependencies "$curlibs;%(AdditionalDependencies)"
+                    tag_content AdditionalLibraryDirectories "$libdirs;%(AdditionalLibraryDirectories)"
+                fi
                tag_content GenerateDebugInformation true
                # Console is the default normally, but if
                # AppContainerApplication is set, we need to override it.
--- a/build/make/ios-Info.plist
+++ b/build/make/ios-Info.plist
@@ -1,37 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
-<plist version="1.0">
-<dict>
-	<key>CFBundleDevelopmentRegion</key>
-	<string>en</string>
-	<key>CFBundleExecutable</key>
-	<string>VPX</string>
-	<key>CFBundleIdentifier</key>
-	<string>org.webmproject.VPX</string>
-	<key>CFBundleInfoDictionaryVersion</key>
-	<string>6.0</string>
-	<key>CFBundleName</key>
-	<string>VPX</string>
-	<key>CFBundlePackageType</key>
-	<string>FMWK</string>
-	<key>CFBundleShortVersionString</key>
-	<string>${VERSION}</string>
-	<key>CFBundleSignature</key>
-	<string>????</string>
-	<key>CFBundleSupportedPlatforms</key>
-	<array>
-		<string>iPhoneOS</string>
-	</array>
-	<key>CFBundleVersion</key>
-	<string>${VERSION}</string>
-	<key>MinimumOSVersion</key>
-	<string>${IOS_VERSION_MIN}</string>
-	<key>UIDeviceFamily</key>
-	<array>
-		<integer>1</integer>
-		<integer>2</integer>
-	</array>
-	<key>VPXFullVersion</key>
-	<string>${FULLVERSION}</string>
-</dict>
-</plist>
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -18,50 +18,34 @@ set -e
 devnull='> /dev/null 2>&1'

 BUILD_ROOT="_iosbuild"
-CONFIGURE_ARGS="--disable-docs
-                --disable-examples
-                --disable-libyuv
-                --disable-unit-tests"
 DIST_DIR="_dist"
 FRAMEWORK_DIR="VPX.framework"
-FRAMEWORK_LIB="VPX.framework/VPX"
 HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
-SCRIPT_DIR=$(dirname "$0")
-LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
+MAKE_JOBS=1
+LIBVPX_SOURCE_DIR=$(dirname "$0" | sed -e s,/build/make,,)
 LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
 ORIG_PWD="$(pwd)"
-ARM_TARGETS="arm64-darwin-gcc
-             armv7-darwin-gcc
-             armv7s-darwin-gcc"
-SIM_TARGETS="x86-iphonesimulator-gcc
-             x86_64-iphonesimulator-gcc"
-OSX_TARGETS="x86-darwin15-gcc
-             x86_64-darwin15-gcc"
-TARGETS="${ARM_TARGETS} ${SIM_TARGETS}"
+TARGETS="arm64-darwin-gcc
+         armv6-darwin-gcc
+         armv7-darwin-gcc
+         armv7s-darwin-gcc
+         x86-iphonesimulator-gcc
+         x86_64-iphonesimulator-gcc"

 # Configures for the target specified by $1, and invokes make with the dist
 # target using $DIST_DIR as the distribution output directory.
 build_target() {
  local target="$1"
  local old_pwd="$(pwd)"
-  local target_specific_flags=""

  vlog "***Building target: ${target}***"

-  case "${target}" in
-    x86-*)
-      target_specific_flags="--enable-pic"
-      vlog "Enabled PIC for ${target}"
-      ;;
-  esac
-
  mkdir "${target}"
  cd "${target}"
-  eval "${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
-    ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${target_specific_flags} \
-    ${devnull}
+  eval "../../${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
+      --disable-docs ${EXTRA_CONFIGURE_ARGS} ${devnull}
  export DIST_DIR
-  eval make dist ${devnull}
+  eval make -j ${MAKE_JOBS} dist ${devnull}
  cd "${old_pwd}"

  vlog "***Done building target: ${target}***"
@@ -74,6 +58,9 @@ target_to_preproc_symbol() {
    arm64-*)
      echo "__aarch64__"
      ;;
+    armv6-*)
+      echo "__ARM_ARCH_6__"
+      ;;
    armv7-*)
      echo "__ARM_ARCH_7A__"
      ;;
@@ -138,44 +125,6 @@ create_vpx_framework_config_shim() {
  printf "#endif  // ${include_guard}" >> "${config_file}"
 }

-# Verifies that $FRAMEWORK_LIB fat library contains requested builds.
-verify_framework_targets() {
-  local requested_cpus=""
-  local cpu=""
-
-  # Extract CPU from full target name.
-  for target; do
-    cpu="${target%%-*}"
-    if [ "${cpu}" = "x86" ]; then
-      # lipo -info outputs i386 for libvpx x86 targets.
-      cpu="i386"
-    fi
-    requested_cpus="${requested_cpus}${cpu} "
-  done
-
-  # Get target CPUs present in framework library.
-  local targets_built=$(${LIPO} -info ${FRAMEWORK_LIB})
-
-  # $LIPO -info outputs a string like the following:
-  #   Architectures in the fat file: $FRAMEWORK_LIB <architectures>
-  # Capture only the architecture strings.
-  targets_built=${targets_built##*: }
-
-  # Sort CPU strings to make the next step a simple string compare.
-  local actual=$(echo ${targets_built} | tr " " "\n" | sort | tr "\n" " ")
-  local requested=$(echo ${requested_cpus} | tr " " "\n" | sort | tr "\n" " ")
-
-  vlog "Requested ${FRAMEWORK_LIB} CPUs: ${requested}"
-  vlog "Actual ${FRAMEWORK_LIB} CPUs: ${actual}"
-
-  if [ "${requested}" != "${actual}" ]; then
-    elog "Actual ${FRAMEWORK_LIB} targets do not match requested target list."
-    elog "  Requested target CPUs: ${requested}"
-    elog "  Actual target CPUs: ${actual}"
-    return 1
-  fi
-}
-
 # Configures and builds each target specified by $1, and then builds
 # VPX.framework.
 build_framework() {
@@ -196,12 +145,7 @@ build_framework() {
  for target in ${targets}; do
    build_target "${target}"
    target_dist_dir="${BUILD_ROOT}/${target}/${DIST_DIR}"
-    if [ "${ENABLE_SHARED}" = "yes" ]; then
-      local suffix="dylib"
-    else
-      local suffix="a"
-    fi
-    lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.${suffix}"
+    lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.a"
  done

  cd "${ORIG_PWD}"
@@ -220,75 +164,37 @@ build_framework() {
  # Copy in vpx_version.h.
  cp -p "${BUILD_ROOT}/${target}/vpx_version.h" "${HEADER_DIR}"

-  if [ "${ENABLE_SHARED}" = "yes" ]; then
-    # Adjust the dylib's name so dynamic linking in apps works as expected.
-    install_name_tool -id '@rpath/VPX.framework/VPX' ${FRAMEWORK_DIR}/VPX
-
-    # Copy in Info.plist.
-    cat "${SCRIPT_DIR}/ios-Info.plist" \
-      | sed "s/\${FULLVERSION}/${FULLVERSION}/g" \
-      | sed "s/\${VERSION}/${VERSION}/g" \
-      | sed "s/\${IOS_VERSION_MIN}/${IOS_VERSION_MIN}/g" \
-      > "${FRAMEWORK_DIR}/Info.plist"
-  fi
-
-  # Confirm VPX.framework/VPX contains the targets requested.
-  verify_framework_targets ${targets}
-
-  vlog "Created fat library ${FRAMEWORK_LIB} containing:"
+  vlog "Created fat library ${FRAMEWORK_DIR}/VPX containing:"
  for lib in ${lib_list}; do
    vlog "  $(echo ${lib} | awk -F / '{print $2, $NF}')"
  done
+
+  # TODO(tomfinegan): Verify that expected targets are included within
+  # VPX.framework/VPX via lipo -info.
 }

 # Trap function. Cleans up the subtree used to build all targets contained in
 # $TARGETS.
 cleanup() {
-  local readonly res=$?
  cd "${ORIG_PWD}"

-  if [ $res -ne 0 ]; then
-    elog "build exited with error ($res)"
-  fi
-
  if [ "${PRESERVE_BUILD_OUTPUT}" != "yes" ]; then
    rm -rf "${BUILD_ROOT}"
  fi
 }

-print_list() {
-  local indent="$1"
-  shift
-  local list="$@"
-  for entry in ${list}; do
-    echo "${indent}${entry}"
-  done
-}
-
 iosbuild_usage() {
 cat << EOF
  Usage: ${0##*/} [arguments]
    --help: Display this message and exit.
-    --enable-shared: Build a dynamic framework for use on iOS 8 or later.
-    --extra-configure-args <args>: Extra args to pass when configuring libvpx.
-    --macosx: Uses darwin15 targets instead of iphonesimulator targets for x86
-              and x86_64. Allows linking to framework when builds target MacOSX
-              instead of iOS.
+    --jobs: Number of make jobs.
    --preserve-build-output: Do not delete the build directory.
    --show-build-output: Show output from each library build.
-    --targets <targets>: Override default target list. Defaults:
-$(print_list "        " ${TARGETS})
-    --test-link: Confirms all targets can be linked. Functionally identical to
-                 passing --enable-examples via --extra-configure-args.
    --verbose: Output information about the environment and each stage of the
               build.
 EOF
 }

-elog() {
-  echo "${0##*/} failed because: $@" 1>&2
-}
-
 vlog() {
  if [ "${VERBOSE}" = "yes" ]; then
    echo "$@"
@@ -308,8 +214,9 @@ while [ -n "$1" ]; do
      iosbuild_usage
      exit
      ;;
-    --enable-shared)
-      ENABLE_SHARED=yes
+    --jobs)
+      MAKE_JOBS="$2"
+      shift
      ;;
    --preserve-build-output)
      PRESERVE_BUILD_OUTPUT=yes
@@ -317,16 +224,6 @@ while [ -n "$1" ]; do
    --show-build-output)
      devnull=
      ;;
-    --test-link)
-      EXTRA_CONFIGURE_ARGS="${EXTRA_CONFIGURE_ARGS} --enable-examples"
-      ;;
-    --targets)
-      TARGETS="$2"
-      shift
-      ;;
-    --macosx)
-      TARGETS="${ARM_TARGETS} ${OSX_TARGETS}"
-      ;;
    --verbose)
      VERBOSE=yes
      ;;
@@ -338,46 +235,20 @@ while [ -n "$1" ]; do
  shift
 done

-if [ "${ENABLE_SHARED}" = "yes" ]; then
-  CONFIGURE_ARGS="--enable-shared ${CONFIGURE_ARGS}"
-fi
-
-FULLVERSION=$("${SCRIPT_DIR}"/version.sh --bare "${LIBVPX_SOURCE_DIR}")
-VERSION=$(echo "${FULLVERSION}" | sed -E 's/^v([0-9]+\.[0-9]+\.[0-9]+).*$/\1/')
-
-if [ "$ENABLE_SHARED" = "yes" ]; then
-  IOS_VERSION_OPTIONS="--enable-shared"
-  IOS_VERSION_MIN="8.0"
-else
-  IOS_VERSION_OPTIONS=""
-  IOS_VERSION_MIN="6.0"
-fi
-
 if [ "${VERBOSE}" = "yes" ]; then
 cat << EOF
  BUILD_ROOT=${BUILD_ROOT}
  DIST_DIR=${DIST_DIR}
-  CONFIGURE_ARGS=${CONFIGURE_ARGS}
  EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
  FRAMEWORK_DIR=${FRAMEWORK_DIR}
-  FRAMEWORK_LIB=${FRAMEWORK_LIB}
  HEADER_DIR=${HEADER_DIR}
+  MAKE_JOBS=${MAKE_JOBS}
+  PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
  LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR}
  LIPO=${LIPO}
-  MAKEFLAGS=${MAKEFLAGS}
  ORIG_PWD=${ORIG_PWD}
-  PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
-  TARGETS="$(print_list "" ${TARGETS})"
-  ENABLE_SHARED=${ENABLE_SHARED}
-  OSX_TARGETS="${OSX_TARGETS}"
-  SIM_TARGETS="${SIM_TARGETS}"
-  SCRIPT_DIR="${SCRIPT_DIR}"
-  FULLVERSION="${FULLVERSION}"
-  VERSION="${VERSION}"
-  IOS_VERSION_MIN="${IOS_VERSION_MIN}"
+  TARGETS="${TARGETS}"
 EOF
 fi

 build_framework "${TARGETS}"
-echo "Successfully built '${FRAMEWORK_DIR}' for:"
-print_list "" ${TARGETS}
--- a/build/make/msvs_common.sh
+++ b/build/make/msvs_common.sh
@@ -39,12 +39,11 @@ fix_path() {
 }

 # Corrects the paths in file_list in one pass for efficiency.
-# $1 is the name of the array to be modified.
 fix_file_list() {
-    declare -n array_ref=$1
-    files=$(fix_path "${array_ref[@]}")
+    # TODO(jzern): this could be more generic and take the array as a param.
+    files=$(fix_path "${file_list[@]}")
    local IFS=$'\n'
-    array_ref=($files)
+    file_list=($files)
 }

 generate_uuid() {
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -0,0 +1,857 @@
+/*
+ *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "vpx_config.h"
+#include "vpx/vpx_integer.h"
+
+typedef enum {
+  OUTPUT_FMT_PLAIN,
+  OUTPUT_FMT_RVDS,
+  OUTPUT_FMT_GAS,
+  OUTPUT_FMT_C_HEADER,
+} output_fmt_t;
+
+int log_msg(const char *fmt, ...) {
+  int res;
+  va_list ap;
+  va_start(ap, fmt);
+  res = vfprintf(stderr, fmt, ap);
+  va_end(ap);
+  return res;
+}
+
+#if defined(__GNUC__) && __GNUC__
+
+#if defined(FORCE_PARSE_ELF)
+
+#if defined(__MACH__)
+#undef __MACH__
+#endif
+
+#if !defined(__ELF__)
+#define __ELF__
+#endif
+#endif
+
+#if defined(__MACH__)
+
+#include <mach-o/loader.h>
+#include <mach-o/nlist.h>
+
+int print_macho_equ(output_fmt_t mode, uint8_t* name, int val) {
+  switch (mode) {
+    case OUTPUT_FMT_RVDS:
+      printf("%-40s EQU %5d\n", name, val);
+      return 0;
+    case OUTPUT_FMT_GAS:
+      printf(".set %-40s, %5d\n", name, val);
+      return 0;
+    case OUTPUT_FMT_C_HEADER:
+      printf("#define %-40s %5d\n", name, val);
+      return 0;
+    default:
+      log_msg("Unsupported mode: %d", mode);
+      return 1;
+  }
+}
+
+int parse_macho(uint8_t *base_buf, size_t sz, output_fmt_t mode) {
+  int i, j;
+  struct mach_header header;
+  uint8_t *buf = base_buf;
+  int base_data_section = 0;
+  int bits = 0;
+
+  /* We can read in mach_header for 32 and 64 bit architectures
+   * because it's identical to mach_header_64 except for the last
+   * element (uint32_t reserved), which we don't use. Then, when
+   * we know which architecture we're looking at, increment buf
+   * appropriately.
+   */
+  memcpy(&header, buf, sizeof(struct mach_header));
+
+  if (header.magic == MH_MAGIC) {
+    if (header.cputype == CPU_TYPE_ARM
+        || header.cputype == CPU_TYPE_X86) {
+      bits = 32;
+      buf += sizeof(struct mach_header);
+    } else {
+      log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_[ARM|X86].\n");
+      goto bail;
+    }
+  } else if (header.magic == MH_MAGIC_64) {
+    if (header.cputype == CPU_TYPE_X86_64) {
+      bits = 64;
+      buf += sizeof(struct mach_header_64);
+    } else {
+      log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_X86_64.\n");
+      goto bail;
+    }
+  } else {
+    log_msg("Bad magic number for object file. 0x%x or 0x%x expected, 0x%x found.\n",
+            MH_MAGIC, MH_MAGIC_64, header.magic);
+    goto bail;
+  }
+
+  if (header.filetype != MH_OBJECT) {
+    log_msg("Bad filetype for object file. Currently only tested for MH_OBJECT.\n");
+    goto bail;
+  }
+
+  for (i = 0; i < header.ncmds; i++) {
+    struct load_command lc;
+
+    memcpy(&lc, buf, sizeof(struct load_command));
+
+    if (lc.cmd == LC_SEGMENT) {
+      uint8_t *seg_buf = buf;
+      struct section s;
+      struct segment_command seg_c;
+
+      memcpy(&seg_c, seg_buf, sizeof(struct segment_command));
+      seg_buf += sizeof(struct segment_command);
+
+      /* Although each section is given it's own offset, nlist.n_value
+       * references the offset of the first section. This isn't
+       * apparent without debug information because the offset of the
+       * data section is the same as the first section. However, with
+       * debug sections mixed in, the offset of the debug section
+       * increases but n_value still references the first section.
+       */
+      if (seg_c.nsects < 1) {
+        log_msg("Not enough sections\n");
+        goto bail;
+      }
+
+      memcpy(&s, seg_buf, sizeof(struct section));
+      base_data_section = s.offset;
+    } else if (lc.cmd == LC_SEGMENT_64) {
+      uint8_t *seg_buf = buf;
+      struct section_64 s;
+      struct segment_command_64 seg_c;
+
+      memcpy(&seg_c, seg_buf, sizeof(struct segment_command_64));
+      seg_buf += sizeof(struct segment_command_64);
+
+      /* Explanation in LG_SEGMENT */
+      if (seg_c.nsects < 1) {
+        log_msg("Not enough sections\n");
+        goto bail;
+      }
+
+      memcpy(&s, seg_buf, sizeof(struct section_64));
+      base_data_section = s.offset;
+    } else if (lc.cmd == LC_SYMTAB) {
+      if (base_data_section != 0) {
+        struct symtab_command sc;
+        uint8_t *sym_buf = base_buf;
+        uint8_t *str_buf = base_buf;
+
+        memcpy(&sc, buf, sizeof(struct symtab_command));
+
+        if (sc.cmdsize != sizeof(struct symtab_command)) {
+          log_msg("Can't find symbol table!\n");
+          goto bail;
+        }
+
+        sym_buf += sc.symoff;
+        str_buf += sc.stroff;
+
+        for (j = 0; j < sc.nsyms; j++) {
+          /* Location of string is cacluated each time from the
+           * start of the string buffer.  On darwin the symbols
+           * are prefixed by "_", so we bump the pointer by 1.
+           * The target value is defined as an int in *_asm_*_offsets.c,
+           * which is 4 bytes on all targets we currently use.
+           */
+          if (bits == 32) {
+            struct nlist nl;
+            int val;
+
+            memcpy(&nl, sym_buf, sizeof(struct nlist));
+            sym_buf += sizeof(struct nlist);
+
+            memcpy(&val, base_buf + base_data_section + nl.n_value,
+                   sizeof(val));
+            print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
+          } else { /* if (bits == 64) */
+            struct nlist_64 nl;
+            int val;
+
+            memcpy(&nl, sym_buf, sizeof(struct nlist_64));
+            sym_buf += sizeof(struct nlist_64);
+
+            memcpy(&val, base_buf + base_data_section + nl.n_value,
+                   sizeof(val));
+            print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
+          }
+        }
+      }
+    }
+
+    buf += lc.cmdsize;
+  }
+
+  return 0;
+bail:
+  return 1;
+
+}
+
+#elif defined(__ELF__)
+#include "elf.h"
+
+#define COPY_STRUCT(dst, buf, ofst, sz) do {\
+    if(ofst + sizeof((*(dst))) > sz) goto bail;\
+    memcpy(dst, buf+ofst, sizeof((*(dst))));\
+  } while(0)
+
+#define ENDIAN_ASSIGN(val, memb) do {\
+    if(!elf->le_data) {log_msg("Big Endian data not supported yet!\n");goto bail;}\
+    (val) = (memb);\
+  } while(0)
+
+#define ENDIAN_ASSIGN_IN_PLACE(memb) do {\
+    ENDIAN_ASSIGN(memb, memb);\
+  } while(0)
+
+typedef struct {
+  uint8_t      *buf; /* Buffer containing ELF data */
+  size_t        sz;  /* Buffer size */
+  int           le_data; /* Data is little-endian */
+  unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
+  int           bits; /* 32 or 64 */
+  Elf32_Ehdr    hdr32;
+  Elf64_Ehdr    hdr64;
+} elf_obj_t;
+
+int parse_elf_header(elf_obj_t *elf) {
+  int res;
+  /* Verify ELF Magic numbers */
+  COPY_STRUCT(&elf->e_ident, elf->buf, 0, elf->sz);
+  res = elf->e_ident[EI_MAG0] == ELFMAG0;
+  res &= elf->e_ident[EI_MAG1] == ELFMAG1;
+  res &= elf->e_ident[EI_MAG2] == ELFMAG2;
+  res &= elf->e_ident[EI_MAG3] == ELFMAG3;
+  res &= elf->e_ident[EI_CLASS] == ELFCLASS32
+         || elf->e_ident[EI_CLASS] == ELFCLASS64;
+  res &= elf->e_ident[EI_DATA] == ELFDATA2LSB;
+
+  if (!res) goto bail;
+
+  elf->le_data = elf->e_ident[EI_DATA] == ELFDATA2LSB;
+
+  /* Read in relevant values */
+  if (elf->e_ident[EI_CLASS] == ELFCLASS32) {
+    elf->bits = 32;
+    COPY_STRUCT(&elf->hdr32, elf->buf, 0, elf->sz);
+
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_type);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_machine);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_version);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_entry);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phoff);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shoff);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_flags);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_ehsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phentsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phnum);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shentsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shnum);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shstrndx);
+  } else { /* if (elf->e_ident[EI_CLASS] == ELFCLASS64) */
+    elf->bits = 64;
+    COPY_STRUCT(&elf->hdr64, elf->buf, 0, elf->sz);
+
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_type);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_machine);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_version);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_entry);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phoff);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shoff);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_flags);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_ehsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phentsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phnum);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shentsize);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shnum);
+    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shstrndx);
+  }
+
+  return 0;
+bail:
+  log_msg("Failed to parse ELF file header");
+  return 1;
+}
+
+int parse_elf_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr32, Elf64_Shdr *hdr64) {
+  if (hdr32) {
+    if (idx >= elf->hdr32.e_shnum)
+      goto bail;
+
+    COPY_STRUCT(hdr32, elf->buf, elf->hdr32.e_shoff + idx * elf->hdr32.e_shentsize,
+                elf->sz);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_name);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_type);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_flags);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addr);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_offset);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_size);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_link);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_info);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addralign);
+    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_entsize);
+  } else { /* if (hdr64) */
+    if (idx >= elf->hdr64.e_shnum)
+      goto bail;
+
+    COPY_STRUCT(hdr64, elf->buf, elf->hdr64.e_shoff + idx * elf->hdr64.e_shentsize,
+                elf->sz);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_name);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_type);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_flags);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addr);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_offset);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_size);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_link);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_info);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addralign);
+    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_entsize);
+  }
+
+  return 0;
+bail:
+  return 1;
+}
+
+const char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx) {
+  if (elf->bits == 32) {
+    Elf32_Shdr shdr;
+
+    if (parse_elf_section(elf, s_idx, &shdr, NULL)) {
+      log_msg("Failed to parse ELF string table: section %d, index %d\n",
+              s_idx, idx);
+      return "";
+    }
+
+    return (char *)(elf->buf + shdr.sh_offset + idx);
+  } else { /* if (elf->bits == 64) */
+    Elf64_Shdr shdr;
+
+    if (parse_elf_section(elf, s_idx, NULL, &shdr)) {
+      log_msg("Failed to parse ELF string table: section %d, index %d\n",
+              s_idx, idx);
+      return "";
+    }
+
+    return (char *)(elf->buf + shdr.sh_offset + idx);
+  }
+}
+
+int parse_elf_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym32, Elf64_Sym *sym64) {
+  if (sym32) {
+    COPY_STRUCT(sym32, elf->buf, ofst, elf->sz);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_name);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_value);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_size);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_info);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_other);
+    ENDIAN_ASSIGN_IN_PLACE(sym32->st_shndx);
+  } else { /* if (sym64) */
+    COPY_STRUCT(sym64, elf->buf, ofst, elf->sz);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_name);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_value);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_size);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_info);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_other);
+    ENDIAN_ASSIGN_IN_PLACE(sym64->st_shndx);
+  }
+  return 0;
+bail:
+  return 1;
+}
+
+int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode) {
+  elf_obj_t    elf;
+  unsigned int ofst;
+  int          i;
+  Elf32_Off    strtab_off32;
+  Elf64_Off    strtab_off64; /* save String Table offset for later use */
+
+  memset(&elf, 0, sizeof(elf));
+  elf.buf = buf;
+  elf.sz = sz;
+
+  /* Parse Header */
+  if (parse_elf_header(&elf))
+    goto bail;
+
+  if (elf.bits == 32) {
+    Elf32_Shdr shdr;
+    for (i = 0; i < elf.hdr32.e_shnum; i++) {
+      parse_elf_section(&elf, i, &shdr, NULL);
+
+      if (shdr.sh_type == SHT_STRTAB) {
+        char strtsb_name[128];
+
+        strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
+
+        if (!(strcmp(strtsb_name, ".shstrtab"))) {
+          /* log_msg("found section: %s\n", strtsb_name); */
+          strtab_off32 = shdr.sh_offset;
+          break;
+        }
+      }
+    }
+  } else { /* if (elf.bits == 64) */
+    Elf64_Shdr shdr;
+    for (i = 0; i < elf.hdr64.e_shnum; i++) {
+      parse_elf_section(&elf, i, NULL, &shdr);
+
+      if (shdr.sh_type == SHT_STRTAB) {
+        char strtsb_name[128];
+
+        strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
+
+        if (!(strcmp(strtsb_name, ".shstrtab"))) {
+          /* log_msg("found section: %s\n", strtsb_name); */
+          strtab_off64 = shdr.sh_offset;
+          break;
+        }
+      }
+    }
+  }
+
+  /* Parse all Symbol Tables */
+  if (elf.bits == 32) {
+    Elf32_Shdr shdr;
+    for (i = 0; i < elf.hdr32.e_shnum; i++) {
+      parse_elf_section(&elf, i, &shdr, NULL);
+
+      if (shdr.sh_type == SHT_SYMTAB) {
+        for (ofst = shdr.sh_offset;
+             ofst < shdr.sh_offset + shdr.sh_size;
+             ofst += shdr.sh_entsize) {
+          Elf32_Sym sym;
+
+          parse_elf_symbol(&elf, ofst, &sym, NULL);
+
+          /* For all OBJECTS (data objects), extract the value from the
+           * proper data segment.
+           */
+          /* if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
+              log_msg("found data object %s\n",
+                      parse_elf_string_table(&elf,
+                                             shdr.sh_link,
+                                             sym.st_name));
+           */
+
+          if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
+              && sym.st_size == 4) {
+            Elf32_Shdr dhdr;
+            int val = 0;
+            char section_name[128];
+
+            parse_elf_section(&elf, sym.st_shndx, &dhdr, NULL);
+
+            /* For explanition - refer to _MSC_VER version of code */
+            strcpy(section_name, (char *)(elf.buf + strtab_off32 + dhdr.sh_name));
+            /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
+
+            if (strcmp(section_name, ".bss")) {
+              if (sizeof(val) != sym.st_size) {
+                /* The target value is declared as an int in
+                 * *_asm_*_offsets.c, which is 4 bytes on all
+                 * targets we currently use. Complain loudly if
+                 * this is not true.
+                 */
+                log_msg("Symbol size is wrong\n");
+                goto bail;
+              }
+
+              memcpy(&val,
+                     elf.buf + dhdr.sh_offset + sym.st_value,
+                     sym.st_size);
+            }
+
+            if (!elf.le_data) {
+              log_msg("Big Endian data not supported yet!\n");
+              goto bail;
+            }
+
+            switch (mode) {
+              case OUTPUT_FMT_RVDS:
+                printf("%-40s EQU %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
+              case OUTPUT_FMT_GAS:
+                printf(".equ %-40s, %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
+              case OUTPUT_FMT_C_HEADER:
+                printf("#define %-40s %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
+              default:
+                printf("%s = %d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+            }
+          }
+        }
+      }
+    }
+  } else { /* if (elf.bits == 64) */
+    Elf64_Shdr shdr;
+    for (i = 0; i < elf.hdr64.e_shnum; i++) {
+      parse_elf_section(&elf, i, NULL, &shdr);
+
+      if (shdr.sh_type == SHT_SYMTAB) {
+        for (ofst = shdr.sh_offset;
+             ofst < shdr.sh_offset + shdr.sh_size;
+             ofst += shdr.sh_entsize) {
+          Elf64_Sym sym;
+
+          parse_elf_symbol(&elf, ofst, NULL, &sym);
+
+          /* For all OBJECTS (data objects), extract the value from the
+           * proper data segment.
+           */
+          /* if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
+              log_msg("found data object %s\n",
+                      parse_elf_string_table(&elf,
+                                             shdr.sh_link,
+                                             sym.st_name));
+           */
+
+          if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT
+              && sym.st_size == 4) {
+            Elf64_Shdr dhdr;
+            int val = 0;
+            char section_name[128];
+
+            parse_elf_section(&elf, sym.st_shndx, NULL, &dhdr);
+
+            /* For explanition - refer to _MSC_VER version of code */
+            strcpy(section_name, (char *)(elf.buf + strtab_off64 + dhdr.sh_name));
+            /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
+
+            if ((strcmp(section_name, ".bss"))) {
+              if (sizeof(val) != sym.st_size) {
+                /* The target value is declared as an int in
+                 * *_asm_*_offsets.c, which is 4 bytes on all
+                 * targets we currently use. Complain loudly if
+                 * this is not true.
+                 */
+                log_msg("Symbol size is wrong\n");
+                goto bail;
+              }
+
+              memcpy(&val,
+                     elf.buf + dhdr.sh_offset + sym.st_value,
+                     sym.st_size);
+            }
+
+            if (!elf.le_data) {
+              log_msg("Big Endian data not supported yet!\n");
+              goto bail;
+            }
+
+            switch (mode) {
+              case OUTPUT_FMT_RVDS:
+                printf("%-40s EQU %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
+              case OUTPUT_FMT_GAS:
+                printf(".equ %-40s, %5d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+                break;
+              default:
+                printf("%s = %d\n",
+                       parse_elf_string_table(&elf,
+                                              shdr.sh_link,
+                                              sym.st_name),
+                       val);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (mode == OUTPUT_FMT_RVDS)
+    printf("    END\n");
+
+  return 0;
+bail:
+  log_msg("Parse error: File does not appear to be valid ELF32 or ELF64\n");
+  return 1;
+}
+
+#endif
+#endif /* defined(__GNUC__) && __GNUC__ */
+
+
+#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
+/*  See "Microsoft Portable Executable and Common Object File Format Specification"
+    for reference.
+*/
+#define get_le32(x) ((*(x)) | (*(x+1)) << 8 |(*(x+2)) << 16 | (*(x+3)) << 24 )
+#define get_le16(x) ((*(x)) | (*(x+1)) << 8)
+
+int parse_coff(uint8_t *buf, size_t sz) {
+  unsigned int nsections, symtab_ptr, symtab_sz, strtab_ptr;
+  unsigned int sectionrawdata_ptr;
+  unsigned int i;
+  uint8_t *ptr;
+  uint32_t symoffset;
+
+  char **sectionlist;  // this array holds all section names in their correct order.
+  // it is used to check if the symbol is in .bss or .rdata section.
+
+  nsections = get_le16(buf + 2);
+  symtab_ptr = get_le32(buf + 8);
+  symtab_sz = get_le32(buf + 12);
+  strtab_ptr = symtab_ptr + symtab_sz * 18;
+
+  if (nsections > 96) {
+    log_msg("Too many sections\n");
+    return 1;
+  }
+
+  sectionlist = malloc(nsections * sizeof(sectionlist));
+
+  if (sectionlist == NULL) {
+    log_msg("Allocating first level of section list failed\n");
+    return 1;
+  }
+
+  // log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
+
+  /*
+  The size of optional header is always zero for an obj file. So, the section header
+  follows the file header immediately.
+  */
+
+  ptr = buf + 20;     // section header
+
+  for (i = 0; i < nsections; i++) {
+    char sectionname[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+    strncpy(sectionname, ptr, 8);
+    // log_msg("COFF: Parsing section %s\n",sectionname);
+
+    sectionlist[i] = malloc(strlen(sectionname) + 1);
+
+    if (sectionlist[i] == NULL) {
+      log_msg("Allocating storage for %s failed\n", sectionname);
+      goto bail;
+    }
+    strcpy(sectionlist[i], sectionname);
+
+    // check if it's .rdata and is not a COMDAT section.
+    if (!strcmp(sectionname, ".rdata") &&
+        (get_le32(ptr + 36) & 0x1000) == 0) {
+      sectionrawdata_ptr = get_le32(ptr + 20);
+    }
+
+    ptr += 40;
+  }
+
+  // log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
+  // log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
+
+  /*  The compiler puts the data with non-zero offset in .rdata section, but puts the data with
+      zero offset in .bss section. So, if the data in in .bss section, set offset=0.
+      Note from Wiki: In an object module compiled from C, the bss section contains
+      the local variables (but not functions) that were declared with the static keyword,
+      except for those with non-zero initial values. (In C, static variables are initialized
+      to zero by default.) It also contains the non-local (both extern and static) variables
+      that are also initialized to zero (either explicitly or by default).
+      */
+  // move to symbol table
+  /* COFF symbol table:
+      offset      field
+      0           Name(*)
+      8           Value
+      12          SectionNumber
+      14          Type
+      16          StorageClass
+      17          NumberOfAuxSymbols
+      */
+  ptr = buf + symtab_ptr;
+
+  for (i = 0; i < symtab_sz; i++) {
+    int16_t section = get_le16(ptr + 12); // section number
+
+    if (section > 0 && ptr[16] == 2) {
+      // if(section > 0 && ptr[16] == 3 && get_le32(ptr+8)) {
+
+      if (get_le32(ptr)) {
+        char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
+        strncpy(name, ptr, 8);
+        // log_msg("COFF: Parsing symbol %s\n",name);
+        /* The 64bit Windows compiler doesn't prefix with an _.
+         * Check what's there, and bump if necessary
+         */
+        if (name[0] == '_')
+          printf("%-40s EQU ", name + 1);
+        else
+          printf("%-40s EQU ", name);
+      } else {
+        // log_msg("COFF: Parsing symbol %s\n",
+        //        buf + strtab_ptr + get_le32(ptr+4));
+        if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
+          printf("%-40s EQU ",
+                 buf + strtab_ptr + get_le32(ptr + 4) + 1);
+        else
+          printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
+      }
+
+      if (!(strcmp(sectionlist[section - 1], ".bss"))) {
+        symoffset = 0;
+      } else {
+        symoffset = get_le32(buf + sectionrawdata_ptr + get_le32(ptr + 8));
+      }
+
+      // log_msg("      Section: %d\n",section);
+      // log_msg("      Class:   %d\n",ptr[16]);
+      // log_msg("      Address: %u\n",get_le32(ptr+8));
+      // log_msg("      Offset: %u\n", symoffset);
+
+      printf("%5d\n", symoffset);
+    }
+
+    ptr += 18;
+  }
+
+  printf("    END\n");
+
+  for (i = 0; i < nsections; i++) {
+    free(sectionlist[i]);
+  }
+
+  free(sectionlist);
+
+  return 0;
+bail:
+
+  for (i = 0; i < nsections; i++) {
+    free(sectionlist[i]);
+  }
+
+  free(sectionlist);
+
+  return 1;
+}
+#endif /* defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__) */
+
+int main(int argc, char **argv) {
+  output_fmt_t mode = OUTPUT_FMT_PLAIN;
+  const char *f;
+  uint8_t *file_buf;
+  int res;
+  FILE *fp;
+  long int file_size;
+
+  if (argc < 2 || argc > 3) {
+    fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
+    fprintf(stderr, "  <obj file>\tobject file to parse\n");
+    fprintf(stderr, "Output Formats:\n");
+    fprintf(stderr, "  gas  - compatible with GNU assembler\n");
+    fprintf(stderr, "  rvds - compatible with armasm\n");
+    fprintf(stderr, "  cheader - c/c++ header file\n");
+    goto bail;
+  }
+
+  f = argv[2];
+
+  if (!strcmp(argv[1], "rvds"))
+    mode = OUTPUT_FMT_RVDS;
+  else if (!strcmp(argv[1], "gas"))
+    mode = OUTPUT_FMT_GAS;
+  else if (!strcmp(argv[1], "cheader"))
+    mode = OUTPUT_FMT_C_HEADER;
+  else
+    f = argv[1];
+
+  fp = fopen(f, "rb");
+
+  if (!fp) {
+    perror("Unable to open file");
+    goto bail;
+  }
+
+  if (fseek(fp, 0, SEEK_END)) {
+    perror("stat");
+    goto bail;
+  }
+
+  file_size = ftell(fp);
+  file_buf = malloc(file_size);
+
+  if (!file_buf) {
+    perror("malloc");
+    goto bail;
+  }
+
+  rewind(fp);
+
+  if (fread(file_buf, sizeof(char), file_size, fp) != file_size) {
+    perror("read");
+    goto bail;
+  }
+
+  if (fclose(fp)) {
+    perror("close");
+    goto bail;
+  }
+
+#if defined(__GNUC__) && __GNUC__
+#if defined(__MACH__)
+  res = parse_macho(file_buf, file_size, mode);
+#elif defined(__ELF__)
+  res = parse_elf(file_buf, file_size, mode);
+#endif
+#endif
+#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
+  res = parse_coff(file_buf, file_size);
+#endif
+
+  free(file_buf);
+
+  if (!res)
+    return EXIT_SUCCESS;
+
+bail:
+  return EXIT_FAILURE;
+}
--- a/build/make/rtcd.pl
+++ b/build/make/rtcd.pl
@@ -319,15 +319,14 @@ EOF

  print <<EOF;
 #if HAVE_DSPR2
-void vpx_dsputil_static_init();
 #if CONFIG_VP8
 void dsputil_static_init();
-#endif
-
-vpx_dsputil_static_init();
-#if CONFIG_VP8
 dsputil_static_init();
 #endif
+#if CONFIG_VP9
+void vp9_dsputil_static_init();
+vp9_dsputil_static_init();
+#endif
 #endif
 }
 #endif
@@ -377,18 +376,17 @@ if ($opts{arch} eq 'x86') {
      @ALL_ARCHS = filter("$opts{arch}", qw/dspr2/);
      last;
    }
-    if (/HAVE_MSA=yes/) {
-      @ALL_ARCHS = filter("$opts{arch}", qw/msa/);
-      last;
-    }
  }
  close CONFIG_FILE;
  mips;
-} elsif ($opts{arch} eq 'armv6') {
-  @ALL_ARCHS = filter(qw/media/);
+} elsif ($opts{arch} eq 'armv5te') {
+  @ALL_ARCHS = filter(qw/edsp/);
  arm;
-} elsif ($opts{arch} =~ /armv7\w?/) {
-  @ALL_ARCHS = filter(qw/media neon_asm neon/);
+} elsif ($opts{arch} eq 'armv6') {
+  @ALL_ARCHS = filter(qw/edsp media/);
+  arm;
+} elsif ($opts{arch} eq 'armv7') {
+  @ALL_ARCHS = filter(qw/edsp media neon_asm neon/);
  @REQUIRES = filter(keys %required ? keys %required : qw/media/);
  &require(@REQUIRES);
  arm;
--- a/build/make/version.sh
+++ b/build/make/version.sh
@@ -24,9 +24,8 @@ out_file=${2}
 id=${3:-VERSION_STRING}

 git_version_id=""
-if [ -e "${source_path}/.git" ]; then
+if [ -d "${source_path}/.git" ]; then
    # Source Path is a git working copy. Check for local modifications.
-    # Note that git submodules may have a file as .git, not a directory.
    export GIT_DIR="${source_path}/.git"
    git_version_id=`git describe --match=v[0-9]* 2>/dev/null`
 fi
--- a/build/x86-msvs/obj_int_extract.bat
+++ b/build/x86-msvs/obj_int_extract.bat
@@ -0,0 +1,15 @@
+REM   Copyright (c) 2011 The WebM project authors. All Rights Reserved.
+REM
+REM   Use of this source code is governed by a BSD-style license
+REM   that can be found in the LICENSE file in the root of the source
+REM   tree. An additional intellectual property rights grant can be found
+REM   in the file PATENTS.  All contributing project authors may
+REM   be found in the AUTHORS file in the root of the source tree.
+echo on
+
+REM Arguments:
+REM   %1 - Relative path to the directory containing the vp8 source directory.
+REM   %2 - Path to obj_int_extract.exe.
+cl /I. /I%1 /nologo /c "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
+%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
+
--- a/codereview.settings
+++ b/codereview.settings
@@ -1,4 +0,0 @@
-# This file is used by gcl to get repository specific information.
-GERRIT_HOST: chromium-review.googlesource.com
-GERRIT_PORT: 29418
-CODE_REVIEW_SERVER: chromium-review.googlesource.com
--- a/247
+++ b/247
@@ -31,16 +31,15 @@ Advanced options:
  --size-limit=WxH                max size to allow in the decoder
  --as={yasm|nasm|auto}           use specified assembler [auto, yasm preferred]
  --sdk-path=PATH                 path to root of sdk (android builds only)
+  ${toggle_fast_unaligned}        don't use unaligned accesses, even when
+                                  supported by hardware [auto]
  ${toggle_codec_srcs}            in/exclude codec library source code
  ${toggle_debug_libs}            in/exclude debug version of libraries
  ${toggle_static_msvcrt}         use static MSVCRT (VS builds only)
-  ${toggle_vp9_highbitdepth}      use VP9 high bit depth (10/12) profiles
-  ${toggle_better_hw_compatibility}
-                                  enable encoder to produce streams with better
-                                  hardware decoder compatibility
  ${toggle_vp8}                   VP8 codec support
  ${toggle_vp9}                   VP9 codec support
  ${toggle_internal_stats}        output of encoder internal stats for debug, if supported (encoders)
+  ${toggle_mem_tracker}           track memory usage
  ${toggle_postproc}              postprocessing
  ${toggle_vp9_postproc}          vp9 specific postprocessing
  ${toggle_multithread}           multithreaded encoding and decoding
@@ -58,8 +57,6 @@ Advanced options:
  ${toggle_postproc_visualizer}   macro block / block level visualizers
  ${toggle_multi_res_encoding}    enable multiple-resolution encoding
  ${toggle_temporal_denoising}    enable temporal denoising and disable the spatial denoiser
-  ${toggle_vp9_temporal_denoising}
-                                  enable vp9 temporal denoising
  ${toggle_webm_io}               enable input from and output to WebM container
  ${toggle_libyuv}                enable libyuv

@@ -97,11 +94,15 @@ EOF

 # all_platforms is a list of all supported target platforms. Maintain
 # alphabetically by architecture, generic-gnu last.
-all_platforms="${all_platforms} arm64-darwin-gcc"
-all_platforms="${all_platforms} arm64-linux-gcc"
+all_platforms="${all_platforms} armv5te-android-gcc"
+all_platforms="${all_platforms} armv5te-linux-rvct"
+all_platforms="${all_platforms} armv5te-linux-gcc"
+all_platforms="${all_platforms} armv5te-none-rvct"
+all_platforms="${all_platforms} armv6-darwin-gcc"
 all_platforms="${all_platforms} armv6-linux-rvct"
 all_platforms="${all_platforms} armv6-linux-gcc"
 all_platforms="${all_platforms} armv6-none-rvct"
+all_platforms="${all_platforms} arm64-darwin-gcc"
 all_platforms="${all_platforms} armv7-android-gcc"   #neon Cortex-A8
 all_platforms="${all_platforms} armv7-darwin-gcc"    #neon Cortex-A8
 all_platforms="${all_platforms} armv7-linux-rvct"    #neon Cortex-A8
@@ -109,11 +110,15 @@ all_platforms="${all_platforms} armv7-linux-gcc"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-none-rvct"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-win32-vs11"
 all_platforms="${all_platforms} armv7-win32-vs12"
-all_platforms="${all_platforms} armv7-win32-vs14"
 all_platforms="${all_platforms} armv7s-darwin-gcc"
-all_platforms="${all_platforms} armv8-linux-gcc"
 all_platforms="${all_platforms} mips32-linux-gcc"
 all_platforms="${all_platforms} mips64-linux-gcc"
+all_platforms="${all_platforms} ppc32-darwin8-gcc"
+all_platforms="${all_platforms} ppc32-darwin9-gcc"
+all_platforms="${all_platforms} ppc32-linux-gcc"
+all_platforms="${all_platforms} ppc64-darwin8-gcc"
+all_platforms="${all_platforms} ppc64-darwin9-gcc"
+all_platforms="${all_platforms} ppc64-linux-gcc"
 all_platforms="${all_platforms} sparc-solaris-gcc"
 all_platforms="${all_platforms} x86-android-gcc"
 all_platforms="${all_platforms} x86-darwin8-gcc"
@@ -124,8 +129,6 @@ all_platforms="${all_platforms} x86-darwin10-gcc"
 all_platforms="${all_platforms} x86-darwin11-gcc"
 all_platforms="${all_platforms} x86-darwin12-gcc"
 all_platforms="${all_platforms} x86-darwin13-gcc"
-all_platforms="${all_platforms} x86-darwin14-gcc"
-all_platforms="${all_platforms} x86-darwin15-gcc"
 all_platforms="${all_platforms} x86-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86-linux-gcc"
 all_platforms="${all_platforms} x86-linux-icc"
@@ -138,15 +141,11 @@ all_platforms="${all_platforms} x86-win32-vs9"
 all_platforms="${all_platforms} x86-win32-vs10"
 all_platforms="${all_platforms} x86-win32-vs11"
 all_platforms="${all_platforms} x86-win32-vs12"
-all_platforms="${all_platforms} x86-win32-vs14"
-all_platforms="${all_platforms} x86_64-android-gcc"
 all_platforms="${all_platforms} x86_64-darwin9-gcc"
 all_platforms="${all_platforms} x86_64-darwin10-gcc"
 all_platforms="${all_platforms} x86_64-darwin11-gcc"
 all_platforms="${all_platforms} x86_64-darwin12-gcc"
 all_platforms="${all_platforms} x86_64-darwin13-gcc"
-all_platforms="${all_platforms} x86_64-darwin14-gcc"
-all_platforms="${all_platforms} x86_64-darwin15-gcc"
 all_platforms="${all_platforms} x86_64-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
 all_platforms="${all_platforms} x86_64-linux-icc"
@@ -157,7 +156,12 @@ all_platforms="${all_platforms} x86_64-win64-vs9"
 all_platforms="${all_platforms} x86_64-win64-vs10"
 all_platforms="${all_platforms} x86_64-win64-vs11"
 all_platforms="${all_platforms} x86_64-win64-vs12"
-all_platforms="${all_platforms} x86_64-win64-vs14"
+all_platforms="${all_platforms} universal-darwin8-gcc"
+all_platforms="${all_platforms} universal-darwin9-gcc"
+all_platforms="${all_platforms} universal-darwin10-gcc"
+all_platforms="${all_platforms} universal-darwin11-gcc"
+all_platforms="${all_platforms} universal-darwin12-gcc"
+all_platforms="${all_platforms} universal-darwin13-gcc"
 all_platforms="${all_platforms} generic-gnu"

 # all_targets is a list of all targets that can be configured
@@ -194,10 +198,6 @@ if [ ${doxy_major:-0} -ge 1 ]; then
    [ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
 fi

-# disable codecs when their source directory does not exist
-[ -d "${source_path}/vp8" ] || disable_codec vp8
-[ -d "${source_path}/vp9" ] || disable_codec vp9
-
 # install everything except the sources, by default. sources will have
 # to be enabled when doing dist builds, since that's no longer a common
 # case.
@@ -207,38 +207,45 @@ enable_feature install_libs

 enable_feature static
 enable_feature optimizations
-enable_feature dependency_tracking
+enable_feature fast_unaligned #allow unaligned accesses, if supported by hw
 enable_feature spatial_resampling
 enable_feature multithread
 enable_feature os_support
 enable_feature temporal_denoising

-CODECS="
-    vp8_encoder
-    vp8_decoder
-    vp9_encoder
-    vp9_decoder
-"
-CODEC_FAMILIES="
-    vp8
-    vp9
-"
+[ -d "${source_path}/../include" ] && enable_feature alt_tree_layout
+for d in vp8 vp9; do
+    [ -d "${source_path}/${d}" ] && disable_feature alt_tree_layout;
+done
+
+if ! enabled alt_tree_layout; then
+# development environment
+[ -d "${source_path}/vp8" ] && CODECS="${CODECS} vp8_encoder vp8_decoder"
+[ -d "${source_path}/vp9" ] && CODECS="${CODECS} vp9_encoder vp9_decoder"
+else
+# customer environment
+[ -f "${source_path}/../include/vpx/vp8cx.h" ] && CODECS="${CODECS} vp8_encoder"
+[ -f "${source_path}/../include/vpx/vp8dx.h" ] && CODECS="${CODECS} vp8_decoder"
+[ -f "${source_path}/../include/vpx/vp9cx.h" ] && CODECS="${CODECS} vp9_encoder"
+[ -f "${source_path}/../include/vpx/vp9dx.h" ] && CODECS="${CODECS} vp9_decoder"
+[ -f "${source_path}/../include/vpx/vp8cx.h" ] || disable_feature vp8_encoder
+[ -f "${source_path}/../include/vpx/vp8dx.h" ] || disable_feature vp8_decoder
+[ -f "${source_path}/../include/vpx/vp9cx.h" ] || disable_feature vp9_encoder
+[ -f "${source_path}/../include/vpx/vp9dx.h" ] || disable_feature vp9_decoder
+
+[ -f "${source_path}/../lib/*/*mt.lib" ] && soft_enable static_msvcrt
+fi
+
+CODECS="$(echo ${CODECS} | tr ' ' '\n')"
+CODEC_FAMILIES="$(for c in ${CODECS}; do echo ${c%_*}; done | sort | uniq)"

 ARCH_LIST="
    arm
    mips
    x86
    x86_64
-"
-ARCH_EXT_LIST_X86="
-    mmx
-    sse
-    sse2
-    sse3
-    ssse3
-    sse4_1
-    avx
-    avx2
+    ppc32
+    ppc64
 "
 ARCH_EXT_LIST="
    edsp
@@ -248,25 +255,60 @@ ARCH_EXT_LIST="

    mips32
    dspr2
-    msa
+
    mips64

-    ${ARCH_EXT_LIST_X86}
+    mmx
+    sse
+    sse2
+    sse3
+    ssse3
+    sse4_1
+    avx
+    avx2
+
+    altivec
 "
 HAVE_LIST="
    ${ARCH_EXT_LIST}
    vpx_ports
+    stdint_h
+    alt_tree_layout
    pthread_h
+    sys_mman_h
    unistd_h
 "
 EXPERIMENT_LIST="
    spatial_svc
+    vp9_temporal_denoising
    fp_mb_stats
    emulate_hardware
-    misc_fixes
+    tx64x64
+    filterintra
+    ext_tx
+    tx_skip
+    supertx
+    copy_mode
+    interintra
+    wedge_partition
+    global_motion
+    palette
+    new_quant
+    intrabc
+    loop_postfilter
+    row_tile
+    new_inter
+    bitstream_fixes
+    newmvref
+    misc_entropy
+    wavelets
+    ext_partition
+    qctx_tprobs
+    sr_mode
+    multi_ref
+    ext_coding_unit_size
 "
 CONFIG_LIST="
-    dependency_tracking
    external_build
    install_docs
    install_bins
@@ -284,6 +326,10 @@ CONFIG_LIST="

    codec_srcs
    debug_libs
+    fast_unaligned
+    mem_manager
+    mem_tracker
+    mem_checks

    dequant_tokens
    dc_recon
@@ -313,16 +359,13 @@ CONFIG_LIST="
    encode_perf_tests
    multi_res_encoding
    temporal_denoising
-    vp9_temporal_denoising
    coefficient_range_checking
    vp9_highbitdepth
-    better_hw_compatibility
    experimental
    size_limit
    ${EXPERIMENT_LIST}
 "
 CMDLINE_SELECT="
-    dependency_tracking
    external_build
    extra_warnings
    werror
@@ -346,6 +389,7 @@ CMDLINE_SELECT="
    libc
    as
    size_limit
+    fast_unaligned
    codec_srcs
    debug_libs

@@ -358,6 +402,7 @@ CMDLINE_SELECT="
    ${CODECS}
    ${CODEC_FAMILIES}
    static_msvcrt
+    mem_tracker
    spatial_resampling
    realtime_only
    onthefly_bitpacking
@@ -373,9 +418,7 @@ CMDLINE_SELECT="
    encode_perf_tests
    multi_res_encoding
    temporal_denoising
-    vp9_temporal_denoising
    coefficient_range_checking
-    better_hw_compatibility
    vp9_highbitdepth
    experimental
 "
@@ -384,19 +427,15 @@ process_cmdline() {
    for opt do
        optval="${opt#*=}"
        case "$opt" in
-        --disable-codecs)
-          for c in ${CODEC_FAMILIES}; do disable_codec $c; done
-          ;;
+        --disable-codecs) for c in ${CODECS}; do disable_feature $c; done ;;
        --enable-?*|--disable-?*)
        eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'`
-        if is_in ${option} ${EXPERIMENT_LIST}; then
+        if echo "${EXPERIMENT_LIST}" | grep "^ *$option\$" >/dev/null; then
            if enabled experimental; then
                ${action}_feature $option
            else
                log_echo "Ignoring $opt -- not in experimental mode."
            fi
-        elif is_in ${option} "${CODECS} ${CODEC_FAMILIES}"; then
-            ${action}_codec ${option}
        else
            process_common_cmdline $opt
        fi
@@ -410,6 +449,14 @@ process_cmdline() {
 post_process_cmdline() {
    c=""

+    # If the codec family is disabled, disable all components of that family.
+    # If the codec family is enabled, enable all components of that family.
+    log_echo "Configuring selected codecs"
+    for c in ${CODECS}; do
+        disabled ${c%%_*} && disable_feature ${c}
+        enabled ${c%%_*} && enable_feature ${c}
+    done
+
    # Enable all detected codecs, if they haven't been disabled
    for c in ${CODECS}; do soft_enable $c; done

@@ -427,8 +474,24 @@ post_process_cmdline() {

 process_targets() {
    enabled child || write_common_config_banner
-    write_common_target_config_h ${BUILD_PFX}vpx_config.h
+    enabled universal || write_common_target_config_h  ${BUILD_PFX}vpx_config.h
+
+    # TODO: add host tools target (obj_int_extract, etc)
+
+    # For fat binaries, call configure recursively to configure for each
+    # binary architecture to be included.
+    if enabled universal; then
+        # Call configure (ourselves) for each subarchitecture
+        for arch in $fat_bin_archs; do
+            BUILD_PFX=${arch}/ toolchain=${arch} $self --child $cmdline_args || exit $?
+        done
+    fi
+
+    # The write_common_config (config.mk) logic is deferred until after the
+    # recursive calls to configure complete, because we want our universal
+    # targets to be executed last.
    write_common_config_targets
+    enabled universal && echo "FAT_ARCHS=${fat_bin_archs}" >> config.mk

    # Calculate the default distribution name, based on the enabled features
    cf=""
@@ -504,18 +567,13 @@ process_detect() {
        # Can only build shared libs on a subset of platforms. Doing this check
        # here rather than at option parse time because the target auto-detect
        # magic happens after the command line has been parsed.
-        case "${tgt_os}" in
-        linux|os2|darwin*|iphonesimulator*)
-            # Supported platforms
-            ;;
-        *)
+        if ! enabled linux; then
            if enabled gnu; then
                echo "--enable-shared is only supported on ELF; assuming this is OK"
            else
-                die "--enable-shared only supported on ELF, OS/2, and Darwin for now"
+                die "--enable-shared only supported on ELF for now"
            fi
-            ;;
-        esac
+        fi
    fi
    if [ -z "$CC" ] || enabled external_build; then
        echo "Bypassing toolchain for environment detection."
@@ -542,12 +600,16 @@ process_detect() {
            # Specialize windows and POSIX environments.
            case $toolchain in
                *-win*-*)
-                    # Don't check for any headers in Windows builds.
-                    false
-                ;;
+                    case $header-$toolchain in
+                        stdint*-gcc) true;;
+                        *) false;;
+                    esac && enable_feature $var
+                    ;;
                *)
                    case $header in
+                        stdint.h) true;;
                        pthread.h) true;;
+                        sys/mman.h) true;;
                        unistd.h) true;;
                        *) false;;
                    esac && enable_feature $var
@@ -563,7 +625,9 @@ process_detect() {
 int main(void) {return 0;}
 EOF
    # check system headers
+    check_header stdint.h
    check_header pthread.h
+    check_header sys/mman.h
    check_header unistd.h # for sysconf(3) and friends.

    check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports
@@ -572,6 +636,30 @@ EOF
 process_toolchain() {
    process_common_toolchain

+    # Handle universal binaries for this architecture
+    case $toolchain in
+        universal-darwin*)
+            darwin_ver=${tgt_os##darwin}
+
+            # Snow Leopard (10.6/darwin10) dropped support for PPC
+            # Include PPC support for all prior versions
+            if [ $darwin_ver -lt 10 ]; then
+                fat_bin_archs="$fat_bin_archs ppc32-${tgt_os}-gcc"
+            fi
+
+            # Tiger (10.4/darwin8) brought support for x86
+            if [ $darwin_ver -ge 8 ]; then
+                fat_bin_archs="$fat_bin_archs x86-${tgt_os}-${tgt_cc}"
+            fi
+
+            # Leopard (10.5/darwin9) brought 64 bit support
+            if [ $darwin_ver -ge 9 ]; then
+                fat_bin_archs="$fat_bin_archs x86_64-${tgt_os}-${tgt_cc}"
+            fi
+            ;;
+    esac
+
+
    # Enable some useful compiler flags
    if enabled gcc; then
        enabled werror && check_add_cflags -Werror
@@ -594,11 +682,7 @@ process_toolchain() {
          ;;
          *) check_add_cflags -Wunused-but-set-variable ;;
        esac
-        if enabled mips || [ -z "${INLINE}" ]; then
-          enabled extra_warnings || check_add_cflags -Wno-unused-function
-        else
-          check_add_cflags -Wunused-function
-        fi
+        enabled extra_warnings || check_add_cflags -Wno-unused-function
    fi

    if enabled icc; then
@@ -651,7 +735,7 @@ process_toolchain() {
                 VCPROJ_SFX=vcproj
                 gen_vcproj_cmd=${source_path}/build/make/gen_msvs_proj.sh
                 ;;
-             10|11|12|14)
+             10|11|12)
                 VCPROJ_SFX=vcxproj
                 gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
                 enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
@@ -663,7 +747,7 @@ process_toolchain() {
    esac

    # Other toolchain specific defaults
-    case $toolchain in x86*) soft_enable postproc;; esac
+    case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac

    if enabled postproc_visualizer; then
        enabled postproc || die "postproc_visualizer requires postproc to be enabled"
@@ -717,16 +801,6 @@ EOF
    esac
    # libwebm needs to be linked with C++ standard library
    enabled webm_io && LD=${CXX}
-
-    # append any user defined extra cflags
-    if [ -n "${extra_cflags}" ] ; then
-        check_add_cflags ${extra_cflags} || \
-        die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
-    fi
-    if [ -n "${extra_cxxflags}" ]; then
-        check_add_cxxflags ${extra_cxxflags} || \
-        die "Requested extra CXXFLAGS '${extra_cxxflags}' not supported by compiler"
-    fi
 }


@@ -737,7 +811,6 @@ CONFIGURE_ARGS="$@"
 process "$@"
 print_webm_license ${BUILD_PFX}vpx_config.c "/*" " */"
 cat <<EOF >> ${BUILD_PFX}vpx_config.c
-#include "vpx/vpx_codec.h"
 static const char* const cfg = "$CONFIGURE_ARGS";
 const char *vpx_codec_build_config(void) {return cfg;}
 EOF
--- a/examples.mk
+++ b/examples.mk
@@ -22,18 +22,17 @@ LIBYUV_SRCS +=  third_party/libyuv/include/libyuv/basic_types.h  \
                third_party/libyuv/source/planar_functions.cc \
                third_party/libyuv/source/row_any.cc \
                third_party/libyuv/source/row_common.cc \
-                third_party/libyuv/source/row_gcc.cc \
                third_party/libyuv/source/row_mips.cc \
                third_party/libyuv/source/row_neon.cc \
                third_party/libyuv/source/row_neon64.cc \
+                third_party/libyuv/source/row_posix.cc \
                third_party/libyuv/source/row_win.cc \
                third_party/libyuv/source/scale.cc \
-                third_party/libyuv/source/scale_any.cc \
                third_party/libyuv/source/scale_common.cc \
-                third_party/libyuv/source/scale_gcc.cc \
                third_party/libyuv/source/scale_mips.cc \
                third_party/libyuv/source/scale_neon.cc \
                third_party/libyuv/source/scale_neon64.cc \
+                third_party/libyuv/source/scale_posix.cc \
                third_party/libyuv/source/scale_win.cc \

 LIBWEBM_COMMON_SRCS += third_party/libwebm/common/hdr_util.cc \
@@ -57,17 +56,16 @@ LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser/mkvparser.cc \
 # Add compile flags and include path for libwebm sources.
 ifeq ($(CONFIG_WEBM_IO),yes)
  CXXFLAGS     += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS
+  CXXFLAGS     += -I$(SRC_PATH_BARE)/third_party/libwebm
  INC_PATH-yes += $(SRC_PATH_BARE)/third_party/libwebm
 endif

-
 # List of examples to build. UTILS are tools meant for distribution
 # while EXAMPLES demonstrate specific portions of the API.
 UTILS-$(CONFIG_DECODERS)    += vpxdec.c
 vpxdec.SRCS                 += md5_utils.c md5_utils.h
 vpxdec.SRCS                 += vpx_ports/mem_ops.h
 vpxdec.SRCS                 += vpx_ports/mem_ops_aligned.h
-vpxdec.SRCS                 += vpx_ports/msvc.h
 vpxdec.SRCS                 += vpx_ports/vpx_timer.h
 vpxdec.SRCS                 += vpx/vpx_integer.h
 vpxdec.SRCS                 += args.c args.h
@@ -94,7 +92,6 @@ vpxenc.SRCS                 += tools_common.c tools_common.h
 vpxenc.SRCS                 += warnings.c warnings.h
 vpxenc.SRCS                 += vpx_ports/mem_ops.h
 vpxenc.SRCS                 += vpx_ports/mem_ops_aligned.h
-vpxenc.SRCS                 += vpx_ports/msvc.h
 vpxenc.SRCS                 += vpx_ports/vpx_timer.h
 vpxenc.SRCS                 += vpxstats.c vpxstats.h
 ifeq ($(CONFIG_LIBYUV),yes)
@@ -108,31 +105,11 @@ ifeq ($(CONFIG_WEBM_IO),yes)
 endif
 vpxenc.GUID                  = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1
 vpxenc.DESCRIPTION           = Full featured encoder
-ifeq ($(CONFIG_SPATIAL_SVC),yes)
-  EXAMPLES-$(CONFIG_VP9_ENCODER)      += vp9_spatial_svc_encoder.c
-  vp9_spatial_svc_encoder.SRCS        += args.c args.h
-  vp9_spatial_svc_encoder.SRCS        += ivfenc.c ivfenc.h
-  vp9_spatial_svc_encoder.SRCS        += tools_common.c tools_common.h
-  vp9_spatial_svc_encoder.SRCS        += video_common.h
-  vp9_spatial_svc_encoder.SRCS        += video_writer.h video_writer.c
-  vp9_spatial_svc_encoder.SRCS        += vpx_ports/msvc.h
-  vp9_spatial_svc_encoder.SRCS        += vpxstats.c vpxstats.h
-  vp9_spatial_svc_encoder.GUID        = 4A38598D-627D-4505-9C7B-D4020C84100D
-  vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
-endif

 ifneq ($(CONFIG_SHARED),yes)
 EXAMPLES-$(CONFIG_VP9_ENCODER)    += resize_util.c
 endif

-EXAMPLES-$(CONFIG_ENCODERS)          += vpx_temporal_svc_encoder.c
-vpx_temporal_svc_encoder.SRCS        += ivfenc.c ivfenc.h
-vpx_temporal_svc_encoder.SRCS        += tools_common.c tools_common.h
-vpx_temporal_svc_encoder.SRCS        += video_common.h
-vpx_temporal_svc_encoder.SRCS        += video_writer.h video_writer.c
-vpx_temporal_svc_encoder.SRCS        += vpx_ports/msvc.h
-vpx_temporal_svc_encoder.GUID        = B18C08F2-A439-4502-A78E-849BE3D60947
-vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder
 EXAMPLES-$(CONFIG_DECODERS)        += simple_decoder.c
 simple_decoder.GUID                 = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC
 simple_decoder.SRCS                += ivfdec.h ivfdec.c
@@ -141,7 +118,6 @@ simple_decoder.SRCS                += video_common.h
 simple_decoder.SRCS                += video_reader.h video_reader.c
 simple_decoder.SRCS                += vpx_ports/mem_ops.h
 simple_decoder.SRCS                += vpx_ports/mem_ops_aligned.h
-simple_decoder.SRCS                += vpx_ports/msvc.h
 simple_decoder.DESCRIPTION          = Simplified decoder loop
 EXAMPLES-$(CONFIG_DECODERS)        += postproc.c
 postproc.SRCS                      += ivfdec.h ivfdec.c
@@ -150,7 +126,6 @@ postproc.SRCS                      += video_common.h
 postproc.SRCS                      += video_reader.h video_reader.c
 postproc.SRCS                      += vpx_ports/mem_ops.h
 postproc.SRCS                      += vpx_ports/mem_ops_aligned.h
-postproc.SRCS                      += vpx_ports/msvc.h
 postproc.GUID                       = 65E33355-F35E-4088-884D-3FD4905881D7
 postproc.DESCRIPTION                = Decoder postprocessor control
 EXAMPLES-$(CONFIG_DECODERS)        += decode_to_md5.c
@@ -161,7 +136,6 @@ decode_to_md5.SRCS                 += video_common.h
 decode_to_md5.SRCS                 += video_reader.h video_reader.c
 decode_to_md5.SRCS                 += vpx_ports/mem_ops.h
 decode_to_md5.SRCS                 += vpx_ports/mem_ops_aligned.h
-decode_to_md5.SRCS                 += vpx_ports/msvc.h
 decode_to_md5.GUID                  = 59120B9B-2735-4BFE-B022-146CA340FE42
 decode_to_md5.DESCRIPTION           = Frame by frame MD5 checksum
 EXAMPLES-$(CONFIG_ENCODERS)     += simple_encoder.c
@@ -169,7 +143,6 @@ simple_encoder.SRCS             += ivfenc.h ivfenc.c
 simple_encoder.SRCS             += tools_common.h tools_common.c
 simple_encoder.SRCS             += video_common.h
 simple_encoder.SRCS             += video_writer.h video_writer.c
-simple_encoder.SRCS             += vpx_ports/msvc.h
 simple_encoder.GUID              = 4607D299-8A71-4D2C-9B1D-071899B6FBFD
 simple_encoder.DESCRIPTION       = Simplified encoder loop
 EXAMPLES-$(CONFIG_VP9_ENCODER)  += vp9_lossless_encoder.c
@@ -177,7 +150,6 @@ vp9_lossless_encoder.SRCS       += ivfenc.h ivfenc.c
 vp9_lossless_encoder.SRCS       += tools_common.h tools_common.c
 vp9_lossless_encoder.SRCS       += video_common.h
 vp9_lossless_encoder.SRCS       += video_writer.h video_writer.c
-vp9_lossless_encoder.SRCS       += vpx_ports/msvc.h
 vp9_lossless_encoder.GUID        = B63C7C88-5348-46DC-A5A6-CC151EF93366
 vp9_lossless_encoder.DESCRIPTION = Simplified lossless VP9 encoder
 EXAMPLES-$(CONFIG_ENCODERS)     += twopass_encoder.c
@@ -185,7 +157,6 @@ twopass_encoder.SRCS            += ivfenc.h ivfenc.c
 twopass_encoder.SRCS            += tools_common.h tools_common.c
 twopass_encoder.SRCS            += video_common.h
 twopass_encoder.SRCS            += video_writer.h video_writer.c
-twopass_encoder.SRCS            += vpx_ports/msvc.h
 twopass_encoder.GUID             = 73494FA6-4AF9-4763-8FBB-265C92402FD8
 twopass_encoder.DESCRIPTION      = Two-pass encoder loop
 EXAMPLES-$(CONFIG_DECODERS)     += decode_with_drops.c
@@ -195,7 +166,6 @@ decode_with_drops.SRCS          += video_common.h
 decode_with_drops.SRCS          += video_reader.h video_reader.c
 decode_with_drops.SRCS          += vpx_ports/mem_ops.h
 decode_with_drops.SRCS          += vpx_ports/mem_ops_aligned.h
-decode_with_drops.SRCS          += vpx_ports/msvc.h
 decode_with_drops.GUID           = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
 decode_with_drops.DESCRIPTION    = Drops frames while decoding
 EXAMPLES-$(CONFIG_ENCODERS)        += set_maps.c
@@ -203,7 +173,6 @@ set_maps.SRCS                      += ivfenc.h ivfenc.c
 set_maps.SRCS                      += tools_common.h tools_common.c
 set_maps.SRCS                      += video_common.h
 set_maps.SRCS                      += video_writer.h video_writer.c
-set_maps.SRCS                      += vpx_ports/msvc.h
 set_maps.GUID                       = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
 set_maps.DESCRIPTION                = Set active and ROI maps
 EXAMPLES-$(CONFIG_VP8_ENCODER)     += vp8cx_set_ref.c
@@ -211,10 +180,15 @@ vp8cx_set_ref.SRCS                 += ivfenc.h ivfenc.c
 vp8cx_set_ref.SRCS                 += tools_common.h tools_common.c
 vp8cx_set_ref.SRCS                 += video_common.h
 vp8cx_set_ref.SRCS                 += video_writer.h video_writer.c
-vp8cx_set_ref.SRCS                 += vpx_ports/msvc.h
 vp8cx_set_ref.GUID                  = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
 vp8cx_set_ref.DESCRIPTION           = VP8 set encoder reference frame
-
+EXAMPLES-$(CONFIG_VP9_ENCODER)     += vp9cx_set_ref.c
+vp9cx_set_ref.SRCS                 += ivfenc.h ivfenc.c
+vp9cx_set_ref.SRCS                 += tools_common.h tools_common.c
+vp9cx_set_ref.SRCS                 += video_common.h
+vp9cx_set_ref.SRCS                 += video_writer.h video_writer.c
+vp9cx_set_ref.GUID                  = 65D7F14A-2EE6-4293-B958-AB5107A03B55
+vp9cx_set_ref.DESCRIPTION           = VP9 set encoder reference frame

 ifeq ($(CONFIG_MULTI_RES_ENCODING),yes)
 ifeq ($(CONFIG_LIBYUV),yes)
@@ -222,7 +196,6 @@ EXAMPLES-$(CONFIG_VP8_ENCODER)          += vp8_multi_resolution_encoder.c
 vp8_multi_resolution_encoder.SRCS       += ivfenc.h ivfenc.c
 vp8_multi_resolution_encoder.SRCS       += tools_common.h tools_common.c
 vp8_multi_resolution_encoder.SRCS       += video_writer.h video_writer.c
-vp8_multi_resolution_encoder.SRCS       += vpx_ports/msvc.h
 vp8_multi_resolution_encoder.SRCS       += $(LIBYUV_SRCS)
 vp8_multi_resolution_encoder.GUID        = 04f8738e-63c8-423b-90fa-7c2703a374de
 vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding
@@ -283,6 +256,14 @@ CODEC_EXTRA_LIBS=$(sort $(call enabled,CODEC_EXTRA_LIBS))
 $(foreach ex,$(ALL_EXAMPLES),$(eval $(notdir $(ex:.c=)).SRCS += $(ex) examples.mk))


+# If this is a universal (fat) binary, then all the subarchitectures have
+# already been built and our job is to stitch them together. The
+# BUILD_OBJS variable indicates whether we should be building
+# (compiling, linking) the library. The LIPO_OBJS variable indicates
+# that we're stitching.
+$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_OBJS,BUILD_OBJS):=yes)
+
+
 # Create build/install dependencies for all examples. The common case
 # is handled here. The MSVS case is handled below.
 NOT_MSVS = $(if $(CONFIG_MSVS),,yes)
@@ -290,28 +271,24 @@ DIST-BINS-$(NOT_MSVS)      += $(addprefix bin/,$(ALL_EXAMPLES:.c=$(EXE_SFX)))
 INSTALL-BINS-$(NOT_MSVS)   += $(addprefix bin/,$(UTILS:.c=$(EXE_SFX)))
 DIST-SRCS-yes              += $(ALL_SRCS)
 INSTALL-SRCS-yes           += $(UTIL_SRCS)
-OBJS-$(NOT_MSVS)           += $(call objs,$(ALL_SRCS))
+OBJS-$(NOT_MSVS)           += $(if $(BUILD_OBJS),$(call objs,$(ALL_SRCS)))
 BINS-$(NOT_MSVS)           += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=$(EXE_SFX)))


 # Instantiate linker template for all examples.
 CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-ifneq ($(filter darwin%,$(TGT_OS)),)
-SHARED_LIB_SUF=.dylib
-else
-ifneq ($(filter os2%,$(TGT_OS)),)
-SHARED_LIB_SUF=_dll.a
-else
-SHARED_LIB_SUF=.so
-endif
-endif
+SHARED_LIB_SUF=$(if $(filter darwin%,$(TGT_OS)),.dylib,.so)
 CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
 $(foreach bin,$(BINS-yes),\
-    $(eval $(bin):$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF))\
-    $(eval $(call linker_template,$(bin),\
+    $(if $(BUILD_OBJS),$(eval $(bin):\
+        $(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF)))\
+    $(if $(BUILD_OBJS),$(eval $(call linker_template,$(bin),\
        $(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) \
        -l$(CODEC_LIB) $(addprefix -l,$(CODEC_EXTRA_LIBS))\
-        )))
+        )))\
+    $(if $(LIPO_OBJS),$(eval $(call lipo_bin_template,$(bin))))\
+    )
+

 # The following pairs define a mapping of locations in the distribution
 # tree to locations in the source/build trees.
@@ -339,8 +316,8 @@ endif
 # the makefiles). We may want to revisit this.
 define vcproj_template
 $(1): $($(1:.$(VCPROJ_SFX)=).SRCS) vpx.$(VCPROJ_SFX)
-	$(if $(quiet),@echo "    [vcproj] $$@")
-	$(qexec)$$(GEN_VCPROJ)\
+	@echo "    [vcproj] $$@"
+	$$(GEN_VCPROJ)\
            --exe\
            --target=$$(TOOLCHAIN)\
            --name=$$(@:.$(VCPROJ_SFX)=)\
@@ -363,7 +340,6 @@ $(foreach proj,$(call enabled,PROJECTS),\
 #
 %.dox: %.c
 	@echo "    [DOXY] $@"
-	@mkdir -p $(dir $@)
 	@echo "/*!\page example_$(@F:.dox=) $(@F:.dox=)" > $@
 	@echo "   \includelineno $(<F)" >> $@
 	@echo "*/" >> $@
--- a/examples/decode_to_md5.c
+++ b/examples/decode_to_md5.c
@@ -36,9 +36,9 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "../md5_utils.h"
-#include "../tools_common.h"
-#include "../video_reader.h"
+#include "./md5_utils.h"
+#include "./tools_common.h"
+#include "./video_reader.h"
 #include "./vpx_config.h"

 static void get_image_md5(const vpx_image_t *img, unsigned char digest[16]) {
@@ -71,7 +71,7 @@ static void print_md5(FILE *stream, unsigned char digest[16]) {

 static const char *exec_name;

-void usage_exit(void) {
+void usage_exit() {
  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/decode_with_drops.c
+++ b/examples/decode_with_drops.c
@@ -59,13 +59,13 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "../tools_common.h"
-#include "../video_reader.h"
+#include "./tools_common.h"
+#include "./video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;

-void usage_exit(void) {
+void usage_exit() {
  fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/postproc.c
+++ b/examples/postproc.c
@@ -46,13 +46,13 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "../tools_common.h"
-#include "../video_reader.h"
+#include "./tools_common.h"
+#include "./video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;

-void usage_exit(void) {
+void usage_exit() {
  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/resize_util.c
+++ b/examples/resize_util.c
@@ -15,23 +15,15 @@
 #include <stdlib.h>
 #include <string.h>

-#include "../tools_common.h"
-#include "../vp9/encoder/vp9_resize.h"
+#include "./vp9/encoder/vp9_resize.h"

-static const char *exec_name = NULL;
-
-static void usage() {
+static void usage(char *progname) {
  printf("Usage:\n");
  printf("%s <input_yuv> <width>x<height> <target_width>x<target_height> ",
-         exec_name);
+         progname);
  printf("<output_yuv> [<frames>]\n");
 }

-void usage_exit(void) {
-  usage();
-  exit(EXIT_FAILURE);
-}
-
 static int parse_dim(char *v, int *width, int *height) {
  char *x = strchr(v, 'x');
  if (x == NULL)
@@ -55,11 +47,9 @@ int main(int argc, char *argv[]) {
  int f, frames;
  int width, height, target_width, target_height;

-  exec_name = argv[0];
-
  if (argc < 5) {
    printf("Incorrect parameters:\n");
-    usage();
+    usage(argv[0]);
    return 1;
  }

@@ -67,25 +57,25 @@ int main(int argc, char *argv[]) {
  fout = argv[4];
  if (!parse_dim(argv[2], &width, &height)) {
    printf("Incorrect parameters: %s\n", argv[2]);
-    usage();
+    usage(argv[0]);
    return 1;
  }
  if (!parse_dim(argv[3], &target_width, &target_height)) {
    printf("Incorrect parameters: %s\n", argv[3]);
-    usage();
+    usage(argv[0]);
    return 1;
  }

  fpin = fopen(fin, "rb");
  if (fpin == NULL) {
    printf("Can't open file %s to read\n", fin);
-    usage();
+    usage(argv[0]);
    return 1;
  }
  fpout = fopen(fout, "wb");
  if (fpout == NULL) {
    printf("Can't open file %s to write\n", fout);
-    usage();
+    usage(argv[0]);
    return 1;
  }
  if (argc >= 6)
--- a/examples/set_maps.c
+++ b/examples/set_maps.c
@@ -50,12 +50,12 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"

-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 static const char *exec_name;

-void usage_exit(void) {
+void usage_exit() {
  fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
          exec_name);
  exit(EXIT_FAILURE);
--- a/examples/simple_decoder.c
+++ b/examples/simple_decoder.c
@@ -82,13 +82,13 @@

 #include "vpx/vpx_decoder.h"

-#include "../tools_common.h"
-#include "../video_reader.h"
+#include "./tools_common.h"
+#include "./video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;

-void usage_exit(void) {
+void usage_exit() {
  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/simple_encoder.c
+++ b/examples/simple_encoder.c
@@ -101,16 +101,16 @@

 #include "vpx/vpx_encoder.h"

-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 static const char *exec_name;

-void usage_exit(void) {
+void usage_exit() {
  fprintf(stderr,
          "Usage: %s <codec> <width> <height> <infile> <outfile> "
-              "<keyframe-interval> <error-resilient> <frames to encode>\n"
-              "See comments in simple_encoder.c for more information.\n",
+              "<keyframe-interval> [<error-resilient>]\nSee comments in "
+              "simple_encoder.c for more information.\n",
          exec_name);
  exit(EXIT_FAILURE);
 }
@@ -147,7 +147,6 @@ static int encode_frame(vpx_codec_ctx_t *codec,
  return got_pkts;
 }

-// TODO(tomfinegan): Improve command line parsing and add args for bitrate/fps.
 int main(int argc, char **argv) {
  FILE *infile = NULL;
  vpx_codec_ctx_t codec;
@@ -158,11 +157,12 @@ int main(int argc, char **argv) {
  VpxVideoInfo info = {0};
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;
-  const int fps = 30;
-  const int bitrate = 200;
+  const int fps = 30;        // TODO(dkovalev) add command line argument
+  const int bitrate = 200;   // kbit/s TODO(dkovalev) add command line argument
  int keyframe_interval = 0;
-  int max_frames = 0;
-  int frames_encoded = 0;
+
+  // TODO(dkovalev): Add some simple command line parsing code to make the
+  // command line more flexible.
  const char *codec_arg = NULL;
  const char *width_arg = NULL;
  const char *height_arg = NULL;
@@ -172,7 +172,7 @@ int main(int argc, char **argv) {

  exec_name = argv[0];

-  if (argc != 9)
+  if (argc < 7)
    die("Invalid number of arguments");

  codec_arg = argv[1];
@@ -181,7 +181,6 @@ int main(int argc, char **argv) {
  infile_arg = argv[4];
  outfile_arg = argv[5];
  keyframe_interval_arg = argv[6];
-  max_frames = strtol(argv[8], NULL, 0);

  encoder = get_vpx_encoder_by_name(codec_arg);
  if (!encoder)
@@ -220,7 +219,7 @@ int main(int argc, char **argv) {
  cfg.g_timebase.num = info.time_base.numerator;
  cfg.g_timebase.den = info.time_base.denominator;
  cfg.rc_target_bitrate = bitrate;
-  cfg.g_error_resilient = strtol(argv[7], NULL, 0);
+  cfg.g_error_resilient = argc > 7 ? strtol(argv[7], NULL, 0) : 0;

  writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
  if (!writer)
@@ -238,9 +237,6 @@ int main(int argc, char **argv) {
    if (keyframe_interval > 0 && frame_count % keyframe_interval == 0)
      flags |= VPX_EFLAG_FORCE_KF;
    encode_frame(&codec, &raw, frame_count++, flags, writer);
-    frames_encoded++;
-    if (max_frames > 0 && frames_encoded >= max_frames)
-      break;
  }

  // Flush encoder.
--- a/examples/twopass_encoder.c
+++ b/examples/twopass_encoder.c
@@ -53,15 +53,13 @@

 #include "vpx/vpx_encoder.h"

-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 static const char *exec_name;

-void usage_exit(void) {
-  fprintf(stderr,
-          "Usage: %s <codec> <width> <height> <infile> <outfile> "
-              "<frame limit>\n",
+void usage_exit() {
+  fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
          exec_name);
  exit(EXIT_FAILURE);
 }
@@ -131,8 +129,7 @@ static int encode_frame(vpx_codec_ctx_t *ctx,
 static vpx_fixed_buf_t pass0(vpx_image_t *raw,
                             FILE *infile,
                             const VpxInterface *encoder,
-                             const vpx_codec_enc_cfg_t *cfg,
-                             int max_frames) {
+                             const vpx_codec_enc_cfg_t *cfg) {
  vpx_codec_ctx_t codec;
  int frame_count = 0;
  vpx_fixed_buf_t stats = {NULL, 0};
@@ -145,8 +142,6 @@ static vpx_fixed_buf_t pass0(vpx_image_t *raw,
    ++frame_count;
    get_frame_stats(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY,
                    &stats);
-    if (max_frames > 0 && frame_count >= max_frames)
-      break;
  }

  // Flush encoder.
@@ -164,8 +159,7 @@ static void pass1(vpx_image_t *raw,
                  FILE *infile,
                  const char *outfile_name,
                  const VpxInterface *encoder,
-                  const vpx_codec_enc_cfg_t *cfg,
-                  int max_frames) {
+                  const vpx_codec_enc_cfg_t *cfg) {
  VpxVideoInfo info = {
    encoder->fourcc,
    cfg->g_w,
@@ -187,9 +181,6 @@ static void pass1(vpx_image_t *raw,
  while (vpx_img_read(raw, infile)) {
    ++frame_count;
    encode_frame(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, writer);
-
-    if (max_frames > 0 && frame_count >= max_frames)
-      break;
  }

  // Flush encoder.
@@ -222,14 +213,11 @@ int main(int argc, char **argv) {
  const char *const height_arg = argv[3];
  const char *const infile_arg = argv[4];
  const char *const outfile_arg = argv[5];
-  int max_frames = 0;
  exec_name = argv[0];

-  if (argc != 7)
+  if (argc != 6)
    die("Invalid number of arguments.");

-  max_frames = strtol(argv[6], NULL, 0);
-
  encoder = get_vpx_encoder_by_name(codec_arg);
  if (!encoder)
    die("Unsupported codec.");
@@ -261,13 +249,13 @@ int main(int argc, char **argv) {

  // Pass 0
  cfg.g_pass = VPX_RC_FIRST_PASS;
-  stats = pass0(&raw, infile, encoder, &cfg, max_frames);
+  stats = pass0(&raw, infile, encoder, &cfg);

  // Pass 1
  rewind(infile);
  cfg.g_pass = VPX_RC_LAST_PASS;
  cfg.rc_twopass_stats_in = stats;
-  pass1(&raw, infile, outfile_arg, encoder, &cfg, max_frames);
+  pass1(&raw, infile, outfile_arg, encoder, &cfg);
  free(stats.buf);

  vpx_img_free(&raw);
--- a/examples/vp8_multi_resolution_encoder.c
+++ b/examples/vp8_multi_resolution_encoder.c
@@ -8,724 +8,292 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-/*
- * This is an example demonstrating multi-resolution encoding in VP8.
- * High-resolution input video is down-sampled to lower-resolutions. The
- * encoder then encodes the video and outputs multiple bitstreams with
- * different resolutions.
- *
- * This test also allows for settings temporal layers for each spatial layer.
- * Different number of temporal layers per spatial stream may be used.
- * Currently up to 3 temporal layers per spatial stream (encoder) are supported
- * in this test.
- */

-#include "./vpx_config.h"
+// This is an example demonstrating multi-resolution encoding in VP8.
+// High-resolution input video is down-sampled to lower-resolutions. The
+// encoder then encodes the video and outputs multiple bitstreams with
+// different resolutions.
+//
+// Configure with --enable-multi-res-encoding flag to enable this example.

 #include <stdio.h>
 #include <stdlib.h>
-#include <stdarg.h>
 #include <string.h>
-#include <math.h>
-#include <assert.h>
-#include <sys/time.h>
-#include "vpx_ports/vpx_timer.h"
-#include "vpx/vpx_encoder.h"
-#include "vpx/vp8cx.h"
-#include "vpx_ports/mem_ops.h"
-#include "../tools_common.h"
-#define interface (vpx_codec_vp8_cx())
-#define fourcc    0x30385056

-void usage_exit(void) {
-  exit(EXIT_FAILURE);
-}
-
-/*
- * The input video frame is downsampled several times to generate a multi-level
- * hierarchical structure. NUM_ENCODERS is defined as the number of encoding
- * levels required. For example, if the size of input video is 1280x720,
- * NUM_ENCODERS is 3, and down-sampling factor is 2, the encoder outputs 3
- * bitstreams with resolution of 1280x720(level 0), 640x360(level 1), and
- * 320x180(level 2) respectively.
- */
-
-/* Number of encoders (spatial resolutions) used in this test. */
-#define NUM_ENCODERS 3
-
-/* Maximum number of temporal layers allowed for this test. */
-#define MAX_NUM_TEMPORAL_LAYERS 3
-
-/* This example uses the scaler function in libyuv. */
 #include "third_party/libyuv/include/libyuv/basic_types.h"
 #include "third_party/libyuv/include/libyuv/scale.h"
 #include "third_party/libyuv/include/libyuv/cpu_id.h"

-int (*read_frame_p)(FILE *f, vpx_image_t *img);
+#include "vpx/vpx_encoder.h"
+#include "vpx/vp8cx.h"

-static int read_frame(FILE *f, vpx_image_t *img) {
-    size_t nbytes, to_read;
-    int    res = 1;
+#include "./tools_common.h"
+#include "./video_writer.h"

-    to_read = img->w*img->h*3/2;
-    nbytes = fread(img->planes[0], 1, to_read, f);
-    if(nbytes != to_read) {
-        res = 0;
-        if(nbytes > 0)
-            printf("Warning: Read partial frame. Check your width & height!\n");
-    }
-    return res;
+// The input video frame is downsampled several times to generate a
+// multi-level  hierarchical structure. kNumEncoders is defined as the number
+// of encoding  levels required. For example, if the size of input video is
+// 1280x720, kNumEncoders is 3, and down-sampling factor is 2, the encoder
+// outputs 3 bitstreams with resolution of 1280x720(level 0),
+// 640x360(level 1), and 320x180(level 2) respectively.
+#define kNumEncoders 3
+
+static const char *exec_name;
+
+void usage_exit() {
+  fprintf(stderr,
+          "Usage: %s <width> <height> <infile> <outfile(s)> <output psnr?>\n",
+          exec_name);
+  exit(EXIT_FAILURE);
 }

-static int read_frame_by_row(FILE *f, vpx_image_t *img) {
-    size_t nbytes, to_read;
-    int    res = 1;
-    int plane;
+int main(int argc, char *argv[]) {
+  int frame_cnt = 0;
+  FILE *infile = NULL;
+  VpxVideoWriter *writers[kNumEncoders];
+  vpx_codec_ctx_t codec[kNumEncoders];
+  vpx_codec_enc_cfg_t cfg[kNumEncoders];
+  vpx_image_t raw[kNumEncoders];
+  const VpxInterface *const encoder = get_vpx_encoder_by_name("vp8");
+  // Currently, only realtime mode is supported in multi-resolution encoding.
+  const int arg_deadline = VPX_DL_REALTIME;
+  int i;
+  int width = 0;
+  int height = 0;
+  int frame_avail = 0;
+  int got_data = 0;

-    for (plane = 0; plane < 3; plane++)
+  // Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you
+  // don't need to know PSNR, which will skip PSNR calculation and save
+  // encoding time.
+  int show_psnr = 0;
+  uint64_t psnr_sse_total[kNumEncoders] = {0};
+  uint64_t psnr_samples_total[kNumEncoders] = {0};
+  double psnr_totals[kNumEncoders][4] = {{0, 0}};
+  int psnr_count[kNumEncoders] = {0};
+
+  // Set the required target bitrates for each resolution level.
+  // If target bitrate for highest-resolution level is set to 0,
+  // (i.e. target_bitrate[0]=0), we skip encoding at that level.
+  unsigned int target_bitrate[kNumEncoders] = {1000, 500, 100};
+
+  // Enter the frame rate of the input video.
+  const int framerate = 30;
+  // Set down-sampling factor for each resolution level.
+  //   dsf[0] controls down sampling from level 0 to level 1;
+  //   dsf[1] controls down sampling from level 1 to level 2;
+  //   dsf[2] is not used.
+  vpx_rational_t dsf[kNumEncoders] = {{2, 1}, {2, 1}, {1, 1}};
+
+  exec_name = argv[0];
+
+  if (!encoder)
+    die("Unsupported codec.");
+
+  // exe_name, input width, input height, input file,
+  // output file 1, output file 2, output file 3, psnr on/off
+  if (argc != (5 + kNumEncoders))
+    die("Invalid number of input options.");
+
+  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
+
+  width = strtol(argv[1], NULL, 0);
+  height = strtol(argv[2], NULL, 0);
+
+  if (width < 16 || width % 2 || height < 16 || height % 2)
+    die("Invalid resolution: %ldx%ld", width, height);
+
+  // Open input video file for encoding
+  if (!(infile = fopen(argv[3], "rb")))
+    die("Failed to open %s for reading", argv[3]);
+
+  show_psnr = strtol(argv[kNumEncoders + 4], NULL, 0);
+
+  // Populate default encoder configuration
+  for (i = 0; i < kNumEncoders; ++i) {
+    vpx_codec_err_t res =
+        vpx_codec_enc_config_default(encoder->codec_interface(), &cfg[i], 0);
+    if (res != VPX_CODEC_OK) {
+      printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
+      return EXIT_FAILURE;
+    }
+  }
+
+  // Update the default configuration according to needs of the application.
+  // Highest-resolution encoder settings
+  cfg[0].g_w = width;
+  cfg[0].g_h = height;
+  cfg[0].g_threads = 1;
+  cfg[0].rc_dropframe_thresh = 30;
+  cfg[0].rc_end_usage = VPX_CBR;
+  cfg[0].rc_resize_allowed = 0;
+  cfg[0].rc_min_quantizer = 4;
+  cfg[0].rc_max_quantizer = 56;
+  cfg[0].rc_undershoot_pct = 98;
+  cfg[0].rc_overshoot_pct = 100;
+  cfg[0].rc_buf_initial_sz = 500;
+  cfg[0].rc_buf_optimal_sz = 600;
+  cfg[0].rc_buf_sz = 1000;
+  cfg[0].g_error_resilient = 1;
+  cfg[0].g_lag_in_frames = 0;
+  cfg[0].kf_mode = VPX_KF_AUTO;  // VPX_KF_DISABLED
+  cfg[0].kf_min_dist = 3000;
+  cfg[0].kf_max_dist = 3000;
+  cfg[0].rc_target_bitrate = target_bitrate[0];
+  cfg[0].g_timebase.num = 1;
+  cfg[0].g_timebase.den = framerate;
+
+  // Other-resolution encoder settings
+  for (i = 1; i < kNumEncoders; ++i) {
+    cfg[i] = cfg[0];
+    cfg[i].g_threads = 1;
+    cfg[i].rc_target_bitrate = target_bitrate[i];
+
+    // Note: Width & height of other-resolution encoders are calculated
+    // from the highest-resolution encoder's size and the corresponding
+    // down_sampling_factor.
    {
-        unsigned char *ptr;
-        int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
-        int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
-        int r;
+      unsigned int iw = cfg[i - 1].g_w * dsf[i - 1].den + dsf[i - 1].num - 1;
+      unsigned int ih = cfg[i - 1].g_h * dsf[i - 1].den + dsf[i - 1].num - 1;
+      cfg[i].g_w = iw / dsf[i - 1].num;
+      cfg[i].g_h = ih / dsf[i - 1].num;
+    }

-        /* Determine the correct plane based on the image format. The for-loop
-         * always counts in Y,U,V order, but this may not match the order of
-         * the data on disk.
-         */
-        switch (plane)
-        {
-        case 1:
-            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
+    // Make width & height to be multiplier of 2.
+    if ((cfg[i].g_w) % 2)
+      cfg[i].g_w++;
+
+    if ((cfg[i].g_h) % 2)
+      cfg[i].g_h++;
+  }
+
+  // Open output file for each encoder to output bitstreams
+  for (i = 0; i < kNumEncoders; ++i) {
+    VpxVideoInfo info = {
+      encoder->fourcc,
+      cfg[i].g_w,
+      cfg[i].g_h,
+      {cfg[i].g_timebase.num, cfg[i].g_timebase.den}
+    };
+
+    if (!(writers[i] = vpx_video_writer_open(argv[i+4], kContainerIVF, &info)))
+      die("Failed to open %s for writing", argv[i+4]);
+  }
+
+  // Allocate image for each encoder
+  for (i = 0; i < kNumEncoders; ++i)
+    if (!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
+      die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
+
+  // Initialize multi-encoder
+  if (vpx_codec_enc_init_multi(&codec[0], encoder->codec_interface(), &cfg[0],
+                               kNumEncoders,
+                               show_psnr ? VPX_CODEC_USE_PSNR : 0, &dsf[0]))
+    die_codec(&codec[0], "Failed to initialize encoder");
+
+  // The extra encoding configuration parameters can be set as follows.
+  for (i = 0; i < kNumEncoders; i++) {
+    // Set encoding speed
+    if (vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, -6))
+      die_codec(&codec[i], "Failed to set cpu_used");
+
+    // Set static threshold.
+    if (vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1))
+      die_codec(&codec[i], "Failed to set static threshold");
+
+    // Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING
+    // Enable denoising for the highest-resolution encoder.
+    if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, i == 0))
+      die_codec(&codec[0], "Failed to set noise_sensitivity");
+  }
+
+  frame_avail = 1;
+  got_data = 0;
+
+  while (frame_avail || got_data) {
+    vpx_codec_iter_t iter[kNumEncoders] = {NULL};
+    const vpx_codec_cx_pkt_t *pkt[kNumEncoders];
+
+    frame_avail = vpx_img_read(&raw[0], infile);
+
+    if (frame_avail) {
+      for (i = 1; i < kNumEncoders; ++i) {
+        vpx_image_t *const prev = &raw[i - 1];
+
+        // Scale the image down a number of times by downsampling factor
+        // FilterMode 1 or 2 give better psnr than FilterMode 0.
+        I420Scale(prev->planes[VPX_PLANE_Y], prev->stride[VPX_PLANE_Y],
+                  prev->planes[VPX_PLANE_U], prev->stride[VPX_PLANE_U],
+                  prev->planes[VPX_PLANE_V], prev->stride[VPX_PLANE_V],
+                  prev->d_w, prev->d_h,
+                  raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
+                  raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
+                  raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
+                  raw[i].d_w, raw[i].d_h, 1);
+      }
+    }
+
+    // Encode frame.
+    if (vpx_codec_encode(&codec[0], frame_avail? &raw[0] : NULL,
+                         frame_cnt, 1, 0, arg_deadline)) {
+      die_codec(&codec[0], "Failed to encode frame");
+    }
+
+    for (i = kNumEncoders - 1; i >= 0; i--) {
+      got_data = 0;
+
+      while ((pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i]))) {
+        got_data = 1;
+        switch (pkt[i]->kind) {
+          case VPX_CODEC_CX_FRAME_PKT:
+            vpx_video_writer_write_frame(writers[i], pkt[i]->data.frame.buf,
+                                         pkt[i]->data.frame.sz, frame_cnt - 1);
+          break;
+          case VPX_CODEC_PSNR_PKT:
+            if (show_psnr) {
+              int j;
+              psnr_sse_total[i] += pkt[i]->data.psnr.sse[0];
+              psnr_samples_total[i] += pkt[i]->data.psnr.samples[0];
+              for (j = 0; j < 4; j++)
+                psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j];
+              psnr_count[i]++;
+            }
            break;
-        case 2:
-            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
+          default:
            break;
-        default:
-            ptr = img->planes[plane];
        }
+        printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT &&
+               (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
+        fflush(stdout);
+      }
+    }
+    frame_cnt++;
+  }
+  printf("\n");

-        for (r = 0; r < h; r++)
-        {
-            to_read = w;
+  fclose(infile);

-            nbytes = fread(ptr, 1, to_read, f);
-            if(nbytes != to_read) {
-                res = 0;
-                if(nbytes > 0)
-                    printf("Warning: Read partial frame. Check your width & height!\n");
-                break;
-            }
+  printf("Processed %d frames.\n", frame_cnt - 1);
+  for (i = 0; i < kNumEncoders; ++i) {
+    // Calculate PSNR and print it out
+    if (show_psnr && psnr_count[i] > 0) {
+      int j;
+      double ovpsnr = sse_to_psnr(psnr_samples_total[i], 255.0,
+                                  psnr_sse_total[i]);

-            ptr += img->stride[plane];
-        }
-        if (!res)
-            break;
+      fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i);
+      fprintf(stderr, " %.3lf", ovpsnr);
+      for (j = 0; j < 4; j++)
+        fprintf(stderr, " %.3lf", psnr_totals[i][j]/psnr_count[i]);
    }

-    return res;
-}
-
-static void write_ivf_file_header(FILE *outfile,
-                                  const vpx_codec_enc_cfg_t *cfg,
-                                  int frame_cnt) {
-    char header[32];
-
-    if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
-        return;
-    header[0] = 'D';
-    header[1] = 'K';
-    header[2] = 'I';
-    header[3] = 'F';
-    mem_put_le16(header+4,  0);                   /* version */
-    mem_put_le16(header+6,  32);                  /* headersize */
-    mem_put_le32(header+8,  fourcc);              /* headersize */
-    mem_put_le16(header+12, cfg->g_w);            /* width */
-    mem_put_le16(header+14, cfg->g_h);            /* height */
-    mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
-    mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
-    mem_put_le32(header+24, frame_cnt);           /* length */
-    mem_put_le32(header+28, 0);                   /* unused */
-
-    (void) fwrite(header, 1, 32, outfile);
-}
-
-static void write_ivf_frame_header(FILE *outfile,
-                                   const vpx_codec_cx_pkt_t *pkt)
-{
-    char             header[12];
-    vpx_codec_pts_t  pts;
-
-    if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
-        return;
-
-    pts = pkt->data.frame.pts;
-    mem_put_le32(header, pkt->data.frame.sz);
-    mem_put_le32(header+4, pts&0xFFFFFFFF);
-    mem_put_le32(header+8, pts >> 32);
-
-    (void) fwrite(header, 1, 12, outfile);
-}
-
-/* Temporal scaling parameters */
-/* This sets all the temporal layer parameters given |num_temporal_layers|,
- * including the target bit allocation across temporal layers. Bit allocation
- * parameters will be passed in as user parameters in another version.
- */
-static void set_temporal_layer_pattern(int num_temporal_layers,
-                                       vpx_codec_enc_cfg_t *cfg,
-                                       int bitrate,
-                                       int *layer_flags)
-{
-    assert(num_temporal_layers <= MAX_NUM_TEMPORAL_LAYERS);
-    switch (num_temporal_layers)
-    {
-    case 1:
-    {
-        /* 1-layer */
-        cfg->ts_number_layers     = 1;
-        cfg->ts_periodicity       = 1;
-        cfg->ts_rate_decimator[0] = 1;
-        cfg->ts_layer_id[0] = 0;
-        cfg->ts_target_bitrate[0] = bitrate;
-
-        // Update L only.
-        layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-        break;
-    }
-
-    case 2:
-    {
-        /* 2-layers, with sync point at first frame of layer 1. */
-        cfg->ts_number_layers     = 2;
-        cfg->ts_periodicity       = 2;
-        cfg->ts_rate_decimator[0] = 2;
-        cfg->ts_rate_decimator[1] = 1;
-        cfg->ts_layer_id[0] = 0;
-        cfg->ts_layer_id[1] = 1;
-        // Use 60/40 bit allocation as example.
-        cfg->ts_target_bitrate[0] = 0.6f * bitrate;
-        cfg->ts_target_bitrate[1] = bitrate;
-
-        /* 0=L, 1=GF */
-        // ARF is used as predictor for all frames, and is only updated on
-        // key frame. Sync point every 8 frames.
-
-        // Layer 0: predict from L and ARF, update L and G.
-        layer_flags[0] = VP8_EFLAG_NO_REF_GF |
-                         VP8_EFLAG_NO_UPD_ARF;
-
-        // Layer 1: sync point: predict from L and ARF, and update G.
-        layer_flags[1] = VP8_EFLAG_NO_REF_GF |
-                         VP8_EFLAG_NO_UPD_LAST |
-                         VP8_EFLAG_NO_UPD_ARF;
-
-        // Layer 0, predict from L and ARF, update L.
-        layer_flags[2] = VP8_EFLAG_NO_REF_GF  |
-                         VP8_EFLAG_NO_UPD_GF  |
-                         VP8_EFLAG_NO_UPD_ARF;
-
-        // Layer 1: predict from L, G and ARF, and update G.
-        layer_flags[3] = VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_UPD_LAST |
-                         VP8_EFLAG_NO_UPD_ENTROPY;
-
-        // Layer 0
-        layer_flags[4] = layer_flags[2];
-
-        // Layer 1
-        layer_flags[5] = layer_flags[3];
-
-        // Layer 0
-        layer_flags[6] = layer_flags[4];
-
-        // Layer 1
-        layer_flags[7] = layer_flags[5];
-        break;
-    }
-
-    case 3:
-    default:
-    {
-        // 3-layers structure where ARF is used as predictor for all frames,
-        // and is only updated on key frame.
-        // Sync points for layer 1 and 2 every 8 frames.
-        cfg->ts_number_layers     = 3;
-        cfg->ts_periodicity       = 4;
-        cfg->ts_rate_decimator[0] = 4;
-        cfg->ts_rate_decimator[1] = 2;
-        cfg->ts_rate_decimator[2] = 1;
-        cfg->ts_layer_id[0] = 0;
-        cfg->ts_layer_id[1] = 2;
-        cfg->ts_layer_id[2] = 1;
-        cfg->ts_layer_id[3] = 2;
-        // Use 40/20/40 bit allocation as example.
-        cfg->ts_target_bitrate[0] = 0.4f * bitrate;
-        cfg->ts_target_bitrate[1] = 0.6f * bitrate;
-        cfg->ts_target_bitrate[2] = bitrate;
-
-        /* 0=L, 1=GF, 2=ARF */
-
-        // Layer 0: predict from L and ARF; update L and G.
-        layer_flags[0] =  VP8_EFLAG_NO_UPD_ARF |
-                          VP8_EFLAG_NO_REF_GF;
-
-        // Layer 2: sync point: predict from L and ARF; update none.
-        layer_flags[1] = VP8_EFLAG_NO_REF_GF |
-                         VP8_EFLAG_NO_UPD_GF |
-                         VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_UPD_LAST |
-                         VP8_EFLAG_NO_UPD_ENTROPY;
-
-        // Layer 1: sync point: predict from L and ARF; update G.
-        layer_flags[2] = VP8_EFLAG_NO_REF_GF |
-                         VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_UPD_LAST;
-
-        // Layer 2: predict from L, G, ARF; update none.
-        layer_flags[3] = VP8_EFLAG_NO_UPD_GF |
-                         VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_UPD_LAST |
-                         VP8_EFLAG_NO_UPD_ENTROPY;
-
-        // Layer 0: predict from L and ARF; update L.
-        layer_flags[4] = VP8_EFLAG_NO_UPD_GF |
-                         VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_REF_GF;
-
-        // Layer 2: predict from L, G, ARF; update none.
-        layer_flags[5] = layer_flags[3];
-
-        // Layer 1: predict from L, G, ARF; update G.
-        layer_flags[6] = VP8_EFLAG_NO_UPD_ARF |
-                         VP8_EFLAG_NO_UPD_LAST;
-
-        // Layer 2: predict from L, G, ARF; update none.
-        layer_flags[7] = layer_flags[3];
-        break;
-    }
-    }
-}
-
-/* The periodicity of the pattern given the number of temporal layers. */
-static int periodicity_to_num_layers[MAX_NUM_TEMPORAL_LAYERS] = {1, 8, 8};
-
-int main(int argc, char **argv)
-{
-    FILE                 *infile, *outfile[NUM_ENCODERS];
-    FILE                 *downsampled_input[NUM_ENCODERS - 1];
-    char                 filename[50];
-    vpx_codec_ctx_t      codec[NUM_ENCODERS];
-    vpx_codec_enc_cfg_t  cfg[NUM_ENCODERS];
-    int                  frame_cnt = 0;
-    vpx_image_t          raw[NUM_ENCODERS];
-    vpx_codec_err_t      res[NUM_ENCODERS];
-
-    int                  i;
-    long                 width;
-    long                 height;
-    int                  length_frame;
-    int                  frame_avail;
-    int                  got_data;
-    int                  flags = 0;
-    int                  layer_id = 0;
-
-    int                  layer_flags[VPX_TS_MAX_PERIODICITY * NUM_ENCODERS]
-                                     = {0};
-    int                  flag_periodicity;
-
-    /*Currently, only realtime mode is supported in multi-resolution encoding.*/
-    int                  arg_deadline = VPX_DL_REALTIME;
-
-    /* Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you
-       don't need to know PSNR, which will skip PSNR calculation and save
-       encoding time. */
-    int                  show_psnr = 0;
-    int                  key_frame_insert = 0;
-    uint64_t             psnr_sse_total[NUM_ENCODERS] = {0};
-    uint64_t             psnr_samples_total[NUM_ENCODERS] = {0};
-    double               psnr_totals[NUM_ENCODERS][4] = {{0,0}};
-    int                  psnr_count[NUM_ENCODERS] = {0};
-
-    int64_t              cx_time = 0;
-
-    /* Set the required target bitrates for each resolution level.
-     * If target bitrate for highest-resolution level is set to 0,
-     * (i.e. target_bitrate[0]=0), we skip encoding at that level.
-     */
-    unsigned int         target_bitrate[NUM_ENCODERS]={1000, 500, 100};
-
-    /* Enter the frame rate of the input video */
-    int                  framerate = 30;
-
-    /* Set down-sampling factor for each resolution level.
-       dsf[0] controls down sampling from level 0 to level 1;
-       dsf[1] controls down sampling from level 1 to level 2;
-       dsf[2] is not used. */
-    vpx_rational_t dsf[NUM_ENCODERS] = {{2, 1}, {2, 1}, {1, 1}};
-
-    /* Set the number of temporal layers for each encoder/resolution level,
-     * starting from highest resoln down to lowest resoln. */
-    unsigned int         num_temporal_layers[NUM_ENCODERS] = {3, 3, 3};
-
-    if(argc!= (7 + 3 * NUM_ENCODERS))
-        die("Usage: %s <width> <height> <frame_rate>  <infile> <outfile(s)> "
-            "<rate_encoder(s)> <temporal_layer(s)> <key_frame_insert> <output psnr?> \n",
-            argv[0]);
-
-    printf("Using %s\n",vpx_codec_iface_name(interface));
-
-    width = strtol(argv[1], NULL, 0);
-    height = strtol(argv[2], NULL, 0);
-    framerate = strtol(argv[3], NULL, 0);
-
-    if(width < 16 || width%2 || height <16 || height%2)
-        die("Invalid resolution: %ldx%ld", width, height);
-
-    /* Open input video file for encoding */
-    if(!(infile = fopen(argv[4], "rb")))
-        die("Failed to open %s for reading", argv[4]);
-
-    /* Open output file for each encoder to output bitstreams */
-    for (i=0; i< NUM_ENCODERS; i++)
-    {
-        if(!target_bitrate[i])
-        {
-            outfile[i] = NULL;
-            continue;
-        }
-
-        if(!(outfile[i] = fopen(argv[i+5], "wb")))
-            die("Failed to open %s for writing", argv[i+4]);
-    }
-
-    // Bitrates per spatial layer: overwrite default rates above.
-    for (i=0; i< NUM_ENCODERS; i++)
-    {
-        target_bitrate[i] = strtol(argv[NUM_ENCODERS + 5 + i], NULL, 0);
-    }
-
-    // Temporal layers per spatial layers: overwrite default settings above.
-    for (i=0; i< NUM_ENCODERS; i++)
-    {
-        num_temporal_layers[i] = strtol(argv[2 * NUM_ENCODERS + 5 + i], NULL, 0);
-        if (num_temporal_layers[i] < 1 || num_temporal_layers[i] > 3)
-          die("Invalid temporal layers: %d, Must be 1, 2, or 3. \n",
-              num_temporal_layers);
-    }
-
-    /* Open file to write out each spatially downsampled input stream. */
-    for (i=0; i< NUM_ENCODERS - 1; i++)
-    {
-       // Highest resoln is encoder 0.
-        if (sprintf(filename,"ds%d.yuv",NUM_ENCODERS - i) < 0)
-        {
-            return EXIT_FAILURE;
-        }
-        downsampled_input[i] = fopen(filename,"wb");
-    }
-
-    key_frame_insert = strtol(argv[3 * NUM_ENCODERS + 5], NULL, 0);
-
-    show_psnr = strtol(argv[3 * NUM_ENCODERS + 6], NULL, 0);
-
-
-    /* Populate default encoder configuration */
-    for (i=0; i< NUM_ENCODERS; i++)
-    {
-        res[i] = vpx_codec_enc_config_default(interface, &cfg[i], 0);
-        if(res[i]) {
-            printf("Failed to get config: %s\n", vpx_codec_err_to_string(res[i]));
-            return EXIT_FAILURE;
-        }
-    }
-
-    /*
-     * Update the default configuration according to needs of the application.
-     */
-    /* Highest-resolution encoder settings */
-    cfg[0].g_w = width;
-    cfg[0].g_h = height;
-    cfg[0].rc_dropframe_thresh = 0;
-    cfg[0].rc_end_usage = VPX_CBR;
-    cfg[0].rc_resize_allowed = 0;
-    cfg[0].rc_min_quantizer = 2;
-    cfg[0].rc_max_quantizer = 56;
-    cfg[0].rc_undershoot_pct = 100;
-    cfg[0].rc_overshoot_pct = 15;
-    cfg[0].rc_buf_initial_sz = 500;
-    cfg[0].rc_buf_optimal_sz = 600;
-    cfg[0].rc_buf_sz = 1000;
-    cfg[0].g_error_resilient = 1;              /* Enable error resilient mode */
-    cfg[0].g_lag_in_frames   = 0;
-
-    /* Disable automatic keyframe placement */
-    /* Note: These 3 settings are copied to all levels. But, except the lowest
-     * resolution level, all other levels are set to VPX_KF_DISABLED internally.
-     */
-    cfg[0].kf_mode           = VPX_KF_AUTO;
-    cfg[0].kf_min_dist = 3000;
-    cfg[0].kf_max_dist = 3000;
-
-    cfg[0].rc_target_bitrate = target_bitrate[0];       /* Set target bitrate */
-    cfg[0].g_timebase.num = 1;                          /* Set fps */
-    cfg[0].g_timebase.den = framerate;
-
-    /* Other-resolution encoder settings */
-    for (i=1; i< NUM_ENCODERS; i++)
-    {
-        memcpy(&cfg[i], &cfg[0], sizeof(vpx_codec_enc_cfg_t));
-
-        cfg[i].rc_target_bitrate = target_bitrate[i];
-
-        /* Note: Width & height of other-resolution encoders are calculated
-         * from the highest-resolution encoder's size and the corresponding
-         * down_sampling_factor.
-         */
-        {
-            unsigned int iw = cfg[i-1].g_w*dsf[i-1].den + dsf[i-1].num - 1;
-            unsigned int ih = cfg[i-1].g_h*dsf[i-1].den + dsf[i-1].num - 1;
-            cfg[i].g_w = iw/dsf[i-1].num;
-            cfg[i].g_h = ih/dsf[i-1].num;
-        }
-
-        /* Make width & height to be multiplier of 2. */
-        // Should support odd size ???
-        if((cfg[i].g_w)%2)cfg[i].g_w++;
-        if((cfg[i].g_h)%2)cfg[i].g_h++;
-    }
-
-
-    // Set the number of threads per encode/spatial layer.
-    // (1, 1, 1) means no encoder threading.
-    cfg[0].g_threads = 2;
-    cfg[1].g_threads = 1;
-    cfg[2].g_threads = 1;
-
-    /* Allocate image for each encoder */
-    for (i=0; i< NUM_ENCODERS; i++)
-        if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
-            die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
-
-    if (raw[0].stride[VPX_PLANE_Y] == raw[0].d_w)
-        read_frame_p = read_frame;
-    else
-        read_frame_p = read_frame_by_row;
-
-    for (i=0; i< NUM_ENCODERS; i++)
-        if(outfile[i])
-            write_ivf_file_header(outfile[i], &cfg[i], 0);
-
-    /* Temporal layers settings */
-    for ( i=0; i<NUM_ENCODERS; i++)
-    {
-        set_temporal_layer_pattern(num_temporal_layers[i],
-                                   &cfg[i],
-                                   cfg[i].rc_target_bitrate,
-                                   &layer_flags[i * VPX_TS_MAX_PERIODICITY]);
-    }
-
-    /* Initialize multi-encoder */
-    if(vpx_codec_enc_init_multi(&codec[0], interface, &cfg[0], NUM_ENCODERS,
-                                (show_psnr ? VPX_CODEC_USE_PSNR : 0), &dsf[0]))
-        die_codec(&codec[0], "Failed to initialize encoder");
-
-    /* The extra encoding configuration parameters can be set as follows. */
-    /* Set encoding speed */
-    for ( i=0; i<NUM_ENCODERS; i++)
-    {
-        int speed = -6;
-        /* Lower speed for the lowest resolution. */
-        if (i == NUM_ENCODERS - 1) speed = -4;
-        if(vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, speed))
-            die_codec(&codec[i], "Failed to set cpu_used");
-    }
-
-    /* Set static threshold = 1 for all encoders */
-    for ( i=0; i<NUM_ENCODERS; i++)
-    {
-        if(vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1))
-            die_codec(&codec[i], "Failed to set static threshold");
-    }
-
-    /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */
-    /* Enable denoising for the highest-resolution encoder. */
-    if(vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 1))
-        die_codec(&codec[0], "Failed to set noise_sensitivity");
-    for ( i=1; i< NUM_ENCODERS; i++)
-    {
-        if(vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0))
-            die_codec(&codec[i], "Failed to set noise_sensitivity");
-    }
-
-    /* Set the number of token partitions */
-    for ( i=0; i<NUM_ENCODERS; i++)
-    {
-        if(vpx_codec_control(&codec[i], VP8E_SET_TOKEN_PARTITIONS, 1))
-            die_codec(&codec[i], "Failed to set static threshold");
-    }
-
-    /* Set the max intra target bitrate */
-    for ( i=0; i<NUM_ENCODERS; i++)
-    {
-        unsigned int max_intra_size_pct =
-            (int)(((double)cfg[0].rc_buf_optimal_sz * 0.5) * framerate / 10);
-        if(vpx_codec_control(&codec[i], VP8E_SET_MAX_INTRA_BITRATE_PCT,
-                             max_intra_size_pct))
-            die_codec(&codec[i], "Failed to set static threshold");
-       //printf("%d %d \n",i,max_intra_size_pct);
-    }
-
-    frame_avail = 1;
-    got_data = 0;
-
-    while(frame_avail || got_data)
-    {
-        struct vpx_usec_timer timer;
-        vpx_codec_iter_t iter[NUM_ENCODERS]={NULL};
-        const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS];
-
-        flags = 0;
-        frame_avail = read_frame_p(infile, &raw[0]);
-
-        if(frame_avail)
-        {
-            for ( i=1; i<NUM_ENCODERS; i++)
-            {
-                /*Scale the image down a number of times by downsampling factor*/
-                /* FilterMode 1 or 2 give better psnr than FilterMode 0. */
-                I420Scale(raw[i-1].planes[VPX_PLANE_Y], raw[i-1].stride[VPX_PLANE_Y],
-                          raw[i-1].planes[VPX_PLANE_U], raw[i-1].stride[VPX_PLANE_U],
-                          raw[i-1].planes[VPX_PLANE_V], raw[i-1].stride[VPX_PLANE_V],
-                          raw[i-1].d_w, raw[i-1].d_h,
-                          raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
-                          raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
-                          raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
-                          raw[i].d_w, raw[i].d_h, 1);
-                /* Write out down-sampled input. */
-                length_frame = cfg[i].g_w *  cfg[i].g_h *3/2;
-                if (fwrite(raw[i].planes[0], 1, length_frame,
-                           downsampled_input[NUM_ENCODERS - i - 1]) !=
-                               length_frame)
-                {
-                    return EXIT_FAILURE;
-                }
-            }
-        }
-
-        /* Set the flags (reference and update) for all the encoders.*/
-        for ( i=0; i<NUM_ENCODERS; i++)
-        {
-            layer_id = cfg[i].ts_layer_id[frame_cnt % cfg[i].ts_periodicity];
-            flags = 0;
-            flag_periodicity = periodicity_to_num_layers
-                [num_temporal_layers[i] - 1];
-            flags = layer_flags[i * VPX_TS_MAX_PERIODICITY +
-                                frame_cnt % flag_periodicity];
-            // Key frame flag for first frame.
-            if (frame_cnt == 0)
-            {
-                flags |= VPX_EFLAG_FORCE_KF;
-            }
-            if (frame_cnt > 0 && frame_cnt == key_frame_insert)
-            {
-                flags = VPX_EFLAG_FORCE_KF;
-            }
-
-            vpx_codec_control(&codec[i], VP8E_SET_FRAME_FLAGS, flags);
-            vpx_codec_control(&codec[i], VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
-        }
-
-        /* Encode each frame at multi-levels */
-        /* Note the flags must be set to 0 in the encode call if they are set
-           for each frame with the vpx_codec_control(), as done above. */
-        vpx_usec_timer_start(&timer);
-        if(vpx_codec_encode(&codec[0], frame_avail? &raw[0] : NULL,
-            frame_cnt, 1, 0, arg_deadline))
-        {
-            die_codec(&codec[0], "Failed to encode frame");
-        }
-        vpx_usec_timer_mark(&timer);
-        cx_time += vpx_usec_timer_elapsed(&timer);
-
-        for (i=NUM_ENCODERS-1; i>=0 ; i--)
-        {
-            got_data = 0;
-            while( (pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i])) )
-            {
-                got_data = 1;
-                switch(pkt[i]->kind) {
-                    case VPX_CODEC_CX_FRAME_PKT:
-                        write_ivf_frame_header(outfile[i], pkt[i]);
-                        (void) fwrite(pkt[i]->data.frame.buf, 1,
-                                      pkt[i]->data.frame.sz, outfile[i]);
-                    break;
-                    case VPX_CODEC_PSNR_PKT:
-                        if (show_psnr)
-                        {
-                            int j;
-
-                            psnr_sse_total[i] += pkt[i]->data.psnr.sse[0];
-                            psnr_samples_total[i] += pkt[i]->data.psnr.samples[0];
-                            for (j = 0; j < 4; j++)
-                            {
-                                psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j];
-                            }
-                            psnr_count[i]++;
-                        }
-
-                        break;
-                    default:
-                        break;
-                }
-                printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT
-                       && (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)? "K":"");
-                fflush(stdout);
-            }
-        }
-        frame_cnt++;
-    }
-    printf("\n");
-    printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
-            frame_cnt,
-            1000 * (float)cx_time / (double)(frame_cnt * 1000000),
-            1000000 * (double)frame_cnt / (double)cx_time);
-
-    fclose(infile);
-
-    printf("Processed %ld frames.\n",(long int)frame_cnt-1);
-    for (i=0; i< NUM_ENCODERS; i++)
-    {
-        /* Calculate PSNR and print it out */
-        if ( (show_psnr) && (psnr_count[i]>0) )
-        {
-            int j;
-            double ovpsnr = sse_to_psnr(psnr_samples_total[i], 255.0,
-                                        psnr_sse_total[i]);
-
-            fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i);
-
-            fprintf(stderr, " %.3lf", ovpsnr);
-            for (j = 0; j < 4; j++)
-            {
-                fprintf(stderr, " %.3lf", psnr_totals[i][j]/psnr_count[i]);
-            }
-        }
-
-        if(vpx_codec_destroy(&codec[i]))
-            die_codec(&codec[i], "Failed to destroy codec");
-
-        vpx_img_free(&raw[i]);
-
-        if(!outfile[i])
-            continue;
-
-        /* Try to rewrite the file header with the actual frame count */
-        if(!fseek(outfile[i], 0, SEEK_SET))
-            write_ivf_file_header(outfile[i], &cfg[i], frame_cnt-1);
-        fclose(outfile[i]);
-    }
-    printf("\n");
-
-    return EXIT_SUCCESS;
+    if (vpx_codec_destroy(&codec[i]))
+      die_codec(&codec[i], "Failed to destroy codec");
+
+    vpx_img_free(&raw[i]);
+    vpx_video_writer_close(writers[i]);
+  }
+  printf("\n");
+
+  return EXIT_SUCCESS;
 }
--- a/examples/vp8cx_set_ref.c
+++ b/examples/vp8cx_set_ref.c
@@ -53,12 +53,12 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"

-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 static const char *exec_name;

-void usage_exit(void) {
+void usage_exit() {
  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> <frame>\n",
          exec_name);
  exit(EXIT_FAILURE);
--- a/examples/vp9_lossless_encoder.c
+++ b/examples/vp9_lossless_encoder.c
@@ -15,12 +15,12 @@
 #include "vpx/vpx_encoder.h"
 #include "vpx/vp8cx.h"

-#include "../tools_common.h"
-#include "../video_writer.h"
+#include "./tools_common.h"
+#include "./video_writer.h"

 static const char *exec_name;

-void usage_exit(void) {
+void usage_exit() {
  fprintf(stderr, "vp9_lossless_encoder: Example demonstrating VP9 lossless "
                  "encoding feature. Supports raw input only.\n");
  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -1,919 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * This is an example demonstrating how to implement a multi-layer
- * VP9 encoding scheme based on spatial scalability for video applications
- * that benefit from a scalable bitstream.
- */
-
-#include <math.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-
-
-#include "../args.h"
-#include "../tools_common.h"
-#include "../video_writer.h"
-
-#include "../vpx_ports/vpx_timer.h"
-#include "vpx/svc_context.h"
-#include "vpx/vp8cx.h"
-#include "vpx/vpx_encoder.h"
-#include "../vpxstats.h"
-#include "vp9/encoder/vp9_encoder.h"
-#define OUTPUT_RC_STATS 1
-
-static const arg_def_t skip_frames_arg =
-    ARG_DEF("s", "skip-frames", 1, "input frames to skip");
-static const arg_def_t frames_arg =
-    ARG_DEF("f", "frames", 1, "number of frames to encode");
-static const arg_def_t threads_arg =
-    ARG_DEF("th", "threads", 1, "number of threads to use");
-#if OUTPUT_RC_STATS
-static const arg_def_t output_rc_stats_arg =
-    ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
-#endif
-static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
-static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
-static const arg_def_t timebase_arg =
-    ARG_DEF("t", "timebase", 1, "timebase (num/den)");
-static const arg_def_t bitrate_arg = ARG_DEF(
-    "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second");
-static const arg_def_t spatial_layers_arg =
-    ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
-static const arg_def_t temporal_layers_arg =
-    ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
-static const arg_def_t temporal_layering_mode_arg =
-    ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
-        "VP9E_TEMPORAL_LAYERING_MODE");
-static const arg_def_t kf_dist_arg =
-    ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
-static const arg_def_t scale_factors_arg =
-    ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)");
-static const arg_def_t passes_arg =
-    ARG_DEF("p", "passes", 1, "Number of passes (1/2)");
-static const arg_def_t pass_arg =
-    ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)");
-static const arg_def_t fpf_name_arg =
-    ARG_DEF(NULL, "fpf", 1, "First pass statistics file name");
-static const arg_def_t min_q_arg =
-    ARG_DEF(NULL, "min-q", 1, "Minimum quantizer");
-static const arg_def_t max_q_arg =
-    ARG_DEF(NULL, "max-q", 1, "Maximum quantizer");
-static const arg_def_t min_bitrate_arg =
-    ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
-static const arg_def_t max_bitrate_arg =
-    ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
-static const arg_def_t lag_in_frame_arg =
-    ARG_DEF(NULL, "lag-in-frames", 1, "Number of frame to input before "
-        "generating any outputs");
-static const arg_def_t rc_end_usage_arg =
-    ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
-static const arg_def_t speed_arg =
-    ARG_DEF("sp", "speed", 1, "speed configuration");
-static const arg_def_t aqmode_arg =
-    ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
-
-#if CONFIG_VP9_HIGHBITDEPTH
-static const struct arg_enum_list bitdepth_enum[] = {
-  {"8",  VPX_BITS_8},
-  {"10", VPX_BITS_10},
-  {"12", VPX_BITS_12},
-  {NULL, 0}
-};
-
-static const arg_def_t bitdepth_arg =
-    ARG_DEF_ENUM("d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ",
-                 bitdepth_enum);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-
-static const arg_def_t *svc_args[] = {
-  &frames_arg,        &width_arg,         &height_arg,
-  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &spatial_layers_arg,
-  &kf_dist_arg,       &scale_factors_arg, &passes_arg,      &pass_arg,
-  &fpf_name_arg,      &min_q_arg,         &max_q_arg,       &min_bitrate_arg,
-  &max_bitrate_arg,   &temporal_layers_arg, &temporal_layering_mode_arg,
-  &lag_in_frame_arg,  &threads_arg,       &aqmode_arg,
-#if OUTPUT_RC_STATS
-  &output_rc_stats_arg,
-#endif
-
-#if CONFIG_VP9_HIGHBITDEPTH
-  &bitdepth_arg,
-#endif
-  &speed_arg,
-  &rc_end_usage_arg,  NULL
-};
-
-static const uint32_t default_frames_to_skip = 0;
-static const uint32_t default_frames_to_code = 60 * 60;
-static const uint32_t default_width = 1920;
-static const uint32_t default_height = 1080;
-static const uint32_t default_timebase_num = 1;
-static const uint32_t default_timebase_den = 60;
-static const uint32_t default_bitrate = 1000;
-static const uint32_t default_spatial_layers = 5;
-static const uint32_t default_temporal_layers = 1;
-static const uint32_t default_kf_dist = 100;
-static const uint32_t default_temporal_layering_mode = 0;
-static const uint32_t default_output_rc_stats = 0;
-static const int32_t default_speed = -1;  // -1 means use library default.
-static const uint32_t default_threads = 0;  // zero means use library default.
-
-typedef struct {
-  const char *input_filename;
-  const char *output_filename;
-  uint32_t frames_to_code;
-  uint32_t frames_to_skip;
-  struct VpxInputContext input_ctx;
-  stats_io_t rc_stats;
-  int passes;
-  int pass;
-} AppInput;
-
-static const char *exec_name;
-
-void usage_exit(void) {
-  fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
-          exec_name);
-  fprintf(stderr, "Options:\n");
-  arg_show_usage(stderr, svc_args);
-  exit(EXIT_FAILURE);
-}
-
-static void parse_command_line(int argc, const char **argv_,
-                               AppInput *app_input, SvcContext *svc_ctx,
-                               vpx_codec_enc_cfg_t *enc_cfg) {
-  struct arg arg = {0};
-  char **argv = NULL;
-  char **argi = NULL;
-  char **argj = NULL;
-  vpx_codec_err_t res;
-  int passes = 0;
-  int pass = 0;
-  const char *fpf_file_name = NULL;
-  unsigned int min_bitrate = 0;
-  unsigned int max_bitrate = 0;
-  char string_options[1024] = {0};
-
-  // initialize SvcContext with parameters that will be passed to vpx_svc_init
-  svc_ctx->log_level = SVC_LOG_DEBUG;
-  svc_ctx->spatial_layers = default_spatial_layers;
-  svc_ctx->temporal_layers = default_temporal_layers;
-  svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
-#if OUTPUT_RC_STATS
-  svc_ctx->output_rc_stat = default_output_rc_stats;
-#endif
-  svc_ctx->speed = default_speed;
-  svc_ctx->threads = default_threads;
-
-  // start with default encoder configuration
-  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
-  if (res) {
-    die("Failed to get config: %s\n", vpx_codec_err_to_string(res));
-  }
-  // update enc_cfg with app default values
-  enc_cfg->g_w = default_width;
-  enc_cfg->g_h = default_height;
-  enc_cfg->g_timebase.num = default_timebase_num;
-  enc_cfg->g_timebase.den = default_timebase_den;
-  enc_cfg->rc_target_bitrate = default_bitrate;
-  enc_cfg->kf_min_dist = default_kf_dist;
-  enc_cfg->kf_max_dist = default_kf_dist;
-  enc_cfg->rc_end_usage = VPX_CQ;
-
-  // initialize AppInput with default values
-  app_input->frames_to_code = default_frames_to_code;
-  app_input->frames_to_skip = default_frames_to_skip;
-
-  // process command line options
-  argv = argv_dup(argc - 1, argv_ + 1);
-  for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) {
-    arg.argv_step = 1;
-
-    if (arg_match(&arg, &frames_arg, argi)) {
-      app_input->frames_to_code = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &width_arg, argi)) {
-      enc_cfg->g_w = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &height_arg, argi)) {
-      enc_cfg->g_h = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &timebase_arg, argi)) {
-      enc_cfg->g_timebase = arg_parse_rational(&arg);
-    } else if (arg_match(&arg, &bitrate_arg, argi)) {
-      enc_cfg->rc_target_bitrate = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &skip_frames_arg, argi)) {
-      app_input->frames_to_skip = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &spatial_layers_arg, argi)) {
-      svc_ctx->spatial_layers = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
-      svc_ctx->temporal_layers = arg_parse_uint(&arg);
-#if OUTPUT_RC_STATS
-    } else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
-      svc_ctx->output_rc_stat = arg_parse_uint(&arg);
-#endif
-    } else if (arg_match(&arg, &speed_arg, argi)) {
-      svc_ctx->speed = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &aqmode_arg, argi)) {
-      svc_ctx->aqmode = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &threads_arg, argi)) {
-      svc_ctx->threads = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
-      svc_ctx->temporal_layering_mode =
-          enc_cfg->temporal_layering_mode = arg_parse_int(&arg);
-      if (svc_ctx->temporal_layering_mode) {
-        enc_cfg->g_error_resilient = 1;
-      }
-    } else if (arg_match(&arg, &kf_dist_arg, argi)) {
-      enc_cfg->kf_min_dist = arg_parse_uint(&arg);
-      enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
-    } else if (arg_match(&arg, &scale_factors_arg, argi)) {
-      snprintf(string_options, sizeof(string_options), "%s scale-factors=%s",
-               string_options, arg.val);
-    } else if (arg_match(&arg, &passes_arg, argi)) {
-      passes = arg_parse_uint(&arg);
-      if (passes < 1 || passes > 2) {
-        die("Error: Invalid number of passes (%d)\n", passes);
-      }
-    } else if (arg_match(&arg, &pass_arg, argi)) {
-      pass = arg_parse_uint(&arg);
-      if (pass < 1 || pass > 2) {
-        die("Error: Invalid pass selected (%d)\n", pass);
-      }
-    } else if (arg_match(&arg, &fpf_name_arg, argi)) {
-      fpf_file_name = arg.val;
-    } else if (arg_match(&arg, &min_q_arg, argi)) {
-      snprintf(string_options, sizeof(string_options), "%s min-quantizers=%s",
-               string_options, arg.val);
-    } else if (arg_match(&arg, &max_q_arg, argi)) {
-      snprintf(string_options, sizeof(string_options), "%s max-quantizers=%s",
-               string_options, arg.val);
-    } else if (arg_match(&arg, &min_bitrate_arg, argi)) {
-      min_bitrate = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
-      max_bitrate = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &lag_in_frame_arg, argi)) {
-      enc_cfg->g_lag_in_frames = arg_parse_uint(&arg);
-    } else if (arg_match(&arg, &rc_end_usage_arg, argi)) {
-      enc_cfg->rc_end_usage = arg_parse_uint(&arg);
-#if CONFIG_VP9_HIGHBITDEPTH
-    } else if (arg_match(&arg, &bitdepth_arg, argi)) {
-      enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg);
-      switch (enc_cfg->g_bit_depth) {
-        case VPX_BITS_8:
-          enc_cfg->g_input_bit_depth = 8;
-          enc_cfg->g_profile = 0;
-          break;
-        case VPX_BITS_10:
-          enc_cfg->g_input_bit_depth = 10;
-          enc_cfg->g_profile = 2;
-          break;
-         case VPX_BITS_12:
-          enc_cfg->g_input_bit_depth = 12;
-          enc_cfg->g_profile = 2;
-          break;
-        default:
-          die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth);
-          break;
-      }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-    } else {
-      ++argj;
-    }
-  }
-
-  // There will be a space in front of the string options
-  if (strlen(string_options) > 0)
-    vpx_svc_set_options(svc_ctx, string_options + 1);
-
-  if (passes == 0 || passes == 1) {
-    if (pass) {
-      fprintf(stderr, "pass is ignored since there's only one pass\n");
-    }
-    enc_cfg->g_pass = VPX_RC_ONE_PASS;
-  } else {
-    if (pass == 0) {
-      die("pass must be specified when passes is 2\n");
-    }
-
-    if (fpf_file_name == NULL) {
-      die("fpf must be specified when passes is 2\n");
-    }
-
-    if (pass == 1) {
-      enc_cfg->g_pass = VPX_RC_FIRST_PASS;
-      if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) {
-        fatal("Failed to open statistics store");
-      }
-    } else {
-      enc_cfg->g_pass = VPX_RC_LAST_PASS;
-      if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) {
-        fatal("Failed to open statistics store");
-      }
-      enc_cfg->rc_twopass_stats_in = stats_get(&app_input->rc_stats);
-    }
-    app_input->passes = passes;
-    app_input->pass = pass;
-  }
-
-  if (enc_cfg->rc_target_bitrate > 0) {
-    if (min_bitrate > 0) {
-      enc_cfg->rc_2pass_vbr_minsection_pct =
-          min_bitrate * 100 / enc_cfg->rc_target_bitrate;
-    }
-    if (max_bitrate > 0) {
-      enc_cfg->rc_2pass_vbr_maxsection_pct =
-          max_bitrate * 100 / enc_cfg->rc_target_bitrate;
-    }
-  }
-
-  // Check for unrecognized options
-  for (argi = argv; *argi; ++argi)
-    if (argi[0][0] == '-' && strlen(argi[0]) > 1)
-      die("Error: Unrecognized option %s\n", *argi);
-
-  if (argv[0] == NULL || argv[1] == 0) {
-    usage_exit();
-  }
-  app_input->input_filename = argv[0];
-  app_input->output_filename = argv[1];
-  free(argv);
-
-  if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 ||
-      enc_cfg->g_h % 2)
-    die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h);
-
-  printf(
-      "Codec %s\nframes: %d, skip: %d\n"
-      "layers: %d\n"
-      "width %d, height: %d,\n"
-      "num: %d, den: %d, bitrate: %d,\n"
-      "gop size: %d\n",
-      vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code,
-      app_input->frames_to_skip,
-      svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h,
-      enc_cfg->g_timebase.num, enc_cfg->g_timebase.den,
-      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
-}
-
-#if OUTPUT_RC_STATS
-// For rate control encoding stats.
-struct RateControlStats {
-  // Number of input frames per layer.
-  int layer_input_frames[VPX_MAX_LAYERS];
-  // Total (cumulative) number of encoded frames per layer.
-  int layer_tot_enc_frames[VPX_MAX_LAYERS];
-  // Number of encoded non-key frames per layer.
-  int layer_enc_frames[VPX_MAX_LAYERS];
-  // Framerate per layer (cumulative).
-  double layer_framerate[VPX_MAX_LAYERS];
-  // Target average frame size per layer (per-frame-bandwidth per layer).
-  double layer_pfb[VPX_MAX_LAYERS];
-  // Actual average frame size per layer.
-  double layer_avg_frame_size[VPX_MAX_LAYERS];
-  // Average rate mismatch per layer (|target - actual| / target).
-  double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
-  // Actual encoding bitrate per layer (cumulative).
-  double layer_encoding_bitrate[VPX_MAX_LAYERS];
-  // Average of the short-time encoder actual bitrate.
-  // TODO(marpan): Should we add these short-time stats for each layer?
-  double avg_st_encoding_bitrate;
-  // Variance of the short-time encoder actual bitrate.
-  double variance_st_encoding_bitrate;
-  // Window (number of frames) for computing short-time encoding bitrate.
-  int window_size;
-  // Number of window measurements.
-  int window_count;
-};
-
-// Note: these rate control stats assume only 1 key frame in the
-// sequence (i.e., first frame only).
-static void set_rate_control_stats(struct RateControlStats *rc,
-                                     vpx_codec_enc_cfg_t *cfg) {
-  unsigned int sl, tl;
-  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
-  // per-frame-bandwidth, for the rate control encoding stats below.
-  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
-
-  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
-    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
-      const int layer = sl * cfg->ts_number_layers + tl;
-      const int tlayer0 = sl * cfg->ts_number_layers;
-      if (cfg->ts_number_layers == 1)
-        rc->layer_framerate[layer] = framerate;
-      else
-        rc->layer_framerate[layer] =
-          framerate / cfg->ts_rate_decimator[tl];
-      if (tl > 0) {
-        rc->layer_pfb[layer] = 1000.0 *
-            (cfg->layer_target_bitrate[layer] -
-                cfg->layer_target_bitrate[layer - 1]) /
-            (rc->layer_framerate[layer] -
-                rc->layer_framerate[layer - 1]);
-      } else {
-        rc->layer_pfb[tlayer0] = 1000.0 *
-            cfg->layer_target_bitrate[tlayer0] /
-            rc->layer_framerate[tlayer0];
-      }
-      rc->layer_input_frames[layer] = 0;
-      rc->layer_enc_frames[layer] = 0;
-      rc->layer_tot_enc_frames[layer] = 0;
-      rc->layer_encoding_bitrate[layer] = 0.0;
-      rc->layer_avg_frame_size[layer] = 0.0;
-      rc->layer_avg_rate_mismatch[layer] = 0.0;
-    }
-  }
-  rc->window_count = 0;
-  rc->window_size = 15;
-  rc->avg_st_encoding_bitrate = 0.0;
-  rc->variance_st_encoding_bitrate = 0.0;
-}
-
-static void printout_rate_control_summary(struct RateControlStats *rc,
-                                          vpx_codec_enc_cfg_t *cfg,
-                                          int frame_cnt) {
-  unsigned int sl, tl;
-  int tot_num_frames = 0;
-  double perc_fluctuation = 0.0;
-  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
-  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
-      cfg->ss_number_layers, cfg->ts_number_layers);
-  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
-    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
-      const int layer = sl * cfg->ts_number_layers + tl;
-      const int num_dropped = (tl > 0) ?
-          (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) :
-          (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1);
-      if (!sl)
-        tot_num_frames += rc->layer_input_frames[layer];
-      rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
-          rc->layer_encoding_bitrate[layer] / tot_num_frames;
-      rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] /
-          rc->layer_enc_frames[layer];
-      rc->layer_avg_rate_mismatch[layer] =
-          100.0 * rc->layer_avg_rate_mismatch[layer] /
-          rc->layer_enc_frames[layer];
-      printf("For layer#: sl%d tl%d \n", sl, tl);
-      printf("Bitrate (target vs actual): %d %f.0 kbps\n",
-             cfg->layer_target_bitrate[layer],
-             rc->layer_encoding_bitrate[layer]);
-      printf("Average frame size (target vs actual): %f %f bits\n",
-             rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
-      printf("Average rate_mismatch: %f\n",
-             rc->layer_avg_rate_mismatch[layer]);
-      printf("Number of input frames, encoded (non-key) frames, "
-          "and percent dropped frames: %d %d %f.0 \n",
-          rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
-          100.0 * num_dropped / rc->layer_input_frames[layer]);
-      printf("\n");
-    }
-  }
-  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
-  rc->variance_st_encoding_bitrate =
-      rc->variance_st_encoding_bitrate / rc->window_count -
-      (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
-  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
-      rc->avg_st_encoding_bitrate;
-  printf("Short-time stats, for window of %d frames: \n", rc->window_size);
-  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
-         rc->avg_st_encoding_bitrate,
-         sqrt(rc->variance_st_encoding_bitrate),
-         perc_fluctuation);
-  if (frame_cnt != tot_num_frames)
-    die("Error: Number of input frames not equal to output encoded frames != "
-        "%d tot_num_frames = %d\n", frame_cnt, tot_num_frames);
-}
-
-vpx_codec_err_t parse_superframe_index(const uint8_t *data,
-                                       size_t data_sz,
-                                       uint32_t sizes[8], int *count) {
-  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
-  // it is a super frame index. If the last byte of real video compression
-  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
-  // not the associated matching marker byte at the front of the index we have
-  // an invalid bitstream and need to return an error.
-
-  uint8_t marker;
-
-  marker = *(data + data_sz - 1);
-  *count = 0;
-
-
-  if ((marker & 0xe0) == 0xc0) {
-    const uint32_t frames = (marker & 0x7) + 1;
-    const uint32_t mag = ((marker >> 3) & 0x3) + 1;
-    const size_t index_sz = 2 + mag * frames;
-
-    // This chunk is marked as having a superframe index but doesn't have
-    // enough data for it, thus it's an invalid superframe index.
-    if (data_sz < index_sz)
-      return VPX_CODEC_CORRUPT_FRAME;
-
-    {
-      const uint8_t marker2 = *(data + data_sz - index_sz);
-
-      // This chunk is marked as having a superframe index but doesn't have
-      // the matching marker byte at the front of the index therefore it's an
-      // invalid chunk.
-      if (marker != marker2)
-        return VPX_CODEC_CORRUPT_FRAME;
-    }
-
-    {
-      // Found a valid superframe index.
-      uint32_t i, j;
-      const uint8_t *x = &data[data_sz - index_sz + 1];
-
-      for (i = 0; i < frames; ++i) {
-        uint32_t this_sz = 0;
-
-        for (j = 0; j < mag; ++j)
-          this_sz |= (*x++) << (j * 8);
-        sizes[i] = this_sz;
-      }
-      *count = frames;
-    }
-  }
-  return VPX_CODEC_OK;
-}
-#endif
-
-// Example pattern for spatial layers and 2 temporal layers used in the
-// bypass/flexible mode. The pattern corresponds to the pattern
-// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
-// non-flexible mode.
-void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
-                                 int is_key_frame,
-                                 vpx_svc_ref_frame_config_t *ref_frame_config) {
-  for (sl = 0; sl < num_spatial_layers; ++sl) {
-    if (!tl) {
-      if (!sl) {
-        ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF |
-                                            VP8_EFLAG_NO_REF_ARF |
-                                            VP8_EFLAG_NO_UPD_GF |
-                                            VP8_EFLAG_NO_UPD_ARF;
-      } else {
-        if (is_key_frame) {
-          ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_LAST |
-                                              VP8_EFLAG_NO_REF_ARF |
-                                              VP8_EFLAG_NO_UPD_GF |
-                                              VP8_EFLAG_NO_UPD_ARF;
-        } else {
-        ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF |
-                                            VP8_EFLAG_NO_UPD_GF |
-                                            VP8_EFLAG_NO_UPD_ARF;
-        }
-      }
-    } else if (tl == 1) {
-      if (!sl) {
-        ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF |
-                                            VP8_EFLAG_NO_REF_ARF |
-                                            VP8_EFLAG_NO_UPD_LAST |
-                                            VP8_EFLAG_NO_UPD_GF;
-      } else {
-        ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF |
-                                            VP8_EFLAG_NO_UPD_LAST |
-                                            VP8_EFLAG_NO_UPD_GF;
-      }
-    }
-    if (tl == 0) {
-      ref_frame_config->lst_fb_idx[sl] = sl;
-      if (sl)
-        ref_frame_config->gld_fb_idx[sl] = sl - 1;
-      else
-        ref_frame_config->gld_fb_idx[sl] = 0;
-      ref_frame_config->alt_fb_idx[sl] = 0;
-    } else if (tl == 1) {
-      ref_frame_config->lst_fb_idx[sl] = sl;
-      ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1;
-      ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
-    }
-  }
-}
-
-int main(int argc, const char **argv) {
-  AppInput app_input = {0};
-  VpxVideoWriter *writer = NULL;
-  VpxVideoInfo info = {0};
-  vpx_codec_ctx_t codec;
-  vpx_codec_enc_cfg_t enc_cfg;
-  SvcContext svc_ctx;
-  uint32_t i;
-  uint32_t frame_cnt = 0;
-  vpx_image_t raw;
-  vpx_codec_err_t res;
-  int pts = 0;            /* PTS starts at 0 */
-  int frame_duration = 1; /* 1 timebase tick per frame */
-  FILE *infile = NULL;
-  int end_of_stream = 0;
-  int frames_received = 0;
-#if OUTPUT_RC_STATS
-  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
-  struct RateControlStats rc;
-  vpx_svc_layer_id_t layer_id;
-  vpx_svc_ref_frame_config_t ref_frame_config;
-  int sl, tl;
-  double sum_bitrate = 0.0;
-  double sum_bitrate2 = 0.0;
-  double framerate  = 30.0;
-#endif
-  struct vpx_usec_timer timer;
-  int64_t cx_time = 0;
-  memset(&svc_ctx, 0, sizeof(svc_ctx));
-  svc_ctx.log_print = 1;
-  exec_name = argv[0];
-  parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg);
-
-  // Allocate image buffer
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (!vpx_img_alloc(&raw, enc_cfg.g_input_bit_depth == 8 ?
-                         VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016,
-                     enc_cfg.g_w, enc_cfg.g_h, 32)) {
-    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
-  }
-#else
-  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) {
-    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
-  }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-  if (!(infile = fopen(app_input.input_filename, "rb")))
-    die("Failed to open %s for reading\n", app_input.input_filename);
-
-  // Initialize codec
-  if (vpx_svc_init(&svc_ctx, &codec, vpx_codec_vp9_cx(), &enc_cfg) !=
-      VPX_CODEC_OK)
-    die("Failed to initialize encoder\n");
-
-#if OUTPUT_RC_STATS
-  if (svc_ctx.output_rc_stat) {
-    set_rate_control_stats(&rc, &enc_cfg);
-    framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
-  }
-#endif
-
-  info.codec_fourcc = VP9_FOURCC;
-  info.time_base.numerator = enc_cfg.g_timebase.num;
-  info.time_base.denominator = enc_cfg.g_timebase.den;
-
-  if (!(app_input.passes == 2 && app_input.pass == 1)) {
-    // We don't save the bitstream for the 1st pass on two pass rate control
-    writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF,
-                                   &info);
-    if (!writer)
-      die("Failed to open %s for writing\n", app_input.output_filename);
-  }
-#if OUTPUT_RC_STATS
-  // For now, just write temporal layer streams.
-  // TODO(wonkap): do spatial by re-writing superframe.
-  if (svc_ctx.output_rc_stat) {
-    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
-      char file_name[PATH_MAX];
-
-      snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
-               app_input.output_filename, tl);
-      outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
-      if (!outfile[tl])
-        die("Failed to open %s for writing", file_name);
-    }
-  }
-#endif
-
-  // skip initial frames
-  for (i = 0; i < app_input.frames_to_skip; ++i)
-    vpx_img_read(&raw, infile);
-
-  if (svc_ctx.speed != -1)
-    vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
-  if (svc_ctx.threads)
-    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
-  if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
-    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
-
-
-  // Encode frames
-  while (!end_of_stream) {
-    vpx_codec_iter_t iter = NULL;
-    const vpx_codec_cx_pkt_t *cx_pkt;
-    if (frame_cnt >= app_input.frames_to_code || !vpx_img_read(&raw, infile)) {
-      // We need one extra vpx_svc_encode call at end of stream to flush
-      // encoder and get remaining data
-      end_of_stream = 1;
-    }
-
-    // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates)
-    // and the buffer indices for each spatial layer of the current
-    // (super)frame to be encoded. The temporal layer_id for the current frame
-    // also needs to be set.
-    // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
-    // mode to "VP9E_LAYERING_MODE_BYPASS".
-    if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
-      layer_id.spatial_layer_id = 0;
-      // Example for 2 temporal layers.
-      if (frame_cnt % 2 == 0)
-        layer_id.temporal_layer_id = 0;
-      else
-        layer_id.temporal_layer_id = 1;
-      // Note that we only set the temporal layer_id, since we are calling
-      // the encode for the whole superframe. The encoder will internally loop
-      // over all the spatial layers for the current superframe.
-      vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
-      set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
-                                  svc_ctx.spatial_layers,
-                                  frame_cnt == 0,
-                                  &ref_frame_config);
-      vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG,
-                        &ref_frame_config);
-      // Keep track of input frames, to account for frame drops in rate control
-      // stats/metrics.
-      for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-        ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
-                                layer_id.temporal_layer_id];
-      }
-    }
-
-    vpx_usec_timer_start(&timer);
-    res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
-                         pts, frame_duration, svc_ctx.speed >= 5 ?
-                         VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
-    vpx_usec_timer_mark(&timer);
-    cx_time += vpx_usec_timer_elapsed(&timer);
-
-    printf("%s", vpx_svc_get_message(&svc_ctx));
-    fflush(stdout);
-    if (res != VPX_CODEC_OK) {
-      die_codec(&codec, "Failed to encode frame");
-    }
-
-    while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
-      switch (cx_pkt->kind) {
-        case VPX_CODEC_CX_FRAME_PKT: {
-          SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal;
-          if (cx_pkt->data.frame.sz > 0) {
-#if OUTPUT_RC_STATS
-            uint32_t sizes[8];
-            int count = 0;
-#endif
-            vpx_video_writer_write_frame(writer,
-                                         cx_pkt->data.frame.buf,
-                                         cx_pkt->data.frame.sz,
-                                         cx_pkt->data.frame.pts);
-#if OUTPUT_RC_STATS
-            // TODO(marpan/wonkap): Put this (to line728) in separate function.
-            if (svc_ctx.output_rc_stat) {
-              vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
-              parse_superframe_index(cx_pkt->data.frame.buf,
-                                     cx_pkt->data.frame.sz, sizes, &count);
-              // Note computing input_layer_frames here won't account for frame
-              // drops in rate control stats.
-              // TODO(marpan): Fix this for non-bypass mode so we can get stats
-              // for dropped frames.
-              if (svc_ctx.temporal_layering_mode !=
-                  VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
-                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                  ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
-                                         layer_id.temporal_layer_id];
-                }
-              }
-              for (tl = layer_id.temporal_layer_id;
-                  tl < enc_cfg.ts_number_layers; ++tl) {
-                vpx_video_writer_write_frame(outfile[tl],
-                                             cx_pkt->data.frame.buf,
-                                             cx_pkt->data.frame.sz,
-                                             cx_pkt->data.frame.pts);
-              }
-
-              for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                for (tl = layer_id.temporal_layer_id;
-                    tl < enc_cfg.ts_number_layers; ++tl) {
-                  const int layer = sl * enc_cfg.ts_number_layers + tl;
-                  ++rc.layer_tot_enc_frames[layer];
-                  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
-                  // Keep count of rate control stats per layer, for non-key
-                  // frames.
-                  if (tl == layer_id.temporal_layer_id &&
-                      !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
-                    rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
-                    rc.layer_avg_rate_mismatch[layer] +=
-                        fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
-                        rc.layer_pfb[layer];
-                    ++rc.layer_enc_frames[layer];
-                  }
-                }
-              }
-
-              // Update for short-time encoding bitrate states, for moving
-              // window of size rc->window, shifted by rc->window / 2.
-              // Ignore first window segment, due to key frame.
-              if (frame_cnt > rc.window_size) {
-                tl = layer_id.temporal_layer_id;
-                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
-                }
-                if (frame_cnt % rc.window_size == 0) {
-                  rc.window_count += 1;
-                  rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
-                  rc.variance_st_encoding_bitrate +=
-                      (sum_bitrate / rc.window_size) *
-                      (sum_bitrate / rc.window_size);
-                  sum_bitrate = 0.0;
-                }
-              }
-
-              // Second shifted window.
-              if (frame_cnt > rc.window_size + rc.window_size / 2) {
-               tl = layer_id.temporal_layer_id;
-               for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-                 sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
-               }
-
-               if (frame_cnt > 2 * rc.window_size &&
-                  frame_cnt % rc.window_size == 0) {
-                 rc.window_count += 1;
-                 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
-                 rc.variance_st_encoding_bitrate +=
-                    (sum_bitrate2 / rc.window_size) *
-                    (sum_bitrate2 / rc.window_size);
-                 sum_bitrate2 = 0.0;
-               }
-              }
-            }
-#endif
-          }
-
-          printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
-                 !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
-                 (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
-          if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
-            si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
-          ++frames_received;
-          break;
-        }
-        case VPX_CODEC_STATS_PKT: {
-          stats_write(&app_input.rc_stats,
-                      cx_pkt->data.twopass_stats.buf,
-                      cx_pkt->data.twopass_stats.sz);
-          break;
-        }
-        default: {
-          break;
-        }
-      }
-    }
-
-    if (!end_of_stream) {
-      ++frame_cnt;
-      pts += frame_duration;
-    }
-  }
-
-  // Compensate for the extra frame count for the bypass mode.
-  if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
-    for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
-      const int layer = sl * enc_cfg.ts_number_layers +
-          layer_id.temporal_layer_id;
-      --rc.layer_input_frames[layer];
-    }
-  }
-
-  printf("Processed %d frames\n", frame_cnt);
-  fclose(infile);
-#if OUTPUT_RC_STATS
-  if (svc_ctx.output_rc_stat) {
-    printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
-    printf("\n");
-  }
-#endif
-  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
-  if (app_input.passes == 2)
-    stats_close(&app_input.rc_stats, 1);
-  if (writer) {
-    vpx_video_writer_close(writer);
-  }
-#if OUTPUT_RC_STATS
-  if (svc_ctx.output_rc_stat) {
-    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
-      vpx_video_writer_close(outfile[tl]);
-    }
-  }
-#endif
-  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
-         frame_cnt,
-         1000 * (float)cx_time / (double)(frame_cnt * 1000000),
-         1000000 * (double)frame_cnt / (double)cx_time);
-  vpx_img_free(&raw);
-  // display average size, psnr
-  printf("%s", vpx_svc_dump_statistics(&svc_ctx));
-  vpx_svc_release(&svc_ctx);
-  return EXIT_SUCCESS;
-}
--- a/examples/vp9cx_set_ref.c
+++ b/examples/vp9cx_set_ref.c
@@ -0,0 +1,443 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+// VP9 Set Reference Frame
+// =======================
+//
+// This is an example demonstrating how to overwrite the VP9 encoder's
+// internal reference frame. In the sample we set the last frame to the
+// current frame. This technique could be used to bounce between two cameras.
+//
+// The decoder would also have to set the reference frame to the same value
+// on the same frame, or the video will become corrupt. The 'test_decode'
+// variable is set to 1 in this example that tests if the encoder and decoder
+// results are matching.
+//
+// Usage
+// -----
+// This example encodes a raw video. And the last argument passed in specifies
+// the frame number to update the reference frame on.
+// The parameter is parsed as follows:
+//
+//
+// Extra Variables
+// ---------------
+// This example maintains the frame number passed on the command line
+// in the `update_frame_num` variable.
+//
+//
+// Configuration
+// -------------
+//
+// The reference frame is updated on the frame specified on the command
+// line.
+//
+// Observing The Effects
+// ---------------------
+// The encoder and decoder results should be matching when the same reference
+// frame setting operation is done in both encoder and decoder. Otherwise,
+// the encoder/decoder mismatch would be seen.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "vpx/vp8cx.h"
+#include "vpx/vpx_decoder.h"
+#include "vpx/vpx_encoder.h"
+
+#include "./tools_common.h"
+#include "./video_writer.h"
+
+static const char *exec_name;
+
+void usage_exit() {
+  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> "
+          "<frame>\n",
+          exec_name);
+  exit(EXIT_FAILURE);
+}
+
+static int compare_img(const vpx_image_t *const img1,
+                       const vpx_image_t *const img2) {
+  uint32_t l_w = img1->d_w;
+  uint32_t c_w =
+      (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
+  const uint32_t c_h =
+      (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
+  uint32_t i;
+  int match = 1;
+
+  match &= (img1->fmt == img2->fmt);
+  match &= (img1->d_w == img2->d_w);
+  match &= (img1->d_h == img2->d_h);
+
+  for (i = 0; i < img1->d_h; ++i)
+    match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y],
+                     img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y],
+                     l_w) == 0);
+
+  for (i = 0; i < c_h; ++i)
+    match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U],
+                     img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U],
+                     c_w) == 0);
+
+  for (i = 0; i < c_h; ++i)
+    match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V],
+                     img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V],
+                     c_w) == 0);
+
+  return match;
+}
+
+#define mmin(a, b)  ((a) < (b) ? (a) : (b))
+static void find_mismatch(const vpx_image_t *const img1,
+                          const vpx_image_t *const img2,
+                          int yloc[4], int uloc[4], int vloc[4]) {
+  const uint32_t bsize = 64;
+  const uint32_t bsizey = bsize >> img1->y_chroma_shift;
+  const uint32_t bsizex = bsize >> img1->x_chroma_shift;
+  const uint32_t c_w =
+      (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift;
+  const uint32_t c_h =
+      (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift;
+  int match = 1;
+  uint32_t i, j;
+  yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1;
+  for (i = 0, match = 1; match && i < img1->d_h; i += bsize) {
+    for (j = 0; match && j < img1->d_w; j += bsize) {
+      int k, l;
+      const int si = mmin(i + bsize, img1->d_h) - i;
+      const int sj = mmin(j + bsize, img1->d_w) - j;
+      for (k = 0; match && k < si; ++k) {
+        for (l = 0; match && l < sj; ++l) {
+          if (*(img1->planes[VPX_PLANE_Y] +
+                (i + k) * img1->stride[VPX_PLANE_Y] + j + l) !=
+              *(img2->planes[VPX_PLANE_Y] +
+                (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) {
+            yloc[0] = i + k;
+            yloc[1] = j + l;
+            yloc[2] = *(img1->planes[VPX_PLANE_Y] +
+                        (i + k) * img1->stride[VPX_PLANE_Y] + j + l);
+            yloc[3] = *(img2->planes[VPX_PLANE_Y] +
+                        (i + k) * img2->stride[VPX_PLANE_Y] + j + l);
+            match = 0;
+            break;
+          }
+        }
+      }
+    }
+  }
+
+  uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1;
+  for (i = 0, match = 1; match && i < c_h; i += bsizey) {
+    for (j = 0; match && j < c_w; j += bsizex) {
+      int k, l;
+      const int si = mmin(i + bsizey, c_h - i);
+      const int sj = mmin(j + bsizex, c_w - j);
+      for (k = 0; match && k < si; ++k) {
+        for (l = 0; match && l < sj; ++l) {
+          if (*(img1->planes[VPX_PLANE_U] +
+                (i + k) * img1->stride[VPX_PLANE_U] + j + l) !=
+              *(img2->planes[VPX_PLANE_U] +
+                (i + k) * img2->stride[VPX_PLANE_U] + j + l)) {
+            uloc[0] = i + k;
+            uloc[1] = j + l;
+            uloc[2] = *(img1->planes[VPX_PLANE_U] +
+                        (i + k) * img1->stride[VPX_PLANE_U] + j + l);
+            uloc[3] = *(img2->planes[VPX_PLANE_U] +
+                        (i + k) * img2->stride[VPX_PLANE_U] + j + l);
+            match = 0;
+            break;
+          }
+        }
+      }
+    }
+  }
+  vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1;
+  for (i = 0, match = 1; match && i < c_h; i += bsizey) {
+    for (j = 0; match && j < c_w; j += bsizex) {
+      int k, l;
+      const int si = mmin(i + bsizey, c_h - i);
+      const int sj = mmin(j + bsizex, c_w - j);
+      for (k = 0; match && k < si; ++k) {
+        for (l = 0; match && l < sj; ++l) {
+          if (*(img1->planes[VPX_PLANE_V] +
+                (i + k) * img1->stride[VPX_PLANE_V] + j + l) !=
+              *(img2->planes[VPX_PLANE_V] +
+                (i + k) * img2->stride[VPX_PLANE_V] + j + l)) {
+            vloc[0] = i + k;
+            vloc[1] = j + l;
+            vloc[2] = *(img1->planes[VPX_PLANE_V] +
+                        (i + k) * img1->stride[VPX_PLANE_V] + j + l);
+            vloc[3] = *(img2->planes[VPX_PLANE_V] +
+                        (i + k) * img2->stride[VPX_PLANE_V] + j + l);
+            match = 0;
+            break;
+          }
+        }
+      }
+    }
+  }
+}
+
+static void testing_decode(vpx_codec_ctx_t *encoder,
+                           vpx_codec_ctx_t *decoder,
+                           vpx_codec_enc_cfg_t *cfg,
+                           unsigned int frame_out,
+                           int *mismatch_seen) {
+  vpx_image_t enc_img, dec_img;
+  struct vp9_ref_frame ref_enc, ref_dec;
+
+  if (*mismatch_seen)
+    return;
+
+  ref_enc.idx = 0;
+  ref_dec.idx = 0;
+  if (vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc))
+    die_codec(encoder,  "Failed to get encoder reference frame");
+  enc_img = ref_enc.img;
+  if (vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec))
+    die_codec(decoder, "Failed to get decoder reference frame");
+  dec_img = ref_dec.img;
+
+  if (!compare_img(&enc_img, &dec_img)) {
+    int y[4], u[4], v[4];
+
+    *mismatch_seen = 1;
+
+    find_mismatch(&enc_img, &dec_img, y, u, v);
+    printf("Encode/decode mismatch on frame %d at"
+           " Y[%d, %d] {%d/%d},"
+           " U[%d, %d] {%d/%d},"
+           " V[%d, %d] {%d/%d}",
+           frame_out,
+           y[0], y[1], y[2], y[3],
+           u[0], u[1], u[2], u[3],
+           v[0], v[1], v[2], v[3]);
+  }
+
+  vpx_img_free(&enc_img);
+  vpx_img_free(&dec_img);
+}
+
+static int encode_frame(vpx_codec_ctx_t *ecodec,
+                        vpx_codec_enc_cfg_t *cfg,
+                        vpx_image_t *img,
+                        unsigned int frame_in,
+                        VpxVideoWriter *writer,
+                        int test_decode,
+                        vpx_codec_ctx_t *dcodec,
+                        unsigned int *frame_out,
+                        int *mismatch_seen) {
+  int got_pkts = 0;
+  vpx_codec_iter_t iter = NULL;
+  const vpx_codec_cx_pkt_t *pkt = NULL;
+  int got_data;
+  const vpx_codec_err_t res = vpx_codec_encode(ecodec, img, frame_in, 1,
+                                               0, VPX_DL_GOOD_QUALITY);
+  if (res != VPX_CODEC_OK)
+    die_codec(ecodec, "Failed to encode frame");
+
+  got_data = 0;
+
+  while ((pkt = vpx_codec_get_cx_data(ecodec, &iter)) != NULL) {
+    got_pkts = 1;
+
+    if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
+      const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
+
+      if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) {
+                *frame_out += 1;
+        }
+
+      if (!vpx_video_writer_write_frame(writer,
+                                        pkt->data.frame.buf,
+                                        pkt->data.frame.sz,
+                                        pkt->data.frame.pts)) {
+        die_codec(ecodec, "Failed to write compressed frame");
+      }
+      printf(keyframe ? "K" : ".");
+      fflush(stdout);
+      got_data = 1;
+
+      // Decode 1 frame.
+      if (test_decode) {
+        if (vpx_codec_decode(dcodec, pkt->data.frame.buf,
+                             (unsigned int)pkt->data.frame.sz, NULL, 0))
+          die_codec(dcodec, "Failed to decode frame.");
+      }
+    }
+  }
+
+  // Mismatch checking
+  if (got_data && test_decode) {
+    testing_decode(ecodec, dcodec, cfg, *frame_out, mismatch_seen);
+  }
+
+  return got_pkts;
+}
+
+int main(int argc, char **argv) {
+  FILE *infile = NULL;
+  // Encoder
+  vpx_codec_ctx_t ecodec = {0};
+  vpx_codec_enc_cfg_t cfg = {0};
+  unsigned int frame_in = 0;
+  vpx_image_t raw;
+  vpx_codec_err_t res;
+  VpxVideoInfo info = {0};
+  VpxVideoWriter *writer = NULL;
+  const VpxInterface *encoder = NULL;
+
+  // Test encoder/decoder mismatch.
+  int test_decode = 1;
+  // Decoder
+  vpx_codec_ctx_t dcodec;
+  unsigned int frame_out = 0;
+
+  // The frame number to set reference frame on
+  int update_frame_num = 0;
+  int mismatch_seen = 0;
+
+  const int fps = 30;
+  const int bitrate = 500;
+
+  const char *codec_used = "vp9";
+  const char *width_arg = NULL;
+  const char *height_arg = NULL;
+  const char *infile_arg = NULL;
+  const char *outfile_arg = NULL;
+  exec_name = argv[0];
+
+  if (argc != 6)
+    die("Invalid number of arguments");
+
+  width_arg = argv[1];
+  height_arg = argv[2];
+  infile_arg = argv[3];
+  outfile_arg = argv[4];
+
+  encoder = get_vpx_encoder_by_name(codec_used);
+  if (!encoder)
+    die("Unsupported codec.");
+
+  update_frame_num = atoi(argv[5]);
+  if (update_frame_num <= 0)
+    die("Couldn't parse frame number '%s'\n", argv[5]);
+
+  info.codec_fourcc = encoder->fourcc;
+  info.frame_width = strtol(width_arg, NULL, 0);
+  info.frame_height = strtol(height_arg, NULL, 0);
+  info.time_base.numerator = 1;
+  info.time_base.denominator = fps;
+
+  if (info.frame_width <= 0 ||
+      info.frame_height <= 0 ||
+      (info.frame_width % 2) != 0 ||
+      (info.frame_height % 2) != 0) {
+    die("Invalid frame size: %dx%d", info.frame_width, info.frame_height);
+  }
+
+  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width,
+                                             info.frame_height, 1)) {
+    die("Failed to allocate image.");
+  }
+
+  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
+
+  res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
+  if (res)
+    die_codec(&ecodec, "Failed to get default codec config.");
+
+  cfg.g_w = info.frame_width;
+  cfg.g_h = info.frame_height;
+  cfg.g_timebase.num = info.time_base.numerator;
+  cfg.g_timebase.den = info.time_base.denominator;
+  cfg.rc_target_bitrate = bitrate;
+  cfg.g_lag_in_frames = 25;
+
+  writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
+  if (!writer)
+    die("Failed to open %s for writing.", outfile_arg);
+
+  if (!(infile = fopen(infile_arg, "rb")))
+    die("Failed to open %s for reading.", infile_arg);
+
+  if (vpx_codec_enc_init(&ecodec, encoder->codec_interface(), &cfg, 0))
+    die_codec(&ecodec, "Failed to initialize encoder");
+
+  // Disable alt_ref.
+  if (vpx_codec_control(&ecodec, VP8E_SET_ENABLEAUTOALTREF, 0))
+    die_codec(&ecodec, "Failed to set enable auto alt ref");
+
+  if (test_decode) {
+      const VpxInterface *decoder = get_vpx_decoder_by_name(codec_used);
+      if (vpx_codec_dec_init(&dcodec, decoder->codec_interface(), NULL, 0))
+        die_codec(&dcodec, "Failed to initialize decoder.");
+  }
+
+  // Encode frames.
+  while (vpx_img_read(&raw, infile)) {
+    // In VP9, the reference buffers (cm->frame_buffs[i].buf) are allocated
+    // while calling vpx_codec_encode(), thus, setting reference for 1st frame
+    // isn't supported.
+    if (update_frame_num > 1 && frame_out + 1 == update_frame_num) {
+      vpx_ref_frame_t ref;
+      ref.frame_type = VP8_LAST_FRAME;
+      ref.img = raw;
+      // Set reference frame in encoder.
+      if (vpx_codec_control(&ecodec, VP8_SET_REFERENCE, &ref))
+        die_codec(&ecodec, "Failed to set reference frame");
+
+      // If set_reference in decoder is commented out, the enc/dec mismatch
+      // would be seen.
+      if (test_decode) {
+        if (vpx_codec_control(&dcodec, VP8_SET_REFERENCE, &ref))
+          die_codec(&dcodec, "Failed to set reference frame");
+      }
+    }
+
+    encode_frame(&ecodec, &cfg, &raw, frame_in, writer, test_decode,
+                 &dcodec, &frame_out, &mismatch_seen);
+    frame_in++;
+    if (mismatch_seen)
+      break;
+  }
+
+  // Flush encoder.
+  if (!mismatch_seen)
+    while (encode_frame(&ecodec, &cfg, NULL, frame_in, writer, test_decode,
+                        &dcodec, &frame_out, &mismatch_seen)) {};
+
+  printf("\n");
+  fclose(infile);
+  printf("Processed %d frames.\n", frame_out);
+
+  if (test_decode) {
+    if (!mismatch_seen)
+      printf("Encoder/decoder results are matching.\n");
+    else
+      printf("Encoder/decoder results are NOT matching.\n");
+  }
+
+  if (test_decode)
+    if (vpx_codec_destroy(&dcodec))
+      die_codec(&dcodec, "Failed to destroy decoder");
+
+  vpx_img_free(&raw);
+  if (vpx_codec_destroy(&ecodec))
+    die_codec(&ecodec, "Failed to destroy encoder.");
+
+  vpx_video_writer_close(writer);
+
+  return EXIT_SUCCESS;
+}
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -1,852 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-//  This is an example demonstrating how to implement a multi-layer VPx
-//  encoding scheme based on temporal scalability for video applications
-//  that benefit from a scalable bitstream.
-
-#include <assert.h>
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "./vpx_config.h"
-#include "../vpx_ports/vpx_timer.h"
-#include "vpx/vp8cx.h"
-#include "vpx/vpx_encoder.h"
-
-#include "../tools_common.h"
-#include "../video_writer.h"
-
-static const char *exec_name;
-
-void usage_exit(void) {
-  exit(EXIT_FAILURE);
-}
-
-// Denoiser states, for temporal denoising.
-enum denoiserState {
-  kDenoiserOff,
-  kDenoiserOnYOnly,
-  kDenoiserOnYUV,
-  kDenoiserOnYUVAggressive,
-  kDenoiserOnAdaptive
-};
-
-static int mode_to_num_layers[13] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3};
-
-// For rate control encoding stats.
-struct RateControlMetrics {
-  // Number of input frames per layer.
-  int layer_input_frames[VPX_TS_MAX_LAYERS];
-  // Total (cumulative) number of encoded frames per layer.
-  int layer_tot_enc_frames[VPX_TS_MAX_LAYERS];
-  // Number of encoded non-key frames per layer.
-  int layer_enc_frames[VPX_TS_MAX_LAYERS];
-  // Framerate per layer layer (cumulative).
-  double layer_framerate[VPX_TS_MAX_LAYERS];
-  // Target average frame size per layer (per-frame-bandwidth per layer).
-  double layer_pfb[VPX_TS_MAX_LAYERS];
-  // Actual average frame size per layer.
-  double layer_avg_frame_size[VPX_TS_MAX_LAYERS];
-  // Average rate mismatch per layer (|target - actual| / target).
-  double layer_avg_rate_mismatch[VPX_TS_MAX_LAYERS];
-  // Actual encoding bitrate per layer (cumulative).
-  double layer_encoding_bitrate[VPX_TS_MAX_LAYERS];
-  // Average of the short-time encoder actual bitrate.
-  // TODO(marpan): Should we add these short-time stats for each layer?
-  double avg_st_encoding_bitrate;
-  // Variance of the short-time encoder actual bitrate.
-  double variance_st_encoding_bitrate;
-  // Window (number of frames) for computing short-timee encoding bitrate.
-  int window_size;
-  // Number of window measurements.
-  int window_count;
-  int layer_target_bitrate[VPX_MAX_LAYERS];
-};
-
-// Note: these rate control metrics assume only 1 key frame in the
-// sequence (i.e., first frame only). So for temporal pattern# 7
-// (which has key frame for every frame on base layer), the metrics
-// computation will be off/wrong.
-// TODO(marpan): Update these metrics to account for multiple key frames
-// in the stream.
-static void set_rate_control_metrics(struct RateControlMetrics *rc,
-                                     vpx_codec_enc_cfg_t *cfg) {
-  unsigned int i = 0;
-  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
-  // per-frame-bandwidth, for the rate control encoding stats below.
-  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
-  rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
-  rc->layer_pfb[0] = 1000.0 * rc->layer_target_bitrate[0] /
-      rc->layer_framerate[0];
-  for (i = 0; i < cfg->ts_number_layers; ++i) {
-    if (i > 0) {
-      rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
-      rc->layer_pfb[i] = 1000.0 *
-          (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
-          (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
-    }
-    rc->layer_input_frames[i] = 0;
-    rc->layer_enc_frames[i] = 0;
-    rc->layer_tot_enc_frames[i] = 0;
-    rc->layer_encoding_bitrate[i] = 0.0;
-    rc->layer_avg_frame_size[i] = 0.0;
-    rc->layer_avg_rate_mismatch[i] = 0.0;
-  }
-  rc->window_count = 0;
-  rc->window_size = 15;
-  rc->avg_st_encoding_bitrate = 0.0;
-  rc->variance_st_encoding_bitrate = 0.0;
-}
-
-static void printout_rate_control_summary(struct RateControlMetrics *rc,
-                                          vpx_codec_enc_cfg_t *cfg,
-                                          int frame_cnt) {
-  unsigned int i = 0;
-  int tot_num_frames = 0;
-  double perc_fluctuation = 0.0;
-  printf("Total number of processed frames: %d\n\n", frame_cnt -1);
-  printf("Rate control layer stats for %d layer(s):\n\n",
-      cfg->ts_number_layers);
-  for (i = 0; i < cfg->ts_number_layers; ++i) {
-    const int num_dropped = (i > 0) ?
-        (rc->layer_input_frames[i] - rc->layer_enc_frames[i]) :
-        (rc->layer_input_frames[i] - rc->layer_enc_frames[i] - 1);
-    tot_num_frames += rc->layer_input_frames[i];
-    rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[i] *
-        rc->layer_encoding_bitrate[i] / tot_num_frames;
-    rc->layer_avg_frame_size[i] = rc->layer_avg_frame_size[i] /
-        rc->layer_enc_frames[i];
-    rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] /
-        rc->layer_enc_frames[i];
-    printf("For layer#: %d \n", i);
-    printf("Bitrate (target vs actual): %d %f \n", rc->layer_target_bitrate[i],
-           rc->layer_encoding_bitrate[i]);
-    printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
-           rc->layer_avg_frame_size[i]);
-    printf("Average rate_mismatch: %f \n", rc->layer_avg_rate_mismatch[i]);
-    printf("Number of input frames, encoded (non-key) frames, "
-        "and perc dropped frames: %d %d %f \n", rc->layer_input_frames[i],
-        rc->layer_enc_frames[i],
-        100.0 * num_dropped / rc->layer_input_frames[i]);
-    printf("\n");
-  }
-  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
-  rc->variance_st_encoding_bitrate =
-      rc->variance_st_encoding_bitrate / rc->window_count -
-      (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
-  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
-      rc->avg_st_encoding_bitrate;
-  printf("Short-time stats, for window of %d frames: \n",rc->window_size);
-  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
-         rc->avg_st_encoding_bitrate,
-         sqrt(rc->variance_st_encoding_bitrate),
-         perc_fluctuation);
-  if ((frame_cnt - 1) != tot_num_frames)
-    die("Error: Number of input frames not equal to output! \n");
-}
-
-// Temporal scaling parameters:
-// NOTE: The 3 prediction frames cannot be used interchangeably due to
-// differences in the way they are handled throughout the code. The
-// frames should be allocated to layers in the order LAST, GF, ARF.
-// Other combinations work, but may produce slightly inferior results.
-static void set_temporal_layer_pattern(int layering_mode,
-                                       vpx_codec_enc_cfg_t *cfg,
-                                       int *layer_flags,
-                                       int *flag_periodicity) {
-  switch (layering_mode) {
-    case 0: {
-      // 1-layer.
-      int ids[1] = {0};
-      cfg->ts_periodicity = 1;
-      *flag_periodicity = 1;
-      cfg->ts_number_layers = 1;
-      cfg->ts_rate_decimator[0] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // Update L only.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF;
-      break;
-    }
-    case 1: {
-      // 2-layers, 2-frame period.
-      int ids[2] = {0, 1};
-      cfg->ts_periodicity = 2;
-      *flag_periodicity = 2;
-      cfg->ts_number_layers = 2;
-      cfg->ts_rate_decimator[0] = 2;
-      cfg->ts_rate_decimator[1] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-#if 1
-      // 0=L, 1=GF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-      layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_REF_ARF;
-#else
-       // 0=L, 1=GF, Intra-layer prediction disabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-      layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST;
-#endif
-      break;
-    }
-    case 2: {
-      // 2-layers, 3-frame period.
-      int ids[3] = {0, 1, 1};
-      cfg->ts_periodicity = 3;
-      *flag_periodicity = 3;
-      cfg->ts_number_layers = 2;
-      cfg->ts_rate_decimator[0] = 3;
-      cfg->ts_rate_decimator[1] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[1] =
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF  | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      break;
-    }
-    case 3: {
-      // 3-layers, 6-frame period.
-      int ids[6] = {0, 2, 2, 1, 2, 2};
-      cfg->ts_periodicity = 6;
-      *flag_periodicity = 6;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 6;
-      cfg->ts_rate_decimator[1] = 3;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST;
-      layer_flags[1] =
-      layer_flags[2] =
-      layer_flags[4] =
-      layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
-      break;
-    }
-    case 4: {
-      // 3-layers, 4-frame period.
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 4;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      layer_flags[1] =
-      layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      break;
-    }
-    case 5: {
-      // 3-layers, 4-frame period.
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 4;
-      cfg->ts_number_layers     = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, disabled
-      // in layer 2.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[1] =
-      layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      break;
-    }
-    case 6: {
-      // 3-layers, 4-frame period.
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 4;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[1] =
-      layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
-      break;
-    }
-    case 7: {
-      // NOTE: Probably of academic interest only.
-      // 5-layers, 16-frame period.
-      int ids[16] = {0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4};
-      cfg->ts_periodicity = 16;
-      *flag_periodicity = 16;
-      cfg->ts_number_layers = 5;
-      cfg->ts_rate_decimator[0] = 16;
-      cfg->ts_rate_decimator[1] = 8;
-      cfg->ts_rate_decimator[2] = 4;
-      cfg->ts_rate_decimator[3] = 2;
-      cfg->ts_rate_decimator[4] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      layer_flags[0]  = VPX_EFLAG_FORCE_KF;
-      layer_flags[1]  =
-      layer_flags[3]  =
-      layer_flags[5]  =
-      layer_flags[7]  =
-      layer_flags[9]  =
-      layer_flags[11] =
-      layer_flags[13] =
-      layer_flags[15] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2]  =
-      layer_flags[6]  =
-      layer_flags[10] =
-      layer_flags[14] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF;
-      layer_flags[4] =
-      layer_flags[12] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[8]  = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF;
-      break;
-    }
-    case 8: {
-      // 2-layers, with sync point at first frame of layer 1.
-      int ids[2] = {0, 1};
-      cfg->ts_periodicity = 2;
-      *flag_periodicity = 8;
-      cfg->ts_number_layers = 2;
-      cfg->ts_rate_decimator[0] = 2;
-      cfg->ts_rate_decimator[1] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF.
-      // ARF is used as predictor for all frames, and is only updated on
-      // key frame. Sync point every 8 frames.
-
-      // Layer 0: predict from L and ARF, update L and G.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_UPD_ARF;
-      // Layer 1: sync point: predict from L and ARF, and update G.
-      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ARF;
-      // Layer 0, predict from L and ARF, update L.
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF  | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF;
-      // Layer 1: predict from L, G and ARF, and update G.
-      layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ENTROPY;
-      // Layer 0.
-      layer_flags[4] = layer_flags[2];
-      // Layer 1.
-      layer_flags[5] = layer_flags[3];
-      // Layer 0.
-      layer_flags[6] = layer_flags[4];
-      // Layer 1.
-      layer_flags[7] = layer_flags[5];
-     break;
-    }
-    case 9: {
-      // 3-layers: Sync points for layer 1 and 2 every 8 frames.
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 8;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF  | VP8_EFLAG_NO_REF_GF |
-          VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF   | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[3] =
-      layer_flags[5] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
-      layer_flags[4] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[6] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[7] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_ENTROPY;
-      break;
-    }
-    case 10: {
-      // 3-layers structure where ARF is used as predictor for all frames,
-      // and is only updated on key frame.
-      // Sync points for layer 1 and 2 every 8 frames.
-
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 8;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF.
-      // Layer 0: predict from L and ARF; update L and G.
-      layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_REF_GF;
-      // Layer 2: sync point: predict from L and ARF; update none.
-      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-          VP8_EFLAG_NO_UPD_ENTROPY;
-      // Layer 1: sync point: predict from L and ARF; update G.
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST;
-      // Layer 2: predict from L, G, ARF; update none.
-      layer_flags[3] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY;
-      // Layer 0: predict from L and ARF; update L.
-      layer_flags[4] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_REF_GF;
-      // Layer 2: predict from L, G, ARF; update none.
-      layer_flags[5] = layer_flags[3];
-      // Layer 1: predict from L, G, ARF; update G.
-      layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      // Layer 2: predict from L, G, ARF; update none.
-      layer_flags[7] = layer_flags[3];
-      break;
-    }
-    case 11: {
-      // 3-layers structure with one reference frame.
-      // This works same as temporal_layering_mode 3.
-      // This was added to compare with vp9_spatial_svc_encoder.
-
-      // 3-layers, 4-frame period.
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 4;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled.
-      layer_flags[0] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
-      layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
-      layer_flags[3] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF;
-      break;
-    }
-    case 12:
-    default: {
-      // 3-layers structure as in case 10, but no sync/refresh points for
-      // layer 1 and 2.
-      int ids[4] = {0, 2, 1, 2};
-      cfg->ts_periodicity = 4;
-      *flag_periodicity = 8;
-      cfg->ts_number_layers = 3;
-      cfg->ts_rate_decimator[0] = 4;
-      cfg->ts_rate_decimator[1] = 2;
-      cfg->ts_rate_decimator[2] = 1;
-      memcpy(cfg->ts_layer_id, ids, sizeof(ids));
-      // 0=L, 1=GF, 2=ARF.
-      // Layer 0: predict from L and ARF; update L.
-      layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_REF_GF;
-      layer_flags[4] = layer_flags[0];
-      // Layer 1: predict from L, G, ARF; update G.
-      layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      layer_flags[6] = layer_flags[2];
-      // Layer 2: predict from L, G, ARF; update none.
-      layer_flags[1] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-          VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY;
-      layer_flags[3] = layer_flags[1];
-      layer_flags[5] = layer_flags[1];
-      layer_flags[7] = layer_flags[1];
-      break;
-    }
-  }
-}
-
-int main(int argc, char **argv) {
-  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
-  vpx_codec_ctx_t codec;
-  vpx_codec_enc_cfg_t cfg;
-  int frame_cnt = 0;
-  vpx_image_t raw;
-  vpx_codec_err_t res;
-  unsigned int width;
-  unsigned int height;
-  int speed;
-  int frame_avail;
-  int got_data;
-  int flags = 0;
-  unsigned int i;
-  int pts = 0;  // PTS starts at 0.
-  int frame_duration = 1;  // 1 timebase tick per frame.
-  int layering_mode = 0;
-  int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
-  int flag_periodicity = 1;
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
-  vpx_svc_layer_id_t layer_id = {0, 0};
-#else
-  vpx_svc_layer_id_t layer_id = {0};
-#endif
-  const VpxInterface *encoder = NULL;
-  FILE *infile = NULL;
-  struct RateControlMetrics rc;
-  int64_t cx_time = 0;
-  const int min_args_base = 11;
-#if CONFIG_VP9_HIGHBITDEPTH
-  vpx_bit_depth_t bit_depth = VPX_BITS_8;
-  int input_bit_depth = 8;
-  const int min_args = min_args_base + 1;
-#else
-  const int min_args = min_args_base;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  double sum_bitrate = 0.0;
-  double sum_bitrate2 = 0.0;
-  double framerate  = 30.0;
-
-  exec_name = argv[0];
-  // Check usage and arguments.
-  if (argc < min_args) {
-#if CONFIG_VP9_HIGHBITDEPTH
-    die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> "
-        "<Rate_0> ... <Rate_nlayers-1> <bit-depth> \n", argv[0]);
-#else
-    die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> "
-        "<Rate_0> ... <Rate_nlayers-1> \n", argv[0]);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  }
-
-  encoder = get_vpx_encoder_by_name(argv[3]);
-  if (!encoder)
-    die("Unsupported codec.");
-
-  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
-
-  width = strtol(argv[4], NULL, 0);
-  height = strtol(argv[5], NULL, 0);
-  if (width < 16 || width % 2 || height < 16 || height % 2) {
-    die("Invalid resolution: %d x %d", width, height);
-  }
-
-  layering_mode = strtol(argv[10], NULL, 0);
-  if (layering_mode < 0 || layering_mode > 13) {
-    die("Invalid layering mode (0..12) %s", argv[10]);
-  }
-
-  if (argc != min_args + mode_to_num_layers[layering_mode]) {
-    die("Invalid number of arguments");
-  }
-
-#if CONFIG_VP9_HIGHBITDEPTH
-  switch (strtol(argv[argc-1], NULL, 0)) {
-    case 8:
-      bit_depth = VPX_BITS_8;
-      input_bit_depth = 8;
-      break;
-    case 10:
-      bit_depth = VPX_BITS_10;
-      input_bit_depth = 10;
-      break;
-    case 12:
-      bit_depth = VPX_BITS_12;
-      input_bit_depth = 12;
-      break;
-    default:
-      die("Invalid bit depth (8, 10, 12) %s", argv[argc-1]);
-  }
-  if (!vpx_img_alloc(&raw,
-                     bit_depth == VPX_BITS_8 ? VPX_IMG_FMT_I420 :
-                                               VPX_IMG_FMT_I42016,
-                     width, height, 32)) {
-    die("Failed to allocate image", width, height);
-  }
-#else
-  if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) {
-    die("Failed to allocate image", width, height);
-  }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-  // Populate encoder configuration.
-  res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0);
-  if (res) {
-    printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
-    return EXIT_FAILURE;
-  }
-
-  // Update the default configuration with our settings.
-  cfg.g_w = width;
-  cfg.g_h = height;
-
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (bit_depth != VPX_BITS_8) {
-    cfg.g_bit_depth = bit_depth;
-    cfg.g_input_bit_depth = input_bit_depth;
-    cfg.g_profile = 2;
-  }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-  // Timebase format e.g. 30fps: numerator=1, demoninator = 30.
-  cfg.g_timebase.num = strtol(argv[6], NULL, 0);
-  cfg.g_timebase.den = strtol(argv[7], NULL, 0);
-
-  speed = strtol(argv[8], NULL, 0);
-  if (speed < 0) {
-    die("Invalid speed setting: must be positive");
-  }
-
-  for (i = min_args_base;
-       (int)i < min_args_base + mode_to_num_layers[layering_mode];
-       ++i) {
-    rc.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
-    if (strncmp(encoder->name, "vp8", 3) == 0)
-      cfg.ts_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
-    else if (strncmp(encoder->name, "vp9", 3) == 0)
-      cfg.layer_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
-  }
-
-  // Real time parameters.
-  cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
-  cfg.rc_end_usage = VPX_CBR;
-  cfg.rc_min_quantizer = 2;
-  cfg.rc_max_quantizer = 56;
-  if (strncmp(encoder->name, "vp9", 3) == 0)
-    cfg.rc_max_quantizer = 52;
-  cfg.rc_undershoot_pct = 50;
-  cfg.rc_overshoot_pct = 50;
-  cfg.rc_buf_initial_sz = 500;
-  cfg.rc_buf_optimal_sz = 600;
-  cfg.rc_buf_sz = 1000;
-
-  // Disable dynamic resizing by default.
-  cfg.rc_resize_allowed = 0;
-
-  // Use 1 thread as default.
-  cfg.g_threads = 1;
-
-  // Enable error resilient mode.
-  cfg.g_error_resilient = 1;
-  cfg.g_lag_in_frames   = 0;
-  cfg.kf_mode = VPX_KF_AUTO;
-
-  // Disable automatic keyframe placement.
-  cfg.kf_min_dist = cfg.kf_max_dist = 3000;
-
-  cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
-
-  set_temporal_layer_pattern(layering_mode,
-                             &cfg,
-                             layer_flags,
-                             &flag_periodicity);
-
-  set_rate_control_metrics(&rc, &cfg);
-
-  // Target bandwidth for the whole stream.
-  // Set to layer_target_bitrate for highest layer (total bitrate).
-  cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1];
-
-  // Open input file.
-  if (!(infile = fopen(argv[1], "rb"))) {
-    die("Failed to open %s for reading", argv[1]);
-  }
-
-  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
-  // Open an output file for each stream.
-  for (i = 0; i < cfg.ts_number_layers; ++i) {
-    char file_name[PATH_MAX];
-    VpxVideoInfo info;
-    info.codec_fourcc = encoder->fourcc;
-    info.frame_width = cfg.g_w;
-    info.frame_height = cfg.g_h;
-    info.time_base.numerator = cfg.g_timebase.num;
-    info.time_base.denominator = cfg.g_timebase.den;
-
-    snprintf(file_name, sizeof(file_name), "%s_%d.ivf", argv[2], i);
-    outfile[i] = vpx_video_writer_open(file_name, kContainerIVF, &info);
-    if (!outfile[i])
-      die("Failed to open %s for writing", file_name);
-
-    assert(outfile[i] != NULL);
-  }
-  // No spatial layers in this encoder.
-  cfg.ss_number_layers = 1;
-
-  // Initialize codec.
-#if CONFIG_VP9_HIGHBITDEPTH
-  if (vpx_codec_enc_init(
-          &codec, encoder->codec_interface(), &cfg,
-          bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH))
-#else
-  if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0))
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-    die_codec(&codec, "Failed to initialize encoder");
-
-  if (strncmp(encoder->name, "vp8", 3) == 0) {
-    vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
-    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
-    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
-  } else if (strncmp(encoder->name, "vp9", 3) == 0) {
-    vpx_svc_extra_cfg_t svc_params;
-    vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
-    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
-    vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
-    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
-    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
-    vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
-    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
-    if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0))
-      die_codec(&codec, "Failed to set SVC");
-    for (i = 0; i < cfg.ts_number_layers; ++i) {
-      svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
-      svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
-    }
-    svc_params.scaling_factor_num[0] = cfg.g_h;
-    svc_params.scaling_factor_den[0] = cfg.g_h;
-    vpx_codec_control(&codec, VP9E_SET_SVC_PARAMETERS, &svc_params);
-  }
-  if (strncmp(encoder->name, "vp8", 3) == 0) {
-    vpx_codec_control(&codec, VP8E_SET_SCREEN_CONTENT_MODE, 0);
-  }
-  vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1);
-  // This controls the maximum target size of the key frame.
-  // For generating smaller key frames, use a smaller max_intra_size_pct
-  // value, like 100 or 200.
-  {
-    const int max_intra_size_pct = 900;
-    vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
-                      max_intra_size_pct);
-  }
-
-  frame_avail = 1;
-  while (frame_avail || got_data) {
-    struct vpx_usec_timer timer;
-    vpx_codec_iter_t iter = NULL;
-    const vpx_codec_cx_pkt_t *pkt;
-#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
-    // Update the temporal layer_id. No spatial layers in this test.
-    layer_id.spatial_layer_id = 0;
-#endif
-    layer_id.temporal_layer_id =
-        cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
-    if (strncmp(encoder->name, "vp9", 3) == 0) {
-      vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
-    } else if (strncmp(encoder->name, "vp8", 3) == 0) {
-      vpx_codec_control(&codec, VP8E_SET_TEMPORAL_LAYER_ID,
-                        layer_id.temporal_layer_id);
-    }
-    flags = layer_flags[frame_cnt % flag_periodicity];
-    if (layering_mode == 0)
-      flags = 0;
-    frame_avail = vpx_img_read(&raw, infile);
-    if (frame_avail)
-      ++rc.layer_input_frames[layer_id.temporal_layer_id];
-    vpx_usec_timer_start(&timer);
-    if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 1, flags,
-        VPX_DL_REALTIME)) {
-      die_codec(&codec, "Failed to encode frame");
-    }
-    vpx_usec_timer_mark(&timer);
-    cx_time += vpx_usec_timer_elapsed(&timer);
-    // Reset KF flag.
-    if (layering_mode != 7) {
-      layer_flags[0] &= ~VPX_EFLAG_FORCE_KF;
-    }
-    got_data = 0;
-    while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) {
-      got_data = 1;
-      switch (pkt->kind) {
-        case VPX_CODEC_CX_FRAME_PKT:
-          for (i = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
-              i < cfg.ts_number_layers; ++i) {
-            vpx_video_writer_write_frame(outfile[i], pkt->data.frame.buf,
-                                         pkt->data.frame.sz, pts);
-            ++rc.layer_tot_enc_frames[i];
-            rc.layer_encoding_bitrate[i] += 8.0 * pkt->data.frame.sz;
-            // Keep count of rate control stats per layer (for non-key frames).
-            if (i == cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity] &&
-                !(pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
-              rc.layer_avg_frame_size[i] += 8.0 * pkt->data.frame.sz;
-              rc.layer_avg_rate_mismatch[i] +=
-                  fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[i]) /
-                  rc.layer_pfb[i];
-              ++rc.layer_enc_frames[i];
-            }
-          }
-          // Update for short-time encoding bitrate states, for moving window
-          // of size rc->window, shifted by rc->window / 2.
-          // Ignore first window segment, due to key frame.
-          if (frame_cnt > rc.window_size) {
-            sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
-            if (frame_cnt % rc.window_size == 0) {
-              rc.window_count += 1;
-              rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
-              rc.variance_st_encoding_bitrate +=
-                  (sum_bitrate / rc.window_size) *
-                  (sum_bitrate / rc.window_size);
-              sum_bitrate = 0.0;
-            }
-          }
-          // Second shifted window.
-          if (frame_cnt > rc.window_size + rc.window_size / 2) {
-            sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
-            if (frame_cnt > 2 * rc.window_size &&
-                frame_cnt % rc.window_size == 0) {
-              rc.window_count += 1;
-              rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
-              rc.variance_st_encoding_bitrate +=
-                  (sum_bitrate2 / rc.window_size) *
-                  (sum_bitrate2 / rc.window_size);
-              sum_bitrate2 = 0.0;
-            }
-          }
-          break;
-          default:
-            break;
-      }
-    }
-    ++frame_cnt;
-    pts += frame_duration;
-  }
-  fclose(infile);
-  printout_rate_control_summary(&rc, &cfg, frame_cnt);
-  printf("\n");
-  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
-          frame_cnt,
-          1000 * (float)cx_time / (double)(frame_cnt * 1000000),
-          1000000 * (double)frame_cnt / (double)cx_time);
-
-  if (vpx_codec_destroy(&codec))
-    die_codec(&codec, "Failed to destroy codec");
-
-  // Try to rewrite the output file headers with the actual frame count.
-  for (i = 0; i < cfg.ts_number_layers; ++i)
-    vpx_video_writer_close(outfile[i]);
-
-  vpx_img_free(&raw);
-  return EXIT_SUCCESS;
-}
--- a/ivfdec.c
+++ b/ivfdec.c
@@ -23,7 +23,7 @@ static void fix_framerate(int *num, int *den) {
  // we can guess the framerate using only the timebase in this
  // case. Other files would require reading ahead to guess the
  // timebase, like we do for webm.
-  if (*den > 0 && *den < 1000000000 && *num > 0 && *num < 1000) {
+  if (*num < 1000) {
    // Correct for the factor of 2 applied to the timebase in the encoder.
    if (*num & 1)
      *den *= 2;
--- a/libs.doxy_template
+++ b/libs.doxy_template
@@ -36,7 +36,7 @@ DOXYFILE_ENCODING      = UTF-8
 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded
 # by quotes) that should identify the project.

-PROJECT_NAME           = "WebM Codec SDK"
+PROJECT_NAME           = "WebM VP8 Codec SDK"

 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
 # base path where the generated documentation will be put.
--- a/libs.mk
+++ b/libs.mk
@@ -17,6 +17,32 @@ else
  ASM:=.asm
 endif

+#
+# Calculate platform- and compiler-specific offsets for hand coded assembly
+#
+ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
+OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
+define asm_offsets_template
+$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).S
+	@echo "    [CREATE] $$@"
+	$$(qexec)LC_ALL=C grep $$(OFFSET_PATTERN) $$< | tr -d '$$$$\#' $$(ADS2GAS) > $$@
+$$(BUILD_PFX)$(2).S: $(2)
+CLEAN-OBJS += $$(BUILD_PFX)$(1) $(2).S
+endef
+else
+  ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
+define asm_offsets_template
+$$(BUILD_PFX)$(1): obj_int_extract
+$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).o
+	@echo "    [CREATE] $$@"
+	$$(qexec)./obj_int_extract rvds $$< $$(ADS2GAS) > $$@
+OBJS-yes += $$(BUILD_PFX)$(2).o
+CLEAN-OBJS += $$(BUILD_PFX)$(1)
+$$(filter %$$(ASM).o,$$(OBJS-yes)): $$(BUILD_PFX)$(1)
+endef
+endif # rvct
+endif # !gcc
+
 #
 # Rule to generate runtime cpu detection files
 #
@@ -25,7 +51,7 @@ $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
 	@echo "    [CREATE] $$@"
 	$$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \
          --sym=$(1) \
-          --config=$$(CONFIG_DIR)$$(target)-$$(TOOLCHAIN).mk \
+          --config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
          $$(RTCD_OPTIONS) $$^ > $$@
 CLEAN-OBJS += $$(BUILD_PFX)$(1).h
 RTCD += $$(BUILD_PFX)$(1).h
@@ -34,6 +60,13 @@ endef
 CODEC_SRCS-yes += CHANGELOG
 CODEC_SRCS-yes += libs.mk

+# If this is a universal (fat) binary, then all the subarchitectures have
+# already been built and our job is to stitch them together. The
+# BUILD_LIBVPX variable indicates whether we should be building
+# (compiling, linking) the library. The LIPO_LIBVPX variable indicates
+# that we're stitching.
+$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
+
 include $(SRC_PATH_BARE)/vpx/vpx_codec.mk
 CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))
 CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS))
@@ -47,13 +80,7 @@ CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS))
 include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk
 CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))

-include $(SRC_PATH_BARE)/vpx_dsp/vpx_dsp.mk
-CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS))
-
-include $(SRC_PATH_BARE)/vpx_util/vpx_util.mk
-CODEC_SRCS-yes += $(addprefix vpx_util/,$(call enabled,UTIL_SRCS))
-
-ifeq ($(CONFIG_VP8),yes)
+ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
  VP8_PREFIX=vp8/
  include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
 endif
@@ -76,7 +103,7 @@ ifeq ($(CONFIG_VP8_DECODER),yes)
  CODEC_DOC_SECTIONS += vp8 vp8_decoder
 endif

-ifeq ($(CONFIG_VP9),yes)
+ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
  VP9_PREFIX=vp9/
  include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
 endif
@@ -88,7 +115,6 @@ ifeq ($(CONFIG_VP9_ENCODER),yes)
  CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS))
  CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h
  INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
-  INSTALL-LIBS-$(CONFIG_SPATIAL_SVC) += include/vpx/svc_context.h
  INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/%
  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
  CODEC_DOC_SECTIONS += vp9 vp9_encoder
@@ -136,22 +162,19 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Release/%)
 INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Debug/%)
 endif

-CODEC_SRCS-yes += build/make/version.sh
-CODEC_SRCS-yes += build/make/rtcd.pl
-CODEC_SRCS-yes += vpx_ports/emmintrin_compat.h
-CODEC_SRCS-yes += vpx_ports/mem_ops.h
-CODEC_SRCS-yes += vpx_ports/mem_ops_aligned.h
-CODEC_SRCS-yes += vpx_ports/vpx_once.h
-CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c
+CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
+CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl
+CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h
+CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h
+CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h
+CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_once.h
+CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
 INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
 ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
 endif
-CODEC_EXPORTS-yes += vpx/exports_com
+CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
-ifeq ($(CONFIG_SPATIAL_SVC),yes)
-CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_spatial_svc
-endif
 CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec

 INSTALL-LIBS-yes += include/vpx/vpx_codec.h
@@ -181,13 +204,33 @@ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(call enabled,CODEC_EXPORTS)
 # based build systems.
 libvpx_srcs.txt:
 	@echo "    [CREATE] $@"
-	@echo $(CODEC_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
+	@echo $(CODEC_SRCS) | xargs -n1 echo | sort -u > $@
 CLEAN-OBJS += libvpx_srcs.txt


 ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
 ifeq ($(CONFIG_MSVS),yes)

+obj_int_extract.bat: $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat
+	@cp $^ $@
+
+obj_int_extract.$(VCPROJ_SFX): obj_int_extract.bat
+obj_int_extract.$(VCPROJ_SFX): $(SRC_PATH_BARE)/build/make/obj_int_extract.c
+	@echo "    [CREATE] $@"
+	$(qexec)$(GEN_VCPROJ) \
+    --exe \
+    --target=$(TOOLCHAIN) \
+    --name=obj_int_extract \
+    --ver=$(CONFIG_VS_VERSION) \
+    --proj-guid=E1360C65-D375-4335-8057-7ED99CC3F9B2 \
+    --src-path-bare="$(SRC_PATH_BARE)" \
+    $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
+    --out=$@ $^ \
+    -I. \
+    -I"$(SRC_PATH_BARE)" \
+
+PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.$(VCPROJ_SFX)
+
 vpx.def: $(call enabled,CODEC_EXPORTS)
 	@echo "    [CREATE] $@"
 	$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
@@ -202,7 +245,7 @@ ASM_INCLUDES := \
    vpx_config.asm \
    vpx_ports/x86_abi_support.asm \

-vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
+vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
 	@echo "    [CREATE] $@"
 	$(qexec)$(GEN_VCPROJ) \
            $(if $(CONFIG_SHARED),--dll,--lib) \
@@ -217,7 +260,7 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
            $(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \
            --src-path-bare="$(SRC_PATH_BARE)" \

-PROJECTS-yes += vpx.$(VCPROJ_SFX)
+PROJECTS-$(BUILD_LIBVPX) += vpx.$(VCPROJ_SFX)

 vpx.$(VCPROJ_SFX): vpx_config.asm
 vpx.$(VCPROJ_SFX): $(RTCD)
@@ -225,49 +268,32 @@ vpx.$(VCPROJ_SFX): $(RTCD)
 endif
 else
 LIBVPX_OBJS=$(call objs,$(CODEC_SRCS))
-OBJS-yes += $(LIBVPX_OBJS)
-LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
+OBJS-$(BUILD_LIBVPX) += $(LIBVPX_OBJS)
+LIBS-$(if $(BUILD_LIBVPX),$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)

-SO_VERSION_MAJOR := 4
-SO_VERSION_MINOR := 0
-SO_VERSION_PATCH := 0
+
+BUILD_LIBVPX_SO         := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
+
 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
-LIBVPX_SO               := libvpx.$(SO_VERSION_MAJOR).dylib
-SHARED_LIB_SUF          := .dylib
+LIBVPX_SO               := libvpx.$(VERSION_MAJOR).dylib
 EXPORT_FILE             := libvpx.syms
 LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
                             libvpx.dylib  )
 else
-ifeq ($(filter iphonesimulator%,$(TGT_OS)),$(TGT_OS))
-LIBVPX_SO               := libvpx.$(SO_VERSION_MAJOR).dylib
-SHARED_LIB_SUF          := .dylib
-EXPORT_FILE             := libvpx.syms
-LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, libvpx.dylib)
-else
-ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS))
-LIBVPX_SO               := libvpx$(SO_VERSION_MAJOR).dll
-SHARED_LIB_SUF          := _dll.a
-EXPORT_FILE             := libvpx.def
-LIBVPX_SO_SYMLINKS      :=
-LIBVPX_SO_IMPLIB        := libvpx_dll.a
-else
-LIBVPX_SO               := libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH)
-SHARED_LIB_SUF          := .so
+LIBVPX_SO               := libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)
 EXPORT_FILE             := libvpx.ver
+SYM_LINK                := libvpx.so
 LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
-                             libvpx.so libvpx.so.$(SO_VERSION_MAJOR) \
-                             libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
-endif
-endif
+                             libvpx.so libvpx.so.$(VERSION_MAJOR) \
+                             libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR))
 endif

-LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\
-                           $(notdir $(LIBVPX_SO_SYMLINKS)) \
-                           $(if $(LIBVPX_SO_IMPLIB), $(BUILD_PFX)$(LIBVPX_SO_IMPLIB))
+LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)\
+                           $(notdir $(LIBVPX_SO_SYMLINKS))
 $(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) $(EXPORT_FILE)
 $(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
-$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(SO_VERSION_MAJOR)
+$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR)
 $(BUILD_PFX)$(LIBVPX_SO): EXPORTS_FILE = $(EXPORT_FILE)

 libvpx.ver: $(call enabled,CODEC_EXPORTS)
@@ -282,19 +308,6 @@ libvpx.syms: $(call enabled,CODEC_EXPORTS)
 	$(qexec)awk '{print "_"$$2}' $^ >$@
 CLEAN-OBJS += libvpx.syms

-libvpx.def: $(call enabled,CODEC_EXPORTS)
-	@echo "    [CREATE] $@"
-	$(qexec)echo LIBRARY $(LIBVPX_SO:.dll=) INITINSTANCE TERMINSTANCE > $@
-	$(qexec)echo "DATA MULTIPLE NONSHARED" >> $@
-	$(qexec)echo "EXPORTS" >> $@
-	$(qexec)awk '!/vpx_svc_*/ {print "_"$$2}' $^ >>$@
-CLEAN-OBJS += libvpx.def
-
-libvpx_dll.a: $(LIBVPX_SO)
-	@echo "    [IMPLIB] $@"
-	$(qexec)emximp -o $@ $<
-CLEAN-OBJS += libvpx_dll.a
-
 define libvpx_symlink_template
 $(1): $(2)
 	@echo "    [LN]     $(2) $$@"
@@ -310,12 +323,11 @@ $(eval $(call libvpx_symlink_template,\
    $(LIBVPX_SO)))


-INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
-INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
-INSTALL-LIBS-$(CONFIG_SHARED) += $(if $(LIBVPX_SO_IMPLIB),$(LIBSUBDIR)/$(LIBVPX_SO_IMPLIB))
+INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBVPX_SO_SYMLINKS)
+INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBSUBDIR)/$(LIBVPX_SO)


-LIBS-yes += vpx.pc
+LIBS-$(BUILD_LIBVPX) += vpx.pc
 vpx.pc: config.mk libs.mk
 	@echo "    [CREATE] $@"
 	$(qexec)echo '# pkg-config file from libvpx $(VERSION_STRING)' > $@
@@ -341,6 +353,9 @@ INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
 CLEAN-OBJS += vpx.pc
 endif

+LIBS-$(LIPO_LIBVPX) += libvpx.a
+$(eval $(if $(LIPO_LIBVPX),$(call lipo_lib_template,libvpx.a)))
+
 #
 # Rule to make assembler configuration file from C configuration file
 #
@@ -361,7 +376,7 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_config.asm
 endif

 #
-# Add assembler dependencies for configuration.
+# Add assembler dependencies for configuration and offsets
 #
 $(filter %.s.o,$(OBJS-yes)):     $(BUILD_PFX)vpx_config.asm
 $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
@@ -385,18 +400,14 @@ LIBVPX_TEST_DATA_PATH ?= .

 include $(SRC_PATH_BARE)/test/test.mk
 LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
-LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX)
+LIBVPX_TEST_BINS=./test_libvpx$(EXE_SFX)
 LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
                     $(call enabled,LIBVPX_TEST_DATA))
 libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)

-TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
-TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
-TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
-
 libvpx_test_srcs.txt:
 	@echo "    [CREATE] $@"
-	@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
+	@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | sort -u > $@
 CLEAN-OBJS += libvpx_test_srcs.txt

 $(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1
@@ -411,10 +422,12 @@ testdata:: $(LIBVPX_TEST_DATA)
          if [ -n "$${sha1sum}" ]; then\
            set -e;\
            echo "Checking test data:";\
-            for f in $(call enabled,LIBVPX_TEST_DATA); do\
-                grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
-                    (cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\
-            done; \
+            if [ -n "$(LIBVPX_TEST_DATA)" ]; then\
+                for f in $(call enabled,LIBVPX_TEST_DATA); do\
+                    grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\
+                        (cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\
+                done; \
+            fi; \
        else\
            echo "Skipping test data integrity check, sha1sum not found.";\
        fi
@@ -456,25 +469,7 @@ test_libvpx.$(VCPROJ_SFX): $(LIBVPX_TEST_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_

 PROJECTS-$(CONFIG_MSVS) += test_libvpx.$(VCPROJ_SFX)

-LIBVPX_TEST_BIN := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BIN)))
-
-ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
-PROJECTS-$(CONFIG_MSVS) += test_intra_pred_speed.$(VCPROJ_SFX)
-test_intra_pred_speed.$(VCPROJ_SFX): $(TEST_INTRA_PRED_SPEED_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX)
-	@echo "    [CREATE] $@"
-	$(qexec)$(GEN_VCPROJ) \
-            --exe \
-            --target=$(TOOLCHAIN) \
-            --name=test_intra_pred_speed \
-            -D_VARIADIC_MAX=10 \
-            --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
-            --ver=$(CONFIG_VS_VERSION) \
-            --src-path-bare="$(SRC_PATH_BARE)" \
-            $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
-            --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
-            -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
-            -L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^
-endif  # TEST_INTRA_PRED_SPEED
+LIBVPX_TEST_BINS := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BINS)))
 endif
 else

@@ -485,54 +480,45 @@ ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS))
 # Disabling pthreads globally will cause issues on darwin and possibly elsewhere
 $(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0
 endif
-GTEST_INCLUDES := -I$(SRC_PATH_BARE)/third_party/googletest/src
-GTEST_INCLUDES += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
-OBJS-yes += $(GTEST_OBJS)
-LIBS-yes += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
+OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
+LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
 $(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS)

 LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS)))
-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
-OBJS-yes += $(LIBVPX_TEST_OBJS)
-BINS-yes += $(LIBVPX_TEST_BIN)
+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
+OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS)
+BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS)

 CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
-TEST_LIBS := lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a
-$(LIBVPX_TEST_BIN): $(TEST_LIBS)
-$(eval $(call linkerxx_template,$(LIBVPX_TEST_BIN), \
-              $(LIBVPX_TEST_OBJS) \
-              -L. -lvpx -lgtest $(extralibs) -lm))
+CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
+$(foreach bin,$(LIBVPX_TEST_BINS),\
+    $(if $(BUILD_LIBVPX),$(eval $(bin): \
+        lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
+    $(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
+        $(LIBVPX_TEST_OBJS) \
+        -L. -lvpx -lgtest $(extralibs) -lm)\
+        )))\
+    $(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\

-ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
-$(TEST_INTRA_PRED_SPEED_OBJS) $(TEST_INTRA_PRED_SPEED_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
-OBJS-yes += $(TEST_INTRA_PRED_SPEED_OBJS)
-BINS-yes += $(TEST_INTRA_PRED_SPEED_BIN)
-
-$(TEST_INTRA_PRED_SPEED_BIN): $(TEST_LIBS)
-$(eval $(call linkerxx_template,$(TEST_INTRA_PRED_SPEED_BIN), \
-              $(TEST_INTRA_PRED_SPEED_OBJS) \
-              -L. -lvpx -lgtest $(extralibs) -lm))
-endif  # TEST_INTRA_PRED_SPEED
-
-endif  # CONFIG_UNIT_TESTS
+endif

 # Install test sources only if codec source is included
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\
    $(shell find $(SRC_PATH_BARE)/third_party/googletest -type f))
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS)
-INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(TEST_INTRA_PRED_SPEED_SRCS)

 define test_shard_template
 test:: test_shard.$(1)
-test-no-data-check:: test_shard_ndc.$(1)
-test_shard.$(1) test_shard_ndc.$(1): $(LIBVPX_TEST_BIN)
+test_shard.$(1): $(LIBVPX_TEST_BINS) testdata
 	@set -e; \
-	 export GTEST_SHARD_INDEX=$(1); \
-	 export GTEST_TOTAL_SHARDS=$(2); \
-	 $(LIBVPX_TEST_BIN)
-test_shard.$(1): testdata
+	 for t in $(LIBVPX_TEST_BINS); do \
+	   export GTEST_SHARD_INDEX=$(1); \
+	   export GTEST_TOTAL_SHARDS=$(2); \
+	   $$$$t; \
+	 done
 .PHONY: test_shard.$(1)
 endef

@@ -555,11 +541,7 @@ libs.doxy: $(CODEC_DOC_SRCS)
 	@echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@

 ## Generate rtcd.h for all objects
-ifeq ($(CONFIG_DEPENDENCY_TRACKING),yes)
 $(OBJS-yes:.o=.d): $(RTCD)
-else
-$(OBJS-yes): $(RTCD)
-endif

 ## Update the global src list
 SRCS += $(CODEC_SRCS) $(LIBVPX_TEST_SRCS) $(GTEST_SRCS)
@@ -577,16 +559,15 @@ ifeq ($(CONFIG_MSVS),yes)
 # TODO(tomfinegan): Support running the debug versions of tools?
 TEST_BIN_PATH := $(addsuffix /$(TGT_OS:win64=x64)/Release, $(TEST_BIN_PATH))
 endif
-utiltest utiltest-no-data-check:
+utiltest: testdata
 	$(qexec)$(SRC_PATH_BARE)/test/vpxdec.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(TEST_BIN_PATH)
 	$(qexec)$(SRC_PATH_BARE)/test/vpxenc.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(TEST_BIN_PATH)
-utiltest: testdata
 else
-utiltest utiltest-no-data-check:
+utiltest:
 	@echo Unit tests must be enabled to make the utiltest target.
 endif

@@ -604,12 +585,11 @@ ifeq ($(CONFIG_MSVS),yes)
 # TODO(tomfinegan): Support running the debug versions of tools?
 EXAMPLES_BIN_PATH := $(TGT_OS:win64=x64)/Release
 endif
-exampletest exampletest-no-data-check: examples
+exampletest: examples testdata
 	$(qexec)$(SRC_PATH_BARE)/test/examples.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(EXAMPLES_BIN_PATH)
-exampletest: testdata
 else
-exampletest exampletest-no-data-check:
+exampletest:
 	@echo Unit tests must be enabled to make the exampletest target.
 endif
--- a/mainpage.dox
+++ b/mainpage.dox
@@ -1,4 +1,4 @@
-/*!\mainpage WebM Codec SDK
+/*!\mainpage WebM VP8 Codec SDK

  \section main_contents Page Contents
  - \ref main_intro
@@ -6,11 +6,11 @@
  - \ref main_support

  \section main_intro Introduction
-  Welcome to the WebM Codec SDK. This SDK allows you to integrate your
-  applications with the VP8 and VP9 video codecs, high quality, royalty free,
-  open source codecs deployed on billions of computers and devices worldwide.
+  Welcome to the WebM VP8 Codec SDK. This SDK allows you to integrate your
+  applications with the VP8 video codec, a high quality, royalty free, open
+  source codec deployed on millions of computers and devices worldwide.

-  This distribution of the WebM Codec SDK includes the following support:
+  This distribution of the WebM VP8 Codec SDK includes the following support:

  \if vp8_encoder
  - \ref vp8_encoder
@@ -28,12 +28,12 @@
  - Read the \ref samples "sample code" for examples of how to interact with the
    codec.
  - \ref codec reference
-  \if encoder
-  - \ref encoder reference
-  \endif
-  \if decoder
-  - \ref decoder reference
-  \endif
+    \if encoder
+    - \ref encoder reference
+    \endif
+    \if decoder
+    - \ref decoder reference
+    \endif

  \section main_support Support Options & FAQ
  The WebM project is an open source project supported by its community. For
--- a/md5_utils.c
+++ b/md5_utils.c
@@ -24,7 +24,7 @@

 #include "md5_utils.h"

-static void
+void
 byteSwap(UWORD32 *buf, unsigned words) {
  md5byte *p;

@@ -150,23 +150,12 @@ MD5Final(md5byte digest[16], struct MD5Context *ctx) {
 #define MD5STEP(f,w,x,y,z,in,s) \
  (w += f(x,y,z) + in, w = (w<<s | w>>(32-s)) + x)

-#if defined(__clang__) && defined(__has_attribute)
-#if __has_attribute(no_sanitize)
-#define VPX_NO_UNSIGNED_OVERFLOW_CHECK \
-  __attribute__((no_sanitize("unsigned-integer-overflow")))
-#endif
-#endif
-
-#ifndef VPX_NO_UNSIGNED_OVERFLOW_CHECK
-#define VPX_NO_UNSIGNED_OVERFLOW_CHECK
-#endif
-
 /*
 * The core of the MD5 algorithm, this alters an existing MD5 hash to
 * reflect the addition of 16 longwords of new data.  MD5Update blocks
 * the data and converts bytes into longwords for this routine.
 */
-VPX_NO_UNSIGNED_OVERFLOW_CHECK void
+void
 MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) {
  register UWORD32 a, b, c, d;

@@ -249,6 +238,4 @@ MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) {
  buf[3] += d;
 }

-#undef VPX_NO_UNSIGNED_OVERFLOW_CHECK
-
 #endif
--- a/rate_hist.c
+++ b/rate_hist.c
@@ -88,9 +88,6 @@ void update_rate_histogram(struct rate_hist *hist,
  if (now < cfg->rc_buf_initial_sz)
    return;

-  if (!cfg->rc_target_bitrate)
-    return;
-
  then = now;

  /* Sum the size over the past rc_buf_sz ms */
--- a/solution.mk
+++ b/solution.mk
@@ -9,7 +9,7 @@
 ##

 # libvpx reverse dependencies (targets that depend on libvpx)
-VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest)
+VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest obj_int_extract)
 VPX_RDEPS=$(foreach vcp,\
              $(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.$(VCPROJ_SFX)=):vpx)

@@ -17,6 +17,7 @@ vpx.sln: $(wildcard *.$(VCPROJ_SFX))
 	@echo "    [CREATE] $@"
 	$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
            $(if $(filter vpx.$(VCPROJ_SFX),$^),$(VPX_RDEPS)) \
+            --dep=vpx:obj_int_extract \
            --dep=test_libvpx:gtest \
            --ver=$(CONFIG_VS_VERSION)\
            --out=$@ $^
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -29,20 +29,14 @@ class ACMRandom {
  uint16_t Rand16(void) {
    const uint32_t value =
        random_.Generate(testing::internal::Random::kMaxRange);
-    return (value >> 15) & 0xffff;
-  }
-
-  int16_t Rand9Signed(void) {
-    // Use 9 bits: values between 255 (0x0FF) and -256 (0x100).
-    const uint32_t value = random_.Generate(512);
-    return static_cast<int16_t>(value) - 256;
+    return (value >> 16) & 0xffff;
  }

  uint8_t Rand8(void) {
    const uint32_t value =
        random_.Generate(testing::internal::Random::kMaxRange);
    // There's a bit more entropy in the upper bits of this implementation.
-    return (value >> 23) & 0xff;
+    return (value >> 24) & 0xff;
  }

  uint8_t Rand8Extremes(void) {
--- a/test/active_map_refresh_test.cc
+++ b/test/active_map_refresh_test.cc
@@ -1,127 +0,0 @@
-/*
- *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#include <algorithm>
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/y4m_video_source.h"
-
-namespace {
-
-// Check if any pixel in a 16x16 macroblock varies between frames.
-int CheckMb(const vpx_image_t &current, const vpx_image_t &previous,
-            int mb_r, int mb_c) {
-  for (int plane = 0; plane < 3; plane++) {
-    int r = 16 * mb_r;
-    int c0 = 16 * mb_c;
-    int r_top = std::min(r + 16, static_cast<int>(current.d_h));
-    int c_top = std::min(c0 + 16, static_cast<int>(current.d_w));
-    r = std::max(r, 0);
-    c0 = std::max(c0, 0);
-    if (plane > 0 && current.x_chroma_shift) {
-      c_top = (c_top + 1) >> 1;
-      c0 >>= 1;
-    }
-    if (plane > 0 && current.y_chroma_shift) {
-      r_top = (r_top + 1) >> 1;
-      r >>= 1;
-    }
-    for (; r < r_top; ++r) {
-      for (int c = c0; c < c_top; ++c) {
-        if (current.planes[plane][current.stride[plane] * r + c] !=
-            previous.planes[plane][previous.stride[plane] * r + c])
-          return 1;
-      }
-    }
-  }
-  return 0;
-}
-
-void GenerateMap(int mb_rows, int mb_cols, const vpx_image_t &current,
-                 const vpx_image_t &previous, uint8_t *map) {
-  for (int mb_r = 0; mb_r < mb_rows; ++mb_r) {
-    for (int mb_c = 0; mb_c < mb_cols; ++mb_c) {
-      map[mb_r * mb_cols + mb_c] = CheckMb(current, previous, mb_r, mb_c);
-    }
-  }
-}
-
-const int kAqModeCyclicRefresh = 3;
-
-class ActiveMapRefreshTest
-    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
- protected:
-  ActiveMapRefreshTest() : EncoderTest(GET_PARAM(0)) {}
-  virtual ~ActiveMapRefreshTest() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(GET_PARAM(1));
-    cpu_used_ = GET_PARAM(2);
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    ::libvpx_test::Y4mVideoSource *y4m_video =
-        static_cast<libvpx_test::Y4mVideoSource *>(video);
-    if (video->frame() == 1) {
-      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
-      encoder->Control(VP9E_SET_AQ_MODE, kAqModeCyclicRefresh);
-    } else if (video->frame() >= 2 && video->img()) {
-      vpx_image_t *current = video->img();
-      vpx_image_t *previous = y4m_holder_->img();
-      ASSERT_TRUE(previous != NULL);
-      vpx_active_map_t map = vpx_active_map_t();
-      const int width = static_cast<int>(current->d_w);
-      const int height = static_cast<int>(current->d_h);
-      const int mb_width = (width + 15) / 16;
-      const int mb_height = (height + 15) / 16;
-      uint8_t *active_map = new uint8_t[mb_width * mb_height];
-      GenerateMap(mb_height, mb_width, *current, *previous, active_map);
-      map.cols = mb_width;
-      map.rows = mb_height;
-      map.active_map = active_map;
-      encoder->Control(VP8E_SET_ACTIVEMAP, &map);
-      delete[] active_map;
-    }
-    if (video->img()) {
-      y4m_video->SwapBuffers(y4m_holder_);
-    }
-  }
-
-  int cpu_used_;
-  ::libvpx_test::Y4mVideoSource *y4m_holder_;
-};
-
-TEST_P(ActiveMapRefreshTest, Test) {
-  cfg_.g_lag_in_frames = 0;
-  cfg_.g_profile = 1;
-  cfg_.rc_target_bitrate = 600;
-  cfg_.rc_resize_allowed = 0;
-  cfg_.rc_min_quantizer = 8;
-  cfg_.rc_max_quantizer = 30;
-  cfg_.g_pass = VPX_RC_ONE_PASS;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.kf_max_dist = 90000;
-
-  ::libvpx_test::Y4mVideoSource video("desktop_credits.y4m", 0, 30);
-  ::libvpx_test::Y4mVideoSource video_holder("desktop_credits.y4m", 0, 30);
-  video_holder.Begin();
-  y4m_holder_ = &video_holder;
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-VP9_INSTANTIATE_TEST_CASE(ActiveMapRefreshTest,
-                          ::testing::Values(::libvpx_test::kRealTime),
-                          ::testing::Range(5, 6));
-}  // namespace
--- a/test/active_map_test.cc
+++ b/test/active_map_test.cc
@@ -85,5 +85,5 @@ TEST_P(ActiveMapTest, Test) {

 VP9_INSTANTIATE_TEST_CASE(ActiveMapTest,
                          ::testing::Values(::libvpx_test::kRealTime),
-                          ::testing::Range(0, 9));
+                          ::testing::Range(0, 6));
 }  // namespace
--- a/test/add_noise_test.cc
+++ b/test/add_noise_test.cc
@@ -1,197 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#include <math.h>
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_dsp_rtcd.h"
-#include "vpx/vpx_integer.h"
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-// TODO(jimbankoski): make width and height integers not unsigned.
-typedef void (*AddNoiseFunc)(unsigned char *start, char *noise,
-                             char blackclamp[16], char whiteclamp[16],
-                             char bothclamp[16], unsigned int width,
-                             unsigned int height, int pitch);
-
-class AddNoiseTest
-    : public ::testing::TestWithParam<AddNoiseFunc> {
- public:
-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-  }
-  virtual ~AddNoiseTest() {}
-};
-
-double stddev6(char a, char b, char c, char d, char e, char f) {
-  const double n = (a + b + c + d + e + f) / 6.0;
-  const double v = ((a - n) * (a - n) + (b - n) * (b - n) + (c - n) * (c - n) +
-                    (d - n) * (d - n) + (e - n) * (e - n) + (f - n) * (f - n)) /
-                   6.0;
-  return sqrt(v);
-}
-
-// TODO(jimbankoski): The following 2 functions are duplicated in each codec.
-// For now the vp9 one has been copied into the test as is. We should normalize
-// these in vpx_dsp and not have 3 copies of these unless there is different
-// noise we add for each codec.
-
-double gaussian(double sigma, double mu, double x) {
-  return 1 / (sigma * sqrt(2.0 * 3.14159265)) *
-         (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma)));
-}
-
-int setup_noise(int size_noise, char *noise) {
-  char char_dist[300];
-  const int ai = 4;
-  const int qi = 24;
-  const double sigma = ai + .5 + .6 * (63 - qi) / 63.0;
-
-  /* set up a lookup table of 256 entries that matches
-   * a gaussian distribution with sigma determined by q.
-   */
-  int next = 0;
-
-  for (int i = -32; i < 32; i++) {
-    int a_i = (int) (0.5 + 256 * gaussian(sigma, 0, i));
-
-    if (a_i) {
-      for (int j = 0; j < a_i; j++) {
-        char_dist[next + j] = (char)(i);
-      }
-
-      next = next + a_i;
-    }
-  }
-
-  for (; next < 256; next++)
-    char_dist[next] = 0;
-
-  for (int i = 0; i < size_noise; i++) {
-    noise[i] = char_dist[rand() & 0xff];  // NOLINT
-  }
-
-  // Returns the most negative value in distribution.
-  return char_dist[0];
-}
-
-TEST_P(AddNoiseTest, CheckNoiseAdded) {
-  DECLARE_ALIGNED(16, char, blackclamp[16]);
-  DECLARE_ALIGNED(16, char, whiteclamp[16]);
-  DECLARE_ALIGNED(16, char, bothclamp[16]);
-  const int width  = 64;
-  const int height = 64;
-  const int image_size = width * height;
-  char noise[3072];
-
-  const int clamp = setup_noise(3072, noise);
-  for (int i = 0; i < 16; i++) {
-    blackclamp[i] = -clamp;
-    whiteclamp[i] = -clamp;
-    bothclamp[i] = -2 * clamp;
-  }
-
-  uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
-  memset(s, 99, image_size);
-
-  ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
-                                      bothclamp, width, height, width));
-
-  // Check to make sure we don't end up having either the same or no added
-  // noise either vertically or horizontally.
-  for (int i = 0; i < image_size - 6 * width - 6; ++i) {
-    const double hd = stddev6(s[i] - 99, s[i + 1] - 99, s[i + 2] - 99,
-                              s[i + 3] - 99, s[i + 4] - 99, s[i + 5] - 99);
-    const double vd = stddev6(s[i] - 99, s[i + width] - 99,
-                              s[i + 2 * width] - 99, s[i + 3 * width] - 99,
-                              s[i + 4 * width] - 99, s[i + 5 * width] - 99);
-
-    EXPECT_NE(hd, 0);
-    EXPECT_NE(vd, 0);
-  }
-
-  // Initialize pixels in the image to 255 and check for roll over.
-  memset(s, 255, image_size);
-
-  ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
-                                      bothclamp, width, height, width));
-
-  // Check to make sure don't roll over.
-  for (int i = 0; i < image_size; ++i) {
-    EXPECT_GT((int)s[i], 10) << "i = " << i;
-  }
-
-  // Initialize pixels in the image to 0 and check for roll under.
-  memset(s, 0, image_size);
-
-  ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
-                                      bothclamp, width, height, width));
-
-  // Check to make sure don't roll under.
-  for (int i = 0; i < image_size; ++i) {
-    EXPECT_LT((int)s[i], 245) << "i = " << i;
-  }
-
-  vpx_free(s);
-}
-
-TEST_P(AddNoiseTest, CheckCvsAssembly) {
-  DECLARE_ALIGNED(16, char, blackclamp[16]);
-  DECLARE_ALIGNED(16, char, whiteclamp[16]);
-  DECLARE_ALIGNED(16, char, bothclamp[16]);
-  const int width  = 64;
-  const int height = 64;
-  const int image_size = width * height;
-  char noise[3072];
-
-  const int clamp = setup_noise(3072, noise);
-  for (int i = 0; i < 16; i++) {
-    blackclamp[i] = -clamp;
-    whiteclamp[i] = -clamp;
-    bothclamp[i] = -2 * clamp;
-  }
-
-  uint8_t *const s = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
-  uint8_t *const d = reinterpret_cast<uint8_t *>(vpx_calloc(image_size, 1));
-
-  memset(s, 99, image_size);
-  memset(d, 99, image_size);
-
-  srand(0);
-  ASM_REGISTER_STATE_CHECK(GetParam()(s, noise, blackclamp, whiteclamp,
-                                      bothclamp, width, height, width));
-  srand(0);
-  ASM_REGISTER_STATE_CHECK(vpx_plane_add_noise_c(d, noise, blackclamp,
-                                                 whiteclamp, bothclamp,
-                                                 width, height, width));
-
-  for (int i = 0; i < image_size; ++i) {
-    EXPECT_EQ((int)s[i], (int)d[i]) << "i = " << i;
-  }
-
-  vpx_free(d);
-  vpx_free(s);
-}
-
-INSTANTIATE_TEST_CASE_P(C, AddNoiseTest,
-                        ::testing::Values(vpx_plane_add_noise_c));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, AddNoiseTest,
-                        ::testing::Values(vpx_plane_add_noise_sse2));
-#endif
-
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, AddNoiseTest,
-                        ::testing::Values(vpx_plane_add_noise_msa));
-#endif
-}  // namespace
--- a/test/altref_test.cc
+++ b/test/altref_test.cc
@@ -14,8 +14,6 @@
 #include "test/util.h"
 namespace {

-#if CONFIG_VP8_ENCODER
-
 // lookahead range: [kLookAheadMin, kLookAheadMax).
 const int kLookAheadMin = 5;
 const int kLookAheadMax = 26;
@@ -65,95 +63,7 @@ TEST_P(AltRefTest, MonotonicTimestamps) {
  EXPECT_GE(altref_count(), 1);
 }

+
 VP8_INSTANTIATE_TEST_CASE(AltRefTest,
                          ::testing::Range(kLookAheadMin, kLookAheadMax));
-
-#endif  // CONFIG_VP8_ENCODER
-
-class AltRefForcedKeyTestLarge
-    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
- protected:
-  AltRefForcedKeyTestLarge()
-      : EncoderTest(GET_PARAM(0)),
-        encoding_mode_(GET_PARAM(1)),
-        cpu_used_(GET_PARAM(2)),
-        forced_kf_frame_num_(1),
-        frame_num_(0) {}
-  virtual ~AltRefForcedKeyTestLarge() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-    cfg_.rc_end_usage = VPX_VBR;
-    cfg_.g_threads = 0;
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 0) {
-      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
-      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
-      // override test default for tile columns if necessary.
-#if CONFIG_VP9_ENCODER
-      if (GET_PARAM(0) == &libvpx_test::kVP9) {
-        encoder->Control(VP9E_SET_TILE_COLUMNS, 6);
-      }
-#endif
-    }
-    frame_flags_ =
-        (video->frame() == forced_kf_frame_num_) ? VPX_EFLAG_FORCE_KF : 0;
-  }
-
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    if (frame_num_ == forced_kf_frame_num_) {
-      ASSERT_TRUE(!!(pkt->data.frame.flags & VPX_FRAME_IS_KEY))
-          << "Frame #" << frame_num_ << " isn't a keyframe!";
-    }
-    ++frame_num_;
-  }
-
-  ::libvpx_test::TestMode encoding_mode_;
-  int cpu_used_;
-  unsigned int forced_kf_frame_num_;
-  unsigned int frame_num_;
-};
-
-TEST_P(AltRefForcedKeyTestLarge, Frame1IsKey) {
-  const vpx_rational timebase = { 1, 30 };
-  const int lag_values[] = { 3, 15, 25, -1 };
-
-  forced_kf_frame_num_ = 1;
-  for (int i = 0; lag_values[i] != -1; ++i) {
-    frame_num_ = 0;
-    cfg_.g_lag_in_frames = lag_values[i];
-    libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       timebase.den, timebase.num, 0, 30);
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  }
-}
-
-TEST_P(AltRefForcedKeyTestLarge, ForcedFrameIsKey) {
-  const vpx_rational timebase = { 1, 30 };
-  const int lag_values[] = { 3, 15, 25, -1 };
-
-  for (int i = 0; lag_values[i] != -1; ++i) {
-    frame_num_ = 0;
-    forced_kf_frame_num_ = lag_values[i] - 1;
-    cfg_.g_lag_in_frames = lag_values[i];
-    libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       timebase.den, timebase.num, 0, 30);
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  }
-}
-
-VP8_INSTANTIATE_TEST_CASE(
-    AltRefForcedKeyTestLarge,
-    ::testing::Values(::libvpx_test::kOnePassGood),
-    ::testing::Range(0, 9));
-
-VP9_INSTANTIATE_TEST_CASE(
-    AltRefForcedKeyTestLarge,
-    ::testing::Values(::libvpx_test::kOnePassGood),
-    ::testing::Range(0, 9));
 }  // namespace
--- a/test/android/Android.mk
+++ b/test/android/Android.mk
@@ -40,17 +40,9 @@ include $(CLEAR_VARS)
 LOCAL_ARM_MODE := arm
 LOCAL_MODULE := libvpx_test
 LOCAL_STATIC_LIBRARIES := gtest libwebm
-
-ifeq ($(ENABLE_SHARED),1)
-  LOCAL_SHARED_LIBRARIES := vpx
-else
-  LOCAL_STATIC_LIBRARIES += vpx
-endif
-
+LOCAL_SHARED_LIBRARIES := vpx
 include $(LOCAL_PATH)/test/test.mk
 LOCAL_C_INCLUDES := $(BINDINGS_DIR)
 FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes)))
 LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC))
-# some test files depend on *_rtcd.h, ensure they're generated first.
-$(eval $(call rtcd_dep_template))
 include $(BUILD_EXECUTABLE)
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -1,411 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <limits.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_config.h"
-#include "./vpx_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "vpx_mem/vpx_mem.h"
-
-using libvpx_test::ACMRandom;
-
-namespace {
-class AverageTestBase : public ::testing::Test {
- public:
-  AverageTestBase(int width, int height) : width_(width), height_(height) {}
-
-  static void SetUpTestCase() {
-    source_data_ = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBlockSize));
-  }
-
-  static void TearDownTestCase() {
-    vpx_free(source_data_);
-    source_data_ = NULL;
-  }
-
-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-  }
-
- protected:
-  // Handle blocks up to 4 blocks 64x64 with stride up to 128
-  static const int kDataAlignment = 16;
-  static const int kDataBlockSize = 64 * 128;
-
-  virtual void SetUp() {
-    source_stride_ = (width_ + 31) & ~31;
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
-  // Sum Pixels
-  unsigned int ReferenceAverage8x8(const uint8_t* source, int pitch) {
-    unsigned int average = 0;
-    for (int h = 0; h < 8; ++h)
-      for (int w = 0; w < 8; ++w)
-        average += source[h * pitch + w];
-    return ((average + 32) >> 6);
-  }
-
-  unsigned int ReferenceAverage4x4(const uint8_t* source, int pitch) {
-    unsigned int average = 0;
-    for (int h = 0; h < 4; ++h)
-      for (int w = 0; w < 4; ++w)
-        average += source[h * pitch + w];
-    return ((average + 8) >> 4);
-  }
-
-  void FillConstant(uint8_t fill_constant) {
-    for (int i = 0; i < width_ * height_; ++i) {
-        source_data_[i] = fill_constant;
-    }
-  }
-
-  void FillRandom() {
-    for (int i = 0; i < width_ * height_; ++i) {
-        source_data_[i] = rnd_.Rand8();
-    }
-  }
-
-  int width_, height_;
-  static uint8_t* source_data_;
-  int source_stride_;
-
-  ACMRandom rnd_;
-};
-typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch);
-
-typedef std::tr1::tuple<int, int, int, int, AverageFunction> AvgFunc;
-
-class AverageTest
-    : public AverageTestBase,
-      public ::testing::WithParamInterface<AvgFunc>{
- public:
-  AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {}
-
- protected:
-  void CheckAverages() {
-    unsigned int expected = 0;
-    if (GET_PARAM(3) == 8) {
-      expected = ReferenceAverage8x8(source_data_+ GET_PARAM(2),
-                                     source_stride_);
-    } else  if (GET_PARAM(3) == 4) {
-      expected = ReferenceAverage4x4(source_data_+ GET_PARAM(2),
-                                     source_stride_);
-    }
-
-    ASM_REGISTER_STATE_CHECK(GET_PARAM(4)(source_data_+ GET_PARAM(2),
-                                          source_stride_));
-    unsigned int actual = GET_PARAM(4)(source_data_+ GET_PARAM(2),
-                                       source_stride_);
-
-    EXPECT_EQ(expected, actual);
-  }
-};
-
-typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref,
-                              const int ref_stride, const int height);
-
-typedef std::tr1::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam;
-
-class IntProRowTest
-    : public AverageTestBase,
-      public ::testing::WithParamInterface<IntProRowParam> {
- public:
-  IntProRowTest()
-    : AverageTestBase(16, GET_PARAM(0)),
-      hbuf_asm_(NULL),
-      hbuf_c_(NULL) {
-    asm_func_ = GET_PARAM(1);
-    c_func_ = GET_PARAM(2);
-  }
-
- protected:
-  virtual void SetUp() {
-    hbuf_asm_ = reinterpret_cast<int16_t*>(
-        vpx_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16));
-    hbuf_c_ = reinterpret_cast<int16_t*>(
-        vpx_memalign(kDataAlignment, sizeof(*hbuf_c_) * 16));
-  }
-
-  virtual void TearDown() {
-    vpx_free(hbuf_c_);
-    hbuf_c_ = NULL;
-    vpx_free(hbuf_asm_);
-    hbuf_asm_ = NULL;
-  }
-
-  void RunComparison() {
-    ASM_REGISTER_STATE_CHECK(c_func_(hbuf_c_, source_data_, 0, height_));
-    ASM_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, 0, height_));
-    EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * 16))
-        << "Output mismatch";
-  }
-
- private:
-  IntProRowFunc asm_func_;
-  IntProRowFunc c_func_;
-  int16_t *hbuf_asm_;
-  int16_t *hbuf_c_;
-};
-
-typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width);
-
-typedef std::tr1::tuple<int, IntProColFunc, IntProColFunc> IntProColParam;
-
-class IntProColTest
-    : public AverageTestBase,
-      public ::testing::WithParamInterface<IntProColParam> {
- public:
-  IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) {
-    asm_func_ = GET_PARAM(1);
-    c_func_ = GET_PARAM(2);
-  }
-
- protected:
-  void RunComparison() {
-    ASM_REGISTER_STATE_CHECK(sum_c_ = c_func_(source_data_, width_));
-    ASM_REGISTER_STATE_CHECK(sum_asm_ = asm_func_(source_data_, width_));
-    EXPECT_EQ(sum_c_, sum_asm_) << "Output mismatch";
-  }
-
- private:
-  IntProColFunc asm_func_;
-  IntProColFunc c_func_;
-  int16_t sum_asm_;
-  int16_t sum_c_;
-};
-
-typedef int (*SatdFunc)(const int16_t *coeffs, int length);
-typedef std::tr1::tuple<int, SatdFunc> SatdTestParam;
-
-class SatdTest
-    : public ::testing::Test,
-      public ::testing::WithParamInterface<SatdTestParam> {
- protected:
-  virtual void SetUp() {
-    satd_size_ = GET_PARAM(0);
-    satd_func_ = GET_PARAM(1);
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-    src_ = reinterpret_cast<int16_t*>(
-        vpx_memalign(16, sizeof(*src_) * satd_size_));
-    ASSERT_TRUE(src_ != NULL);
-  }
-
-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-    vpx_free(src_);
-  }
-
-  void FillConstant(const int16_t val) {
-    for (int i = 0; i < satd_size_; ++i) src_[i] = val;
-  }
-
-  void FillRandom() {
-    for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16();
-  }
-
-  void Check(const int expected) {
-    int total;
-    ASM_REGISTER_STATE_CHECK(total = satd_func_(src_, satd_size_));
-    EXPECT_EQ(expected, total);
-  }
-
-  int satd_size_;
-
- private:
-  int16_t *src_;
-  SatdFunc satd_func_;
-  ACMRandom rnd_;
-};
-
-uint8_t* AverageTestBase::source_data_ = NULL;
-
-TEST_P(AverageTest, MinValue) {
-  FillConstant(0);
-  CheckAverages();
-}
-
-TEST_P(AverageTest, MaxValue) {
-  FillConstant(255);
-  CheckAverages();
-}
-
-TEST_P(AverageTest, Random) {
-  // The reference frame, but not the source frame, may be unaligned for
-  // certain types of searches.
-  for (int i = 0; i < 1000; i++) {
-    FillRandom();
-    CheckAverages();
-  }
-}
-
-TEST_P(IntProRowTest, MinValue) {
-  FillConstant(0);
-  RunComparison();
-}
-
-TEST_P(IntProRowTest, MaxValue) {
-  FillConstant(255);
-  RunComparison();
-}
-
-TEST_P(IntProRowTest, Random) {
-  FillRandom();
-  RunComparison();
-}
-
-TEST_P(IntProColTest, MinValue) {
-  FillConstant(0);
-  RunComparison();
-}
-
-TEST_P(IntProColTest, MaxValue) {
-  FillConstant(255);
-  RunComparison();
-}
-
-TEST_P(IntProColTest, Random) {
-  FillRandom();
-  RunComparison();
-}
-
-
-TEST_P(SatdTest, MinValue) {
-  const int kMin = -32640;
-  const int expected = -kMin * satd_size_;
-  FillConstant(kMin);
-  Check(expected);
-}
-
-TEST_P(SatdTest, MaxValue) {
-  const int kMax = 32640;
-  const int expected = kMax * satd_size_;
-  FillConstant(kMax);
-  Check(expected);
-}
-
-TEST_P(SatdTest, Random) {
-  int expected;
-  switch (satd_size_) {
-    case 16: expected = 205298; break;
-    case 64: expected = 1113950; break;
-    case 256: expected = 4268415; break;
-    case 1024: expected = 16954082; break;
-    default:
-      FAIL() << "Invalid satd size (" << satd_size_
-             << ") valid: 16/64/256/1024";
-  }
-  FillRandom();
-  Check(expected);
-}
-
-using std::tr1::make_tuple;
-
-INSTANTIATE_TEST_CASE_P(
-    C, AverageTest,
-    ::testing::Values(
-        make_tuple(16, 16, 1, 8, &vpx_avg_8x8_c),
-        make_tuple(16, 16, 1, 4, &vpx_avg_4x4_c)));
-
-INSTANTIATE_TEST_CASE_P(
-    C, SatdTest,
-    ::testing::Values(
-        make_tuple(16, &vpx_satd_c),
-        make_tuple(64, &vpx_satd_c),
-        make_tuple(256, &vpx_satd_c),
-        make_tuple(1024, &vpx_satd_c)));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, AverageTest,
-    ::testing::Values(
-        make_tuple(16, 16, 0, 8, &vpx_avg_8x8_sse2),
-        make_tuple(16, 16, 5, 8, &vpx_avg_8x8_sse2),
-        make_tuple(32, 32, 15, 8, &vpx_avg_8x8_sse2),
-        make_tuple(16, 16, 0, 4, &vpx_avg_4x4_sse2),
-        make_tuple(16, 16, 5, 4, &vpx_avg_4x4_sse2),
-        make_tuple(32, 32, 15, 4, &vpx_avg_4x4_sse2)));
-
-INSTANTIATE_TEST_CASE_P(
-    SSE2, IntProRowTest, ::testing::Values(
-        make_tuple(16, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c),
-        make_tuple(32, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c),
-        make_tuple(64, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c)));
-
-INSTANTIATE_TEST_CASE_P(
-    SSE2, IntProColTest, ::testing::Values(
-        make_tuple(16, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c),
-        make_tuple(32, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c),
-        make_tuple(64, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c)));
-
-INSTANTIATE_TEST_CASE_P(
-    SSE2, SatdTest,
-    ::testing::Values(
-        make_tuple(16, &vpx_satd_sse2),
-        make_tuple(64, &vpx_satd_sse2),
-        make_tuple(256, &vpx_satd_sse2),
-        make_tuple(1024, &vpx_satd_sse2)));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, AverageTest,
-    ::testing::Values(
-        make_tuple(16, 16, 0, 8, &vpx_avg_8x8_neon),
-        make_tuple(16, 16, 5, 8, &vpx_avg_8x8_neon),
-        make_tuple(32, 32, 15, 8, &vpx_avg_8x8_neon),
-        make_tuple(16, 16, 0, 4, &vpx_avg_4x4_neon),
-        make_tuple(16, 16, 5, 4, &vpx_avg_4x4_neon),
-        make_tuple(32, 32, 15, 4, &vpx_avg_4x4_neon)));
-
-INSTANTIATE_TEST_CASE_P(
-    NEON, IntProRowTest, ::testing::Values(
-        make_tuple(16, &vpx_int_pro_row_neon, &vpx_int_pro_row_c),
-        make_tuple(32, &vpx_int_pro_row_neon, &vpx_int_pro_row_c),
-        make_tuple(64, &vpx_int_pro_row_neon, &vpx_int_pro_row_c)));
-
-INSTANTIATE_TEST_CASE_P(
-    NEON, IntProColTest, ::testing::Values(
-        make_tuple(16, &vpx_int_pro_col_neon, &vpx_int_pro_col_c),
-        make_tuple(32, &vpx_int_pro_col_neon, &vpx_int_pro_col_c),
-        make_tuple(64, &vpx_int_pro_col_neon, &vpx_int_pro_col_c)));
-
-INSTANTIATE_TEST_CASE_P(
-    NEON, SatdTest,
-    ::testing::Values(
-        make_tuple(16, &vpx_satd_neon),
-        make_tuple(64, &vpx_satd_neon),
-        make_tuple(256, &vpx_satd_neon),
-        make_tuple(1024, &vpx_satd_neon)));
-#endif
-
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(
-    MSA, AverageTest,
-    ::testing::Values(
-        make_tuple(16, 16, 0, 8, &vpx_avg_8x8_msa),
-        make_tuple(16, 16, 5, 8, &vpx_avg_8x8_msa),
-        make_tuple(32, 32, 15, 8, &vpx_avg_8x8_msa),
-        make_tuple(16, 16, 0, 4, &vpx_avg_4x4_msa),
-        make_tuple(16, 16, 5, 4, &vpx_avg_4x4_msa),
-        make_tuple(32, 32, 15, 4, &vpx_avg_4x4_msa)));
-#endif
-
-}  // namespace
--- a/test/blockiness_test.cc
+++ b/test/blockiness_test.cc
@@ -1,229 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <limits.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_config.h"
-#if CONFIG_VP9_ENCODER
-#include "./vp9_rtcd.h"
-#endif
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-#include "vpx_mem/vpx_mem.h"
-
-
-extern "C"
-double vp9_get_blockiness(const unsigned char *img1, int img1_pitch,
-                          const unsigned char *img2, int img2_pitch,
-                          int width, int height);
-
-using libvpx_test::ACMRandom;
-
-namespace {
-class BlockinessTestBase : public ::testing::Test {
- public:
-  BlockinessTestBase(int width, int height) : width_(width), height_(height) {}
-
-  static void SetUpTestCase() {
-    source_data_ = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBufferSize));
-    reference_data_ = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBufferSize));
-  }
-
-  static void TearDownTestCase() {
-    vpx_free(source_data_);
-    source_data_ = NULL;
-    vpx_free(reference_data_);
-    reference_data_ = NULL;
-  }
-
-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-  }
-
- protected:
-  // Handle frames up to 640x480
-  static const int kDataAlignment = 16;
-  static const int kDataBufferSize = 640*480;
-
-  virtual void SetUp() {
-    source_stride_ = (width_ + 31) & ~31;
-    reference_stride_ = width_ * 2;
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
-  void FillConstant(uint8_t *data, int stride, uint8_t fill_constant,
-                    int width, int height) {
-    for (int h = 0; h < height; ++h) {
-      for (int w = 0; w < width; ++w) {
-        data[h * stride + w] = fill_constant;
-      }
-    }
-  }
-
-  void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) {
-    FillConstant(data, stride, fill_constant, width_, height_);
-  }
-
-  void FillRandom(uint8_t *data, int stride, int width, int height) {
-    for (int h = 0; h < height; ++h) {
-      for (int w = 0; w < width; ++w) {
-        data[h * stride + w] = rnd_.Rand8();
-      }
-    }
-  }
-
-  void FillRandom(uint8_t *data, int stride) {
-    FillRandom(data, stride, width_, height_);
-  }
-
-  void FillRandomBlocky(uint8_t *data, int stride) {
-    for (int h = 0; h < height_; h += 4) {
-      for (int w = 0; w < width_; w += 4) {
-        FillRandom(data + h * stride + w, stride, 4, 4);
-      }
-    }
-  }
-
-  void FillCheckerboard(uint8_t *data, int stride) {
-    for (int h = 0; h < height_; h += 4) {
-      for (int w = 0; w < width_; w += 4) {
-        if (((h/4) ^ (w/4)) & 1)
-          FillConstant(data + h * stride + w, stride, 255, 4, 4);
-        else
-          FillConstant(data + h * stride + w, stride, 0, 4, 4);
-      }
-    }
-  }
-
-  void Blur(uint8_t *data, int stride, int taps) {
-    int sum = 0;
-    int half_taps = taps / 2;
-    for (int h = 0; h < height_; ++h) {
-      for (int w = 0; w < taps; ++w) {
-        sum += data[w + h * stride];
-      }
-      for (int w = taps; w < width_; ++w) {
-        sum += data[w + h * stride] - data[w - taps + h * stride];
-        data[w - half_taps + h * stride] = (sum + half_taps) / taps;
-      }
-    }
-    for (int w = 0; w < width_; ++w) {
-      for (int h = 0; h < taps; ++h) {
-        sum += data[h + w * stride];
-      }
-      for (int h = taps; h < height_; ++h) {
-        sum += data[w + h * stride] - data[(h - taps) * stride + w];
-        data[(h - half_taps) * stride + w] = (sum + half_taps) / taps;
-      }
-    }
-  }
-  int width_, height_;
-  static uint8_t* source_data_;
-  int source_stride_;
-  static uint8_t* reference_data_;
-  int reference_stride_;
-
-  ACMRandom rnd_;
-};
-
-#if CONFIG_VP9_ENCODER
-typedef std::tr1::tuple<int, int> BlockinessParam;
-class BlockinessVP9Test
-    : public BlockinessTestBase,
-      public ::testing::WithParamInterface<BlockinessParam> {
- public:
-  BlockinessVP9Test() : BlockinessTestBase(GET_PARAM(0), GET_PARAM(1)) {}
-
- protected:
-  int CheckBlockiness() {
-    return vp9_get_blockiness(source_data_, source_stride_,
-                              reference_data_, reference_stride_,
-                              width_, height_);
-  }
-};
-#endif  // CONFIG_VP9_ENCODER
-
-uint8_t* BlockinessTestBase::source_data_ = NULL;
-uint8_t* BlockinessTestBase::reference_data_ = NULL;
-
-#if CONFIG_VP9_ENCODER
-TEST_P(BlockinessVP9Test, SourceBlockierThanReference) {
-  // Source is blockier than reference.
-  FillRandomBlocky(source_data_, source_stride_);
-  FillConstant(reference_data_, reference_stride_, 128);
-  int super_blocky = CheckBlockiness();
-
-  EXPECT_EQ(0, super_blocky) << "Blocky source should produce 0 blockiness.";
-}
-
-TEST_P(BlockinessVP9Test, ReferenceBlockierThanSource) {
-  // Source is blockier than reference.
-  FillConstant(source_data_, source_stride_, 128);
-  FillRandomBlocky(reference_data_, reference_stride_);
-  int super_blocky = CheckBlockiness();
-
-  EXPECT_GT(super_blocky, 0.0)
-      << "Blocky reference should score high for blockiness.";
-}
-
-TEST_P(BlockinessVP9Test, BlurringDecreasesBlockiness) {
-  // Source is blockier than reference.
-  FillConstant(source_data_, source_stride_, 128);
-  FillRandomBlocky(reference_data_, reference_stride_);
-  int super_blocky = CheckBlockiness();
-
-  Blur(reference_data_, reference_stride_, 4);
-  int less_blocky = CheckBlockiness();
-
-  EXPECT_GT(super_blocky, less_blocky)
-      << "A straight blur should decrease blockiness.";
-}
-
-TEST_P(BlockinessVP9Test, WorstCaseBlockiness) {
-  // Source is blockier than reference.
-  FillConstant(source_data_, source_stride_, 128);
-  FillCheckerboard(reference_data_, reference_stride_);
-
-  int super_blocky = CheckBlockiness();
-
-  Blur(reference_data_, reference_stride_, 4);
-  int less_blocky = CheckBlockiness();
-
-  EXPECT_GT(super_blocky, less_blocky)
-      << "A straight blur should decrease blockiness.";
-}
-#endif  // CONFIG_VP9_ENCODER
-
-
-using std::tr1::make_tuple;
-
-//------------------------------------------------------------------------------
-// C functions
-
-#if CONFIG_VP9_ENCODER
-const BlockinessParam c_vp9_tests[] = {
-  make_tuple(320, 240),
-  make_tuple(318, 242),
-  make_tuple(318, 238),
-};
-INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests));
-#endif
-
-}  // namespace
--- a/test/borders_test.cc
+++ b/test/borders_test.cc
@@ -52,7 +52,7 @@ TEST_P(BordersTest, TestEncodeHighBitrate) {
  // extend into the border and test the border condition.
  cfg_.g_lag_in_frames = 25;
  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_2pass_vbr_minsection_pct = 2000;
  cfg_.rc_target_bitrate = 2000;
  cfg_.rc_max_quantizer = 10;

--- a/test/byte_alignment_test.cc
+++ b/test/byte_alignment_test.cc
@@ -1,189 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <string>
-
-#include "./vpx_config.h"
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/md5_helper.h"
-#include "test/util.h"
-#if CONFIG_WEBM_IO
-#include "test/webm_video_source.h"
-#endif
-
-namespace {
-
-#if CONFIG_WEBM_IO
-
-const int kLegacyByteAlignment = 0;
-const int kLegacyYPlaneByteAlignment = 32;
-const int kNumPlanesToCheck = 3;
-const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
-const char kVP9Md5File[] = "vp90-2-02-size-lf-1920x1080.webm.md5";
-
-struct ByteAlignmentTestParam {
-  int byte_alignment;
-  vpx_codec_err_t expected_value;
-  bool decode_remaining;
-};
-
-const ByteAlignmentTestParam kBaTestParams[] = {
-  {kLegacyByteAlignment, VPX_CODEC_OK, true},
-  {32, VPX_CODEC_OK, true},
-  {64, VPX_CODEC_OK, true},
-  {128, VPX_CODEC_OK, true},
-  {256, VPX_CODEC_OK, true},
-  {512, VPX_CODEC_OK, true},
-  {1024, VPX_CODEC_OK, true},
-  {1, VPX_CODEC_INVALID_PARAM, false},
-  {-2, VPX_CODEC_INVALID_PARAM, false},
-  {4, VPX_CODEC_INVALID_PARAM, false},
-  {16, VPX_CODEC_INVALID_PARAM, false},
-  {255, VPX_CODEC_INVALID_PARAM, false},
-  {2048, VPX_CODEC_INVALID_PARAM, false},
-};
-
-// Class for testing byte alignment of reference buffers.
-class ByteAlignmentTest
-    : public ::testing::TestWithParam<ByteAlignmentTestParam> {
- protected:
-  ByteAlignmentTest()
-      : video_(NULL),
-        decoder_(NULL),
-        md5_file_(NULL) {}
-
-  virtual void SetUp() {
-    video_ = new libvpx_test::WebMVideoSource(kVP9TestFile);
-    ASSERT_TRUE(video_ != NULL);
-    video_->Init();
-    video_->Begin();
-
-    const vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
-    decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
-    ASSERT_TRUE(decoder_ != NULL);
-
-    OpenMd5File(kVP9Md5File);
-  }
-
-  virtual void TearDown() {
-    if (md5_file_ != NULL)
-      fclose(md5_file_);
-
-    delete decoder_;
-    delete video_;
-  }
-
-  void SetByteAlignment(int byte_alignment, vpx_codec_err_t expected_value) {
-    decoder_->Control(VP9_SET_BYTE_ALIGNMENT, byte_alignment, expected_value);
-  }
-
-  vpx_codec_err_t DecodeOneFrame(int byte_alignment_to_check) {
-    const vpx_codec_err_t res =
-        decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
-    CheckDecodedFrames(byte_alignment_to_check);
-    if (res == VPX_CODEC_OK)
-      video_->Next();
-    return res;
-  }
-
-  vpx_codec_err_t DecodeRemainingFrames(int byte_alignment_to_check) {
-    for (; video_->cxdata() != NULL; video_->Next()) {
-      const vpx_codec_err_t res =
-          decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
-      if (res != VPX_CODEC_OK)
-        return res;
-      CheckDecodedFrames(byte_alignment_to_check);
-    }
-    return VPX_CODEC_OK;
-  }
-
- private:
-  // Check if |data| is aligned to |byte_alignment_to_check|.
-  // |byte_alignment_to_check| must be a power of 2.
-  void CheckByteAlignment(const uint8_t *data, int byte_alignment_to_check) {
-    ASSERT_EQ(0u, reinterpret_cast<size_t>(data) % byte_alignment_to_check);
-  }
-
-  // Iterate through the planes of the decoded frames and check for
-  // alignment based off |byte_alignment_to_check|.
-  void CheckDecodedFrames(int byte_alignment_to_check) {
-    libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
-    const vpx_image_t *img;
-
-    // Get decompressed data
-    while ((img = dec_iter.Next()) != NULL) {
-      if (byte_alignment_to_check == kLegacyByteAlignment) {
-        CheckByteAlignment(img->planes[0], kLegacyYPlaneByteAlignment);
-      } else {
-        for (int i = 0; i < kNumPlanesToCheck; ++i) {
-          CheckByteAlignment(img->planes[i], byte_alignment_to_check);
-        }
-      }
-      CheckMd5(*img);
-    }
-  }
-
-  // TODO(fgalligan): Move the MD5 testing code into another class.
-  void OpenMd5File(const std::string &md5_file_name_) {
-    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
-    ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
-        << md5_file_name_;
-  }
-
-  void CheckMd5(const vpx_image_t &img) {
-    ASSERT_TRUE(md5_file_ != NULL);
-    char expected_md5[33];
-    char junk[128];
-
-    // Read correct md5 checksums.
-    const int res = fscanf(md5_file_, "%s  %s", expected_md5, junk);
-    ASSERT_NE(EOF, res) << "Read md5 data failed";
-    expected_md5[32] = '\0';
-
-    ::libvpx_test::MD5 md5_res;
-    md5_res.Add(&img);
-    const char *const actual_md5 = md5_res.Get();
-
-    // Check md5 match.
-    ASSERT_STREQ(expected_md5, actual_md5) << "MD5 checksums don't match";
-  }
-
-  libvpx_test::WebMVideoSource *video_;
-  libvpx_test::VP9Decoder *decoder_;
-  FILE *md5_file_;
-};
-
-TEST_F(ByteAlignmentTest, SwitchByteAlignment) {
-  const int num_elements = 14;
-  const int byte_alignments[] = { 0, 32, 64, 128, 256, 512, 1024,
-                                  0, 1024, 32, 512, 64, 256, 128 };
-
-  for (int i = 0; i < num_elements; ++i) {
-    SetByteAlignment(byte_alignments[i], VPX_CODEC_OK);
-    ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame(byte_alignments[i]));
-  }
-  SetByteAlignment(byte_alignments[0], VPX_CODEC_OK);
-  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(byte_alignments[0]));
-}
-
-TEST_P(ByteAlignmentTest, TestAlignment) {
-  const ByteAlignmentTestParam t = GetParam();
-  SetByteAlignment(t.byte_alignment, t.expected_value);
-  if (t.decode_remaining)
-    ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(t.byte_alignment));
-}
-
-INSTANTIATE_TEST_CASE_P(Alignments, ByteAlignmentTest,
-                        ::testing::ValuesIn(kBaTestParams));
-
-#endif  // CONFIG_WEBM_IO
-
-}  // namespace
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -35,11 +35,6 @@ class CodecFactory {
  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const = 0;

-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline)  // NOLINT(runtime/int)
-                                 const = 0;
-
  virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
                                 unsigned long deadline,
                                 const unsigned long init_flags,
@@ -77,10 +72,6 @@ class VP8Decoder : public Decoder {
  VP8Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
      : Decoder(cfg, deadline) {}

-  VP8Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-             unsigned long deadline)  // NOLINT
-      : Decoder(cfg, flag, deadline) {}
-
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP8_DECODER
@@ -113,14 +104,8 @@ class VP8CodecFactory : public CodecFactory {

  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const {
-    return CreateDecoder(cfg, 0, deadline);
-  }
-
-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline) const {  // NOLINT
 #if CONFIG_VP8_DECODER
-    return new VP8Decoder(cfg, flags, deadline);
+    return new VP8Decoder(cfg, deadline);
 #else
    return NULL;
 #endif
@@ -169,10 +154,6 @@ class VP9Decoder : public Decoder {
  VP9Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
      : Decoder(cfg, deadline) {}

-  VP9Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-             unsigned long deadline)  // NOLINT
-      : Decoder(cfg, flag, deadline) {}
-
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP9_DECODER
@@ -205,14 +186,8 @@ class VP9CodecFactory : public CodecFactory {

  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const {
-    return CreateDecoder(cfg, 0, deadline);
-  }
-
-  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
-                                 const vpx_codec_flags_t flags,
-                                 unsigned long deadline) const {  // NOLINT
 #if CONFIG_VP9_DECODER
-    return new VP9Decoder(cfg, flags, deadline);
+    return new VP9Decoder(cfg, deadline);
 #else
    return NULL;
 #endif
@@ -251,5 +226,7 @@ const libvpx_test::VP9CodecFactory kVP9;
 #define VP9_INSTANTIATE_TEST_CASE(test, ...)
 #endif  // CONFIG_VP9

+
 }  // namespace libvpx_test
+
 #endif  // TEST_CODEC_FACTORY_H_
--- a/test/consistency_test.cc
+++ b/test/consistency_test.cc
@@ -1,224 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <limits.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_config.h"
-#if CONFIG_VP9_ENCODER
-#include "./vp9_rtcd.h"
-#endif
-
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "vpx_dsp/ssim.h"
-#include "vpx_mem/vpx_mem.h"
-
-extern "C"
-double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch,
-                            uint8_t *img2, int img2_pitch,
-                            int width, int height,
-                            Ssimv *sv2, Metrics *m,
-                            int do_inconsistency);
-
-using libvpx_test::ACMRandom;
-
-namespace {
-class ConsistencyTestBase : public ::testing::Test {
- public:
-  ConsistencyTestBase(int width, int height) : width_(width), height_(height) {}
-
-  static void SetUpTestCase() {
-    source_data_[0] = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBufferSize));
-    reference_data_[0] = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBufferSize));
-    source_data_[1] = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBufferSize));
-    reference_data_[1] = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBufferSize));
-    ssim_array_ = new Ssimv[kDataBufferSize / 16];
-  }
-
-  static void ClearSsim() {
-    memset(ssim_array_, 0, kDataBufferSize / 16);
-  }
-  static void TearDownTestCase() {
-    vpx_free(source_data_[0]);
-    source_data_[0] = NULL;
-    vpx_free(reference_data_[0]);
-    reference_data_[0] = NULL;
-    vpx_free(source_data_[1]);
-    source_data_[1] = NULL;
-    vpx_free(reference_data_[1]);
-    reference_data_[1] = NULL;
-
-    delete[] ssim_array_;
-  }
-
-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-  }
-
- protected:
-  // Handle frames up to 640x480
-  static const int kDataAlignment = 16;
-  static const int kDataBufferSize = 640*480;
-
-  virtual void SetUp() {
-    source_stride_ = (width_ + 31) & ~31;
-    reference_stride_ = width_ * 2;
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
-  void FillRandom(uint8_t *data, int stride, int width, int height) {
-    for (int h = 0; h < height; ++h) {
-      for (int w = 0; w < width; ++w) {
-        data[h * stride + w] = rnd_.Rand8();
-      }
-    }
-  }
-
-  void FillRandom(uint8_t *data, int stride) {
-    FillRandom(data, stride, width_, height_);
-  }
-
-  void Copy(uint8_t *reference, uint8_t *source) {
-    memcpy(reference, source, kDataBufferSize);
-  }
-
-  void Blur(uint8_t *data, int stride, int taps) {
-    int sum = 0;
-    int half_taps = taps / 2;
-    for (int h = 0; h < height_; ++h) {
-      for (int w = 0; w < taps; ++w) {
-        sum += data[w + h * stride];
-      }
-      for (int w = taps; w < width_; ++w) {
-        sum += data[w + h * stride] - data[w - taps + h * stride];
-        data[w - half_taps + h * stride] = (sum + half_taps) / taps;
-      }
-    }
-    for (int w = 0; w < width_; ++w) {
-      for (int h = 0; h < taps; ++h) {
-        sum += data[h + w * stride];
-      }
-      for (int h = taps; h < height_; ++h) {
-        sum += data[w + h * stride] - data[(h - taps) * stride + w];
-        data[(h - half_taps) * stride + w] = (sum + half_taps) / taps;
-      }
-    }
-  }
-  int width_, height_;
-  static uint8_t* source_data_[2];
-  int source_stride_;
-  static uint8_t* reference_data_[2];
-  int reference_stride_;
-  static Ssimv *ssim_array_;
-  Metrics metrics_;
-
-  ACMRandom rnd_;
-};
-
-#if CONFIG_VP9_ENCODER
-typedef std::tr1::tuple<int, int> ConsistencyParam;
-class ConsistencyVP9Test
-    : public ConsistencyTestBase,
-      public ::testing::WithParamInterface<ConsistencyParam> {
- public:
-  ConsistencyVP9Test() : ConsistencyTestBase(GET_PARAM(0), GET_PARAM(1)) {}
-
- protected:
-  double CheckConsistency(int frame) {
-    EXPECT_LT(frame, 2)<< "Frame to check has to be less than 2.";
-    return
-        vpx_get_ssim_metrics(source_data_[frame], source_stride_,
-                             reference_data_[frame], reference_stride_,
-                             width_, height_, ssim_array_, &metrics_, 1);
-  }
-};
-#endif  // CONFIG_VP9_ENCODER
-
-uint8_t* ConsistencyTestBase::source_data_[2] = {NULL, NULL};
-uint8_t* ConsistencyTestBase::reference_data_[2] = {NULL, NULL};
-Ssimv* ConsistencyTestBase::ssim_array_ = NULL;
-
-#if CONFIG_VP9_ENCODER
-TEST_P(ConsistencyVP9Test, ConsistencyIsZero) {
-  FillRandom(source_data_[0], source_stride_);
-  Copy(source_data_[1], source_data_[0]);
-  Copy(reference_data_[0], source_data_[0]);
-  Blur(reference_data_[0], reference_stride_, 3);
-  Copy(reference_data_[1], source_data_[0]);
-  Blur(reference_data_[1], reference_stride_, 3);
-
-  double inconsistency = CheckConsistency(1);
-  inconsistency = CheckConsistency(0);
-  EXPECT_EQ(inconsistency, 0.0)
-      << "Should have 0 inconsistency if they are exactly the same.";
-
-  // If sources are not consistent reference frames inconsistency should
-  // be less than if the source is consistent.
-  FillRandom(source_data_[0], source_stride_);
-  FillRandom(source_data_[1], source_stride_);
-  FillRandom(reference_data_[0], reference_stride_);
-  FillRandom(reference_data_[1], reference_stride_);
-  CheckConsistency(0);
-  inconsistency = CheckConsistency(1);
-
-  Copy(source_data_[1], source_data_[0]);
-  CheckConsistency(0);
-  double inconsistency2 = CheckConsistency(1);
-  EXPECT_LT(inconsistency, inconsistency2)
-      << "Should have less inconsistency if source itself is inconsistent.";
-
-  // Less of a blur should be less inconsistent than more blur coming off a
-  // a frame with no blur.
-  ClearSsim();
-  FillRandom(source_data_[0], source_stride_);
-  Copy(source_data_[1], source_data_[0]);
-  Copy(reference_data_[0], source_data_[0]);
-  Copy(reference_data_[1], source_data_[0]);
-  Blur(reference_data_[1], reference_stride_, 4);
-  CheckConsistency(0);
-  inconsistency = CheckConsistency(1);
-  ClearSsim();
-  Copy(reference_data_[1], source_data_[0]);
-  Blur(reference_data_[1], reference_stride_, 8);
-  CheckConsistency(0);
-  inconsistency2 = CheckConsistency(1);
-
-  EXPECT_LT(inconsistency, inconsistency2)
-      << "Stronger Blur should produce more inconsistency.";
-}
-#endif  // CONFIG_VP9_ENCODER
-
-
-using std::tr1::make_tuple;
-
-//------------------------------------------------------------------------------
-// C functions
-
-#if CONFIG_VP9_ENCODER
-const ConsistencyParam c_vp9_tests[] = {
-  make_tuple(320, 240),
-  make_tuple(318, 242),
-  make_tuple(318, 238),
-};
-INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test,
-                        ::testing::ValuesIn(c_vp9_tests));
-#endif
-
-}  // namespace
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -26,8 +26,7 @@ class CpuSpeedTest
      : EncoderTest(GET_PARAM(0)),
        encoding_mode_(GET_PARAM(1)),
        set_cpu_used_(GET_PARAM(2)),
-        min_psnr_(kMaxPSNR),
-        tune_content_(VP9E_CONTENT_DEFAULT) {}
+        min_psnr_(kMaxPSNR) {}
  virtual ~CpuSpeedTest() {}

  virtual void SetUp() {
@@ -50,7 +49,6 @@ class CpuSpeedTest
                                  ::libvpx_test::Encoder *encoder) {
    if (video->frame() == 1) {
      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-      encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_);
      if (encoding_mode_ != ::libvpx_test::kRealTime) {
        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
@@ -68,7 +66,6 @@ class CpuSpeedTest
  ::libvpx_test::TestMode encoding_mode_;
  int set_cpu_used_;
  double min_psnr_;
-  int tune_content_;
 };

 TEST_P(CpuSpeedTest, TestQ0) {
@@ -77,7 +74,7 @@ TEST_P(CpuSpeedTest, TestQ0) {
  // the encoder to producing lots of big partitions which will likely
  // extend into the border and test the border condition.
  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_2pass_vbr_minsection_pct = 2000;
  cfg_.rc_target_bitrate = 400;
  cfg_.rc_max_quantizer = 0;
  cfg_.rc_min_quantizer = 0;
@@ -95,7 +92,7 @@ TEST_P(CpuSpeedTest, TestScreencastQ0) {
  ::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25);
  cfg_.g_timebase = video.timebase();
  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_2pass_vbr_minsection_pct = 2000;
  cfg_.rc_target_bitrate = 400;
  cfg_.rc_max_quantizer = 0;
  cfg_.rc_min_quantizer = 0;
@@ -106,28 +103,13 @@ TEST_P(CpuSpeedTest, TestScreencastQ0) {
  EXPECT_GE(min_psnr_, kMaxPSNR);
 }

-TEST_P(CpuSpeedTest, TestTuneScreen) {
-  ::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25);
-  cfg_.g_timebase = video.timebase();
-  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_minsection_pct = 2000;
-  cfg_.rc_target_bitrate = 2000;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_min_quantizer = 0;
-  tune_content_ = VP9E_CONTENT_SCREEN;
-
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
 TEST_P(CpuSpeedTest, TestEncodeHighBitrate) {
  // Validate that this non multiple of 64 wide clip encodes and decodes
  // without a mismatch when passing in a very low max q.  This pushes
  // the encoder to producing lots of big partitions which will likely
  // extend into the border and test the border condition.
  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_2pass_vbr_minsection_pct = 2000;
  cfg_.rc_target_bitrate = 12000;
  cfg_.rc_max_quantizer = 10;
  cfg_.rc_min_quantizer = 0;
@@ -143,7 +125,7 @@ TEST_P(CpuSpeedTest, TestLowBitrate) {
  // when passing in a very high min q.  This pushes the encoder to producing
  // lots of small partitions which might will test the other condition.
  cfg_.rc_2pass_vbr_minsection_pct = 5;
-  cfg_.rc_2pass_vbr_maxsection_pct = 2000;
+  cfg_.rc_2pass_vbr_minsection_pct = 2000;
  cfg_.rc_target_bitrate = 200;
  cfg_.rc_min_quantizer = 40;

--- a/test/vp8cx_set_ref.sh
+++ b/test/vp8cx_set_ref.sh
@@ -8,30 +8,27 @@
 ##  in the file PATENTS.  All contributing project authors may
 ##  be found in the AUTHORS file in the root of the source tree.
 ##
-##  This file tests the libvpx vp8cx_set_ref example. To add new tests to this
+##  This file tests the libvpx cx_set_ref example. To add new tests to this
 ##  file, do the following:
 ##    1. Write a shell function (this is your test).
-##    2. Add the function to vp8cx_set_ref_tests (on a new line).
+##    2. Add the function to cx_set_ref_tests (on a new line).
 ##
 . $(dirname $0)/tools_common.sh

 # Environment check: $YUV_RAW_INPUT is required.
-vp8cx_set_ref_verify_environment() {
+cx_set_ref_verify_environment() {
  if [ ! -e "${YUV_RAW_INPUT}" ]; then
    echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
    return 1
  fi
 }

-# Runs vp8cx_set_ref and updates the reference frame before encoding frame 90.
-# $1 is the codec name, which vp8cx_set_ref does not support at present: It's
-# currently used only to name the output file.
-# TODO(tomfinegan): Pass the codec param once the example is updated to support
-# VP9.
+# Runs cx_set_ref and updates the reference frame before encoding frame 90.
+# $1 is the codec name.
 vpx_set_ref() {
-  local encoder="${LIBVPX_BIN_PATH}/vp8cx_set_ref${VPX_TEST_EXE_SUFFIX}"
  local codec="$1"
-  local output_file="${VPX_TEST_OUTPUT_DIR}/vp8cx_set_ref_${codec}.ivf"
+  local encoder="${LIBVPX_BIN_PATH}/${codec}cx_set_ref${VPX_TEST_EXE_SUFFIX}"
+  local output_file="${VPX_TEST_OUTPUT_DIR}/${codec}cx_set_ref_${codec}.ivf"
  local ref_frame_num=90

  if [ ! -x "${encoder}" ]; then
@@ -46,12 +43,18 @@ vpx_set_ref() {
  [ -e "${output_file}" ] || return 1
 }

-vp8cx_set_ref_vp8() {
+cx_set_ref_vp8() {
  if [ "$(vp8_encode_available)" = "yes" ]; then
    vpx_set_ref vp8 || return 1
  fi
 }

-vp8cx_set_ref_tests="vp8cx_set_ref_vp8"
+cx_set_ref_vp9() {
+  if [ "$(vp9_encode_available)" = "yes" ]; then
+    vpx_set_ref vp9 || return 1
+  fi
+}

-run_tests vp8cx_set_ref_verify_environment "${vp8cx_set_ref_tests}"
+cx_set_ref_tests="cx_set_ref_vp8 cx_set_ref_vp9"
+
+run_tests cx_set_ref_verify_environment "${cx_set_ref_tests}"
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -14,7 +14,6 @@
 #include "test/i420_video_source.h"
 #include "test/util.h"
 #include "test/y4m_video_source.h"
-#include "vpx/vpx_codec.h"

 namespace {

@@ -39,25 +38,13 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
    first_drop_ = 0;
    bits_total_ = 0;
    duration_ = 0.0;
-    denoiser_offon_test_ = 0;
-    denoiser_offon_period_ = -1;
  }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 0)
-      encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
-
-    if (denoiser_offon_test_) {
-      ASSERT_GT(denoiser_offon_period_, 0)
-          << "denoiser_offon_period_ is not positive.";
-      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
-        // Flip denoiser_on_ periodically
-        denoiser_on_ ^= 1;
-      }
+    if (video->frame() == 1) {
      encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
    }
-
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
    duration_ = 0;
@@ -90,7 +77,7 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
          << pkt->data.frame.pts;
    }

-    const int64_t frame_size_in_bits = pkt->data.frame.sz * 8;
+    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;

    // Subtract from the buffer the bits associated with a played back frame.
    bits_in_buffer_model_ -= frame_size_in_bits;
@@ -135,10 +122,8 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
  double duration_;
  double file_datarate_;
  double effective_datarate_;
-  int64_t bits_in_last_frame_;
+  size_t bits_in_last_frame_;
  int denoiser_on_;
-  int denoiser_offon_test_;
-  int denoiser_offon_period_;
 };

 #if CONFIG_TEMPORAL_DENOISING
@@ -170,29 +155,6 @@ TEST_P(DatarateTestLarge, DenoiserLevels) {
        << " The datarate for the file missed the target!";
  }
 }
-
-// Check basic datarate targeting, for a single bitrate, when denoiser is off
-// and on.
-TEST_P(DatarateTestLarge, DenoiserOffOn) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_CBR;
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 299);
-  cfg_.rc_target_bitrate = 300;
-  ResetModel();
-  // The denoiser is off by default.
-  denoiser_on_ = 0;
-  // Set the offon test flag.
-  denoiser_offon_test_ = 1;
-  denoiser_offon_period_ = 100;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
-      << " The datarate for the file exceeds the target!";
-  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
-      << " The datarate for the file missed the target!";
-}
 #endif  // CONFIG_TEMPORAL_DENOISING

 TEST_P(DatarateTestLarge, BasicBufferModel) {
@@ -256,901 +218,5 @@ TEST_P(DatarateTestLarge, ChangingDropFrameThresh) {
  }
 }

-class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
- public:
-  DatarateTestVP9Large() : EncoderTest(GET_PARAM(0)) {}
-
- protected:
-  virtual ~DatarateTestVP9Large() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(GET_PARAM(1));
-    set_cpu_used_ = GET_PARAM(2);
-    ResetModel();
-  }
-
-  virtual void ResetModel() {
-    last_pts_ = 0;
-    bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
-    frame_number_ = 0;
-    tot_frame_number_ = 0;
-    first_drop_ = 0;
-    num_drops_ = 0;
-    // Denoiser is off by default.
-    denoiser_on_ = 0;
-    // For testing up to 3 layers.
-    for (int i = 0; i < 3; ++i) {
-      bits_total_[i] = 0;
-    }
-    denoiser_offon_test_ = 0;
-    denoiser_offon_period_ = -1;
-  }
-
-  //
-  // Frame flags and layer id for temporal layers.
-  //
-
-  // For two layers, test pattern is:
-  //   1     3
-  // 0    2     .....
-  // For three layers, test pattern is:
-  //   1      3    5      7
-  //      2           6
-  // 0          4            ....
-  // LAST is always update on base/layer 0, GOLDEN is updated on layer 1.
-  // For this 3 layer example, the 2nd enhancement layer (layer 2) does not
-  // update any reference frames.
-  int SetFrameFlags(int frame_num, int num_temp_layers) {
-    int frame_flags = 0;
-    if (num_temp_layers == 2) {
-      if (frame_num % 2 == 0) {
-        // Layer 0: predict from L and ARF, update L.
-        frame_flags = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
-                      VP8_EFLAG_NO_UPD_ARF;
-      } else {
-        // Layer 1: predict from L, G and ARF, and update G.
-        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-                      VP8_EFLAG_NO_UPD_ENTROPY;
-      }
-    } else if (num_temp_layers == 3) {
-      if (frame_num % 4 == 0) {
-        // Layer 0: predict from L and ARF; update L.
-        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                      VP8_EFLAG_NO_REF_GF;
-      } else if ((frame_num - 2) % 4 == 0) {
-        // Layer 1: predict from L, G, ARF; update G.
-        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST;
-      }  else if ((frame_num - 1) % 2 == 0) {
-        // Layer 2: predict from L, G, ARF; update none.
-        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                      VP8_EFLAG_NO_UPD_LAST;
-      }
-    }
-    return frame_flags;
-  }
-
-  int SetLayerId(int frame_num, int num_temp_layers) {
-    int layer_id = 0;
-    if (num_temp_layers == 2) {
-      if (frame_num % 2 == 0) {
-        layer_id = 0;
-      } else {
-        layer_id = 1;
-      }
-    } else if (num_temp_layers == 3) {
-      if (frame_num % 4 == 0) {
-        layer_id = 0;
-      } else if ((frame_num - 2) % 4 == 0) {
-        layer_id = 1;
-      } else if ((frame_num - 1) % 2 == 0) {
-        layer_id = 2;
-      }
-    }
-    return layer_id;
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 0)
-      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-
-    if (denoiser_offon_test_) {
-      ASSERT_GT(denoiser_offon_period_, 0)
-          << "denoiser_offon_period_ is not positive.";
-      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
-        // Flip denoiser_on_ periodically
-        denoiser_on_ ^= 1;
-      }
-    }
-
-    encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_);
-
-    if (cfg_.ts_number_layers > 1) {
-      if (video->frame() == 0) {
-        encoder->Control(VP9E_SET_SVC, 1);
-      }
-      vpx_svc_layer_id_t layer_id;
-      layer_id.spatial_layer_id = 0;
-      frame_flags_ = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
-      layer_id.temporal_layer_id = SetLayerId(video->frame(),
-                                              cfg_.ts_number_layers);
-      encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
-    }
-    const vpx_rational_t tb = video->timebase();
-    timebase_ = static_cast<double>(tb.num) / tb.den;
-    duration_ = 0;
-  }
-
-
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    // Time since last timestamp = duration.
-    vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
-
-    if (duration > 1) {
-      // If first drop not set and we have a drop set it to this time.
-      if (!first_drop_)
-        first_drop_ = last_pts_ + 1;
-      // Update the number of frame drops.
-      num_drops_ += static_cast<int>(duration - 1);
-      // Update counter for total number of frames (#frames input to encoder).
-      // Needed for setting the proper layer_id below.
-      tot_frame_number_ += static_cast<int>(duration - 1);
-    }
-
-    int layer = SetLayerId(tot_frame_number_, cfg_.ts_number_layers);
-
-    // Add to the buffer the bits we'd expect from a constant bitrate server.
-    bits_in_buffer_model_ += static_cast<int64_t>(
-        duration * timebase_ * cfg_.rc_target_bitrate * 1000);
-
-    // Buffer should not go negative.
-    ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
-        << pkt->data.frame.pts;
-
-    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
-
-    // Update the total encoded bits. For temporal layers, update the cumulative
-    // encoded bits per layer.
-    for (int i = layer; i < static_cast<int>(cfg_.ts_number_layers); ++i) {
-      bits_total_[i] += frame_size_in_bits;
-    }
-
-    // Update the most recent pts.
-    last_pts_ = pkt->data.frame.pts;
-    ++frame_number_;
-    ++tot_frame_number_;
-  }
-
-  virtual void EndPassHook(void) {
-    for (int layer = 0; layer < static_cast<int>(cfg_.ts_number_layers);
-        ++layer) {
-      duration_ = (last_pts_ + 1) * timebase_;
-      if (bits_total_[layer]) {
-        // Effective file datarate:
-        effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_;
-      }
-    }
-  }
-
-  vpx_codec_pts_t last_pts_;
-  double timebase_;
-  int frame_number_;      // Counter for number of non-dropped/encoded frames.
-  int tot_frame_number_;  // Counter for total number of input frames.
-  int64_t bits_total_[3];
-  double duration_;
-  double effective_datarate_[3];
-  int set_cpu_used_;
-  int64_t bits_in_buffer_model_;
-  vpx_codec_pts_t first_drop_;
-  int num_drops_;
-  int denoiser_on_;
-  int denoiser_offon_test_;
-  int denoiser_offon_period_;
-};
-
-// Check basic rate targeting for VBR mode.
-TEST_P(DatarateTestVP9Large, BasicRateTargetingVBR) {
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.g_error_resilient = 0;
-  cfg_.rc_end_usage = VPX_VBR;
-  cfg_.g_lag_in_frames = 0;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 300);
-  for (int i = 400; i <= 800; i += 400) {
-    cfg_.rc_target_bitrate = i;
-    ResetModel();
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75)
-        << " The datarate for the file is lower than target by too much!";
-    ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.25)
-        << " The datarate for the file is greater than target by too much!";
-  }
-}
-
-// Check basic rate targeting for CBR,
-TEST_P(DatarateTestVP9Large, BasicRateTargeting) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 140);
-  for (int i = 150; i < 800; i += 200) {
-    cfg_.rc_target_bitrate = i;
-    ResetModel();
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
-        << " The datarate for the file is lower than target by too much!";
-    ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
-        << " The datarate for the file is greater than target by too much!";
-  }
-}
-
-// Check basic rate targeting for CBR.
-TEST_P(DatarateTestVP9Large, BasicRateTargeting444) {
-  ::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
-
-  cfg_.g_profile = 1;
-  cfg_.g_timebase = video.timebase();
-
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-
-  for (int i = 250; i < 900; i += 200) {
-    cfg_.rc_target_bitrate = i;
-    ResetModel();
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_GE(static_cast<double>(cfg_.rc_target_bitrate),
-              effective_datarate_[0] * 0.85)
-        << " The datarate for the file exceeds the target by too much!";
-    ASSERT_LE(static_cast<double>(cfg_.rc_target_bitrate),
-              effective_datarate_[0] * 1.15)
-        << " The datarate for the file missed the target!"
-        << cfg_.rc_target_bitrate << " "<< effective_datarate_;
-  }
-}
-
-// Check that (1) the first dropped frame gets earlier and earlier
-// as the drop frame threshold is increased, and (2) that the total number of
-// frame drops does not decrease as we increase frame drop threshold.
-// Use a lower qp-max to force some frame drops.
-TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_undershoot_pct = 20;
-  cfg_.rc_undershoot_pct = 20;
-  cfg_.rc_dropframe_thresh = 10;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 50;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.rc_target_bitrate = 200;
-  cfg_.g_lag_in_frames = 0;
-  // TODO(marpan): Investigate datarate target failures with a smaller keyframe
-  // interval (128).
-  cfg_.kf_max_dist = 9999;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 140);
-
-  const int kDropFrameThreshTestStep = 30;
-  vpx_codec_pts_t last_drop = 140;
-  int last_num_drops = 0;
-  for (int i = 10; i < 100; i += kDropFrameThreshTestStep) {
-    cfg_.rc_dropframe_thresh = i;
-    ResetModel();
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
-        << " The datarate for the file is lower than target by too much!";
-    ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
-        << " The datarate for the file is greater than target by too much!";
-    ASSERT_LE(first_drop_, last_drop)
-        << " The first dropped frame for drop_thresh " << i
-        << " > first dropped frame for drop_thresh "
-        << i - kDropFrameThreshTestStep;
-    ASSERT_GE(num_drops_, last_num_drops * 0.85)
-        << " The number of dropped frames for drop_thresh " << i
-        << " < number of dropped frames for drop_thresh "
-        << i - kDropFrameThreshTestStep;
-    last_drop = first_drop_;
-    last_num_drops = num_drops_;
-  }
-}
-
-// Check basic rate targeting for 2 temporal layers.
-TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-
-  // 2 Temporal layers, no spatial layers: Framerate decimation (2, 1).
-  cfg_.ss_number_layers = 1;
-  cfg_.ts_number_layers = 2;
-  cfg_.ts_rate_decimator[0] = 2;
-  cfg_.ts_rate_decimator[1] = 1;
-
-  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
-
-  if (deadline_ == VPX_DL_REALTIME)
-    cfg_.g_error_resilient = 1;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
-  for (int i = 200; i <= 800; i += 200) {
-    cfg_.rc_target_bitrate = i;
-    ResetModel();
-    // 60-40 bitrate allocation for 2 temporal layers.
-    cfg_.layer_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
-    cfg_.layer_target_bitrate[1] = cfg_.rc_target_bitrate;
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
-      ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
-          << " The datarate for the file is lower than target by too much, "
-              "for layer: " << j;
-      ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
-          << " The datarate for the file is greater than target by too much, "
-              "for layer: " << j;
-    }
-  }
-}
-
-// Check basic rate targeting for 3 temporal layers.
-TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayers) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-
-  // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1).
-  cfg_.ss_number_layers = 1;
-  cfg_.ts_number_layers = 3;
-  cfg_.ts_rate_decimator[0] = 4;
-  cfg_.ts_rate_decimator[1] = 2;
-  cfg_.ts_rate_decimator[2] = 1;
-
-  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
-  for (int i = 200; i <= 800; i += 200) {
-    cfg_.rc_target_bitrate = i;
-    ResetModel();
-    // 40-20-40 bitrate allocation for 3 temporal layers.
-    cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
-    cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
-    cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
-      // TODO(yaowu): Work out more stable rc control strategy and
-      //              Adjust the thresholds to be tighter than .75.
-      ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.75)
-          << " The datarate for the file is lower than target by too much, "
-              "for layer: " << j;
-      // TODO(yaowu): Work out more stable rc control strategy and
-      //              Adjust the thresholds to be tighter than 1.25.
-      ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.25)
-          << " The datarate for the file is greater than target by too much, "
-              "for layer: " << j;
-    }
-  }
-}
-
-// Check basic rate targeting for 3 temporal layers, with frame dropping.
-// Only for one (low) bitrate with lower max_quantizer, and somewhat higher
-// frame drop threshold, to force frame dropping.
-TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  // Set frame drop threshold and rc_max_quantizer to force some frame drops.
-  cfg_.rc_dropframe_thresh = 20;
-  cfg_.rc_max_quantizer = 45;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-
-  // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1).
-  cfg_.ss_number_layers = 1;
-  cfg_.ts_number_layers = 3;
-  cfg_.ts_rate_decimator[0] = 4;
-  cfg_.ts_rate_decimator[1] = 2;
-  cfg_.ts_rate_decimator[2] = 1;
-
-  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
-  cfg_.rc_target_bitrate = 200;
-  ResetModel();
-  // 40-20-40 bitrate allocation for 3 temporal layers.
-  cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
-  cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
-  cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
-    ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
-        << " The datarate for the file is lower than target by too much, "
-            "for layer: " << j;
-    ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
-        << " The datarate for the file is greater than target by too much, "
-            "for layer: " << j;
-    // Expect some frame drops in this test: for this 200 frames test,
-    // expect at least 10% and not more than 60% drops.
-    ASSERT_GE(num_drops_, 20);
-    ASSERT_LE(num_drops_, 130);
-  }
-}
-
-#if CONFIG_VP9_TEMPORAL_DENOISING
-// Check basic datarate targeting, for a single bitrate, when denoiser is on.
-TEST_P(DatarateTestVP9Large, DenoiserLevels) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_min_quantizer = 2;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 140);
-
-  // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
-  // there is only one denoiser mode: denoiserYonly(which is 1),
-  // but may add more modes in the future.
-  cfg_.rc_target_bitrate = 300;
-  ResetModel();
-  // Turn on the denoiser.
-  denoiser_on_ = 1;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
-      << " The datarate for the file is lower than target by too much!";
-  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
-      << " The datarate for the file is greater than target by too much!";
-}
-
-// Check basic datarate targeting, for a single bitrate, when denoiser is off
-// and on.
-TEST_P(DatarateTestVP9Large, DenoiserOffOn) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_min_quantizer = 2;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 299);
-
-  // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
-  // there is only one denoiser mode: denoiserYonly(which is 1),
-  // but may add more modes in the future.
-  cfg_.rc_target_bitrate = 300;
-  ResetModel();
-  // The denoiser is off by default.
-  denoiser_on_ = 0;
-  // Set the offon test flag.
-  denoiser_offon_test_ = 1;
-  denoiser_offon_period_ = 100;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
-      << " The datarate for the file is lower than target by too much!";
-  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
-      << " The datarate for the file is greater than target by too much!";
-}
-#endif  // CONFIG_VP9_TEMPORAL_DENOISING
-
-class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
- public:
-  DatarateOnePassCbrSvc() : EncoderTest(GET_PARAM(0)) {}
-  virtual ~DatarateOnePassCbrSvc() {}
- protected:
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(GET_PARAM(1));
-    speed_setting_ = GET_PARAM(2);
-    ResetModel();
-  }
-  virtual void ResetModel() {
-    last_pts_ = 0;
-    bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
-    frame_number_ = 0;
-    first_drop_ = 0;
-    bits_total_ = 0;
-    duration_ = 0.0;
-    mismatch_psnr_ = 0.0;
-    mismatch_nframes_ = 0;
-  }
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-  }
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 0) {
-      int i;
-      for (i = 0; i < VPX_MAX_LAYERS; ++i) {
-        svc_params_.max_quantizers[i] = 63;
-        svc_params_.min_quantizers[i] = 0;
-      }
-      encoder->Control(VP9E_SET_SVC, 1);
-      encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_);
-      encoder->Control(VP8E_SET_CPUUSED, speed_setting_);
-      encoder->Control(VP9E_SET_TILE_COLUMNS, 0);
-      encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300);
-      encoder->Control(VP9E_SET_TILE_COLUMNS, (cfg_.g_threads >> 1));
-    }
-    const vpx_rational_t tb = video->timebase();
-    timebase_ = static_cast<double>(tb.num) / tb.den;
-    duration_ = 0;
-  }
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
-    if (last_pts_ == 0)
-      duration = 1;
-    bits_in_buffer_model_ += static_cast<int64_t>(
-        duration * timebase_ * cfg_.rc_target_bitrate * 1000);
-    const bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY)
-                         ? true: false;
-    if (!key_frame) {
-      ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
-          << pkt->data.frame.pts;
-    }
-    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
-    bits_in_buffer_model_ -= frame_size_in_bits;
-    bits_total_ += frame_size_in_bits;
-    if (!first_drop_ && duration > 1)
-      first_drop_ = last_pts_ + 1;
-    last_pts_ = pkt->data.frame.pts;
-    bits_in_last_frame_ = frame_size_in_bits;
-    ++frame_number_;
-  }
-  virtual void EndPassHook(void) {
-    if (bits_total_) {
-      const double file_size_in_kb = bits_total_ / 1000.;  // bits per kilobit
-      duration_ = (last_pts_ + 1) * timebase_;
-      file_datarate_ = file_size_in_kb / duration_;
-    }
-  }
-
-  virtual void MismatchHook(const vpx_image_t *img1,
-                            const vpx_image_t *img2) {
-    double mismatch_psnr = compute_psnr(img1, img2);
-    mismatch_psnr_ += mismatch_psnr;
-    ++mismatch_nframes_;
-  }
-
-  unsigned int GetMismatchFrames() {
-    return mismatch_nframes_;
-  }
-
-  vpx_codec_pts_t last_pts_;
-  int64_t bits_in_buffer_model_;
-  double timebase_;
-  int frame_number_;
-  vpx_codec_pts_t first_drop_;
-  int64_t bits_total_;
-  double duration_;
-  double file_datarate_;
-  size_t bits_in_last_frame_;
-  vpx_svc_extra_cfg_t svc_params_;
-  int speed_setting_;
-  double mismatch_psnr_;
-  int mismatch_nframes_;
-};
-static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
-    const vpx_svc_extra_cfg_t *svc_params,
-    int spatial_layers,
-    int temporal_layers,
-    int temporal_layering_mode) {
-  int sl, spatial_layer_target;
-  float total = 0;
-  float alloc_ratio[VPX_MAX_LAYERS] = {0};
-  for (sl = 0; sl < spatial_layers; ++sl) {
-    if (svc_params->scaling_factor_den[sl] > 0) {
-      alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] *
-          1.0 / svc_params->scaling_factor_den[sl]);
-      total += alloc_ratio[sl];
-    }
-  }
-  for (sl = 0; sl < spatial_layers; ++sl) {
-    enc_cfg->ss_target_bitrate[sl] = spatial_layer_target =
-        (unsigned int)(enc_cfg->rc_target_bitrate *
-            alloc_ratio[sl] / total);
-    const int index = sl * temporal_layers;
-    if (temporal_layering_mode == 3) {
-      enc_cfg->layer_target_bitrate[index] =
-          spatial_layer_target >> 1;
-      enc_cfg->layer_target_bitrate[index + 1] =
-          (spatial_layer_target >> 1) + (spatial_layer_target >> 2);
-      enc_cfg->layer_target_bitrate[index + 2] =
-          spatial_layer_target;
-    } else if (temporal_layering_mode == 2) {
-      enc_cfg->layer_target_bitrate[index] =
-          spatial_layer_target * 2 / 3;
-      enc_cfg->layer_target_bitrate[index + 1] =
-          spatial_layer_target;
-    }
-  }
-}
-
-// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
-// 3 temporal layers. Run CIF clip with 1 thread.
-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayers) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-  cfg_.ss_number_layers = 2;
-  cfg_.ts_number_layers = 3;
-  cfg_.ts_rate_decimator[0] = 4;
-  cfg_.ts_rate_decimator[1] = 2;
-  cfg_.ts_rate_decimator[2] = 1;
-  cfg_.g_error_resilient = 1;
-  cfg_.g_threads = 1;
-  cfg_.temporal_layering_mode = 3;
-  svc_params_.scaling_factor_num[0] = 144;
-  svc_params_.scaling_factor_den[0] = 288;
-  svc_params_.scaling_factor_num[1] = 288;
-  svc_params_.scaling_factor_den[1] = 288;
-  cfg_.rc_dropframe_thresh = 10;
-  cfg_.kf_max_dist = 9999;
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
-  // TODO(wonkap/marpan): Check that effective_datarate for each layer hits the
-  // layer target_bitrate.
-  for (int i = 200; i <= 800; i += 200) {
-    cfg_.rc_target_bitrate = i;
-    ResetModel();
-    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
-        cfg_.ts_number_layers, cfg_.temporal_layering_mode);
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
-            << " The datarate for the file exceeds the target by too much!";
-    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
-        << " The datarate for the file is lower than the target by too much!";
-    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
-  }
-}
-
-// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3
-// temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayersSmallKf) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-  cfg_.ss_number_layers = 2;
-  cfg_.ts_number_layers = 3;
-  cfg_.ts_rate_decimator[0] = 4;
-  cfg_.ts_rate_decimator[1] = 2;
-  cfg_.ts_rate_decimator[2] = 1;
-  cfg_.g_error_resilient = 1;
-  cfg_.g_threads = 1;
-  cfg_.temporal_layering_mode = 3;
-  svc_params_.scaling_factor_num[0] = 144;
-  svc_params_.scaling_factor_den[0] = 288;
-  svc_params_.scaling_factor_num[1] = 288;
-  svc_params_.scaling_factor_den[1] = 288;
-  cfg_.rc_dropframe_thresh = 10;
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
-  cfg_.rc_target_bitrate = 400;
-  // For this 3 temporal layer case, pattern repeats every 4 frames, so choose
-  // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
-  for (int j = 64; j <= 67; j++) {
-    cfg_.kf_max_dist = j;
-    ResetModel();
-    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
-        cfg_.ts_number_layers, cfg_.temporal_layering_mode);
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
-            << " The datarate for the file exceeds the target by too much!";
-    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
-        << " The datarate for the file is lower than the target by too much!";
-    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
-  }
-}
-
-// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
-// 3 temporal layers. Run HD clip with 4 threads.
-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayers4threads) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-  cfg_.ss_number_layers = 2;
-  cfg_.ts_number_layers = 3;
-  cfg_.ts_rate_decimator[0] = 4;
-  cfg_.ts_rate_decimator[1] = 2;
-  cfg_.ts_rate_decimator[2] = 1;
-  cfg_.g_error_resilient = 1;
-  cfg_.g_threads = 4;
-  cfg_.temporal_layering_mode = 3;
-  svc_params_.scaling_factor_num[0] = 144;
-  svc_params_.scaling_factor_den[0] = 288;
-  svc_params_.scaling_factor_num[1] = 288;
-  svc_params_.scaling_factor_den[1] = 288;
-  cfg_.rc_dropframe_thresh = 10;
-  cfg_.kf_max_dist = 9999;
-  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720,
-                                       30, 1, 0, 300);
-  cfg_.rc_target_bitrate = 800;
-  ResetModel();
-  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
-      cfg_.ts_number_layers, cfg_.temporal_layering_mode);
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
-          << " The datarate for the file exceeds the target by too much!";
-  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
-      << " The datarate for the file is lower than the target by too much!";
-  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
-}
-
-// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
-// 3 temporal layers. Run CIF clip with 1 thread.
-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayers) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-  cfg_.ss_number_layers = 3;
-  cfg_.ts_number_layers = 3;
-  cfg_.ts_rate_decimator[0] = 4;
-  cfg_.ts_rate_decimator[1] = 2;
-  cfg_.ts_rate_decimator[2] = 1;
-  cfg_.g_error_resilient = 1;
-  cfg_.g_threads = 1;
-  cfg_.temporal_layering_mode = 3;
-  svc_params_.scaling_factor_num[0] = 72;
-  svc_params_.scaling_factor_den[0] = 288;
-  svc_params_.scaling_factor_num[1] = 144;
-  svc_params_.scaling_factor_den[1] = 288;
-  svc_params_.scaling_factor_num[2] = 288;
-  svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 10;
-  cfg_.kf_max_dist = 9999;
-  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720,
-                                       30, 1, 0, 300);
-  cfg_.rc_target_bitrate = 800;
-  ResetModel();
-  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
-     cfg_.ts_number_layers, cfg_.temporal_layering_mode);
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
-          << " The datarate for the file exceeds the target by too much!";
-  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
-      << " The datarate for the file is lower than the target by too much!";
-  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
-}
-
-// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3
-// temporal layers. Run CIF clip with 1 thread, and few short key frame periods.
-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayersSmallKf) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-  cfg_.ss_number_layers = 3;
-  cfg_.ts_number_layers = 3;
-  cfg_.ts_rate_decimator[0] = 4;
-  cfg_.ts_rate_decimator[1] = 2;
-  cfg_.ts_rate_decimator[2] = 1;
-  cfg_.g_error_resilient = 1;
-  cfg_.g_threads = 1;
-  cfg_.temporal_layering_mode = 3;
-  svc_params_.scaling_factor_num[0] = 72;
-  svc_params_.scaling_factor_den[0] = 288;
-  svc_params_.scaling_factor_num[1] = 144;
-  svc_params_.scaling_factor_den[1] = 288;
-  svc_params_.scaling_factor_num[2] = 288;
-  svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 10;
-  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720,
-                                       30, 1, 0, 300);
-  cfg_.rc_target_bitrate = 800;
-  // For this 3 temporal layer case, pattern repeats every 4 frames, so choose
-  // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
-  for (int j = 32; j <= 35; j++) {
-    cfg_.kf_max_dist = j;
-    ResetModel();
-    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
-       cfg_.ts_number_layers, cfg_.temporal_layering_mode);
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
-            << " The datarate for the file exceeds the target by too much!";
-    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.30)
-        << " The datarate for the file is lower than the target by too much!";
-    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
-  }
-}
-
-// Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and
-// 3 temporal layers. Run HD clip with 4 threads.
-TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayers4threads) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_min_quantizer = 0;
-  cfg_.rc_max_quantizer = 63;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.g_lag_in_frames = 0;
-  cfg_.ss_number_layers = 3;
-  cfg_.ts_number_layers = 3;
-  cfg_.ts_rate_decimator[0] = 4;
-  cfg_.ts_rate_decimator[1] = 2;
-  cfg_.ts_rate_decimator[2] = 1;
-  cfg_.g_error_resilient = 1;
-  cfg_.g_threads = 4;
-  cfg_.temporal_layering_mode = 3;
-  svc_params_.scaling_factor_num[0] = 72;
-  svc_params_.scaling_factor_den[0] = 288;
-  svc_params_.scaling_factor_num[1] = 144;
-  svc_params_.scaling_factor_den[1] = 288;
-  svc_params_.scaling_factor_num[2] = 288;
-  svc_params_.scaling_factor_den[2] = 288;
-  cfg_.rc_dropframe_thresh = 10;
-  cfg_.kf_max_dist = 9999;
-  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720,
-                                       30, 1, 0, 300);
-  cfg_.rc_target_bitrate = 800;
-  ResetModel();
-  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
-      cfg_.ts_number_layers, cfg_.temporal_layering_mode);
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(cfg_.rc_target_bitrate, file_datarate_ * 0.85)
-          << " The datarate for the file exceeds the target by too much!";
-  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.22)
-      << " The datarate for the file is lower than the target by too much!";
-  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
-}
-
 VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES);
-VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
-                          ::testing::Values(::libvpx_test::kOnePassGood,
-                                            ::libvpx_test::kRealTime),
-                          ::testing::Range(2, 9));
-VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvc,
-                          ::testing::Values(::libvpx_test::kRealTime),
-                          ::testing::Range(5, 9));
 }  // namespace
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -13,18 +13,15 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vp9_rtcd.h"
-#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
+
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
-#include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
-#include "vpx_ports/mem.h"

 using libvpx_test::ACMRandom;

@@ -40,6 +37,30 @@ static int round(double x) {
 #endif

 const int kNumCoeffs = 256;
+const double PI = 3.1415926535898;
+void reference2_16x16_idct_2d(double *input, double *output) {
+  double x;
+  for (int l = 0; l < 16; ++l) {
+    for (int k = 0; k < 16; ++k) {
+      double s = 0;
+      for (int i = 0; i < 16; ++i) {
+        for (int j = 0; j < 16; ++j) {
+          x = cos(PI * j * (l + 0.5) / 16.0) *
+              cos(PI * i * (k + 0.5) / 16.0) *
+              input[i * 16 + j] / 256;
+          if (i != 0)
+            x *= sqrt(2.0);
+          if (j != 0)
+            x *= sqrt(2.0);
+          s += x;
+        }
+      }
+      output[k*16+l] = s;
+    }
+  }
+}
+
+
 const double C1 = 0.995184726672197;
 const double C2 = 0.98078528040323;
 const double C3 = 0.956940335732209;
@@ -248,12 +269,12 @@ typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>

 void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
                   int /*tx_type*/) {
-  vpx_fdct16x16_c(in, out, stride);
+  vp9_fdct16x16_c(in, out, stride);
 }

 void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
                   int /*tx_type*/) {
-  vpx_idct16x16_256_add_c(in, dest, stride);
+  vp9_idct16x16_256_add_c(in, dest, stride);
 }

 void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
@@ -268,20 +289,20 @@ void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_c(in, out, stride, 10);
+  vp9_highbd_idct16x16_256_add_c(in, out, stride, 10);
 }

 void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_c(in, out, stride, 12);
+  vp9_highbd_idct16x16_256_add_c(in, out, stride, 12);
 }

 void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
-                      int /*tx_type*/) {
+                      int tx_type) {
  idct16x16_10(in, out, stride);
 }

 void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
-                      int /*tx_type*/) {
+                      int tx_type) {
  idct16x16_12(in, out, stride);
 }

@@ -293,32 +314,32 @@ void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
 }

-#if HAVE_SSE2
 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_c(in, out, stride, 10);
+  vp9_highbd_idct16x16_10_add_c(in, out, stride, 10);
 }

 void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_c(in, out, stride, 12);
+  vp9_highbd_idct16x16_10_add_c(in, out, stride, 12);
 }

+#if HAVE_SSE2
 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
+  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
 }

 void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
+  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
 }

 void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
+  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
 }

 void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
+  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
 }
-#endif  // HAVE_SSE2
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
+#endif

 class Trans16x16TestBase {
 public:
@@ -335,13 +356,13 @@ class Trans16x16TestBase {
    int64_t total_error = 0;
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
-      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
-      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
-      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+      DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
+      DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
+      DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+      DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 #if CONFIG_VP9_HIGHBITDEPTH
-      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+      DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
+      DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
 #endif

      // Initialize a test block with input range [-mask_, mask_].
@@ -373,10 +394,10 @@ class Trans16x16TestBase {

      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-        const int32_t diff =
+        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ?  dst[j] - src[j] : dst16[j] - src16[j];
 #else
-        const int32_t diff = dst[j] - src[j];
+        const uint32_t diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
        if (max_error < error)
@@ -395,9 +416,9 @@ class Trans16x16TestBase {
  void RunCoeffCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
@@ -416,13 +437,15 @@ class Trans16x16TestBase {
  void RunMemCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
+        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0) {
@@ -449,19 +472,24 @@ class Trans16x16TestBase {
  void RunQuantCheck(int dc_thred, int ac_thred) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 100000;
-    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);

-    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
+        if (bit_depth_ == VPX_BITS_8)
+          input_block[j] = rnd.Rand8() - rnd.Rand8();
+        else
+          input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0)
@@ -474,11 +502,11 @@ class Trans16x16TestBase {
      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);

      // clear reconstructed pixel buffers
-      memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
-      memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
+      vpx_memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
+      vpx_memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
 #if CONFIG_VP9_HIGHBITDEPTH
-      memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
-      memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
+      vpx_memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
+      vpx_memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
 #endif

      // quantization with maximum allowed step sizes
@@ -511,14 +539,14 @@ class Trans16x16TestBase {
  void RunInvAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+#endif

    for (int i = 0; i < count_test_block; ++i) {
      double out_r[kNumCoeffs];
@@ -534,7 +562,7 @@ class Trans16x16TestBase {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          in[j] = src16[j] - dst16[j];
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
        }
      }

@@ -548,7 +576,7 @@ class Trans16x16TestBase {
      } else {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                            16));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
@@ -557,7 +585,7 @@ class Trans16x16TestBase {
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
        const uint32_t diff = dst[j] - src[j];
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
        const uint32_t error = diff * diff;
        EXPECT_GE(1u, error)
            << "Error: 16x16 IDCT has error " << error
@@ -565,25 +593,24 @@ class Trans16x16TestBase {
      }
    }
  }
-
  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 10000;
    const int eob = 10;
    const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
-    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
+#endif

    for (int i = 0; i < count_test_block; ++i) {
      for (int j = 0; j < kNumCoeffs; ++j) {
        if (j < eob) {
          // Random values less than the threshold, either positive or negative
-          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
+          coeff[scan[j]] = rnd(thresh) * (1-2*(i%2));
        } else {
          coeff[scan[j]] = 0;
        }
@@ -594,7 +621,7 @@ class Trans16x16TestBase {
        } else {
          dst16[j] = 0;
          ref16[j] = 0;
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
        }
      }
      if (bit_depth_ == VPX_BITS_8) {
@@ -605,7 +632,7 @@ class Trans16x16TestBase {
        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                 pitch_));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
@@ -614,7 +641,7 @@ class Trans16x16TestBase {
            bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
 #else
        const uint32_t diff = dst[j] - ref[j];
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
        const uint32_t error = diff * diff;
        EXPECT_EQ(0u, error)
            << "Error: 16x16 IDCT Comparison has error " << error
@@ -622,7 +649,6 @@ class Trans16x16TestBase {
      }
    }
  }
-
  int pitch_;
  int tx_type_;
  vpx_bit_depth_t bit_depth_;
@@ -770,15 +796,15 @@ class InvTrans16x16DCT
  virtual void SetUp() {
    ref_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    thresh_ = GET_PARAM(2);
+    thresh_  = GET_PARAM(2);
    bit_depth_ = GET_PARAM(3);
-    pitch_ = 16;
+    pitch_    = 16;
    mask_ = (1 << bit_depth_) - 1;
 }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {}
+  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {}
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
    inv_txfm_(out, dst, stride);
  }
@@ -792,82 +818,21 @@ TEST_P(InvTrans16x16DCT, CompareReference) {
  CompareInvReference(ref_txfm_, thresh_);
 }

-class PartialTrans16x16Test
-    : public ::testing::TestWithParam<
-          std::tr1::tuple<FdctFunc, vpx_bit_depth_t> > {
- public:
-  virtual ~PartialTrans16x16Test() {}
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    bit_depth_ = GET_PARAM(1);
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  vpx_bit_depth_t bit_depth_;
-  FdctFunc fwd_txfm_;
-};
-
-TEST_P(PartialTrans16x16Test, Extremes) {
-#if CONFIG_VP9_HIGHBITDEPTH
-  const int16_t maxval =
-      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
-#else
-  const int16_t maxval = 255;
-#endif
-  const int minval = -maxval;
-  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
-
-  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
-  EXPECT_EQ((maxval * kNumCoeffs) >> 1, output[0]);
-
-  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
-  EXPECT_EQ((minval * kNumCoeffs) >> 1, output[0]);
-}
-
-TEST_P(PartialTrans16x16Test, Random) {
-#if CONFIG_VP9_HIGHBITDEPTH
-  const int16_t maxval =
-      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
-#else
-  const int16_t maxval = 255;
-#endif
-  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-
-  int sum = 0;
-  for (int i = 0; i < kNumCoeffs; ++i) {
-    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
-    input[i] = val;
-    sum += val;
-  }
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
-  EXPECT_EQ(sum >> 1, output[0]);
-}
-
 using std::tr1::make_tuple;

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
-        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+#endif

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -885,11 +850,6 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(
-    C, PartialTrans16x16Test,
-    ::testing::Values(make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_8),
-                      make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_10),
-                      make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
@@ -898,25 +858,22 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(C, PartialTrans16x16Test,
-                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_c,
-                                                     VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct16x16_c,
-                   &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fdct16x16_c,
+                   &vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));
 #endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct16x16_sse2,
-                   &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fdct16x16_sse2,
+                   &vp9_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
@@ -928,28 +885,33 @@ INSTANTIATE_TEST_CASE_P(
                   VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3,
                   VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
-                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
-                                                     VPX_BITS_8)));
-#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vpx_highbd_fdct16x16_sse2,
+        make_tuple(&vp9_highbd_fdct16x16_sse2,
                   &idct16x16_10, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct16x16_c,
+        make_tuple(&vp9_highbd_fdct16x16_c,
                   &idct16x16_256_add_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct16x16_sse2,
+        make_tuple(&vp9_highbd_fdct16x16_sse2,
                   &idct16x16_12, 0, VPX_BITS_12),
-        make_tuple(&vpx_highbd_fdct16x16_c,
+        make_tuple(&vp9_highbd_fdct16x16_c,
                   &idct16x16_256_add_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vpx_fdct16x16_sse2,
-                   &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fdct16x16_sse2,
+                   &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 1, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 2, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 3, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 1, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 2, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 3, VPX_BITS_12),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
@@ -968,27 +930,13 @@ INSTANTIATE_TEST_CASE_P(
                   &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
        make_tuple(&idct16x16_12,
                   &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
-INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
-                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
-                                                     VPX_BITS_8)));
-#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

-#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    MSA, Trans16x16DCT,
+    SSSE3, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct16x16_msa,
-                   &vpx_idct16x16_256_add_msa, 0, VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(
-    MSA, Trans16x16HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
+        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0,
                   VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(MSA, PartialTrans16x16Test,
-                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_msa,
-                                                     VPX_BITS_8)));
-#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif
 }  // namespace
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -13,18 +13,16 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vp9_rtcd.h"
-#include "./vpx_config.h"
-#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
-#include "vpx_ports/mem.h"

 using libvpx_test::ACMRandom;

@@ -81,14 +79,18 @@ typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
    Trans32x32Param;

 #if CONFIG_VP9_HIGHBITDEPTH
+void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 8);
+}
+
 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
+  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 10);
 }

 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
+  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 12);
 }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
 public:
@@ -117,13 +119,13 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
  uint32_t max_error = 0;
  int64_t total_error = 0;
  const int count_test_block = 10000;
-  DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
 #endif

  for (int i = 0; i < count_test_block; ++i) {
@@ -154,10 +156,10 @@ TEST_P(Trans32x32Test, AccuracyCheck) {

    for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-      const int32_t diff =
+      const uint32_t diff =
          bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
-      const int32_t diff = dst[j] - src[j];
+      const uint32_t diff = dst[j] - src[j];
 #endif
      const uint32_t error = diff * diff;
      if (max_error < error)
@@ -182,16 +184,16 @@ TEST_P(Trans32x32Test, CoeffCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 1000;

-  DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);

  for (int i = 0; i < count_test_block; ++i) {
    for (int j = 0; j < kNumCoeffs; ++j)
      input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);

    const int stride = 32;
-    vpx_fdct32x32_c(input_block, output_ref_block, stride);
+    vp9_fdct32x32_c(input_block, output_ref_block, stride);
    ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));

    if (version_ == 0) {
@@ -210,13 +212,15 @@ TEST_P(Trans32x32Test, MemCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 2000;

-  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);

  for (int i = 0; i < count_test_block; ++i) {
    // Initialize a test block with input range [-mask_, mask_].
    for (int j = 0; j < kNumCoeffs; ++j) {
+      input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
      input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
    }
    if (i == 0) {
@@ -228,7 +232,7 @@ TEST_P(Trans32x32Test, MemCheck) {
    }

    const int stride = 32;
-    vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
+    vp9_fdct32x32_c(input_extreme_block, output_ref_block, stride);
    ASM_REGISTER_STATE_CHECK(
        fwd_txfm_(input_extreme_block, output_block, stride));

@@ -253,13 +257,13 @@ TEST_P(Trans32x32Test, MemCheck) {
 TEST_P(Trans32x32Test, InverseAccuracy) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 1000;
-  DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
 #endif

  for (int i = 0; i < count_test_block; ++i) {
@@ -305,165 +309,77 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
  }
 }

-class PartialTrans32x32Test
-    : public ::testing::TestWithParam<
-          std::tr1::tuple<FwdTxfmFunc, vpx_bit_depth_t> > {
- public:
-  virtual ~PartialTrans32x32Test() {}
-  virtual void SetUp() {
-    fwd_txfm_ = GET_PARAM(0);
-    bit_depth_ = GET_PARAM(1);
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  vpx_bit_depth_t bit_depth_;
-  FwdTxfmFunc fwd_txfm_;
-};
-
-TEST_P(PartialTrans32x32Test, Extremes) {
-#if CONFIG_VP9_HIGHBITDEPTH
-  const int16_t maxval =
-      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
-#else
-  const int16_t maxval = 255;
-#endif
-  const int minval = -maxval;
-  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
-
-  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
-  EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]);
-
-  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
-  EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]);
-}
-
-TEST_P(PartialTrans32x32Test, Random) {
-#if CONFIG_VP9_HIGHBITDEPTH
-  const int16_t maxval =
-      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
-#else
-  const int16_t maxval = 255;
-#endif
-  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-
-  int sum = 0;
-  for (int i = 0; i < kNumCoeffs; ++i) {
-    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
-    input[i] = val;
-    sum += val;
-  }
-  output[0] = 0;
-  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
-  EXPECT_EQ(sum >> 3, output[0]);
-}
-
 using std::tr1::make_tuple;

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vpx_highbd_fdct32x32_c,
+        make_tuple(&vp9_highbd_fdct32x32_c,
                   &idct32x32_10, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct32x32_rd_c,
+        make_tuple(&vp9_highbd_fdct32x32_rd_c,
                   &idct32x32_10, 1, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct32x32_c,
+        make_tuple(&vp9_highbd_fdct32x32_c,
                   &idct32x32_12, 0, VPX_BITS_12),
-        make_tuple(&vpx_highbd_fdct32x32_rd_c,
+        make_tuple(&vp9_highbd_fdct32x32_rd_c,
                   &idct32x32_12, 1, VPX_BITS_12),
-        make_tuple(&vpx_fdct32x32_c,
-                   &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
-        make_tuple(&vpx_fdct32x32_rd_c,
-                   &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(
-    C, PartialTrans32x32Test,
-    ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_8),
-                      make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_10),
-                      make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_12)));
+        make_tuple(&vp9_fdct32x32_c,
+                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
+        make_tuple(&vp9_fdct32x32_rd_c,
+                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vpx_fdct32x32_c,
-                   &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
-        make_tuple(&vpx_fdct32x32_rd_c,
-                   &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test,
-                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_c,
-                                                     VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+        make_tuple(&vp9_fdct32x32_c,
+                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
+        make_tuple(&vp9_fdct32x32_rd_c,
+                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+#endif

-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vpx_fdct32x32_c,
-                   &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
-        make_tuple(&vpx_fdct32x32_rd_c,
-                   &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+        make_tuple(&vp9_fdct32x32_c,
+                   &vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8),
+        make_tuple(&vp9_fdct32x32_rd_c,
+                   &vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
+#endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vpx_fdct32x32_sse2,
-                   &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
-        make_tuple(&vpx_fdct32x32_rd_sse2,
-                   &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
-                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
-                                                     VPX_BITS_8)));
-#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+        make_tuple(&vp9_fdct32x32_sse2,
+                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
+        make_tuple(&vp9_fdct32x32_rd_sse2,
+                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+#endif

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
+        make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
                   VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
-        make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
+        make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
                   VPX_BITS_12),
-        make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
+        make_tuple(&vp9_fdct32x32_sse2, &vp9_idct32x32_1024_add_c, 0,
                   VPX_BITS_8),
-        make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
+        make_tuple(&vp9_fdct32x32_rd_sse2, &vp9_idct32x32_1024_add_c, 1,
                   VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
-                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
-                                                     VPX_BITS_8)));
-#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    AVX2, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vpx_fdct32x32_avx2,
-                   &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
-        make_tuple(&vpx_fdct32x32_rd_avx2,
-                   &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
-#endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    MSA, Trans32x32Test,
-    ::testing::Values(
-        make_tuple(&vpx_fdct32x32_msa,
-                   &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
-        make_tuple(&vpx_fdct32x32_rd_msa,
-                   &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test,
-                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa,
-                                                     VPX_BITS_8)));
-#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+        make_tuple(&vp9_fdct32x32_avx2,
+                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
+        make_tuple(&vp9_fdct32x32_rd_avx2,
+                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+#endif
 }  // namespace
--- a/test/decode_api_test.cc
+++ b/test/decode_api_test.cc
@@ -7,11 +7,10 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
-
 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "./vpx_config.h"
 #include "test/ivf_video_source.h"
+#include "./vpx_config.h"
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

@@ -58,21 +57,6 @@ TEST(DecodeAPI, InvalidParams) {
  }
 }

-#if CONFIG_VP8_DECODER
-TEST(DecodeAPI, OptionalParams) {
-  vpx_codec_ctx_t dec;
-
-#if CONFIG_ERROR_CONCEALMENT
-  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, &vpx_codec_vp8_dx_algo, NULL,
-                                             VPX_CODEC_USE_ERROR_CONCEALMENT));
-#else
-  EXPECT_EQ(VPX_CODEC_INCAPABLE,
-            vpx_codec_dec_init(&dec, &vpx_codec_vp8_dx_algo, NULL,
-                               VPX_CODEC_USE_ERROR_CONCEALMENT));
-#endif  // CONFIG_ERROR_CONCEALMENT
-}
-#endif  // CONFIG_VP8_DECODER
-
 #if CONFIG_VP9_DECODER
 // Test VP9 codec controls after a decode error to ensure the code doesn't
 // misbehave.
@@ -81,7 +65,6 @@ void TestVp9Controls(vpx_codec_ctx_t *dec) {
    VP8D_GET_LAST_REF_UPDATES,
    VP8D_GET_FRAME_CORRUPTED,
    VP9D_GET_DISPLAY_SIZE,
-    VP9D_GET_FRAME_SIZE
  };
  int val[2];

@@ -130,53 +113,14 @@ TEST(DecodeAPI, Vp9InvalidDecode) {
  vpx_codec_ctx_t dec;
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0));
  const uint32_t frame_size = static_cast<uint32_t>(video.frame_size());
-#if CONFIG_VP9_HIGHBITDEPTH
  EXPECT_EQ(VPX_CODEC_MEM_ERROR,
            vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0));
-#else
-  EXPECT_EQ(VPX_CODEC_UNSUP_BITSTREAM,
-            vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0));
-#endif
  vpx_codec_iter_t iter = NULL;
  EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter));

  TestVp9Controls(&dec);
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec));
 }
-
-TEST(DecodeAPI, Vp9PeekSI) {
-  const vpx_codec_iface_t *const codec = &vpx_codec_vp9_dx_algo;
-  // The first 9 bytes are valid and the rest of the bytes are made up. Until
-  // size 10, this should return VPX_CODEC_UNSUP_BITSTREAM and after that it
-  // should return VPX_CODEC_CORRUPT_FRAME.
-  const uint8_t data[32] = {
-    0x85, 0xa4, 0xc1, 0xa1, 0x38, 0x81, 0xa3, 0x49,
-    0x83, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-    0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
-  };
-
-  for (uint32_t data_sz = 1; data_sz <= 32; ++data_sz) {
-    // Verify behavior of vpx_codec_decode. vpx_codec_decode doesn't even get
-    // to decoder_peek_si_internal on frames of size < 8.
-    if (data_sz >= 8) {
-      vpx_codec_ctx_t dec;
-      EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0));
-      EXPECT_EQ((data_sz < 10) ?
-                    VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_CORRUPT_FRAME,
-                vpx_codec_decode(&dec, data, data_sz, NULL, 0));
-      vpx_codec_iter_t iter = NULL;
-      EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter));
-      EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec));
-    }
-
-    // Verify behavior of vpx_codec_peek_stream_info.
-    vpx_codec_stream_info_t si;
-    si.sz = sizeof(si);
-    EXPECT_EQ((data_sz < 10) ? VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_OK,
-              vpx_codec_peek_stream_info(codec, data, data_sz, &si));
-  }
-}
 #endif  // CONFIG_VP9_DECODER

 }  // namespace
--- a/test/decode_perf_test.cc
+++ b/test/decode_perf_test.cc
@@ -8,17 +8,13 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include <string>
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
 #include "test/ivf_video_source.h"
 #include "test/md5_helper.h"
 #include "test/util.h"
 #include "test/webm_video_source.h"
 #include "vpx_ports/vpx_timer.h"
-#include "./ivfenc.h"
 #include "./vpx_version.h"

 using std::tr1::make_tuple;
@@ -28,9 +24,7 @@ namespace {
 #define VIDEO_NAME 0
 #define THREADS 1

-const int kMaxPsnr = 100;
 const double kUsecsInSec = 1000000.0;
-const char kNewEncodeOutputFile[] = "new_encode.ivf";

 /*
 DecodePerfTest takes a tuple of filename + number of threads to decode with
@@ -111,163 +105,4 @@ TEST_P(DecodePerfTest, PerfTest) {
 INSTANTIATE_TEST_CASE_P(VP9, DecodePerfTest,
                        ::testing::ValuesIn(kVP9DecodePerfVectors));

-class VP9NewEncodeDecodePerfTest :
-    public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
- protected:
-  VP9NewEncodeDecodePerfTest()
-      : EncoderTest(GET_PARAM(0)),
-        encoding_mode_(GET_PARAM(1)),
-        speed_(0),
-        outfile_(0),
-        out_frames_(0) {
-  }
-
-  virtual ~VP9NewEncodeDecodePerfTest() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-
-    cfg_.g_lag_in_frames = 25;
-    cfg_.rc_min_quantizer = 2;
-    cfg_.rc_max_quantizer = 56;
-    cfg_.rc_dropframe_thresh = 0;
-    cfg_.rc_undershoot_pct = 50;
-    cfg_.rc_overshoot_pct = 50;
-    cfg_.rc_buf_sz = 1000;
-    cfg_.rc_buf_initial_sz = 500;
-    cfg_.rc_buf_optimal_sz = 600;
-    cfg_.rc_resize_allowed = 0;
-    cfg_.rc_end_usage = VPX_VBR;
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
-      encoder->Control(VP8E_SET_CPUUSED, speed_);
-      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
-      encoder->Control(VP9E_SET_TILE_COLUMNS, 2);
-    }
-  }
-
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-    const std::string data_path = getenv("LIBVPX_TEST_DATA_PATH");
-    const std::string path_to_source = data_path + "/" + kNewEncodeOutputFile;
-    outfile_ = fopen(path_to_source.c_str(), "wb");
-    ASSERT_TRUE(outfile_ != NULL);
-  }
-
-  virtual void EndPassHook() {
-    if (outfile_ != NULL) {
-      if (!fseek(outfile_, 0, SEEK_SET))
-        ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_);
-      fclose(outfile_);
-      outfile_ = NULL;
-    }
-  }
-
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    ++out_frames_;
-
-    // Write initial file header if first frame.
-    if (pkt->data.frame.pts == 0)
-      ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_);
-
-    // Write frame header and data.
-    ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz);
-    ASSERT_EQ(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_),
-              pkt->data.frame.sz);
-  }
-
-  virtual bool DoDecode() { return false; }
-
-  void set_speed(unsigned int speed) {
-    speed_ = speed;
-  }
-
- private:
-  libvpx_test::TestMode encoding_mode_;
-  uint32_t speed_;
-  FILE *outfile_;
-  uint32_t out_frames_;
-};
-
-struct EncodePerfTestVideo {
-  EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
-                      uint32_t bitrate_, int frames_)
-      : name(name_),
-        width(width_),
-        height(height_),
-        bitrate(bitrate_),
-        frames(frames_) {}
-  const char *name;
-  uint32_t width;
-  uint32_t height;
-  uint32_t bitrate;
-  int frames;
-};
-
-const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
-  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
-};
-
-TEST_P(VP9NewEncodeDecodePerfTest, PerfTest) {
-  SetUp();
-
-  // TODO(JBB): Make this work by going through the set of given files.
-  const int i = 0;
-  const vpx_rational timebase = { 33333333, 1000000000 };
-  cfg_.g_timebase = timebase;
-  cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;
-
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  const char *video_name = kVP9EncodePerfTestVectors[i].name;
-  libvpx_test::I420VideoSource video(
-      video_name,
-      kVP9EncodePerfTestVectors[i].width,
-      kVP9EncodePerfTestVectors[i].height,
-      timebase.den, timebase.num, 0,
-      kVP9EncodePerfTestVectors[i].frames);
-  set_speed(2);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  const uint32_t threads = 4;
-
-  libvpx_test::IVFVideoSource decode_video(kNewEncodeOutputFile);
-  decode_video.Init();
-
-  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
-  cfg.threads = threads;
-  libvpx_test::VP9Decoder decoder(cfg, 0);
-
-  vpx_usec_timer t;
-  vpx_usec_timer_start(&t);
-
-  for (decode_video.Begin(); decode_video.cxdata() != NULL;
-       decode_video.Next()) {
-    decoder.DecodeFrame(decode_video.cxdata(), decode_video.frame_size());
-  }
-
-  vpx_usec_timer_mark(&t);
-  const double elapsed_secs =
-      static_cast<double>(vpx_usec_timer_elapsed(&t)) / kUsecsInSec;
-  const unsigned decode_frames = decode_video.frame_number();
-  const double fps = static_cast<double>(decode_frames) / elapsed_secs;
-
-  printf("{\n");
-  printf("\t\"type\" : \"decode_perf_test\",\n");
-  printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
-  printf("\t\"videoName\" : \"%s\",\n", kNewEncodeOutputFile);
-  printf("\t\"threadCount\" : %u,\n", threads);
-  printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs);
-  printf("\t\"totalFrames\" : %u,\n", decode_frames);
-  printf("\t\"framesPerSecond\" : %f\n", fps);
-  printf("}\n");
-}
-
-VP9_INSTANTIATE_TEST_CASE(
-  VP9NewEncodeDecodePerfTest, ::testing::Values(::libvpx_test::kTwoPassGood));
 }  // namespace
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -7,11 +7,9 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/register_state_check.h"
 #include "test/video_source.h"

@@ -67,7 +65,7 @@ void DecoderTest::HandlePeekResult(Decoder *const decoder,

 void DecoderTest::RunLoop(CompressedVideoSource *video,
                          const vpx_codec_dec_cfg_t &dec_cfg) {
-  Decoder* const decoder = codec_->CreateDecoder(dec_cfg, flags_, 0);
+  Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
  ASSERT_TRUE(decoder != NULL);
  bool end_of_file = false;

@@ -112,12 +110,4 @@ void DecoderTest::RunLoop(CompressedVideoSource *video) {
  RunLoop(video, dec_cfg);
 }

-void DecoderTest::set_cfg(const vpx_codec_dec_cfg_t &dec_cfg) {
-  memcpy(&cfg_, &dec_cfg, sizeof(cfg_));
-}
-
-void DecoderTest::set_flags(const vpx_codec_flags_t flags) {
-  flags_ = flags;
-}
-
 }  // namespace libvpx_test
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -41,13 +41,7 @@ class DxDataIterator {
 class Decoder {
 public:
  Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
-      : cfg_(cfg), flags_(0), deadline_(deadline), init_done_(false) {
-    memset(&decoder_, 0, sizeof(decoder_));
-  }
-
-  Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
-          unsigned long deadline)  // NOLINT
-      : cfg_(cfg), flags_(flag), deadline_(deadline), init_done_(false) {
+      : cfg_(cfg), deadline_(deadline), init_done_(false) {
    memset(&decoder_, 0, sizeof(decoder_));
  }

@@ -72,7 +66,9 @@ class Decoder {
  }

  void Control(int ctrl_id, int arg) {
-    Control(ctrl_id, arg, VPX_CODEC_OK);
+    InitOnce();
+    const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
+    ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
  }

  void Control(int ctrl_id, const void *arg) {
@@ -81,12 +77,6 @@ class Decoder {
    ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
  }

-  void Control(int ctrl_id, int arg, vpx_codec_err_t expected_value) {
-    InitOnce();
-    const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
-    ASSERT_EQ(expected_value, res) << DecodeError();
-  }
-
  const char* DecodeError() {
    const char *detail = vpx_codec_error_detail(&decoder_);
    return detail ? detail : vpx_codec_error(&decoder_);
@@ -107,10 +97,6 @@ class Decoder {

  bool IsVP8() const;

-  vpx_codec_ctx_t * GetDecoder() {
-    return &decoder_;
-  }
-
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const = 0;

@@ -118,7 +104,7 @@ class Decoder {
    if (!init_done_) {
      const vpx_codec_err_t res = vpx_codec_dec_init(&decoder_,
                                                     CodecInterface(),
-                                                     &cfg_, flags_);
+                                                     &cfg_, 0);
      ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
      init_done_ = true;
    }
@@ -126,7 +112,6 @@ class Decoder {

  vpx_codec_ctx_t     decoder_;
  vpx_codec_dec_cfg_t cfg_;
-  vpx_codec_flags_t   flags_;
  unsigned int        deadline_;
  bool                init_done_;
 };
@@ -139,9 +124,6 @@ class DecoderTest {
  virtual void RunLoop(CompressedVideoSource *video,
                       const vpx_codec_dec_cfg_t &dec_cfg);

-  virtual void set_cfg(const vpx_codec_dec_cfg_t &dec_cfg);
-  virtual void set_flags(const vpx_codec_flags_t flags);
-
  // Hook to be called before decompressing every frame.
  virtual void PreDecodeFrameHook(const CompressedVideoSource& /*video*/,
                                  Decoder* /*decoder*/) {}
@@ -164,16 +146,11 @@ class DecoderTest {
                                const vpx_codec_err_t res_peek);

 protected:
-  explicit DecoderTest(const CodecFactory *codec)
-      : codec_(codec),
-        cfg_(),
-        flags_(0) {}
+  explicit DecoderTest(const CodecFactory *codec) : codec_(codec) {}

  virtual ~DecoderTest() {}

  const CodecFactory *codec_;
-  vpx_codec_dec_cfg_t cfg_;
-  vpx_codec_flags_t   flags_;
 };

 }  // namespace libvpx_test
--- a/test/encode_api_test.cc
+++ b/test/encode_api_test.cc
@@ -1,65 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_config.h"
-#include "vpx/vp8cx.h"
-#include "vpx/vpx_encoder.h"
-
-namespace {
-
-#define NELEMENTS(x) static_cast<int>(sizeof(x) / sizeof(x[0]))
-
-TEST(EncodeAPI, InvalidParams) {
-  static const vpx_codec_iface_t *kCodecs[] = {
-#if CONFIG_VP8_ENCODER
-    &vpx_codec_vp8_cx_algo,
-#endif
-#if CONFIG_VP9_ENCODER
-    &vpx_codec_vp9_cx_algo,
-#endif
-  };
-  uint8_t buf[1] = {0};
-  vpx_image_t img;
-  vpx_codec_ctx_t enc;
-  vpx_codec_enc_cfg_t cfg;
-
-  EXPECT_EQ(&img, vpx_img_wrap(&img, VPX_IMG_FMT_I420, 1, 1, 1, buf));
-
-  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(NULL, NULL, NULL, 0));
-  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(&enc, NULL, NULL, 0));
-  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, NULL, 0, 0, 0, 0));
-  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, &img, 0, 0, 0, 0));
-  EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_destroy(NULL));
-  EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
-            vpx_codec_enc_config_default(NULL, NULL, 0));
-  EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
-            vpx_codec_enc_config_default(NULL, &cfg, 0));
-  EXPECT_TRUE(vpx_codec_error(NULL) != NULL);
-
-  for (int i = 0; i < NELEMENTS(kCodecs); ++i) {
-    SCOPED_TRACE(vpx_codec_iface_name(kCodecs[i]));
-    EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
-              vpx_codec_enc_init(NULL, kCodecs[i], NULL, 0));
-    EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
-              vpx_codec_enc_init(&enc, kCodecs[i], NULL, 0));
-    EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
-              vpx_codec_enc_config_default(kCodecs[i], &cfg, 1));
-
-    EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(kCodecs[i], &cfg, 0));
-    EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, kCodecs[i], &cfg, 0));
-    EXPECT_EQ(VPX_CODEC_OK, vpx_codec_encode(&enc, NULL, 0, 0, 0, 0));
-
-    EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc));
-  }
-}
-
-}  // namespace
--- a/test/encode_perf_test.cc
+++ b/test/encode_perf_test.cc
@@ -7,7 +7,6 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
-#include <string>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vpx_config.h"
 #include "./vpx_version.h"
@@ -51,8 +50,7 @@ const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
 };

-const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 };
-const int kEncodePerfTestThreads[] = { 1, 2, 4 };
+const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 12 };

 #define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0]))

@@ -65,8 +63,7 @@ class VP9EncodePerfTest
        min_psnr_(kMaxPsnr),
        nframes_(0),
        encoding_mode_(GET_PARAM(1)),
-        speed_(0),
-        threads_(1) {}
+        speed_(0) {}

  virtual ~VP9EncodePerfTest() {}

@@ -85,18 +82,12 @@ class VP9EncodePerfTest
    cfg_.rc_buf_optimal_sz = 600;
    cfg_.rc_resize_allowed = 0;
    cfg_.rc_end_usage = VPX_CBR;
-    cfg_.g_error_resilient = 1;
-    cfg_.g_threads = threads_;
  }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 0) {
-      const int log2_tile_columns = 3;
+    if (video->frame() == 1) {
      encoder->Control(VP8E_SET_CPUUSED, speed_);
-      encoder->Control(VP9E_SET_TILE_COLUMNS, log2_tile_columns);
-      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
-      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
    }
  }

@@ -122,77 +113,54 @@ class VP9EncodePerfTest
    speed_ = speed;
  }

-  void set_threads(unsigned int threads) {
-    threads_ = threads;
-  }
-
 private:
  double min_psnr_;
  unsigned int nframes_;
  libvpx_test::TestMode encoding_mode_;
  unsigned speed_;
-  unsigned int threads_;
 };

 TEST_P(VP9EncodePerfTest, PerfTest) {
  for (size_t i = 0; i < NELEMENTS(kVP9EncodePerfTestVectors); ++i) {
    for (size_t j = 0; j < NELEMENTS(kEncodePerfTestSpeeds); ++j) {
-      for (size_t k = 0; k < NELEMENTS(kEncodePerfTestThreads); ++k) {
-        if (kVP9EncodePerfTestVectors[i].width < 512 &&
-            kEncodePerfTestThreads[k] > 1)
-          continue;
-        else if (kVP9EncodePerfTestVectors[i].width < 1024 &&
-                 kEncodePerfTestThreads[k] > 2)
-          continue;
+      SetUp();

-        set_threads(kEncodePerfTestThreads[k]);
-        SetUp();
+      const vpx_rational timebase = { 33333333, 1000000000 };
+      cfg_.g_timebase = timebase;
+      cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;

-        const vpx_rational timebase = { 33333333, 1000000000 };
-        cfg_.g_timebase = timebase;
-        cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;
+      init_flags_ = VPX_CODEC_USE_PSNR;

-        init_flags_ = VPX_CODEC_USE_PSNR;
+      const unsigned frames = kVP9EncodePerfTestVectors[i].frames;
+      const char *video_name = kVP9EncodePerfTestVectors[i].name;
+      libvpx_test::I420VideoSource video(
+          video_name,
+          kVP9EncodePerfTestVectors[i].width,
+          kVP9EncodePerfTestVectors[i].height,
+          timebase.den, timebase.num, 0,
+          kVP9EncodePerfTestVectors[i].frames);
+      set_speed(kEncodePerfTestSpeeds[j]);

-        const unsigned frames = kVP9EncodePerfTestVectors[i].frames;
-        const char *video_name = kVP9EncodePerfTestVectors[i].name;
-        libvpx_test::I420VideoSource video(
-            video_name,
-            kVP9EncodePerfTestVectors[i].width,
-            kVP9EncodePerfTestVectors[i].height,
-            timebase.den, timebase.num, 0,
-            kVP9EncodePerfTestVectors[i].frames);
-        set_speed(kEncodePerfTestSpeeds[j]);
+      vpx_usec_timer t;
+      vpx_usec_timer_start(&t);

-        vpx_usec_timer t;
-        vpx_usec_timer_start(&t);
+      ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

-        ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+      vpx_usec_timer_mark(&t);
+      const double elapsed_secs = vpx_usec_timer_elapsed(&t) / kUsecsInSec;
+      const double fps = frames / elapsed_secs;
+      const double minimum_psnr = min_psnr();

-        vpx_usec_timer_mark(&t);
-        const double elapsed_secs = vpx_usec_timer_elapsed(&t) / kUsecsInSec;
-        const double fps = frames / elapsed_secs;
-        const double minimum_psnr = min_psnr();
-        std::string display_name(video_name);
-        if (kEncodePerfTestThreads[k] > 1) {
-          char thread_count[32];
-          snprintf(thread_count, sizeof(thread_count), "_t-%d",
-                   kEncodePerfTestThreads[k]);
-          display_name += thread_count;
-        }
-
-        printf("{\n");
-        printf("\t\"type\" : \"encode_perf_test\",\n");
-        printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
-        printf("\t\"videoName\" : \"%s\",\n", display_name.c_str());
-        printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs);
-        printf("\t\"totalFrames\" : %u,\n", frames);
-        printf("\t\"framesPerSecond\" : %f,\n", fps);
-        printf("\t\"minPsnr\" : %f,\n", minimum_psnr);
-        printf("\t\"speed\" : %d,\n", kEncodePerfTestSpeeds[j]);
-        printf("\t\"threads\" : %d\n", kEncodePerfTestThreads[k]);
-        printf("}\n");
-      }
+      printf("{\n");
+      printf("\t\"type\" : \"encode_perf_test\",\n");
+      printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+      printf("\t\"videoName\" : \"%s\",\n", video_name);
+      printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs);
+      printf("\t\"totalFrames\" : %u,\n", frames);
+      printf("\t\"framesPerSecond\" : %f,\n", fps);
+      printf("\t\"minPsnr\" : %f,\n", minimum_psnr);
+      printf("\t\"speed\" : %d\n", kEncodePerfTestSpeeds[j]);
+      printf("}\n");
    }
  }
 }
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -8,50 +8,15 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include <string>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
 #include "./vpx_config.h"
 #include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
 #include "test/encode_test_driver.h"
+#include "test/decode_test_driver.h"
 #include "test/register_state_check.h"
 #include "test/video_source.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"

 namespace libvpx_test {
-void Encoder::InitEncoder(VideoSource *video) {
-  vpx_codec_err_t res;
-  const vpx_image_t *img = video->img();
-
-  if (video->img() && !encoder_.priv) {
-    cfg_.g_w = img->d_w;
-    cfg_.g_h = img->d_h;
-    cfg_.g_timebase = video->timebase();
-    cfg_.rc_twopass_stats_in = stats_->buf();
-
-    res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_,
-                             init_flags_);
-    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-
-#if CONFIG_VP9_ENCODER
-    if (CodecInterface() == &vpx_codec_vp9_cx_algo) {
-      // Default to 1 tile column for VP9.
-      const int log2_tile_columns = 0;
-      res = vpx_codec_control_(&encoder_, VP9E_SET_TILE_COLUMNS,
-                               log2_tile_columns);
-      ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-    } else
-#endif
-    {
-#if CONFIG_VP8_ENCODER
-      ASSERT_EQ(&vpx_codec_vp8_cx_algo, CodecInterface())
-          << "Unknown Codec Interface";
-#endif
-    }
-  }
-}
-
 void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) {
  if (video->img())
    EncodeFrameInternal(*video, frame_flags);
@@ -74,6 +39,17 @@ void Encoder::EncodeFrameInternal(const VideoSource &video,
  vpx_codec_err_t res;
  const vpx_image_t *img = video.img();

+  // Handle first frame initialization
+  if (!encoder_.priv) {
+    cfg_.g_w = img->d_w;
+    cfg_.g_h = img->d_h;
+    cfg_.g_timebase = video.timebase();
+    cfg_.rc_twopass_stats_in = stats_->buf();
+    res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_,
+                             init_flags_);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+  }
+
  // Handle frame resizing
  if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) {
    cfg_.g_w = img->d_w;
@@ -84,7 +60,8 @@ void Encoder::EncodeFrameInternal(const VideoSource &video,

  // Encode the frame
  API_REGISTER_STATE_CHECK(
-      res = vpx_codec_encode(&encoder_, img, video.pts(), video.duration(),
+      res = vpx_codec_encode(&encoder_,
+                             video.img(), video.pts(), video.duration(),
                             frame_flags, deadline_));
  ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
 }
@@ -100,7 +77,6 @@ void Encoder::Flush() {

 void EncoderTest::InitializeConfig() {
  const vpx_codec_err_t res = codec_->DefaultEncoderConfig(&cfg_, 0);
-  dec_cfg_ = vpx_codec_dec_cfg_t();
  ASSERT_EQ(VPX_CODEC_OK, res);
 }

@@ -166,6 +142,12 @@ void EncoderTest::MismatchHook(const vpx_image_t* /*img1*/,
 void EncoderTest::RunLoop(VideoSource *video) {
  vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();

+#if CONFIG_ROW_TILE
+  // Decode all tiles.
+  dec_cfg.tile_col = -1;
+  dec_cfg.tile_row = -1;
+#endif  // CONFIG_ROW_TILE
+
  stats_.Reset();

  ASSERT_TRUE(passes_ == 1 || passes_ == 2);
@@ -183,19 +165,9 @@ void EncoderTest::RunLoop(VideoSource *video) {
    Encoder* const encoder = codec_->CreateEncoder(cfg_, deadline_, init_flags_,
                                                   &stats_);
    ASSERT_TRUE(encoder != NULL);
-
-    video->Begin();
-    encoder->InitEncoder(video);
-    ASSERT_FALSE(::testing::Test::HasFatalFailure());
-
-    unsigned long dec_init_flags = 0;  // NOLINT
-    // Use fragment decoder if encoder outputs partitions.
-    // NOTE: fragment decoder and partition encoder are only supported by VP8.
-    if (init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION)
-      dec_init_flags |= VPX_CODEC_USE_INPUT_FRAGMENTS;
-    Decoder* const decoder = codec_->CreateDecoder(dec_cfg, dec_init_flags, 0);
+    Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
    bool again;
-    for (again = true; again; video->Next()) {
+    for (again = true, video->Begin(); again; video->Next()) {
      again = (video->img() != NULL);

      PreEncodeFrameHook(video);
@@ -235,13 +207,6 @@ void EncoderTest::RunLoop(VideoSource *video) {
        }
      }

-      // Flush the decoder when there are no more fragments.
-      if ((init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION) && has_dxdata) {
-        const vpx_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
-        if (!HandleDecodeResult(res_dec, *video, decoder))
-          break;
-      }
-
      if (has_dxdata && has_cxdata) {
        const vpx_image_t *img_enc = encoder->GetPreviewFrame();
        DxDataIterator dec_iter = decoder->GetDxData();
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -13,13 +13,12 @@
 #include <string>
 #include <vector>

-#include "third_party/googletest/src/include/gtest/gtest.h"
-
 #include "./vpx_config.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vpx/vpx_encoder.h"
 #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
 #include "vpx/vp8cx.h"
 #endif
-#include "vpx/vpx_encoder.h"

 namespace libvpx_test {

@@ -105,8 +104,6 @@ class Encoder {
    return CxDataIterator(&encoder_);
  }

-  void InitEncoder(VideoSource *video);
-
  const vpx_image_t *GetPreviewFrame() {
    return vpx_codec_get_preview_frame(&encoder_);
  }
@@ -124,25 +121,11 @@ class Encoder {
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }

-  void Control(int ctrl_id, int *arg) {
-    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
-    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-  }
-
  void Control(int ctrl_id, struct vpx_scaling_mode *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }

-  void Control(int ctrl_id, struct vpx_svc_layer_id *arg) {
-    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
-    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-  }
-
-  void Control(int ctrl_id, struct vpx_svc_parameters *arg) {
-    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
-    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-  }
 #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
  void Control(int ctrl_id, vpx_active_map_t *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
@@ -150,12 +133,6 @@ class Encoder {
  }
 #endif

-  void Config(const vpx_codec_enc_cfg_t *cfg) {
-    const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg);
-    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-    cfg_ = *cfg;
-  }
-
  void set_deadline(unsigned long deadline) {
    deadline_ = deadline;
  }
@@ -193,10 +170,7 @@ class EncoderTest {
 protected:
  explicit EncoderTest(const CodecFactory *codec)
      : codec_(codec), abort_(false), init_flags_(0), frame_flags_(0),
-        last_pts_(0) {
-    // Default to 1 thread.
-    cfg_.g_threads = 1;
-  }
+        last_pts_(0) {}

  virtual ~EncoderTest() {}

@@ -206,11 +180,6 @@ class EncoderTest {
  // Map the TestMode enum to the deadline_ and passes_ variables.
  void SetMode(TestMode mode);

-  // Set encoder flag.
-  void set_init_flags(unsigned long flag) {  // NOLINT(runtime/int)
-    init_flags_ = flag;
-  }
-
  // Main loop
  virtual void RunLoop(VideoSource *video);

@@ -264,7 +233,6 @@ class EncoderTest {

  bool                 abort_;
  vpx_codec_enc_cfg_t  cfg_;
-  vpx_codec_dec_cfg_t  dec_cfg_;
  unsigned int         passes_;
  unsigned long        deadline_;
  TwopassStatsStore    stats_;
--- a/test/error_block_test.cc
+++ b/test/error_block_test.cc
@@ -0,0 +1,146 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+#if CONFIG_VP9_HIGHBITDEPTH
+const int number_of_iterations = 1000;
+
+typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
+                               const tran_low_t *dqcoeff, intptr_t block_size,
+                               int64_t *ssz, int bps);
+typedef std::tr1::tuple<ErrorBlockFunc, ErrorBlockFunc, vpx_bit_depth_t>
+                        ErrorBlockParam;
+class ErrorBlockTest
+  : public ::testing::TestWithParam<ErrorBlockParam> {
+ public:
+  virtual ~ErrorBlockTest() {}
+  virtual void SetUp() {
+    error_block_op_     = GET_PARAM(0);
+    ref_error_block_op_ = GET_PARAM(1);
+    bit_depth_  = GET_PARAM(2);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  vpx_bit_depth_t bit_depth_;
+  ErrorBlockFunc error_block_op_;
+  ErrorBlockFunc ref_error_block_op_;
+};
+
+TEST_P(ErrorBlockTest, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff,   4096);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
+  int err_count_total = 0;
+  int first_failure = -1;
+  intptr_t block_size;
+  int64_t ssz;
+  int64_t ret;
+  int64_t ref_ssz;
+  int64_t ref_ret;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int err_count = 0;
+    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
+    for (int j = 0; j < block_size; j++) {
+      coeff[j]   = rnd(2<<20)-(1<<20);
+      dqcoeff[j] = rnd(2<<20)-(1<<20);
+    }
+    ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
+                                  bit_depth_);
+    ASM_REGISTER_STATE_CHECK(ret = error_block_op_(coeff, dqcoeff, block_size,
+                                                   &ssz, bit_depth_));
+    err_count += (ref_ret != ret) | (ref_ssz != ssz);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+    << "Error: Error Block Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
+}
+
+TEST_P(ErrorBlockTest, ExtremeValues) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff,   4096);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
+  int err_count_total = 0;
+  int first_failure = -1;
+  intptr_t block_size;
+  int64_t ssz;
+  int64_t ret;
+  int64_t ref_ssz;
+  int64_t ref_ret;
+  int max_val = ((1<<20)-1);
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int err_count = 0;
+    int k = (i / 9) % 5;
+
+    // Change the maximum coeff value, to test different bit boundaries
+    if ( k == 4 && (i % 9) == 0 ) {
+      max_val >>= 1;
+    }
+    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
+    for (int j = 0; j < block_size; j++) {
+      if (k < 4) {  // Test at maximum values
+        coeff[j]   = k % 2 ? max_val : -max_val;
+        dqcoeff[j] = (k >> 1) % 2 ? max_val : -max_val;
+      } else {
+        coeff[j]   = rnd(2 << 14) - (1 << 14);
+        dqcoeff[j] = rnd(2 << 14) - (1 << 14);
+      }
+    }
+    ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
+                                  bit_depth_);
+    ASM_REGISTER_STATE_CHECK(ret = error_block_op_(coeff, dqcoeff, block_size,
+                                                   &ssz, bit_depth_));
+    err_count += (ref_ret != ret) | (ref_ssz != ssz);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+    << "Error: Error Block Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
+}
+
+using std::tr1::make_tuple;
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+  SSE2_C_COMPARE, ErrorBlockTest,
+  ::testing::Values(
+    make_tuple(&vp9_highbd_block_error_sse2,
+               &vp9_highbd_block_error_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_block_error_sse2,
+               &vp9_highbd_block_error_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_block_error_sse2,
+               &vp9_highbd_block_error_c, VPX_BITS_8)));
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+}  // namespace
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -20,11 +20,10 @@ const int kMaxErrorFrames = 12;
 const int kMaxDroppableFrames = 12;

 class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, bool> {
+    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
 protected:
  ErrorResilienceTestLarge()
      : EncoderTest(GET_PARAM(0)),
-        svc_support_(GET_PARAM(2)),
        psnr_(0.0),
        nframes_(0),
        mismatch_psnr_(0.0),
@@ -38,7 +37,6 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
  void Reset() {
    error_nframes_ = 0;
    droppable_nframes_ = 0;
-    pattern_switch_ = 0;
  }

  virtual void SetUp() {
@@ -58,77 +56,22 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
    nframes_++;
  }

-  //
-  // Frame flags and layer id for temporal layers.
-  // For two layers, test pattern is:
-  //   1     3
-  // 0    2     .....
-  // LAST is updated on base/layer 0, GOLDEN  updated on layer 1.
-  // Non-zero pattern_switch parameter means pattern will switch to
-  // not using LAST for frame_num >= pattern_switch.
-  int SetFrameFlags(int frame_num,
-                    int num_temp_layers,
-                    int pattern_switch) {
-    int frame_flags = 0;
-    if (num_temp_layers == 2) {
-        if (frame_num % 2 == 0) {
-          if (frame_num < pattern_switch || pattern_switch == 0) {
-            // Layer 0: predict from LAST and ARF, update LAST.
-            frame_flags = VP8_EFLAG_NO_REF_GF |
-                          VP8_EFLAG_NO_UPD_GF |
-                          VP8_EFLAG_NO_UPD_ARF;
-          } else {
-            // Layer 0: predict from GF and ARF, update GF.
-            frame_flags = VP8_EFLAG_NO_REF_LAST |
-                          VP8_EFLAG_NO_UPD_LAST |
-                          VP8_EFLAG_NO_UPD_ARF;
-          }
-        } else {
-          if (frame_num < pattern_switch || pattern_switch == 0) {
-            // Layer 1: predict from L, GF, and ARF, update GF.
-            frame_flags = VP8_EFLAG_NO_UPD_ARF |
-                          VP8_EFLAG_NO_UPD_LAST;
-          } else {
-            // Layer 1: predict from GF and ARF, update GF.
-            frame_flags = VP8_EFLAG_NO_REF_LAST |
-                          VP8_EFLAG_NO_UPD_LAST |
-                          VP8_EFLAG_NO_UPD_ARF;
-          }
-        }
-    }
-    return frame_flags;
-  }
-
-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder * /*encoder*/) {
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video) {
    frame_flags_ &= ~(VP8_EFLAG_NO_UPD_LAST |
                      VP8_EFLAG_NO_UPD_GF |
                      VP8_EFLAG_NO_UPD_ARF);
-    // For temporal layer case.
-    if (cfg_.ts_number_layers > 1) {
-      frame_flags_ = SetFrameFlags(video->frame(),
-                                   cfg_.ts_number_layers,
-                                   pattern_switch_);
+    if (droppable_nframes_ > 0 &&
+        (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
      for (unsigned int i = 0; i < droppable_nframes_; ++i) {
        if (droppable_frames_[i] == video->frame()) {
-          std::cout << "Encoding droppable frame: "
+          std::cout << "             Encoding droppable frame: "
                    << droppable_frames_[i] << "\n";
+          frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
+                           VP8_EFLAG_NO_UPD_GF |
+                           VP8_EFLAG_NO_UPD_ARF);
+          return;
        }
      }
-    } else {
-       if (droppable_nframes_ > 0 &&
-         (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
-         for (unsigned int i = 0; i < droppable_nframes_; ++i) {
-           if (droppable_frames_[i] == video->frame()) {
-             std::cout << "Encoding droppable frame: "
-                       << droppable_frames_[i] << "\n";
-             frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
-                              VP8_EFLAG_NO_UPD_GF |
-                              VP8_EFLAG_NO_UPD_ARF);
-             return;
-           }
-         }
-       }
    }
  }

@@ -190,18 +133,11 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
    return mismatch_nframes_;
  }

-  void SetPatternSwitch(int frame_switch) {
-     pattern_switch_ = frame_switch;
-   }
-
-  bool svc_support_;
-
 private:
  double psnr_;
  unsigned int nframes_;
  unsigned int error_nframes_;
  unsigned int droppable_nframes_;
-  unsigned int pattern_switch_;
  double mismatch_psnr_;
  unsigned int mismatch_nframes_;
  unsigned int error_frames_[kMaxErrorFrames];
@@ -300,300 +236,7 @@ TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
 #endif
 }

-// Check for successful decoding and no encoder/decoder mismatch
-// if we lose (i.e., drop before decoding) the enhancement layer frames for a
-// two layer temporal pattern. The base layer does not predict from the top
-// layer, so successful decoding is expected.
-TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
-  // This test doesn't run if SVC is not supported.
-  if (!svc_support_)
-    return;
+VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);

-  const vpx_rational timebase = { 33333333, 1000000000 };
-  cfg_.g_timebase = timebase;
-  cfg_.rc_target_bitrate = 500;
-  cfg_.g_lag_in_frames = 0;
-
-  cfg_.rc_end_usage = VPX_CBR;
-  // 2 Temporal layers, no spatial layers, CBR mode.
-  cfg_.ss_number_layers = 1;
-  cfg_.ts_number_layers = 2;
-  cfg_.ts_rate_decimator[0] = 2;
-  cfg_.ts_rate_decimator[1] = 1;
-  cfg_.ts_periodicity = 2;
-  cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
-  cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
-
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                     timebase.den, timebase.num, 0, 40);
-
-  // Error resilient mode ON.
-  cfg_.g_error_resilient = 1;
-  cfg_.kf_mode = VPX_KF_DISABLED;
-  SetPatternSwitch(0);
-
-  // The odd frames are the enhancement layer for 2 layer pattern, so set
-  // those frames as droppable. Drop the last 7 frames.
-  unsigned int num_droppable_frames = 7;
-  unsigned int droppable_frame_list[] = {27, 29, 31, 33, 35, 37, 39};
-  SetDroppableFrames(num_droppable_frames, droppable_frame_list);
-  SetErrorFrames(num_droppable_frames, droppable_frame_list);
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  // Test that no mismatches have been found
-  std::cout << "             Mismatch frames: "
-            << GetMismatchFrames() << "\n";
-  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
-
-  // Reset previously set of error/droppable frames.
-  Reset();
-}
-
-// Check for successful decoding and no encoder/decoder mismatch
-// for a two layer temporal pattern, where at some point in the
-// sequence, the LAST ref is not used anymore.
-TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) {
-  // This test doesn't run if SVC is not supported.
-  if (!svc_support_)
-    return;
-
-  const vpx_rational timebase = { 33333333, 1000000000 };
-  cfg_.g_timebase = timebase;
-  cfg_.rc_target_bitrate = 500;
-  cfg_.g_lag_in_frames = 0;
-
-  cfg_.rc_end_usage = VPX_CBR;
-  // 2 Temporal layers, no spatial layers, CBR mode.
-  cfg_.ss_number_layers = 1;
-  cfg_.ts_number_layers = 2;
-  cfg_.ts_rate_decimator[0] = 2;
-  cfg_.ts_rate_decimator[1] = 1;
-  cfg_.ts_periodicity = 2;
-  cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
-  cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
-
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                     timebase.den, timebase.num, 0, 100);
-
-  // Error resilient mode ON.
-  cfg_.g_error_resilient = 1;
-  cfg_.kf_mode = VPX_KF_DISABLED;
-  SetPatternSwitch(60);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  // Test that no mismatches have been found
-  std::cout << "             Mismatch frames: "
-            << GetMismatchFrames() << "\n";
-  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
-
-  // Reset previously set of error/droppable frames.
-  Reset();
-}
-
-class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
- protected:
-  ErrorResilienceTestLargeCodecControls()
-      : EncoderTest(GET_PARAM(0)),
-        encoding_mode_(GET_PARAM(1)) {
-    Reset();
-  }
-
-  virtual ~ErrorResilienceTestLargeCodecControls() {}
-
-  void Reset() {
-    last_pts_ = 0;
-    tot_frame_number_ = 0;
-    // For testing up to 3 layers.
-    for (int i = 0; i < 3; ++i) {
-      bits_total_[i] = 0;
-    }
-    duration_ = 0.0;
-  }
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-  }
-
-  //
-  // Frame flags and layer id for temporal layers.
-  //
-
-  // For two layers, test pattern is:
-  //   1     3
-  // 0    2     .....
-  // For three layers, test pattern is:
-  //   1      3    5      7
-  //      2           6
-  // 0          4            ....
-  // LAST is always update on base/layer 0, GOLDEN is updated on layer 1,
-  // and ALTREF is updated on top layer for 3 layer pattern.
-  int SetFrameFlags(int frame_num, int num_temp_layers) {
-    int frame_flags = 0;
-    if (num_temp_layers == 2) {
-      if (frame_num % 2 == 0) {
-        // Layer 0: predict from L and ARF, update L.
-        frame_flags = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
-                      VP8_EFLAG_NO_UPD_ARF;
-      } else {
-        // Layer 1: predict from L, G and ARF, and update G.
-        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-                      VP8_EFLAG_NO_UPD_ENTROPY;
-      }
-    } else if (num_temp_layers == 3) {
-      if (frame_num % 4 == 0) {
-        // Layer 0: predict from L, update L.
-        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
-                      VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
-      } else if ((frame_num - 2) % 4 == 0) {
-        // Layer 1: predict from L, G,  update G.
-        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
-                      VP8_EFLAG_NO_REF_ARF;
-      }  else if ((frame_num - 1) % 2 == 0) {
-        // Layer 2: predict from L, G, ARF; update ARG.
-        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
-      }
-    }
-    return frame_flags;
-  }
-
-  int SetLayerId(int frame_num, int num_temp_layers) {
-    int layer_id = 0;
-    if (num_temp_layers == 2) {
-      if (frame_num % 2 == 0) {
-        layer_id = 0;
-      } else {
-         layer_id = 1;
-      }
-    } else if (num_temp_layers == 3) {
-      if (frame_num % 4 == 0) {
-        layer_id = 0;
-      } else if ((frame_num - 2) % 4 == 0) {
-        layer_id = 1;
-      } else if ((frame_num - 1) % 2 == 0) {
-        layer_id = 2;
-      }
-    }
-    return layer_id;
-  }
-
-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-                                  libvpx_test::Encoder *encoder) {
-    if (cfg_.ts_number_layers > 1) {
-        int layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers);
-        int frame_flags = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
-        if (video->frame() > 0) {
-          encoder->Control(VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
-          encoder->Control(VP8E_SET_FRAME_FLAGS, frame_flags);
-        }
-       const vpx_rational_t tb = video->timebase();
-       timebase_ = static_cast<double>(tb.num) / tb.den;
-       duration_ = 0;
-       return;
-    }
-  }
-
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    // Time since last timestamp = duration.
-    vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
-    if (duration > 1) {
-      // Update counter for total number of frames (#frames input to encoder).
-      // Needed for setting the proper layer_id below.
-      tot_frame_number_ += static_cast<int>(duration - 1);
-    }
-    int layer = SetLayerId(tot_frame_number_, cfg_.ts_number_layers);
-    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
-    // Update the total encoded bits. For temporal layers, update the cumulative
-    // encoded bits per layer.
-    for (int i = layer; i < static_cast<int>(cfg_.ts_number_layers); ++i) {
-      bits_total_[i] += frame_size_in_bits;
-    }
-    // Update the most recent pts.
-    last_pts_ = pkt->data.frame.pts;
-    ++tot_frame_number_;
-  }
-
-  virtual void EndPassHook(void) {
-    duration_ = (last_pts_ + 1) * timebase_;
-    if (cfg_.ts_number_layers  > 1) {
-      for (int layer = 0; layer < static_cast<int>(cfg_.ts_number_layers);
-          ++layer) {
-        if (bits_total_[layer]) {
-          // Effective file datarate:
-          effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_;
-        }
-      }
-    }
-  }
-
-  double effective_datarate_[3];
-   private:
-    libvpx_test::TestMode encoding_mode_;
-    vpx_codec_pts_t last_pts_;
-    double timebase_;
-    int64_t bits_total_[3];
-    double duration_;
-    int tot_frame_number_;
-  };
-
-// Check two codec controls used for:
-// (1) for setting temporal layer id, and (2) for settings encoder flags.
-// This test invokes those controls for each frame, and verifies encoder/decoder
-// mismatch and basic rate control response.
-// TODO(marpan): Maybe move this test to datarate_test.cc.
-TEST_P(ErrorResilienceTestLargeCodecControls, CodecControl3TemporalLayers) {
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.rc_min_quantizer = 2;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_CBR;
-  cfg_.rc_dropframe_thresh = 1;
-  cfg_.g_lag_in_frames = 0;
-  cfg_.kf_mode = VPX_KF_DISABLED;
-  cfg_.g_error_resilient = 1;
-
-  // 3 Temporal layers. Framerate decimation (4, 2, 1).
-  cfg_.ts_number_layers = 3;
-  cfg_.ts_rate_decimator[0] = 4;
-  cfg_.ts_rate_decimator[1] = 2;
-  cfg_.ts_rate_decimator[2] = 1;
-  cfg_.ts_periodicity = 4;
-  cfg_.ts_layer_id[0] = 0;
-  cfg_.ts_layer_id[1] = 2;
-  cfg_.ts_layer_id[2] = 1;
-  cfg_.ts_layer_id[3] = 2;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 200);
-  for (int i = 200; i <= 800; i += 200) {
-    cfg_.rc_target_bitrate = i;
-    Reset();
-    // 40-20-40 bitrate allocation for 3 temporal layers.
-    cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
-    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
-      ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75)
-          << " The datarate for the file is lower than target by too much, "
-              "for layer: " << j;
-      ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25)
-          << " The datarate for the file is greater than target by too much, "
-              "for layer: " << j;
-    }
-  }
-}
-
-VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
-                          ::testing::Values(true));
-VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls,
-                          ONE_PASS_TEST_MODES);
-VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
-                          ::testing::Values(true));
 }  // namespace
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -24,6 +24,7 @@
 namespace {

 const int kVideoNameParam = 1;
+const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";

 struct ExternalFrameBuffer {
  uint8_t *data;
@@ -96,19 +97,13 @@ class ExternalFrameBufferList {
    return 0;
  }

-  // Marks the external frame buffer that |fb| is pointing to as free.
+  // Marks the external frame buffer that |fb| is pointing too as free.
  // Returns < 0 on an error.
  int ReturnFrameBuffer(vpx_codec_frame_buffer_t *fb) {
-    if (fb == NULL) {
-      EXPECT_TRUE(fb != NULL);
-      return -1;
-    }
+    EXPECT_TRUE(fb != NULL);
    ExternalFrameBuffer *const ext_fb =
        reinterpret_cast<ExternalFrameBuffer*>(fb->priv);
-    if (ext_fb == NULL) {
-      EXPECT_TRUE(ext_fb != NULL);
-      return -1;
-    }
+    EXPECT_TRUE(ext_fb != NULL);
    EXPECT_EQ(1, ext_fb->in_use);
    ext_fb->in_use = 0;
    return 0;
@@ -154,8 +149,6 @@ class ExternalFrameBufferList {
  ExternalFrameBuffer *ext_fb_list_;
 };

-#if CONFIG_WEBM_IO
-
 // Callback used by libvpx to request the application to return a frame
 // buffer of at least |min_size| in bytes.
 int get_vp9_frame_buffer(void *user_priv, size_t min_size,
@@ -198,8 +191,6 @@ int do_not_release_vp9_frame_buffer(void *user_priv,
  return 0;
 }

-#endif  // CONFIG_WEBM_IO
-
 // Class for testing passing in external frame buffers to libvpx.
 class ExternalFrameBufferMD5Test
    : public ::libvpx_test::DecoderTest,
@@ -281,8 +272,6 @@ class ExternalFrameBufferMD5Test
 };

 #if CONFIG_WEBM_IO
-const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
-
 // Class for testing passing in external frame buffers to libvpx.
 class ExternalFrameBufferTest : public ::testing::Test {
 protected:
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -13,17 +13,15 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vp9_rtcd.h"
-#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
+
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
-#include "vpx_ports/mem.h"

 using libvpx_test::ACMRandom;

@@ -40,8 +38,8 @@ typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct4x4Param;
 typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;

 void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
-                 int /*tx_type*/) {
-  vpx_fdct4x4_c(in, out, stride);
+                 int tx_type) {
+  vp9_fdct4x4_c(in, out, stride);
 }

 void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
@@ -49,17 +47,17 @@ void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
 }

 void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
-                 int /*tx_type*/) {
+                 int tx_type) {
  vp9_fwht4x4_c(in, out, stride);
 }

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct4x4_16_add_c(in, out, stride, 10);
+  vp9_highbd_idct4x4_16_add_c(in, out, stride, 10);
 }

 void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct4x4_16_add_c(in, out, stride, 12);
+  vp9_highbd_idct4x4_16_add_c(in, out, stride, 12);
 }

 void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
@@ -71,23 +69,23 @@ void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 }

 void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 10);
+  vp9_highbd_iwht4x4_16_add_c(in, out, stride, 10);
 }

 void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 12);
+  vp9_highbd_iwht4x4_16_add_c(in, out, stride, 12);
 }

 #if HAVE_SSE2
 void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
+  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
 }

 void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
+  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
 }
-#endif  // HAVE_SSE2
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
+#endif

 class Trans4x4TestBase {
 public:
@@ -104,13 +102,13 @@ class Trans4x4TestBase {
    int64_t total_error = 0;
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
-      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
-      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
-      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+      DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
+      DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
+      DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+      DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 #if CONFIG_VP9_HIGHBITDEPTH
-      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+      DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
+      DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
 #endif

      // Initialize a test block with input range [-255, 255].
@@ -141,11 +139,10 @@ class Trans4x4TestBase {

      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-        const int diff =
+        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
-        ASSERT_EQ(VPX_BITS_8, bit_depth_);
-        const int diff = dst[j] - src[j];
+        const uint32_t diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
        if (max_error < error)
@@ -166,9 +163,9 @@ class Trans4x4TestBase {
  void RunCoeffCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 5000;
-    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
@@ -187,13 +184,15 @@ class Trans4x4TestBase {
  void RunMemCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 5000;
-    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
+        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0) {
@@ -220,13 +219,13 @@ class Trans4x4TestBase {
  void RunInvAccuracyCheck(int limit) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
@@ -258,10 +257,10 @@ class Trans4x4TestBase {

      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-        const int diff =
+        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
-        const int diff = dst[j] - src[j];
+        const uint32_t diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
        EXPECT_GE(static_cast<uint32_t>(limit), error)
@@ -419,15 +418,15 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
-        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
+#endif

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -453,7 +452,7 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -461,46 +460,43 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4WHT,
    ::testing::Values(
-        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+#endif

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct4x4_c,
-                   &vpx_idct4x4_16_add_neon, 0, VPX_BITS_8)));
-#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+        make_tuple(&vp9_fdct4x4_c,
+                   &vp9_idct4x4_16_add_neon, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
-    NEON, Trans4x4HT,
+    DISABLED_NEON, Trans4x4HT,
    ::testing::Values(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

-#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
+#if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
+    !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    SSE2, Trans4x4WHT,
+    MMX, Trans4x4WHT,
    ::testing::Values(
-        make_tuple(&vp9_fwht4x4_sse2, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8),
-        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
 #endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct4x4_sse2,
-                   &vpx_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fdct4x4_sse2,
+                   &vp9_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4HT,
    ::testing::Values(
@@ -508,39 +504,33 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8)));
-#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vpx_highbd_fdct4x4_c,    &idct4x4_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct4x4_c,    &idct4x4_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vpx_fdct4x4_sse2,      &vpx_idct4x4_16_add_c, 0,
+        make_tuple(&vp9_highbd_fdct4x4_c,    &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct4x4_c,    &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vp9_fdct4x4_sse2,      &vp9_idct4x4_16_add_c, 0,
                   VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4HT,
    ::testing::Values(
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 1, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 2, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 3, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 1, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 2, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 3, VPX_BITS_12),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
-#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    MSA, Trans4x4DCT,
-    ::testing::Values(
-        make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 0, VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(
-    MSA, Trans4x4HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 0, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 1, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 2, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 3, VPX_BITS_8)));
-#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif
 }  // namespace
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -13,41 +13,19 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vp9_rtcd.h"
-#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
+
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
-#include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
-#include "vpx_ports/mem.h"
-
-using libvpx_test::ACMRandom;
-
-namespace {

 const int kNumCoeffs = 64;
 const double kPi = 3.141592653589793238462643383279502884;
-
-const int kSignBiasMaxDiff255 = 1500;
-const int kSignBiasMaxDiff15 = 10000;
-
-typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
-                        int tx_type);
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        int tx_type);
-
-typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
-typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
-
-void reference_8x8_dct_1d(const double in[8], double out[8]) {
+void reference_8x8_dct_1d(const double in[8], double out[8], int stride) {
  const double kInvSqrt2 = 0.707106781186547524400844362104;
  for (int k = 0; k < 8; k++) {
    out[k] = 0.0;
@@ -65,7 +43,7 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
    double temp_in[8], temp_out[8];
    for (int j = 0; j < 8; ++j)
      temp_in[j] = input[j*8 + i];
-    reference_8x8_dct_1d(temp_in, temp_out);
+    reference_8x8_dct_1d(temp_in, temp_out, 1);
    for (int j = 0; j < 8; ++j)
      output[j * 8 + i] = temp_out[j];
  }
@@ -74,17 +52,29 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
    double temp_in[8], temp_out[8];
    for (int j = 0; j < 8; ++j)
      temp_in[j] = output[j + i*8];
-    reference_8x8_dct_1d(temp_in, temp_out);
+    reference_8x8_dct_1d(temp_in, temp_out, 1);
    // Scale by some magic number
    for (int j = 0; j < 8; ++j)
      output[j + i * 8] = temp_out[j] * 2;
  }
 }

+using libvpx_test::ACMRandom;

-void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride,
-                 int /*tx_type*/) {
-  vpx_fdct8x8_c(in, out, stride);
+namespace {
+typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
+typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
+                        int tx_type);
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+
+typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
+typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
+
+void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  vp9_fdct8x8_c(in, out, stride);
 }

 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
@@ -93,11 +83,11 @@ void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_c(in, out, stride, 10);
+  vp9_highbd_idct8x8_64_add_c(in, out, stride, 10);
 }

 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_c(in, out, stride, 12);
+  vp9_highbd_idct8x8_64_add_c(in, out, stride, 12);
 }

 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
@@ -108,33 +98,32 @@ void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12);
 }

-#if HAVE_SSE2
-
 void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_10_add_c(in, out, stride, 10);
+  vp9_highbd_idct8x8_10_add_c(in, out, stride, 10);
 }

 void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_10_add_c(in, out, stride, 12);
+  vp9_highbd_idct8x8_10_add_c(in, out, stride, 12);
 }

+#if HAVE_SSE2
 void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
+  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
 }

 void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
+  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
 }

 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
+  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
 }

 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
+  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
 }
-#endif  // HAVE_SSE2
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif
+#endif

 class FwdTrans8x8TestBase {
 public:
@@ -146,8 +135,8 @@ class FwdTrans8x8TestBase {

  void RunSignBiasCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
-    DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_output_block, 64);
    int count_sign_block[64][2];
    const int count_test_block = 100000;

@@ -171,7 +160,7 @@ class FwdTrans8x8TestBase {

    for (int j = 0; j < 64; ++j) {
      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = kSignBiasMaxDiff255;
+      const int max_diff = 1125;
      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
          << "Error: 8x8 FDCT/FHT has a sign bias > "
          << 1. * max_diff / count_test_block * 100 << "%"
@@ -184,7 +173,7 @@ class FwdTrans8x8TestBase {
    memset(count_sign_block, 0, sizeof(count_sign_block));

    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
+      // Initialize a test block with input range [-mask_/16, mask_/16].
      for (int j = 0; j < 64; ++j)
        test_input_block[j] = ((rnd.Rand16() & mask_) >> 4) -
                              ((rnd.Rand16() & mask_) >> 4);
@@ -201,9 +190,9 @@ class FwdTrans8x8TestBase {

    for (int j = 0; j < 64; ++j) {
      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = kSignBiasMaxDiff15;
+      const int max_diff = 10000;
      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
-          << "Error: 8x8 FDCT/FHT has a sign bias > "
+          << "Error: 4x4 FDCT/FHT has a sign bias > "
          << 1. * max_diff / count_test_block * 100 << "%"
          << " for input range [-15, 15] at index " << j
          << " count0: " << count_sign_block[j][0]
@@ -217,13 +206,13 @@ class FwdTrans8x8TestBase {
    int max_error = 0;
    int total_error = 0;
    const int count_test_block = 100000;
-    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
-    DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
-    DECLARE_ALIGNED(16, uint8_t, dst[64]);
-    DECLARE_ALIGNED(16, uint8_t, src[64]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[64]);
-    DECLARE_ALIGNED(16, uint16_t, src16[64]);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
@@ -294,14 +283,14 @@ class FwdTrans8x8TestBase {
    int total_error = 0;
    int total_coeff_error = 0;
    const int count_test_block = 100000;
-    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
-    DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
-    DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
-    DECLARE_ALIGNED(16, uint8_t, dst[64]);
-    DECLARE_ALIGNED(16, uint8_t, src[64]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_temp_block, 64);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[64]);
-    DECLARE_ALIGNED(16, uint16_t, src16[64]);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
@@ -383,13 +372,13 @@ class FwdTrans8x8TestBase {
  void RunInvAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
@@ -425,10 +414,10 @@ class FwdTrans8x8TestBase {

      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-        const int diff =
+        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
-        const int diff = dst[j] - src[j];
+        const uint32_t diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
        EXPECT_GE(1u << 2 * (bit_depth_ - 8), error)
@@ -441,9 +430,9 @@ class FwdTrans8x8TestBase {
  void RunFwdAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
-    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_r, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);

    for (int i = 0; i < count_test_block; ++i) {
      double out_r[kNumCoeffs];
@@ -458,7 +447,7 @@ class FwdTrans8x8TestBase {
        coeff_r[j] = static_cast<tran_low_t>(round(out_r[j]));

      for (int j = 0; j < kNumCoeffs; ++j) {
-        const int32_t diff = coeff[j] - coeff_r[j];
+        const uint32_t diff = coeff[j] - coeff_r[j];
        const uint32_t error = diff * diff;
        EXPECT_GE(9u << 2 * (bit_depth_ - 8), error)
            << "Error: 8x8 DCT has error " << error
@@ -471,12 +460,12 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 10000;
    const int eob = 12;
-    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
-    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
+    DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
 #endif
    const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;

@@ -511,10 +500,10 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) {

      for (int j = 0; j < kNumCoeffs; ++j) {
 #if CONFIG_VP9_HIGHBITDEPTH
-        const int diff =
+        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
 #else
-        const int diff = dst[j] - ref[j];
+        const uint32_t diff = dst[j] - ref[j];
 #endif
        const uint32_t error = diff * diff;
        EXPECT_EQ(0u, error)
@@ -631,8 +620,8 @@ class InvTrans8x8DCT
  virtual void SetUp() {
    ref_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    thresh_ = GET_PARAM(2);
-    pitch_ = 8;
+    thresh_   = GET_PARAM(2);
+    pitch_    = 8;
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
  }
@@ -643,7 +632,7 @@ class InvTrans8x8DCT
  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
    inv_txfm_(out, dst, stride);
  }
-  void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {}
+  void RunFwdTxfm(int16_t *out, tran_low_t *dst, int stride) {}

  IdctFunc ref_txfm_;
  IdctFunc inv_txfm_;
@@ -660,21 +649,20 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
-        make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
+        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
+#endif

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8HT,
    ::testing::Values(
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
@@ -683,6 +671,7 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
+        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
@@ -694,31 +683,28 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 0,
+        make_tuple(&vp9_fdct8x8_neon, &vp9_idct8x8_64_add_neon, 0,
                   VPX_BITS_8)));
-#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    NEON, FwdTrans8x8HT,
+    DISABLED_NEON, FwdTrans8x8HT,
    ::testing::Values(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 0,
+        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0,
                   VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8HT,
@@ -727,21 +713,22 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
-#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
-        make_tuple(&vpx_highbd_fdct8x8_c,
+        make_tuple(&vp9_highbd_fdct8x8_c,
                   &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct8x8_sse2,
+        make_tuple(&vp9_highbd_fdct8x8_sse2,
                   &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct8x8_c,
+        make_tuple(&vp9_highbd_fdct8x8_c,
                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
-        make_tuple(&vpx_highbd_fdct8x8_sse2,
-                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
+        make_tuple(&vp9_highbd_fdct8x8_sse2,
+                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
+        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
+

 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8HT,
@@ -764,28 +751,15 @@ INSTANTIATE_TEST_CASE_P(
                   &idct8x8_10_add_12_sse2, 6225, VPX_BITS_12),
        make_tuple(&idct8x8_12,
                   &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
-#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

-#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \
-    !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
+    !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSSE3, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_ssse3, 0,
+        make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0,
                   VPX_BITS_8)));
 #endif
-
-#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    MSA, FwdTrans8x8DCT,
-    ::testing::Values(
-        make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 0, VPX_BITS_8)));
-INSTANTIATE_TEST_CASE_P(
-    MSA, FwdTrans8x8HT,
-    ::testing::Values(
-        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
-#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/frame_size_tests.cc
+++ b/test/frame_size_tests.cc
@@ -74,7 +74,7 @@ TEST_F(VP9FrameSizeTestsLarge, ValidSizes) {
  // size or almost 1 gig of memory.
  // In total the allocations will exceed 2GiB which may cause a failure with
  // mingw + wine, use a smaller size in that case.
-#if defined(_WIN32) && !defined(_WIN64) || defined(__OS2__)
+#if defined(_WIN32) && !defined(_WIN64)
  video.SetSize(4096, 3072);
 #else
  video.SetSize(4096, 4096);
--- a/test/hadamard_test.cc
+++ b/test/hadamard_test.cc
@@ -1,220 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <algorithm>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/register_state_check.h"
-
-namespace {
-
-using ::libvpx_test::ACMRandom;
-
-typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b);
-
-void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
-  int16_t b[8];
-  for (int i = 0; i < 8; i += 2) {
-    b[i + 0] = a[i * a_stride] + a[(i + 1) * a_stride];
-    b[i + 1] = a[i * a_stride] - a[(i + 1) * a_stride];
-  }
-  int16_t c[8];
-  for (int i = 0; i < 8; i += 4) {
-    c[i + 0] = b[i + 0] + b[i + 2];
-    c[i + 1] = b[i + 1] + b[i + 3];
-    c[i + 2] = b[i + 0] - b[i + 2];
-    c[i + 3] = b[i + 1] - b[i + 3];
-  }
-  out[0] = c[0] + c[4];
-  out[7] = c[1] + c[5];
-  out[3] = c[2] + c[6];
-  out[4] = c[3] + c[7];
-  out[2] = c[0] - c[4];
-  out[6] = c[1] - c[5];
-  out[1] = c[2] - c[6];
-  out[5] = c[3] - c[7];
-}
-
-void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) {
-  int16_t buf[64];
-  for (int i = 0; i < 8; ++i) {
-    hadamard_loop(a + i, a_stride, buf + i * 8);
-  }
-
-  for (int i = 0; i < 8; ++i) {
-    hadamard_loop(buf + i, 8, b + i * 8);
-  }
-}
-
-void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) {
-  /* The source is a 16x16 block. The destination is rearranged to 8x32.
-   * Input is 9 bit. */
-  reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
-  reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64);
-  reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128);
-  reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192);
-
-  /* Overlay the 8x8 blocks and combine. */
-  for (int i = 0; i < 64; ++i) {
-    /* 8x8 steps the range up to 15 bits. */
-    const int16_t a0 = b[0];
-    const int16_t a1 = b[64];
-    const int16_t a2 = b[128];
-    const int16_t a3 = b[192];
-
-    /* Prevent the result from escaping int16_t. */
-    const int16_t b0 = (a0 + a1) >> 1;
-    const int16_t b1 = (a0 - a1) >> 1;
-    const int16_t b2 = (a2 + a3) >> 1;
-    const int16_t b3 = (a2 - a3) >> 1;
-
-    /* Store a 16 bit value. */
-    b[  0] = b0 + b2;
-    b[ 64] = b1 + b3;
-    b[128] = b0 - b2;
-    b[192] = b1 - b3;
-
-    ++b;
-  }
-}
-
-class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
- public:
-  virtual void SetUp() {
-    h_func_ = GetParam();
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
- protected:
-  HadamardFunc h_func_;
-  ACMRandom rnd_;
-};
-
-class Hadamard8x8Test : public HadamardTestBase {};
-
-TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
-  DECLARE_ALIGNED(16, int16_t, a[64]);
-  DECLARE_ALIGNED(16, int16_t, b[64]);
-  int16_t b_ref[64];
-  for (int i = 0; i < 64; ++i) {
-    a[i] = rnd_.Rand9Signed();
-  }
-  memset(b, 0, sizeof(b));
-  memset(b_ref, 0, sizeof(b_ref));
-
-  reference_hadamard8x8(a, 8, b_ref);
-  ASM_REGISTER_STATE_CHECK(h_func_(a, 8, b));
-
-  // The order of the output is not important. Sort before checking.
-  std::sort(b, b + 64);
-  std::sort(b_ref, b_ref + 64);
-  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
-}
-
-TEST_P(Hadamard8x8Test, VaryStride) {
-  DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
-  DECLARE_ALIGNED(16, int16_t, b[64]);
-  int16_t b_ref[64];
-  for (int i = 0; i < 64 * 8; ++i) {
-    a[i] = rnd_.Rand9Signed();
-  }
-
-  for (int i = 8; i < 64; i += 8) {
-    memset(b, 0, sizeof(b));
-    memset(b_ref, 0, sizeof(b_ref));
-
-    reference_hadamard8x8(a, i, b_ref);
-    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
-
-    // The order of the output is not important. Sort before checking.
-    std::sort(b, b + 64);
-    std::sort(b_ref, b_ref + 64);
-    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
-  }
-}
-
-INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
-                        ::testing::Values(&vpx_hadamard_8x8_c));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, Hadamard8x8Test,
-                        ::testing::Values(&vpx_hadamard_8x8_sse2));
-#endif  // HAVE_SSE2
-
-#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(SSSE3, Hadamard8x8Test,
-                        ::testing::Values(&vpx_hadamard_8x8_ssse3));
-#endif  // HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
-                        ::testing::Values(&vpx_hadamard_8x8_neon));
-#endif  // HAVE_NEON
-
-class Hadamard16x16Test : public HadamardTestBase {};
-
-TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
-  DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
-  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
-  int16_t b_ref[16 * 16];
-  for (int i = 0; i < 16 * 16; ++i) {
-    a[i] = rnd_.Rand9Signed();
-  }
-  memset(b, 0, sizeof(b));
-  memset(b_ref, 0, sizeof(b_ref));
-
-  reference_hadamard16x16(a, 16, b_ref);
-  ASM_REGISTER_STATE_CHECK(h_func_(a, 16, b));
-
-  // The order of the output is not important. Sort before checking.
-  std::sort(b, b + 16 * 16);
-  std::sort(b_ref, b_ref + 16 * 16);
-  EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
-}
-
-TEST_P(Hadamard16x16Test, VaryStride) {
-  DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
-  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
-  int16_t b_ref[16 * 16];
-  for (int i = 0; i < 16 * 16 * 8; ++i) {
-    a[i] = rnd_.Rand9Signed();
-  }
-
-  for (int i = 8; i < 64; i += 8) {
-    memset(b, 0, sizeof(b));
-    memset(b_ref, 0, sizeof(b_ref));
-
-    reference_hadamard16x16(a, i, b_ref);
-    ASM_REGISTER_STATE_CHECK(h_func_(a, i, b));
-
-    // The order of the output is not important. Sort before checking.
-    std::sort(b, b + 16 * 16);
-    std::sort(b_ref, b_ref + 16 * 16);
-    EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b)));
-  }
-}
-
-INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
-                        ::testing::Values(&vpx_hadamard_16x16_c));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
-                        ::testing::Values(&vpx_hadamard_16x16_sse2));
-#endif  // HAVE_SSE2
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
-                        ::testing::Values(&vpx_hadamard_16x16_neon));
-#endif  // HAVE_NEON
-}  // namespace
--- a/test/idct8x8_test.cc
+++ b/test/idct8x8_test.cc
@@ -14,7 +14,8 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "./vpx_dsp_rtcd.h"
+#include "./vp9_rtcd.h"
+
 #include "test/acm_random.h"
 #include "vpx/vpx_integer.h"

@@ -67,6 +68,43 @@ void reference_dct_2d(int16_t input[64], double output[64]) {
    output[i] *= 2;
 }

+void reference_idct_1d(double input[8], double output[8]) {
+  const double kPi = 3.141592653589793238462643383279502884;
+  const double kSqrt2 = 1.414213562373095048801688724209698;
+  for (int k = 0; k < 8; k++) {
+    output[k] = 0.0;
+    for (int n = 0; n < 8; n++) {
+      output[k] += input[n]*cos(kPi*(2*k+1)*n/16.0);
+      if (n == 0)
+        output[k] = output[k]/kSqrt2;
+    }
+  }
+}
+
+void reference_idct_2d(double input[64], int16_t output[64]) {
+  double out[64], out2[64];
+  // First transform rows
+  for (int i = 0; i < 8; ++i) {
+    double temp_in[8], temp_out[8];
+    for (int j = 0; j < 8; ++j)
+      temp_in[j] = input[j + i*8];
+    reference_idct_1d(temp_in, temp_out);
+    for (int j = 0; j < 8; ++j)
+      out[j + i*8] = temp_out[j];
+  }
+  // Then transform columns
+  for (int i = 0; i < 8; ++i) {
+    double temp_in[8], temp_out[8];
+    for (int j = 0; j < 8; ++j)
+      temp_in[j] = out[j*8 + i];
+    reference_idct_1d(temp_in, temp_out);
+    for (int j = 0; j < 8; ++j)
+      out2[j*8 + i] = temp_out[j];
+  }
+  for (int i = 0; i < 64; ++i)
+    output[i] = round(out2[i]/32);
+}
+
 TEST(VP9Idct8x8Test, AccuracyCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 10000;
@@ -87,7 +125,7 @@ TEST(VP9Idct8x8Test, AccuracyCheck) {
    reference_dct_2d(input, output_r);
    for (int j = 0; j < 64; ++j)
      coeff[j] = round(output_r[j]);
-    vpx_idct8x8_64_add_c(coeff, dst, 8);
+    vp9_idct8x8_64_add_c(coeff, dst, 8);
    for (int j = 0; j < 64; ++j) {
      const int diff = dst[j] - src[j];
      const int error = diff * diff;
--- a/test/idct_test.cc
+++ b/test/idct_test.cc
@@ -10,11 +10,10 @@

 #include "./vpx_config.h"
 #include "./vp8_rtcd.h"
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
 #include "vpx/vpx_integer.h"

 typedef void (*IdctFunc)(int16_t *input, unsigned char *pred_ptr,
@@ -114,8 +113,4 @@ INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
 INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_mmx));
 #endif
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, IDCTTest,
-                        ::testing::Values(vp8_short_idct4x4llm_msa));
-#endif
 }
--- a/test/intrapred_test.cc
+++ b/test/intrapred_test.cc
@@ -0,0 +1,396 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <string.h>
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
+#include "vp8/common/blockd.h"
+#include "vpx_mem/vpx_mem.h"
+
+namespace {
+
+using libvpx_test::ACMRandom;
+
+class IntraPredBase {
+ public:
+  virtual ~IntraPredBase() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void SetupMacroblock(MACROBLOCKD *mbptr,
+                       MODE_INFO *miptr,
+                       uint8_t *data,
+                       int block_size,
+                       int stride,
+                       int num_planes) {
+    mbptr_ = mbptr;
+    miptr_ = miptr;
+    mbptr_->up_available = 1;
+    mbptr_->left_available = 1;
+    mbptr_->mode_info_context = miptr_;
+    stride_ = stride;
+    block_size_ = block_size;
+    num_planes_ = num_planes;
+    for (int p = 0; p < num_planes; p++)
+      data_ptr_[p] = data + stride * (block_size + 1) * p +
+                     stride + block_size;
+  }
+
+  void FillRandom() {
+    // Fill edges with random data
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    for (int p = 0; p < num_planes_; p++) {
+      for (int x = -1 ; x <= block_size_; x++)
+        data_ptr_[p][x - stride_] = rnd.Rand8();
+      for (int y = 0; y < block_size_; y++)
+        data_ptr_[p][y * stride_ - 1] = rnd.Rand8();
+    }
+  }
+
+  virtual void Predict(MB_PREDICTION_MODE mode) = 0;
+
+  void SetLeftUnavailable() {
+    mbptr_->left_available = 0;
+    for (int p = 0; p < num_planes_; p++)
+      for (int i = -1; i < block_size_; ++i)
+        data_ptr_[p][stride_ * i - 1] = 129;
+  }
+
+  void SetTopUnavailable() {
+    mbptr_->up_available = 0;
+    for (int p = 0; p < num_planes_; p++)
+      memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
+  }
+
+  void SetTopLeftUnavailable() {
+    SetLeftUnavailable();
+    SetTopUnavailable();
+  }
+
+  int BlockSizeLog2Min1() const {
+    switch (block_size_) {
+      case 16:
+        return 3;
+      case 8:
+        return 2;
+      default:
+        return 0;
+    }
+  }
+
+  // check DC prediction output against a reference
+  void CheckDCPrediction() const {
+    for (int p = 0; p < num_planes_; p++) {
+      // calculate expected DC
+      int expected;
+      if (mbptr_->up_available || mbptr_->left_available) {
+        int sum = 0, shift = BlockSizeLog2Min1() + mbptr_->up_available +
+                             mbptr_->left_available;
+        if (mbptr_->up_available)
+          for (int x = 0; x < block_size_; x++)
+            sum += data_ptr_[p][x - stride_];
+        if (mbptr_->left_available)
+          for (int y = 0; y < block_size_; y++)
+            sum += data_ptr_[p][y * stride_ - 1];
+        expected = (sum + (1 << (shift - 1))) >> shift;
+      } else {
+        expected = 0x80;
+      }
+      // check that all subsequent lines are equal to the first
+      for (int y = 1; y < block_size_; ++y)
+        ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
+                            block_size_));
+      // within the first line, ensure that each pixel has the same value
+      for (int x = 1; x < block_size_; ++x)
+        ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]);
+      // now ensure that that pixel has the expected (DC) value
+      ASSERT_EQ(expected, data_ptr_[p][0]);
+    }
+  }
+
+  // check V prediction output against a reference
+  void CheckVPrediction() const {
+    // check that all lines equal the top border
+    for (int p = 0; p < num_planes_; p++)
+      for (int y = 0; y < block_size_; y++)
+        ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_],
+                            &data_ptr_[p][y * stride_], block_size_));
+  }
+
+  // check H prediction output against a reference
+  void CheckHPrediction() const {
+    // for each line, ensure that each pixel is equal to the left border
+    for (int p = 0; p < num_planes_; p++)
+      for (int y = 0; y < block_size_; y++)
+        for (int x = 0; x < block_size_; x++)
+          ASSERT_EQ(data_ptr_[p][-1 + y * stride_],
+                    data_ptr_[p][x + y * stride_]);
+  }
+
+  static int ClipByte(int value) {
+    if (value > 255)
+      return 255;
+    else if (value < 0)
+      return 0;
+    return value;
+  }
+
+  // check TM prediction output against a reference
+  void CheckTMPrediction() const {
+    for (int p = 0; p < num_planes_; p++)
+      for (int y = 0; y < block_size_; y++)
+        for (int x = 0; x < block_size_; x++) {
+          const int expected = ClipByte(data_ptr_[p][x - stride_]
+                                      + data_ptr_[p][stride_ * y - 1]
+                                      - data_ptr_[p][-1 - stride_]);
+          ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]);
+       }
+  }
+
+  // Actual test
+  void RunTest() {
+    {
+      SCOPED_TRACE("DC_PRED");
+      FillRandom();
+      Predict(DC_PRED);
+      CheckDCPrediction();
+    }
+    {
+      SCOPED_TRACE("DC_PRED LEFT");
+      FillRandom();
+      SetLeftUnavailable();
+      Predict(DC_PRED);
+      CheckDCPrediction();
+    }
+    {
+      SCOPED_TRACE("DC_PRED TOP");
+      FillRandom();
+      SetTopUnavailable();
+      Predict(DC_PRED);
+      CheckDCPrediction();
+    }
+    {
+      SCOPED_TRACE("DC_PRED TOP_LEFT");
+      FillRandom();
+      SetTopLeftUnavailable();
+      Predict(DC_PRED);
+      CheckDCPrediction();
+    }
+    {
+      SCOPED_TRACE("H_PRED");
+      FillRandom();
+      Predict(H_PRED);
+      CheckHPrediction();
+    }
+    {
+      SCOPED_TRACE("V_PRED");
+      FillRandom();
+      Predict(V_PRED);
+      CheckVPrediction();
+    }
+    {
+      SCOPED_TRACE("TM_PRED");
+      FillRandom();
+      Predict(TM_PRED);
+      CheckTMPrediction();
+    }
+  }
+
+  MACROBLOCKD *mbptr_;
+  MODE_INFO *miptr_;
+  uint8_t *data_ptr_[2];  // in the case of Y, only [0] is used
+  int stride_;
+  int block_size_;
+  int num_planes_;
+};
+
+typedef void (*IntraPredYFunc)(MACROBLOCKD *x,
+                               uint8_t *yabove_row,
+                               uint8_t *yleft,
+                               int left_stride,
+                               uint8_t *ypred_ptr,
+                               int y_stride);
+
+class IntraPredYTest
+    : public IntraPredBase,
+      public ::testing::TestWithParam<IntraPredYFunc> {
+ public:
+  static void SetUpTestCase() {
+    mb_ = reinterpret_cast<MACROBLOCKD*>(
+        vpx_memalign(32, sizeof(MACROBLOCKD)));
+    mi_ = reinterpret_cast<MODE_INFO*>(
+        vpx_memalign(32, sizeof(MODE_INFO)));
+    data_array_ = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+  }
+
+  static void TearDownTestCase() {
+    vpx_free(data_array_);
+    vpx_free(mi_);
+    vpx_free(mb_);
+    data_array_ = NULL;
+  }
+
+ protected:
+  static const int kBlockSize = 16;
+  static const int kDataAlignment = 16;
+  static const int kStride = kBlockSize * 3;
+  // We use 48 so that the data pointer of the first pixel in each row of
+  // each macroblock is 16-byte aligned, and this gives us access to the
+  // top-left and top-right corner pixels belonging to the top-left/right
+  // macroblocks.
+  // We use 17 lines so we have one line above us for top-prediction.
+  static const int kDataBufferSize = kStride * (kBlockSize + 1);
+
+  virtual void SetUp() {
+    pred_fn_ = GetParam();
+    SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1);
+  }
+
+  virtual void Predict(MB_PREDICTION_MODE mode) {
+    mbptr_->mode_info_context->mbmi.mode = mode;
+    ASM_REGISTER_STATE_CHECK(pred_fn_(mbptr_,
+                                      data_ptr_[0] - kStride,
+                                      data_ptr_[0] - 1, kStride,
+                                      data_ptr_[0], kStride));
+  }
+
+  IntraPredYFunc pred_fn_;
+  static uint8_t* data_array_;
+  static MACROBLOCKD * mb_;
+  static MODE_INFO *mi_;
+};
+
+MACROBLOCKD* IntraPredYTest::mb_ = NULL;
+MODE_INFO* IntraPredYTest::mi_ = NULL;
+uint8_t* IntraPredYTest::data_array_ = NULL;
+
+TEST_P(IntraPredYTest, IntraPredTests) {
+  RunTest();
+}
+
+INSTANTIATE_TEST_CASE_P(C, IntraPredYTest,
+                        ::testing::Values(
+                            vp8_build_intra_predictors_mby_s_c));
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest,
+                        ::testing::Values(
+                            vp8_build_intra_predictors_mby_s_sse2));
+#endif
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest,
+                        ::testing::Values(
+                            vp8_build_intra_predictors_mby_s_ssse3));
+#endif
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, IntraPredYTest,
+                        ::testing::Values(
+                            vp8_build_intra_predictors_mby_s_neon));
+#endif
+
+typedef void (*IntraPredUvFunc)(MACROBLOCKD *x,
+                                uint8_t *uabove_row,
+                                uint8_t *vabove_row,
+                                uint8_t *uleft,
+                                uint8_t *vleft,
+                                int left_stride,
+                                uint8_t *upred_ptr,
+                                uint8_t *vpred_ptr,
+                                int pred_stride);
+
+class IntraPredUVTest
+    : public IntraPredBase,
+      public ::testing::TestWithParam<IntraPredUvFunc> {
+ public:
+  static void SetUpTestCase() {
+    mb_ = reinterpret_cast<MACROBLOCKD*>(
+        vpx_memalign(32, sizeof(MACROBLOCKD)));
+    mi_ = reinterpret_cast<MODE_INFO*>(
+        vpx_memalign(32, sizeof(MODE_INFO)));
+    data_array_ = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+  }
+
+  static void TearDownTestCase() {
+    vpx_free(data_array_);
+    vpx_free(mi_);
+    vpx_free(mb_);
+    data_array_ = NULL;
+  }
+
+ protected:
+  static const int kBlockSize = 8;
+  static const int kDataAlignment = 8;
+  static const int kStride = kBlockSize * 3;
+  // We use 24 so that the data pointer of the first pixel in each row of
+  // each macroblock is 8-byte aligned, and this gives us access to the
+  // top-left and top-right corner pixels belonging to the top-left/right
+  // macroblocks.
+  // We use 9 lines so we have one line above us for top-prediction.
+  // [0] = U, [1] = V
+  static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1);
+
+  virtual void SetUp() {
+    pred_fn_ = GetParam();
+    SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 2);
+  }
+
+  virtual void Predict(MB_PREDICTION_MODE mode) {
+    mbptr_->mode_info_context->mbmi.uv_mode = mode;
+    pred_fn_(mbptr_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
+             data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
+             data_ptr_[0], data_ptr_[1], kStride);
+  }
+
+  IntraPredUvFunc pred_fn_;
+  // We use 24 so that the data pointer of the first pixel in each row of
+  // each macroblock is 8-byte aligned, and this gives us access to the
+  // top-left and top-right corner pixels belonging to the top-left/right
+  // macroblocks.
+  // We use 9 lines so we have one line above us for top-prediction.
+  // [0] = U, [1] = V
+  static uint8_t* data_array_;
+  static MACROBLOCKD* mb_;
+  static MODE_INFO* mi_;
+};
+
+MACROBLOCKD* IntraPredUVTest::mb_ = NULL;
+MODE_INFO* IntraPredUVTest::mi_ = NULL;
+uint8_t* IntraPredUVTest::data_array_ = NULL;
+
+TEST_P(IntraPredUVTest, IntraPredTests) {
+  RunTest();
+}
+
+INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest,
+                        ::testing::Values(
+                            vp8_build_intra_predictors_mbuv_s_c));
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest,
+                        ::testing::Values(
+                            vp8_build_intra_predictors_mbuv_s_sse2));
+#endif
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest,
+                        ::testing::Values(
+                            vp8_build_intra_predictors_mbuv_s_ssse3));
+#endif
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, IntraPredUVTest,
+                        ::testing::Values(
+                            vp8_build_intra_predictors_mbuv_s_neon));
+#endif
+
+}  // namespace
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -63,22 +63,9 @@ class InvalidFileTest
    EXPECT_NE(res, EOF) << "Read result data failed";

    // Check results match.
-    const DecodeParam input = GET_PARAM(1);
-    if (input.threads > 1) {
-      // The serial decode check is too strict for tile-threaded decoding as
-      // there is no guarantee on the decode order nor which specific error
-      // will take precedence. Currently a tile-level error is not forwarded so
-      // the frame will simply be marked corrupt.
-      EXPECT_TRUE(res_dec == expected_res_dec ||
-                  res_dec == VPX_CODEC_CORRUPT_FRAME)
-          << "Results don't match: frame number = " << video.frame_number()
-          << ". (" << decoder->DecodeError() << "). Expected: "
-          << expected_res_dec << " or " << VPX_CODEC_CORRUPT_FRAME;
-    } else {
-      EXPECT_EQ(expected_res_dec, res_dec)
-          << "Results don't match: frame number = " << video.frame_number()
-          << ". (" << decoder->DecodeError() << ")";
-    }
+    EXPECT_EQ(expected_res_dec, res_dec)
+        << "Results don't match: frame number = " << video.frame_number()
+        << ". (" << decoder->DecodeError() << ")";

    return !HasFailure();
  }
@@ -125,9 +112,7 @@ TEST_P(InvalidFileTest, ReturnCode) {

 const DecodeParam kVP9InvalidFileTests[] = {
  {1, "invalid-vp90-02-v2.webm"},
-#if CONFIG_VP9_HIGHBITDEPTH
  {1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf"},
-#endif
  {1, "invalid-vp90-03-v3.webm"},
  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf"},
  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf"},
@@ -136,8 +121,6 @@ const DecodeParam kVP9InvalidFileTests[] = {
  {1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf"},
  {1, "invalid-vp91-2-mixedrefcsp-444to420.ivf"},
  {1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf"},
-  {1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf"},
-  {1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf"},
 };

 VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
@@ -158,7 +141,7 @@ TEST_P(InvalidFileInvalidPeekTest, ReturnCode) {
 }

 const DecodeParam kVP9InvalidFileInvalidPeekTests[] = {
-  {1, "invalid-vp90-01-v3.webm"},
+  {1, "invalid-vp90-01-v2.webm"},
 };

 VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
@@ -168,7 +151,6 @@ const DecodeParam kMultiThreadedVP9InvalidFileTests[] = {
  {4, "invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm"},
  {4, "invalid-"
      "vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf"},
-  {4, "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf"},
  {2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf"},
  {4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf"},
 };
--- a/test/level_test.cc
+++ b/test/level_test.cc
@@ -1,119 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-class LevelTest
-    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
- protected:
-  LevelTest()
-     : EncoderTest(GET_PARAM(0)),
-       encoding_mode_(GET_PARAM(1)),
-       cpu_used_(GET_PARAM(2)),
-       min_gf_internal_(24),
-       target_level_(0),
-       level_(0) {}
-  virtual ~LevelTest() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-    if (encoding_mode_ != ::libvpx_test::kRealTime) {
-      cfg_.g_lag_in_frames = 25;
-      cfg_.rc_end_usage = VPX_VBR;
-    } else {
-      cfg_.g_lag_in_frames = 0;
-      cfg_.rc_end_usage = VPX_CBR;
-    }
-    cfg_.rc_2pass_vbr_minsection_pct = 5;
-    cfg_.rc_2pass_vbr_maxsection_pct = 2000;
-    cfg_.rc_target_bitrate = 400;
-    cfg_.rc_max_quantizer = 63;
-    cfg_.rc_min_quantizer = 0;
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 0) {
-      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
-      encoder->Control(VP9E_SET_TARGET_LEVEL, target_level_);
-      encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_gf_internal_);
-      if (encoding_mode_ != ::libvpx_test::kRealTime) {
-        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
-        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
-        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
-        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
-      }
-    }
-    encoder->Control(VP9E_GET_LEVEL, &level_);
-    ASSERT_LE(level_, 51);
-    ASSERT_GE(level_, 0);
-  }
-
-  ::libvpx_test::TestMode encoding_mode_;
-  int cpu_used_;
-  int min_gf_internal_;
-  int target_level_;
-  int level_;
-};
-
-// Test for keeping level stats only
-TEST_P(LevelTest, TestTargetLevel0) {
-  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       40);
-  target_level_ = 0;
-  min_gf_internal_ = 4;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_EQ(11, level_);
-
-  cfg_.rc_target_bitrate = 1600;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_EQ(20, level_);
-}
-
-// Test for level control being turned off
-TEST_P(LevelTest, TestTargetLevel255) {
-  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       30);
-  target_level_ = 255;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-TEST_P(LevelTest, TestTargetLevelApi) {
-  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 1);
-  static const vpx_codec_iface_t *codec = &vpx_codec_vp9_cx_algo;
-  vpx_codec_ctx_t enc;
-  vpx_codec_enc_cfg_t cfg;
-  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(codec, &cfg, 0));
-  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, codec, &cfg, 0));
-  for (int level = 0; level <= 256; ++level) {
-    if (level == 10 || level == 11 || level == 20 || level == 21 ||
-        level == 30 || level == 31 || level == 40 || level == 41 ||
-        level == 50 || level == 51 || level == 52 || level == 60 ||
-        level == 61 || level == 62 || level == 0 || level == 255)
-      EXPECT_EQ(VPX_CODEC_OK,
-                vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
-    else
-      EXPECT_EQ(VPX_CODEC_INVALID_PARAM,
-                vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
-  }
-  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc));
-}
-
-VP9_INSTANTIATE_TEST_CASE(LevelTest,
-                          ::testing::Values(::libvpx_test::kTwoPassGood,
-                                            ::libvpx_test::kOnePassGood),
-                          ::testing::Range(0, 9));
-}  // namespace
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -13,17 +13,18 @@
 #include <string>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_config.h"
-#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
-#include "vp9/common/vp9_loopfilter.h"
 #include "vpx/vpx_integer.h"

+#define MAX_LOOP_FILTER 63
+
 using libvpx_test::ACMRandom;

 namespace {
@@ -37,14 +38,15 @@ const int number_of_iterations = 10000;
 #if CONFIG_VP9_HIGHBITDEPTH
 typedef void (*loop_op_t)(uint16_t *s, int p, const uint8_t *blimit,
                          const uint8_t *limit, const uint8_t *thresh,
-                          int bd);
+                          int count, int bd);
 typedef void (*dual_loop_op_t)(uint16_t *s, int p, const uint8_t *blimit0,
                               const uint8_t *limit0, const uint8_t *thresh0,
                               const uint8_t *blimit1, const uint8_t *limit1,
                               const uint8_t *thresh1, int bd);
 #else
 typedef void (*loop_op_t)(uint8_t *s, int p, const uint8_t *blimit,
-                          const uint8_t *limit, const uint8_t *thresh);
+                          const uint8_t *limit, const uint8_t *thresh,
+                          int count);
 typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
                               const uint8_t *limit0, const uint8_t *thresh0,
                               const uint8_t *blimit1, const uint8_t *limit1,
@@ -54,6 +56,58 @@ typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
 typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
 typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;

+#if HAVE_SSE2
+#if CONFIG_VP9_HIGHBITDEPTH
+void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
+                              const uint8_t *limit, const uint8_t *thresh,
+                              int count, int bd) {
+  vp9_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
+}
+
+void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
+                           const uint8_t *limit, const uint8_t *thresh,
+                           int count, int bd) {
+  vp9_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
+}
+
+void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,
+                                   const uint8_t *limit, const uint8_t *thresh,
+                                   int count, int bd) {
+  vp9_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
+}
+
+void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,
+                                const uint8_t *limit, const uint8_t *thresh,
+                                int count, int bd) {
+  vp9_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
+}
+#else
+void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,
+                              const uint8_t *limit, const uint8_t *thresh,
+                              int count) {
+  vp9_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
+}
+
+void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
+                           const uint8_t *limit, const uint8_t *thresh,
+                           int count) {
+  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+}
+
+void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,
+                                   const uint8_t *limit, const uint8_t *thresh,
+                                   int count) {
+  vp9_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
+}
+
+void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
+                                const uint8_t *limit, const uint8_t *thresh,
+                                int count) {
+  vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_SSE2
+
 class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
 public:
  virtual ~Loop8Test6Param() {}
@@ -97,22 +151,29 @@ TEST_P(Loop8Test6Param, OperationCheck) {
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
  int32_t bd = bit_depth_;
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
 #else
-  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    uint8_t tmp = rnd.Rand8();
+    // mblim  <= 3 * MAX_LOOP_FILTER + 4
+    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -123,6 +184,7 @@ TEST_P(Loop8Test6Param, OperationCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
    int32_t p = kNumCoeffs/32;
+    int count = 1;

    uint16_t tmp_s[kNumCoeffs];
    int j = 0;
@@ -154,13 +216,13 @@ TEST_P(Loop8Test6Param, OperationCheck) {
      ref_s[j] = s[j];
    }
 #if CONFIG_VP9_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count, bd);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
 #else
-    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh);
+    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

    for (int j = 0; j < kNumCoeffs; ++j) {
@@ -182,35 +244,29 @@ TEST_P(Loop8Test6Param, ValueCheck) {
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
  const int32_t bd = bit_depth_;
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
 #else
-  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
-
-  // NOTE: The code in vp9_loopfilter.c:update_sharpness computes mblim as a
-  // function of sharpness_lvl and the loopfilter lvl as:
-  // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
-  // ...
-  // memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
-  //        SIMD_WIDTH);
-  // This means that the largest value for mblim will occur when sharpness_lvl
-  // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
-  // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
-  // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) =
-  // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4
-
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    uint8_t tmp = rnd.Rand8();
+    // mblim  <= 3 * MAX_LOOP_FILTER + 4
+    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -221,18 +277,19 @@ TEST_P(Loop8Test6Param, ValueCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
    int32_t p = kNumCoeffs / 32;
+    int count = 1;
    for (int j = 0; j < kNumCoeffs; ++j) {
      s[j] = rnd.Rand16() & mask_;
      ref_s[j] = s[j];
    }
 #if CONFIG_VP9_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bd);
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count, bd);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bd));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
 #else
-    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh);
+    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
    for (int j = 0; j < kNumCoeffs; ++j) {
      err_count += ref_s[j] != s[j];
@@ -253,22 +310,30 @@ TEST_P(Loop8Test9Param, OperationCheck) {
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
  const int32_t bd = bit_depth_;
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
 #else
-  DECLARE_ALIGNED(8,  uint8_t,  s[kNumCoeffs]);
-  DECLARE_ALIGNED(8,  uint8_t,  ref_s[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(8,  uint8_t,  s, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(8,  uint8_t,  ref_s, kNumCoeffs);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    uint8_t tmp = rnd.Rand8();
+    // mblim  <= 3 * MAX_LOOP_FILTER + 4
+    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    // lim  <= MAX_LOOP_FILTER
+    while (tmp > MAX_LOOP_FILTER) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -278,12 +343,19 @@ TEST_P(Loop8Test9Param, OperationCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    tmp = rnd.Rand8();
+    // mblim  <= 3 * MAX_LOOP_FILTER + 4
+    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -355,22 +427,29 @@ TEST_P(Loop8Test9Param, ValueCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
 #else
-  DECLARE_ALIGNED(8,  uint8_t, s[kNumCoeffs]);
-  DECLARE_ALIGNED(8,  uint8_t, ref_s[kNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(8,  uint8_t, s, kNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(8,  uint8_t, ref_s, kNumCoeffs);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    uint8_t tmp = rnd.Rand8();
+    // mblim  <= 3 * MAX_LOOP_FILTER + 4
+    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -380,12 +459,19 @@ TEST_P(Loop8Test9Param, ValueCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
+    tmp = rnd.Rand8();
+    // mblim  <= 3 * MAX_LOOP_FILTER + 4
+    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
+    tmp = rnd.Rand8();
+    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
+      tmp = rnd.Rand8();
+    }
    DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -433,240 +519,114 @@ using std::tr1::make_tuple;
 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test6Param,
+    SSE2_C_COMPARE_SINGLE, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
-                   &vpx_highbd_lpf_horizontal_4_c, 8),
-        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
-                   &vpx_highbd_lpf_vertical_4_c, 8),
-        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
-                   &vpx_highbd_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
-                   &vpx_highbd_lpf_horizontal_edge_8_c, 8),
-        make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
-                   &vpx_highbd_lpf_horizontal_edge_16_c, 8),
-        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
-                   &vpx_highbd_lpf_vertical_8_c, 8),
-        make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
-                   &vpx_highbd_lpf_vertical_16_c, 8),
-        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
-                   &vpx_highbd_lpf_horizontal_4_c, 10),
-        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
-                   &vpx_highbd_lpf_vertical_4_c, 10),
-        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
-                   &vpx_highbd_lpf_horizontal_8_c, 10),
-        make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
-                   &vpx_highbd_lpf_horizontal_edge_8_c, 10),
-        make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
-                   &vpx_highbd_lpf_horizontal_edge_16_c, 10),
-        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
-                   &vpx_highbd_lpf_vertical_8_c, 10),
-        make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
-                   &vpx_highbd_lpf_vertical_16_c, 10),
-        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
-                   &vpx_highbd_lpf_horizontal_4_c, 12),
-        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
-                   &vpx_highbd_lpf_vertical_4_c, 12),
-        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
-                   &vpx_highbd_lpf_horizontal_8_c, 12),
-        make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
-                   &vpx_highbd_lpf_horizontal_edge_8_c, 12),
-        make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
-                   &vpx_highbd_lpf_horizontal_edge_16_c, 12),
-        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
-                   &vpx_highbd_lpf_vertical_8_c, 12),
-        make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
-                   &vpx_highbd_lpf_vertical_16_c, 12),
-        make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
-                   &vpx_highbd_lpf_vertical_16_dual_c, 8),
-        make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
-                   &vpx_highbd_lpf_vertical_16_dual_c, 10),
-        make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2,
-                   &vpx_highbd_lpf_vertical_16_dual_c, 12)));
+        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
+                   &vp9_highbd_lpf_horizontal_4_c, 8),
+        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
+                   &vp9_highbd_lpf_vertical_4_c, 8),
+        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
+                   &vp9_highbd_lpf_horizontal_8_c, 8),
+        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
+                   &vp9_highbd_lpf_horizontal_16_c, 8),
+        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
+                   &vp9_highbd_lpf_vertical_8_c, 8),
+        make_tuple(&wrapper_vertical_16_sse2,
+                   &wrapper_vertical_16_c, 8),
+        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
+                   &vp9_highbd_lpf_horizontal_4_c, 10),
+        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
+                   &vp9_highbd_lpf_vertical_4_c, 10),
+        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
+                   &vp9_highbd_lpf_horizontal_8_c, 10),
+        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
+                   &vp9_highbd_lpf_horizontal_16_c, 10),
+        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
+                   &vp9_highbd_lpf_vertical_8_c, 10),
+        make_tuple(&wrapper_vertical_16_sse2,
+                   &wrapper_vertical_16_c, 10),
+        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
+                   &vp9_highbd_lpf_horizontal_4_c, 12),
+        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
+                   &vp9_highbd_lpf_vertical_4_c, 12),
+        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
+                   &vp9_highbd_lpf_horizontal_8_c, 12),
+        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
+                   &vp9_highbd_lpf_horizontal_16_c, 12),
+        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
+                   &vp9_highbd_lpf_vertical_8_c, 12),
+        make_tuple(&wrapper_vertical_16_sse2,
+                   &wrapper_vertical_16_c, 12)));
 #else
 INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test6Param,
+    SSE2_C_COMPARE_SINGLE, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_4_sse2,
-                   &vpx_lpf_horizontal_4_c, 8),
-        make_tuple(&vpx_lpf_horizontal_8_sse2,
-                   &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_8_sse2,
-                   &vpx_lpf_horizontal_edge_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_16_sse2,
-                   &vpx_lpf_horizontal_edge_16_c, 8),
-        make_tuple(&vpx_lpf_vertical_4_sse2,
-                   &vpx_lpf_vertical_4_c, 8),
-        make_tuple(&vpx_lpf_vertical_8_sse2,
-                   &vpx_lpf_vertical_8_c, 8),
-        make_tuple(&vpx_lpf_vertical_16_sse2,
-                   &vpx_lpf_vertical_16_c, 8),
-        make_tuple(&vpx_lpf_vertical_16_dual_sse2,
-                   &vpx_lpf_vertical_16_dual_c, 8)));
+        make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8),
+        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8),
+        make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif

-#if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
-INSTANTIATE_TEST_CASE_P(
-    AVX2, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_edge_8_avx2,
-                   &vpx_lpf_horizontal_edge_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_16_avx2,
-                   &vpx_lpf_horizontal_edge_16_c, 8)));
-#endif
-
 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test9Param,
+    SSE2_C_COMPARE_DUAL, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
-                   &vpx_highbd_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
-                   &vpx_highbd_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
-                   &vpx_highbd_lpf_vertical_4_dual_c, 8),
-        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
-                   &vpx_highbd_lpf_vertical_8_dual_c, 8),
-        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
-                   &vpx_highbd_lpf_horizontal_4_dual_c, 10),
-        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
-                   &vpx_highbd_lpf_horizontal_8_dual_c, 10),
-        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
-                   &vpx_highbd_lpf_vertical_4_dual_c, 10),
-        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
-                   &vpx_highbd_lpf_vertical_8_dual_c, 10),
-        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
-                   &vpx_highbd_lpf_horizontal_4_dual_c, 12),
-        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
-                   &vpx_highbd_lpf_horizontal_8_dual_c, 12),
-        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
-                   &vpx_highbd_lpf_vertical_4_dual_c, 12),
-        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
-                   &vpx_highbd_lpf_vertical_8_dual_c, 12)));
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, 8),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, 10),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, 12)));
 #else
 INSTANTIATE_TEST_CASE_P(
-    SSE2, Loop8Test9Param,
+    SSE2_C_COMPARE_DUAL, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_4_dual_sse2,
-                   &vpx_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vpx_lpf_horizontal_8_dual_sse2,
-                   &vpx_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vpx_lpf_vertical_4_dual_sse2,
-                   &vpx_lpf_vertical_4_dual_c, 8),
-        make_tuple(&vpx_lpf_vertical_8_dual_sse2,
-                   &vpx_lpf_vertical_8_dual_c, 8)));
+        make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_SSE2
+
+#if HAVE_SSE2
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    SSE_C_COMPARE_DUAL, Loop8Test9Param,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
+                   &vp9_highbd_lpf_vertical_4_dual_c, 8),
+        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
+                   &vp9_highbd_lpf_vertical_8_dual_c, 8),
+        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_4_dual_c, 10),
+        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_8_dual_c, 10),
+        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
+                   &vp9_highbd_lpf_vertical_4_dual_c, 10),
+        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
+                   &vp9_highbd_lpf_vertical_8_dual_c, 10),
+        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_4_dual_c, 12),
+        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
+                   &vp9_highbd_lpf_horizontal_8_dual_c, 12),
+        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
+                   &vp9_highbd_lpf_vertical_4_dual_c, 12),
+        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
+                   &vp9_highbd_lpf_vertical_8_dual_c, 12)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    SSE_C_COMPARE_DUAL, Loop8Test9Param,
+    ::testing::Values(
+        make_tuple(&vp9_lpf_horizontal_4_dual_sse2,
+                   &vp9_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vp9_lpf_horizontal_8_dual_sse2,
+                   &vp9_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vp9_lpf_vertical_4_dual_sse2,
+                   &vp9_lpf_vertical_4_dual_c, 8),
+        make_tuple(&vp9_lpf_vertical_8_dual_sse2,
+                   &vp9_lpf_vertical_8_dual_c, 8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif
-
-#if HAVE_NEON
-#if CONFIG_VP9_HIGHBITDEPTH
-// No neon high bitdepth functions.
-#else
-INSTANTIATE_TEST_CASE_P(
-    NEON, Loop8Test6Param,
-    ::testing::Values(
-#if HAVE_NEON_ASM
-// Using #if inside the macro is unsupported on MSVS but the tests are not
-// currently built for MSVS with ARM and NEON.
-        make_tuple(&vpx_lpf_horizontal_edge_8_neon,
-                   &vpx_lpf_horizontal_edge_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_16_neon,
-                   &vpx_lpf_horizontal_edge_16_c, 8),
-        make_tuple(&vpx_lpf_vertical_16_neon,
-                   &vpx_lpf_vertical_16_c, 8),
-        make_tuple(&vpx_lpf_vertical_16_dual_neon,
-                   &vpx_lpf_vertical_16_dual_c, 8),
-#endif  // HAVE_NEON_ASM
-        make_tuple(&vpx_lpf_horizontal_8_neon,
-                   &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_vertical_8_neon,
-                   &vpx_lpf_vertical_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_4_neon,
-                   &vpx_lpf_horizontal_4_c, 8),
-        make_tuple(&vpx_lpf_vertical_4_neon,
-                   &vpx_lpf_vertical_4_c, 8)));
-INSTANTIATE_TEST_CASE_P(
-    NEON, Loop8Test9Param,
-    ::testing::Values(
-#if HAVE_NEON_ASM
-        make_tuple(&vpx_lpf_horizontal_8_dual_neon,
-                   &vpx_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vpx_lpf_vertical_8_dual_neon,
-                   &vpx_lpf_vertical_8_dual_c, 8),
-#endif  // HAVE_NEON_ASM
-        make_tuple(&vpx_lpf_horizontal_4_dual_neon,
-                   &vpx_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vpx_lpf_vertical_4_dual_neon,
-                   &vpx_lpf_vertical_4_dual_c, 8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_NEON
-
-#if HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    DSPR2, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_4_dspr2,
-                   &vpx_lpf_horizontal_4_c, 8),
-        make_tuple(&vpx_lpf_horizontal_8_dspr2,
-                   &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_8,
-                   &vpx_lpf_horizontal_edge_8, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_16,
-                   &vpx_lpf_horizontal_edge_16, 8),
-        make_tuple(&vpx_lpf_vertical_4_dspr2,
-                   &vpx_lpf_vertical_4_c, 8),
-        make_tuple(&vpx_lpf_vertical_8_dspr2,
-                   &vpx_lpf_vertical_8_c, 8),
-        make_tuple(&vpx_lpf_vertical_16_dspr2,
-                   &vpx_lpf_vertical_16_c, 8),
-        make_tuple(&vpx_lpf_vertical_16_dual_dspr2,
-                   &vpx_lpf_vertical_16_dual_c, 8)));
-
-INSTANTIATE_TEST_CASE_P(
-    DSPR2, Loop8Test9Param,
-    ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_4_dual_dspr2,
-                   &vpx_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vpx_lpf_horizontal_8_dual_dspr2,
-                   &vpx_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vpx_lpf_vertical_4_dual_dspr2,
-                   &vpx_lpf_vertical_4_dual_c, 8),
-        make_tuple(&vpx_lpf_vertical_8_dual_dspr2,
-                   &vpx_lpf_vertical_8_dual_c, 8)));
-#endif  // HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH
-
-#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
-INSTANTIATE_TEST_CASE_P(
-    MSA, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_4_msa,
-                   &vpx_lpf_horizontal_4_c, 8),
-        make_tuple(&vpx_lpf_horizontal_8_msa,
-                   &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_8_msa,
-                   &vpx_lpf_horizontal_edge_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_16_msa,
-                   &vpx_lpf_horizontal_edge_16_c, 8),
-        make_tuple(&vpx_lpf_vertical_4_msa,
-                   &vpx_lpf_vertical_4_c, 8),
-        make_tuple(&vpx_lpf_vertical_8_msa,
-                   &vpx_lpf_vertical_8_c, 8),
-        make_tuple(&vpx_lpf_vertical_16_msa,
-                   &vpx_lpf_vertical_16_c, 8)));
-
-INSTANTIATE_TEST_CASE_P(
-    MSA, Loop8Test9Param,
-    ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_4_dual_msa,
-                   &vpx_lpf_horizontal_4_dual_c, 8),
-        make_tuple(&vpx_lpf_horizontal_8_dual_msa,
-                   &vpx_lpf_horizontal_8_dual_c, 8),
-        make_tuple(&vpx_lpf_vertical_4_dual_msa,
-                   &vpx_lpf_vertical_4_dual_c, 8),
-        make_tuple(&vpx_lpf_vertical_8_dual_msa,
-                   &vpx_lpf_vertical_8_dual_c, 8)));
-#endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
-
 }  // namespace
--- a/test/masked_sad_test.cc
+++ b/test/masked_sad_test.cc
@@ -0,0 +1,209 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+const int number_of_iterations = 500;
+
+typedef unsigned int (*MaskedSADFunc)(const uint8_t *a, int a_stride,
+                                      const uint8_t *b, int b_stride,
+                                      const uint8_t *m, int m_stride);
+typedef std::tr1::tuple<MaskedSADFunc, MaskedSADFunc> MaskedSADParam;
+
+class MaskedSADTest : public ::testing::TestWithParam<MaskedSADParam> {
+ public:
+  virtual ~MaskedSADTest() {}
+  virtual void SetUp() {
+    maskedSAD_op_   = GET_PARAM(0);
+    ref_maskedSAD_op_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  MaskedSADFunc maskedSAD_op_;
+  MaskedSADFunc ref_maskedSAD_op_;
+};
+
+TEST_P(MaskedSADTest, OperationCheck) {
+  unsigned int ref_ret, ret;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  src_ptr, 4096);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  ref_ptr, 4096);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  msk_ptr, 4096);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = 64;
+  int ref_stride = 64;
+  int msk_stride = 64;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    for (int j = 0; j < 4096; j++) {
+      src_ptr[j] = rnd.Rand8();
+      ref_ptr[j] = rnd.Rand8();
+      msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64;
+    }
+
+    ref_ret = ref_maskedSAD_op_(src_ptr, src_stride, ref_ptr, ref_stride,
+                                msk_ptr, msk_stride);
+    ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src_ptr, src_stride,
+                                                 ref_ptr, ref_stride,
+                                                 msk_ptr, msk_stride));
+    if (ret != ref_ret) {
+      err_count++;
+      if (first_failure == -1)
+        first_failure = i;
+    }
+  }
+  EXPECT_EQ(0, err_count)
+    << "Error: Masked SAD Test, C output doesn't match SSSE3 output. "
+    << "First failed at test case " << first_failure;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef unsigned int (*HighbdMaskedSADFunc)(const uint8_t *a, int a_stride,
+                                            const uint8_t *b, int b_stride,
+                                            const uint8_t *m, int m_stride);
+typedef std::tr1::tuple<HighbdMaskedSADFunc, HighbdMaskedSADFunc>
+    HighbdMaskedSADParam;
+
+class HighbdMaskedSADTest : public ::testing::
+        TestWithParam<HighbdMaskedSADParam> {
+ public:
+  virtual ~HighbdMaskedSADTest() {}
+  virtual void SetUp() {
+    maskedSAD_op_   = GET_PARAM(0);
+    ref_maskedSAD_op_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  HighbdMaskedSADFunc maskedSAD_op_;
+  HighbdMaskedSADFunc ref_maskedSAD_op_;
+};
+
+TEST_P(HighbdMaskedSADTest, OperationCheck) {
+  unsigned int ref_ret, ret;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, uint16_t,  src_ptr, 4096);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t,  ref_ptr, 4096);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  msk_ptr, 4096);
+  uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = 64;
+  int ref_stride = 64;
+  int msk_stride = 64;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    for (int j = 0; j < 4096; j++) {
+      src_ptr[j] = rnd.Rand16()&0xfff;
+      ref_ptr[j] = rnd.Rand16()&0xfff;
+      msk_ptr[j] = ((rnd.Rand8()&0x7f) > 64) ? rnd.Rand8()&0x3f : 64;
+    }
+
+    ref_ret = ref_maskedSAD_op_(src8_ptr, src_stride, ref8_ptr, ref_stride,
+                                msk_ptr, msk_stride);
+    ASM_REGISTER_STATE_CHECK(ret = maskedSAD_op_(src8_ptr, src_stride,
+                                                 ref8_ptr, ref_stride,
+                                                 msk_ptr, msk_stride));
+    if (ret != ref_ret) {
+      err_count++;
+      if (first_failure == -1)
+        first_failure = i;
+    }
+  }
+  EXPECT_EQ(0, err_count)
+    << "Error: High BD Masked SAD Test, C output doesn't match SSSE3 output. "
+    << "First failed at test case " << first_failure;
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+  SSSE3_C_COMPARE, MaskedSADTest,
+  ::testing::Values(
+    make_tuple(&vp9_masked_sad64x64_ssse3,
+               &vp9_masked_sad64x64_c),
+    make_tuple(&vp9_masked_sad64x32_ssse3,
+               &vp9_masked_sad64x32_c),
+    make_tuple(&vp9_masked_sad32x64_ssse3,
+               &vp9_masked_sad32x64_c),
+    make_tuple(&vp9_masked_sad32x32_ssse3,
+               &vp9_masked_sad32x32_c),
+    make_tuple(&vp9_masked_sad32x16_ssse3,
+               &vp9_masked_sad32x16_c),
+    make_tuple(&vp9_masked_sad16x32_ssse3,
+               &vp9_masked_sad16x32_c),
+    make_tuple(&vp9_masked_sad16x16_ssse3,
+               &vp9_masked_sad16x16_c),
+    make_tuple(&vp9_masked_sad16x8_ssse3,
+               &vp9_masked_sad16x8_c),
+    make_tuple(&vp9_masked_sad8x16_ssse3,
+               &vp9_masked_sad8x16_c),
+    make_tuple(&vp9_masked_sad8x8_ssse3,
+               &vp9_masked_sad8x8_c),
+    make_tuple(&vp9_masked_sad8x4_ssse3,
+               &vp9_masked_sad8x4_c),
+    make_tuple(&vp9_masked_sad4x8_ssse3,
+               &vp9_masked_sad4x8_c),
+    make_tuple(&vp9_masked_sad4x4_ssse3,
+               &vp9_masked_sad4x4_c)));
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+  SSSE3_C_COMPARE, HighbdMaskedSADTest,
+  ::testing::Values(
+    make_tuple(&vp9_highbd_masked_sad64x64_ssse3,
+               &vp9_highbd_masked_sad64x64_c),
+    make_tuple(&vp9_highbd_masked_sad64x32_ssse3,
+               &vp9_highbd_masked_sad64x32_c),
+    make_tuple(&vp9_highbd_masked_sad32x64_ssse3,
+               &vp9_highbd_masked_sad32x64_c),
+    make_tuple(&vp9_highbd_masked_sad32x32_ssse3,
+               &vp9_highbd_masked_sad32x32_c),
+    make_tuple(&vp9_highbd_masked_sad32x16_ssse3,
+               &vp9_highbd_masked_sad32x16_c),
+    make_tuple(&vp9_highbd_masked_sad16x32_ssse3,
+               &vp9_highbd_masked_sad16x32_c),
+    make_tuple(&vp9_highbd_masked_sad16x16_ssse3,
+               &vp9_highbd_masked_sad16x16_c),
+    make_tuple(&vp9_highbd_masked_sad16x8_ssse3,
+               &vp9_highbd_masked_sad16x8_c),
+    make_tuple(&vp9_highbd_masked_sad8x16_ssse3,
+               &vp9_highbd_masked_sad8x16_c),
+    make_tuple(&vp9_highbd_masked_sad8x8_ssse3,
+               &vp9_highbd_masked_sad8x8_c),
+    make_tuple(&vp9_highbd_masked_sad8x4_ssse3,
+               &vp9_highbd_masked_sad8x4_c),
+    make_tuple(&vp9_highbd_masked_sad4x8_ssse3,
+               &vp9_highbd_masked_sad4x8_c),
+    make_tuple(&vp9_highbd_masked_sad4x4_ssse3,
+               &vp9_highbd_masked_sad4x4_c)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_SSSE3
+}  // namespace
--- a/test/masked_variance_test.cc
+++ b/test/masked_variance_test.cc
@@ -0,0 +1,753 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vpx/vpx_integer.h"
+#include "vp9/common/vp9_filter.h"
+
+#define MAX_SIZE 64
+
+using libvpx_test::ACMRandom;
+
+namespace {
+const int number_of_iterations = 500;
+
+typedef unsigned int (*MaskedVarianceFunc)(const uint8_t *a, int a_stride,
+                                           const uint8_t *b, int b_stride,
+                                           const uint8_t *m, int m_stride,
+                                           unsigned int *sse);
+
+typedef std::tr1::tuple<MaskedVarianceFunc,
+                        MaskedVarianceFunc> MaskedVarianceParam;
+
+class MaskedVarianceTest :
+  public ::testing::TestWithParam<MaskedVarianceParam> {
+ public:
+  virtual ~MaskedVarianceTest() {}
+  virtual void SetUp() {
+    opt_func_ = GET_PARAM(0);
+    ref_func_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  MaskedVarianceFunc opt_func_;
+  MaskedVarianceFunc ref_func_;
+};
+
+TEST_P(MaskedVarianceTest, OperationCheck) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  src_ptr, MAX_SIZE*MAX_SIZE);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  ref_ptr, MAX_SIZE*MAX_SIZE);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  msk_ptr, MAX_SIZE*MAX_SIZE);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SIZE;
+  int ref_stride = MAX_SIZE;
+  int msk_stride = MAX_SIZE;
+
+  for (int i = 0; i < number_of_iterations; ++i) {
+    for (int j = 0; j < MAX_SIZE*MAX_SIZE; j++) {
+      src_ptr[j] = rnd.Rand8();
+      ref_ptr[j] = rnd.Rand8();
+      msk_ptr[j] = rnd(65);
+    }
+
+    ref_ret = ref_func_(src_ptr, src_stride,
+                        ref_ptr, ref_stride,
+                        msk_ptr, msk_stride,
+                        &ref_sse);
+    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src_ptr, src_stride,
+                                                 ref_ptr, ref_stride,
+                                                 msk_ptr, msk_stride,
+                                                 &opt_sse));
+
+    if (opt_ret != ref_ret || opt_sse != ref_sse) {
+      err_count++;
+      if (first_failure == -1)
+        first_failure = i;
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+  << "Error: Masked Variance Test OperationCheck,"
+  << "C output doesn't match SSSE3 output. "
+  << "First failed at test case " << first_failure;
+}
+
+TEST_P(MaskedVarianceTest, ExtremeValues) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  src_ptr, MAX_SIZE*MAX_SIZE);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  ref_ptr, MAX_SIZE*MAX_SIZE);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  msk_ptr, MAX_SIZE*MAX_SIZE);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SIZE;
+  int ref_stride = MAX_SIZE;
+  int msk_stride = MAX_SIZE;
+
+  for (int i = 0; i < 8; ++i) {
+    memset(src_ptr, (i & 0x1) ? 255 : 0, MAX_SIZE*MAX_SIZE);
+    memset(ref_ptr, (i & 0x2) ? 255 : 0, MAX_SIZE*MAX_SIZE);
+    memset(msk_ptr, (i & 0x4) ?  64 : 0, MAX_SIZE*MAX_SIZE);
+
+    ref_ret = ref_func_(src_ptr, src_stride,
+                        ref_ptr, ref_stride,
+                        msk_ptr, msk_stride,
+                        &ref_sse);
+    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src_ptr, src_stride,
+                                                 ref_ptr, ref_stride,
+                                                 msk_ptr, msk_stride,
+                                                 &opt_sse));
+
+    if (opt_ret != ref_ret || opt_sse != ref_sse) {
+      err_count++;
+      if (first_failure == -1)
+        first_failure = i;
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+  << "Error: Masked Variance Test ExtremeValues,"
+  << "C output doesn't match SSSE3 output. "
+  << "First failed at test case " << first_failure;
+}
+
+typedef unsigned int (*MaskedSubPixelVarianceFunc)(
+    const uint8_t *a, int a_stride,
+    int xoffset, int  yoffset,
+    const uint8_t *b, int b_stride,
+    const uint8_t *m, int m_stride,
+    unsigned int *sse);
+
+typedef std::tr1::tuple<MaskedSubPixelVarianceFunc,
+                        MaskedSubPixelVarianceFunc> MaskedSubPixelVarianceParam;
+
+class MaskedSubPixelVarianceTest :
+  public ::testing::TestWithParam<MaskedSubPixelVarianceParam> {
+ public:
+  virtual ~MaskedSubPixelVarianceTest() {}
+  virtual void SetUp() {
+    opt_func_ = GET_PARAM(0);
+    ref_func_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  MaskedSubPixelVarianceFunc opt_func_;
+  MaskedSubPixelVarianceFunc ref_func_;
+};
+
+TEST_P(MaskedSubPixelVarianceTest, OperationCheck) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  src_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  ref_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  msk_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = (MAX_SIZE+1);
+  int ref_stride = (MAX_SIZE+1);
+  int msk_stride = (MAX_SIZE+1);
+  int xoffset;
+  int yoffset;
+
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int xoffsets[] = {0, 8, rnd(SUBPEL_SHIFTS)};
+    int yoffsets[] = {0, 8, rnd(SUBPEL_SHIFTS)};
+    for (int j = 0; j < (MAX_SIZE+1)*(MAX_SIZE+1); j++) {
+      src_ptr[j] = rnd.Rand8();
+      ref_ptr[j] = rnd.Rand8();
+      msk_ptr[j] = rnd(65);
+    }
+    for (int k = 0; k < 3; k++) {
+      xoffset = xoffsets[k];
+      for (int l = 0; l < 3; l++) {
+        xoffset = xoffsets[k];
+        yoffset = yoffsets[l];
+
+        ref_ret = ref_func_(src_ptr, src_stride,
+                            xoffset, yoffset,
+                            ref_ptr, ref_stride,
+                            msk_ptr, msk_stride,
+                            &ref_sse);
+        ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src_ptr, src_stride,
+                                                    xoffset, yoffset,
+                                                    ref_ptr, ref_stride,
+                                                    msk_ptr, msk_stride,
+                                                    &opt_sse));
+
+        if (opt_ret != ref_ret || opt_sse != ref_sse) {
+        err_count++;
+        if (first_failure == -1)
+            first_failure = i;
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+    << "Error: Masked Sub Pixel Variance Test OperationCheck,"
+    << "C output doesn't match SSSE3 output. "
+    << "First failed at test case " << first_failure;
+}
+
+TEST_P(MaskedSubPixelVarianceTest, ExtremeValues) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  src_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  ref_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  msk_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  int first_failure_x = -1;
+  int first_failure_y = -1;
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = (MAX_SIZE+1);
+  int ref_stride = (MAX_SIZE+1);
+  int msk_stride = (MAX_SIZE+1);
+
+  for (int xoffset = 0 ; xoffset < SUBPEL_SHIFTS ; xoffset++) {
+    for (int yoffset = 0 ; yoffset < SUBPEL_SHIFTS ; yoffset++) {
+      for (int i = 0; i < 8; ++i) {
+        memset(src_ptr, (i & 0x1) ? 255 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
+        memset(ref_ptr, (i & 0x2) ? 255 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
+        memset(msk_ptr, (i & 0x4) ?  64 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
+
+        ref_ret = ref_func_(src_ptr, src_stride,
+                            xoffset, yoffset,
+                            ref_ptr, ref_stride,
+                            msk_ptr, msk_stride,
+                            &ref_sse);
+        ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src_ptr, src_stride,
+                                                     xoffset, yoffset,
+                                                     ref_ptr, ref_stride,
+                                                     msk_ptr, msk_stride,
+                                                     &opt_sse));
+
+        if (opt_ret != ref_ret || opt_sse != ref_sse) {
+          err_count++;
+          if (first_failure == -1) {
+            first_failure = i;
+            first_failure_x = xoffset;
+            first_failure_y = yoffset;
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+  << "Error: Masked Variance Test ExtremeValues,"
+  << "C output doesn't match SSSE3 output. "
+  << "First failed at test case " << first_failure
+  << " x_offset = " << first_failure_x
+  << " y_offset = " << first_failure_y;
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef std::tr1::tuple<MaskedVarianceFunc,
+                        MaskedVarianceFunc,
+                        vpx_bit_depth_t> HighbdMaskedVarianceParam;
+
+class HighbdMaskedVarianceTest :
+  public ::testing::TestWithParam<HighbdMaskedVarianceParam> {
+ public:
+  virtual ~HighbdMaskedVarianceTest() {}
+  virtual void SetUp() {
+    opt_func_ = GET_PARAM(0);
+    ref_func_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  MaskedVarianceFunc opt_func_;
+  MaskedVarianceFunc ref_func_;
+  vpx_bit_depth_t bit_depth_;
+};
+
+TEST_P(HighbdMaskedVarianceTest, OperationCheck) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, src_ptr, MAX_SIZE*MAX_SIZE);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_ptr, MAX_SIZE*MAX_SIZE);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  msk_ptr, MAX_SIZE*MAX_SIZE);
+  uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SIZE;
+  int ref_stride = MAX_SIZE;
+  int msk_stride = MAX_SIZE;
+
+  for (int i = 0; i < number_of_iterations; ++i) {
+    for (int j = 0; j < MAX_SIZE*MAX_SIZE; j++) {
+      src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+      ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+      msk_ptr[j] = rnd(65);
+    }
+
+    ref_ret = ref_func_(src8_ptr, src_stride,
+                        ref8_ptr, ref_stride,
+                        msk_ptr, msk_stride,
+                        &ref_sse);
+    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src8_ptr, src_stride,
+                                                 ref8_ptr, ref_stride,
+                                                 msk_ptr, msk_stride,
+                                                 &opt_sse));
+
+    if (opt_ret != ref_ret || opt_sse != ref_sse) {
+      err_count++;
+      if (first_failure == -1)
+        first_failure = i;
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+  << "Error: Masked Variance Test OperationCheck,"
+  << "C output doesn't match SSSE3 output. "
+  << "First failed at test case " << first_failure;
+}
+
+TEST_P(HighbdMaskedVarianceTest, ExtremeValues) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, src_ptr, MAX_SIZE*MAX_SIZE);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_ptr, MAX_SIZE*MAX_SIZE);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  msk_ptr, MAX_SIZE*MAX_SIZE);
+  uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = MAX_SIZE;
+  int ref_stride = MAX_SIZE;
+  int msk_stride = MAX_SIZE;
+
+  for (int i = 0; i < 8; ++i) {
+    vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
+                 MAX_SIZE*MAX_SIZE);
+    vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
+                 MAX_SIZE*MAX_SIZE);
+    memset(msk_ptr, (i & 0x4) ?  64 : 0, MAX_SIZE*MAX_SIZE);
+
+    ref_ret = ref_func_(src8_ptr, src_stride,
+                        ref8_ptr, ref_stride,
+                        msk_ptr, msk_stride,
+                        &ref_sse);
+    ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src8_ptr, src_stride,
+                                                 ref8_ptr, ref_stride,
+                                                 msk_ptr, msk_stride,
+                                                 &opt_sse));
+
+    if (opt_ret != ref_ret || opt_sse != ref_sse) {
+      err_count++;
+      if (first_failure == -1)
+        first_failure = i;
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+  << "Error: Masked Variance Test ExtremeValues,"
+  << "C output doesn't match SSSE3 output. "
+  << "First failed at test case " << first_failure;
+}
+
+typedef std::tr1::tuple<MaskedSubPixelVarianceFunc,
+                        MaskedSubPixelVarianceFunc,
+                        vpx_bit_depth_t> HighbdMaskedSubPixelVarianceParam;
+
+class HighbdMaskedSubPixelVarianceTest :
+  public ::testing::TestWithParam<HighbdMaskedSubPixelVarianceParam> {
+ public:
+  virtual ~HighbdMaskedSubPixelVarianceTest() {}
+  virtual void SetUp() {
+    opt_func_ = GET_PARAM(0);
+    ref_func_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  MaskedSubPixelVarianceFunc opt_func_;
+  MaskedSubPixelVarianceFunc ref_func_;
+  vpx_bit_depth_t bit_depth_;
+};
+
+TEST_P(HighbdMaskedSubPixelVarianceTest, OperationCheck) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, src_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  msk_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  int err_count = 0;
+  int first_failure = -1;
+  int first_failure_x = -1;
+  int first_failure_y = -1;
+  int src_stride = (MAX_SIZE+1);
+  int ref_stride = (MAX_SIZE+1);
+  int msk_stride = (MAX_SIZE+1);
+  int xoffset, yoffset;
+
+  for (int i = 0; i < number_of_iterations; ++i) {
+    for (xoffset = 0; xoffset < SUBPEL_SHIFTS; xoffset++) {
+      for (yoffset = 0; yoffset < SUBPEL_SHIFTS; yoffset++) {
+        for (int j = 0; j < (MAX_SIZE+1)*(MAX_SIZE+1); j++) {
+          src_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+          ref_ptr[j] = rnd.Rand16() & ((1 << bit_depth_) - 1);
+          msk_ptr[j] = rnd(65);
+        }
+
+        ref_ret = ref_func_(src8_ptr, src_stride,
+                            xoffset, yoffset,
+                            ref8_ptr, ref_stride,
+                            msk_ptr, msk_stride,
+                            &ref_sse);
+        ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src8_ptr, src_stride,
+                                                     xoffset, yoffset,
+                                                     ref8_ptr, ref_stride,
+                                                     msk_ptr, msk_stride,
+                                                     &opt_sse));
+
+        if (opt_ret != ref_ret || opt_sse != ref_sse) {
+          err_count++;
+          if (first_failure == -1) {
+            first_failure = i;
+            first_failure_x = xoffset;
+            first_failure_y = yoffset;
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+    << "Error: Masked Sub Pixel Variance Test OperationCheck,"
+    << "C output doesn't match SSSE3 output. "
+    << "First failed at test case " << first_failure
+    << " x_offset = " << first_failure_x
+    << " y_offset = " << first_failure_y;
+}
+
+TEST_P(HighbdMaskedSubPixelVarianceTest, ExtremeValues) {
+  unsigned int ref_ret, opt_ret;
+  unsigned int ref_sse, opt_sse;
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, src_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  DECLARE_ALIGNED_ARRAY(16, uint8_t,  msk_ptr, (MAX_SIZE+1)*(MAX_SIZE+1));
+  uint8_t* src8_ptr = CONVERT_TO_BYTEPTR(src_ptr);
+  uint8_t* ref8_ptr = CONVERT_TO_BYTEPTR(ref_ptr);
+  int first_failure_x = -1;
+  int first_failure_y = -1;
+  int err_count = 0;
+  int first_failure = -1;
+  int src_stride = (MAX_SIZE+1);
+  int ref_stride = (MAX_SIZE+1);
+  int msk_stride = (MAX_SIZE+1);
+
+  for (int xoffset = 0 ; xoffset < SUBPEL_SHIFTS ; xoffset++) {
+    for (int yoffset = 0 ; yoffset < SUBPEL_SHIFTS ; yoffset++) {
+      for (int i = 0; i < 8; ++i) {
+        vpx_memset16(src_ptr, (i & 0x1) ? ((1 << bit_depth_) - 1) : 0,
+               (MAX_SIZE+1)*(MAX_SIZE+1));
+        vpx_memset16(ref_ptr, (i & 0x2) ? ((1 << bit_depth_) - 1) : 0,
+               (MAX_SIZE+1)*(MAX_SIZE+1));
+        memset(msk_ptr, (i & 0x4) ?   64 : 0, (MAX_SIZE+1)*(MAX_SIZE+1));
+
+        ref_ret = ref_func_(src8_ptr, src_stride,
+                            xoffset, yoffset,
+                            ref8_ptr, ref_stride,
+                            msk_ptr, msk_stride,
+                            &ref_sse);
+        ASM_REGISTER_STATE_CHECK(opt_ret = opt_func_(src8_ptr, src_stride,
+                                                     xoffset, yoffset,
+                                                     ref8_ptr, ref_stride,
+                                                     msk_ptr, msk_stride,
+                                                     &opt_sse));
+
+        if (opt_ret != ref_ret || opt_sse != ref_sse) {
+          err_count++;
+          if (first_failure == -1) {
+            first_failure = i;
+            first_failure_x = xoffset;
+            first_failure_y = yoffset;
+          }
+        }
+      }
+    }
+  }
+
+  EXPECT_EQ(0, err_count)
+  << "Error: Masked Variance Test ExtremeValues,"
+  << "C output doesn't match SSSE3 output. "
+  << "First failed at test case " << first_failure
+  << " x_offset = " << first_failure_x
+  << " y_offset = " << first_failure_y;
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+  SSSE3_C_COMPARE, MaskedVarianceTest,
+  ::testing::Values(
+    make_tuple(&vp9_masked_variance64x64_ssse3,
+               &vp9_masked_variance64x64_c),
+    make_tuple(&vp9_masked_variance64x32_ssse3,
+               &vp9_masked_variance64x32_c),
+    make_tuple(&vp9_masked_variance32x64_ssse3,
+               &vp9_masked_variance32x64_c),
+    make_tuple(&vp9_masked_variance32x32_ssse3,
+               &vp9_masked_variance32x32_c),
+    make_tuple(&vp9_masked_variance32x16_ssse3,
+               &vp9_masked_variance32x16_c),
+    make_tuple(&vp9_masked_variance16x32_ssse3,
+               &vp9_masked_variance16x32_c),
+    make_tuple(&vp9_masked_variance16x16_ssse3,
+               &vp9_masked_variance16x16_c),
+    make_tuple(&vp9_masked_variance16x8_ssse3,
+               &vp9_masked_variance16x8_c),
+    make_tuple(&vp9_masked_variance8x16_ssse3,
+               &vp9_masked_variance8x16_c),
+    make_tuple(&vp9_masked_variance8x8_ssse3,
+               &vp9_masked_variance8x8_c),
+    make_tuple(&vp9_masked_variance8x4_ssse3,
+               &vp9_masked_variance8x4_c),
+    make_tuple(&vp9_masked_variance4x8_ssse3,
+               &vp9_masked_variance4x8_c),
+    make_tuple(&vp9_masked_variance4x4_ssse3,
+               &vp9_masked_variance4x4_c)));
+
+INSTANTIATE_TEST_CASE_P(
+  SSSE3_C_COMPARE, MaskedSubPixelVarianceTest,
+  ::testing::Values(
+    make_tuple(&vp9_masked_sub_pixel_variance64x64_ssse3,
+              &vp9_masked_sub_pixel_variance64x64_c),
+    make_tuple(&vp9_masked_sub_pixel_variance64x32_ssse3,
+              &vp9_masked_sub_pixel_variance64x32_c),
+    make_tuple(&vp9_masked_sub_pixel_variance32x64_ssse3,
+              &vp9_masked_sub_pixel_variance32x64_c),
+    make_tuple(&vp9_masked_sub_pixel_variance32x32_ssse3,
+              &vp9_masked_sub_pixel_variance32x32_c),
+    make_tuple(&vp9_masked_sub_pixel_variance32x16_ssse3,
+              &vp9_masked_sub_pixel_variance32x16_c),
+    make_tuple(&vp9_masked_sub_pixel_variance16x32_ssse3,
+              &vp9_masked_sub_pixel_variance16x32_c),
+    make_tuple(&vp9_masked_sub_pixel_variance16x16_ssse3,
+              &vp9_masked_sub_pixel_variance16x16_c),
+    make_tuple(&vp9_masked_sub_pixel_variance16x8_ssse3,
+              &vp9_masked_sub_pixel_variance16x8_c),
+    make_tuple(&vp9_masked_sub_pixel_variance8x16_ssse3,
+              &vp9_masked_sub_pixel_variance8x16_c),
+    make_tuple(&vp9_masked_sub_pixel_variance8x8_ssse3,
+              &vp9_masked_sub_pixel_variance8x8_c),
+    make_tuple(&vp9_masked_sub_pixel_variance8x4_ssse3,
+              &vp9_masked_sub_pixel_variance8x4_c),
+    make_tuple(&vp9_masked_sub_pixel_variance4x8_ssse3,
+              &vp9_masked_sub_pixel_variance4x8_c),
+    make_tuple(&vp9_masked_sub_pixel_variance4x4_ssse3,
+              &vp9_masked_sub_pixel_variance4x4_c)));
+
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+  SSSE3_C_COMPARE, HighbdMaskedVarianceTest,
+  ::testing::Values(
+    make_tuple(&vp9_highbd_masked_variance64x64_ssse3,
+               &vp9_highbd_masked_variance64x64_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance64x32_ssse3,
+               &vp9_highbd_masked_variance64x32_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance32x64_ssse3,
+               &vp9_highbd_masked_variance32x64_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance32x32_ssse3,
+               &vp9_highbd_masked_variance32x32_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance32x16_ssse3,
+               &vp9_highbd_masked_variance32x16_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance16x32_ssse3,
+               &vp9_highbd_masked_variance16x32_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance16x16_ssse3,
+               &vp9_highbd_masked_variance16x16_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance16x8_ssse3,
+               &vp9_highbd_masked_variance16x8_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance8x16_ssse3,
+               &vp9_highbd_masked_variance8x16_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance8x8_ssse3,
+               &vp9_highbd_masked_variance8x8_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance8x4_ssse3,
+               &vp9_highbd_masked_variance8x4_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance4x8_ssse3,
+               &vp9_highbd_masked_variance4x8_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_variance4x4_ssse3,
+               &vp9_highbd_masked_variance4x4_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_10_masked_variance64x64_ssse3,
+               &vp9_highbd_10_masked_variance64x64_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance64x32_ssse3,
+               &vp9_highbd_10_masked_variance64x32_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance32x64_ssse3,
+               &vp9_highbd_10_masked_variance32x64_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance32x32_ssse3,
+               &vp9_highbd_10_masked_variance32x32_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance32x16_ssse3,
+               &vp9_highbd_10_masked_variance32x16_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance16x32_ssse3,
+               &vp9_highbd_10_masked_variance16x32_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance16x16_ssse3,
+               &vp9_highbd_10_masked_variance16x16_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance16x8_ssse3,
+               &vp9_highbd_10_masked_variance16x8_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance8x16_ssse3,
+               &vp9_highbd_10_masked_variance8x16_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance8x8_ssse3,
+               &vp9_highbd_10_masked_variance8x8_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance8x4_ssse3,
+               &vp9_highbd_10_masked_variance8x4_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance4x8_ssse3,
+               &vp9_highbd_10_masked_variance4x8_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_variance4x4_ssse3,
+               &vp9_highbd_10_masked_variance4x4_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_12_masked_variance64x64_ssse3,
+               &vp9_highbd_12_masked_variance64x64_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance64x32_ssse3,
+               &vp9_highbd_12_masked_variance64x32_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance32x64_ssse3,
+               &vp9_highbd_12_masked_variance32x64_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance32x32_ssse3,
+               &vp9_highbd_12_masked_variance32x32_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance32x16_ssse3,
+               &vp9_highbd_12_masked_variance32x16_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance16x32_ssse3,
+               &vp9_highbd_12_masked_variance16x32_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance16x16_ssse3,
+               &vp9_highbd_12_masked_variance16x16_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance16x8_ssse3,
+               &vp9_highbd_12_masked_variance16x8_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance8x16_ssse3,
+               &vp9_highbd_12_masked_variance8x16_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance8x8_ssse3,
+               &vp9_highbd_12_masked_variance8x8_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance8x4_ssse3,
+               &vp9_highbd_12_masked_variance8x4_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance4x8_ssse3,
+               &vp9_highbd_12_masked_variance4x8_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_variance4x4_ssse3,
+               &vp9_highbd_12_masked_variance4x4_c, VPX_BITS_12)));
+
+INSTANTIATE_TEST_CASE_P(
+  SSSE3_C_COMPARE, HighbdMaskedSubPixelVarianceTest,
+  ::testing::Values(
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance64x64_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance64x64_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance64x32_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance64x32_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance32x64_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance32x64_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance32x32_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance32x32_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance32x16_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance32x16_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance16x32_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance16x32_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance16x16_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance16x16_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance16x8_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance16x8_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance8x16_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance8x16_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance8x8_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance8x8_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance8x4_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance8x4_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance4x8_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance4x8_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_masked_sub_pixel_variance4x4_ssse3,
+               &vp9_highbd_masked_sub_pixel_variance4x4_c, VPX_BITS_8),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance64x64_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance64x64_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance64x32_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance64x32_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance32x64_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance32x64_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance32x32_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance32x32_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance32x16_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance32x16_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance16x32_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance16x32_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance16x16_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance16x16_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance16x8_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance16x8_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance8x16_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance8x16_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance8x8_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance8x8_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance8x4_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance8x4_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance4x8_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance4x8_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_10_masked_sub_pixel_variance4x4_ssse3,
+               &vp9_highbd_10_masked_sub_pixel_variance4x4_c, VPX_BITS_10),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance64x64_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance64x64_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance64x32_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance64x32_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance32x64_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance32x64_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance32x32_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance32x32_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance32x16_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance32x16_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance16x32_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance16x32_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance16x16_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance16x16_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance16x8_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance16x8_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance8x16_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance8x16_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance8x8_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance8x8_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance8x4_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance8x4_c, VPX_BITS_12) ,
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance4x8_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance4x8_c, VPX_BITS_12),
+    make_tuple(&vp9_highbd_12_masked_sub_pixel_variance4x4_ssse3,
+               &vp9_highbd_12_masked_sub_pixel_variance4x4_c, VPX_BITS_12)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#endif  // HAVE_SSSE3
+}  // namespace
--- a/test/md5_helper.h
+++ b/test/md5_helper.h
@@ -42,10 +42,6 @@ class MD5 {
    }
  }

-  void Add(const uint8_t *data, size_t size) {
-    MD5Update(&md5_, data, static_cast<uint32_t>(size));
-  }
-
  const char *Get(void) {
    static const char hex[16] = {
      '0', '1', '2', '3', '4', '5', '6', '7',
--- a/test/minmax_test.cc
+++ b/test/minmax_test.cc
@@ -1,132 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_dsp_rtcd.h"
-#include "vpx/vpx_integer.h"
-
-#include "test/acm_random.h"
-#include "test/register_state_check.h"
-
-namespace {
-
-using ::libvpx_test::ACMRandom;
-
-typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride,
-                           const uint8_t *b, int b_stride,
-                           int *min, int *max);
-
-class MinMaxTest : public ::testing::TestWithParam<MinMaxFunc> {
- public:
-  virtual void SetUp() {
-    mm_func_ = GetParam();
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
- protected:
-  MinMaxFunc mm_func_;
-  ACMRandom rnd_;
-};
-
-void reference_minmax(const uint8_t *a, int a_stride,
-                      const uint8_t *b, int b_stride,
-                      int *min_ret, int *max_ret) {
-  int min = 255;
-  int max = 0;
-  for (int i = 0; i < 8; i++) {
-    for (int j = 0; j < 8; j++) {
-      const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]);
-      if (min > diff) min = diff;
-      if (max < diff) max = diff;
-    }
-  }
-
-  *min_ret = min;
-  *max_ret = max;
-}
-
-TEST_P(MinMaxTest, MinValue) {
-  for (int i = 0; i < 64; i++) {
-    uint8_t a[64], b[64];
-    memset(a, 0, sizeof(a));
-    memset(b, 255, sizeof(b));
-    b[i] = i;  // Set a minimum difference of i.
-
-    int min, max;
-    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
-    EXPECT_EQ(255, max);
-    EXPECT_EQ(i, min);
-  }
-}
-
-TEST_P(MinMaxTest, MaxValue) {
-  for (int i = 0; i < 64; i++) {
-    uint8_t a[64], b[64];
-    memset(a, 0, sizeof(a));
-    memset(b, 0, sizeof(b));
-    b[i] = i;  // Set a maximum difference of i.
-
-    int min, max;
-    ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
-    EXPECT_EQ(i, max);
-    EXPECT_EQ(0, min);
-  }
-}
-
-TEST_P(MinMaxTest, CompareReference) {
-  uint8_t a[64], b[64];
-  for (int j = 0; j < 64; j++) {
-    a[j] = rnd_.Rand8();
-    b[j] = rnd_.Rand8();
-  }
-
-  int min_ref, max_ref, min, max;
-  reference_minmax(a, 8, b, 8, &min_ref, &max_ref);
-  ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max));
-  EXPECT_EQ(max_ref, max);
-  EXPECT_EQ(min_ref, min);
-}
-
-TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
-  uint8_t a[8 * 64], b[8 * 64];
-  for (int i = 0; i < 8 * 64; i++) {
-    a[i] = rnd_.Rand8();
-    b[i] = rnd_.Rand8();
-  }
-  for (int a_stride = 8; a_stride <= 64; a_stride += 8) {
-    for (int b_stride = 8; b_stride <= 64; b_stride += 8) {
-      int min_ref, max_ref, min, max;
-      reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
-      ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
-      EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
-                              << " and b_stride = " << b_stride;;
-      EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
-                              << " and b_stride = " << b_stride;;
-    }
-  }
-}
-
-INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest,
-                        ::testing::Values(&vpx_minmax_8x8_sse2));
-#endif
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
-                        ::testing::Values(&vpx_minmax_8x8_neon));
-#endif
-
-}  // namespace
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -13,13 +13,12 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vp9_rtcd.h"
-#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
+
+#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_integer.h"
@@ -75,16 +74,16 @@ TEST_P(PartialIDctTest, RunQuantCheck) {
      FAIL() << "Wrong Size!";
      break;
  }
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);

  const int count_test_block = 1000;
  const int block_size = size * size;

-  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kMaxNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kMaxNumCoeffs);

  int max_error = 0;
  for (int i = 0; i < count_test_block; ++i) {
@@ -154,10 +153,10 @@ TEST_P(PartialIDctTest, ResultsMatch) {
      FAIL() << "Wrong Size!";
      break;
  }
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
+  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
  const int count_test_block = 1000;
  const int max_coeff = 32766 / 4;
  const int block_size = size * size;
@@ -202,142 +201,117 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vpx_fdct32x32_c,
-                   &vpx_idct32x32_1024_add_c,
-                   &vpx_idct32x32_34_add_c,
+        make_tuple(&vp9_fdct32x32_c,
+                   &vp9_idct32x32_1024_add_c,
+                   &vp9_idct32x32_34_add_c,
                   TX_32X32, 34),
-        make_tuple(&vpx_fdct32x32_c,
-                   &vpx_idct32x32_1024_add_c,
-                   &vpx_idct32x32_1_add_c,
+        make_tuple(&vp9_fdct32x32_c,
+                   &vp9_idct32x32_1024_add_c,
+                   &vp9_idct32x32_1_add_c,
                   TX_32X32, 1),
-        make_tuple(&vpx_fdct16x16_c,
-                   &vpx_idct16x16_256_add_c,
-                   &vpx_idct16x16_10_add_c,
+        make_tuple(&vp9_fdct16x16_c,
+                   &vp9_idct16x16_256_add_c,
+                   &vp9_idct16x16_10_add_c,
                   TX_16X16, 10),
-        make_tuple(&vpx_fdct16x16_c,
-                   &vpx_idct16x16_256_add_c,
-                   &vpx_idct16x16_1_add_c,
+        make_tuple(&vp9_fdct16x16_c,
+                   &vp9_idct16x16_256_add_c,
+                   &vp9_idct16x16_1_add_c,
                   TX_16X16, 1),
-        make_tuple(&vpx_fdct8x8_c,
-                   &vpx_idct8x8_64_add_c,
-                   &vpx_idct8x8_12_add_c,
+        make_tuple(&vp9_fdct8x8_c,
+                   &vp9_idct8x8_64_add_c,
+                   &vp9_idct8x8_12_add_c,
                   TX_8X8, 12),
-        make_tuple(&vpx_fdct8x8_c,
-                   &vpx_idct8x8_64_add_c,
-                   &vpx_idct8x8_1_add_c,
+        make_tuple(&vp9_fdct8x8_c,
+                   &vp9_idct8x8_64_add_c,
+                   &vp9_idct8x8_1_add_c,
                   TX_8X8, 1),
-        make_tuple(&vpx_fdct4x4_c,
-                   &vpx_idct4x4_16_add_c,
-                   &vpx_idct4x4_1_add_c,
+        make_tuple(&vp9_fdct4x4_c,
+                   &vp9_idct4x4_16_add_c,
+                   &vp9_idct4x4_1_add_c,
                   TX_4X4, 1)));

-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON_ASM
 INSTANTIATE_TEST_CASE_P(
    NEON, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vpx_fdct32x32_c,
-                   &vpx_idct32x32_1024_add_c,
-                   &vpx_idct32x32_1_add_neon,
+        make_tuple(&vp9_fdct32x32_c,
+                   &vp9_idct32x32_1024_add_c,
+                   &vp9_idct32x32_1_add_neon,
                   TX_32X32, 1),
-        make_tuple(&vpx_fdct16x16_c,
-                   &vpx_idct16x16_256_add_c,
-                   &vpx_idct16x16_10_add_neon,
+        make_tuple(&vp9_fdct16x16_c,
+                   &vp9_idct16x16_256_add_c,
+                   &vp9_idct16x16_10_add_neon,
                   TX_16X16, 10),
-        make_tuple(&vpx_fdct16x16_c,
-                   &vpx_idct16x16_256_add_c,
-                   &vpx_idct16x16_1_add_neon,
+        make_tuple(&vp9_fdct16x16_c,
+                   &vp9_idct16x16_256_add_c,
+                   &vp9_idct16x16_1_add_neon,
                   TX_16X16, 1),
-        make_tuple(&vpx_fdct8x8_c,
-                   &vpx_idct8x8_64_add_c,
-                   &vpx_idct8x8_12_add_neon,
+        make_tuple(&vp9_fdct8x8_c,
+                   &vp9_idct8x8_64_add_c,
+                   &vp9_idct8x8_12_add_neon,
                   TX_8X8, 12),
-        make_tuple(&vpx_fdct8x8_c,
-                   &vpx_idct8x8_64_add_c,
-                   &vpx_idct8x8_1_add_neon,
+        make_tuple(&vp9_fdct8x8_c,
+                   &vp9_idct8x8_64_add_c,
+                   &vp9_idct8x8_1_add_neon,
                   TX_8X8, 1),
-        make_tuple(&vpx_fdct4x4_c,
-                   &vpx_idct4x4_16_add_c,
-                   &vpx_idct4x4_1_add_neon,
+        make_tuple(&vp9_fdct4x4_c,
+                   &vp9_idct4x4_16_add_c,
+                   &vp9_idct4x4_1_add_neon,
                   TX_4X4, 1)));
-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vpx_fdct32x32_c,
-                   &vpx_idct32x32_1024_add_c,
-                   &vpx_idct32x32_34_add_sse2,
+        make_tuple(&vp9_fdct32x32_c,
+                   &vp9_idct32x32_1024_add_c,
+                   &vp9_idct32x32_34_add_sse2,
                   TX_32X32, 34),
-        make_tuple(&vpx_fdct32x32_c,
-                   &vpx_idct32x32_1024_add_c,
-                   &vpx_idct32x32_1_add_sse2,
+        make_tuple(&vp9_fdct32x32_c,
+                   &vp9_idct32x32_1024_add_c,
+                   &vp9_idct32x32_1_add_sse2,
                   TX_32X32, 1),
-        make_tuple(&vpx_fdct16x16_c,
-                   &vpx_idct16x16_256_add_c,
-                   &vpx_idct16x16_10_add_sse2,
+        make_tuple(&vp9_fdct16x16_c,
+                   &vp9_idct16x16_256_add_c,
+                   &vp9_idct16x16_10_add_sse2,
                   TX_16X16, 10),
-        make_tuple(&vpx_fdct16x16_c,
-                   &vpx_idct16x16_256_add_c,
-                   &vpx_idct16x16_1_add_sse2,
+        make_tuple(&vp9_fdct16x16_c,
+                   &vp9_idct16x16_256_add_c,
+                   &vp9_idct16x16_1_add_sse2,
                   TX_16X16, 1),
-        make_tuple(&vpx_fdct8x8_c,
-                   &vpx_idct8x8_64_add_c,
-                   &vpx_idct8x8_12_add_sse2,
+        make_tuple(&vp9_fdct8x8_c,
+                   &vp9_idct8x8_64_add_c,
+                   &vp9_idct8x8_12_add_sse2,
                   TX_8X8, 12),
-        make_tuple(&vpx_fdct8x8_c,
-                   &vpx_idct8x8_64_add_c,
-                   &vpx_idct8x8_1_add_sse2,
+        make_tuple(&vp9_fdct8x8_c,
+                   &vp9_idct8x8_64_add_c,
+                   &vp9_idct8x8_1_add_sse2,
                   TX_8X8, 1),
-        make_tuple(&vpx_fdct4x4_c,
-                   &vpx_idct4x4_16_add_c,
-                   &vpx_idct4x4_1_add_sse2,
+        make_tuple(&vp9_fdct4x4_c,
+                   &vp9_idct4x4_16_add_c,
+                   &vp9_idct4x4_1_add_sse2,
                   TX_4X4, 1)));
 #endif

-#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \
-    !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
+    !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSSE3_64, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vpx_fdct8x8_c,
-                   &vpx_idct8x8_64_add_c,
-                   &vpx_idct8x8_12_add_ssse3,
+        make_tuple(&vp9_fdct8x8_c,
+                   &vp9_idct8x8_64_add_c,
+                   &vp9_idct8x8_12_add_ssse3,
                   TX_8X8, 12)));
 #endif

-#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    MSA, PartialIDctTest,
+    SSSE3, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vpx_fdct32x32_c,
-                   &vpx_idct32x32_1024_add_c,
-                   &vpx_idct32x32_34_add_msa,
-                   TX_32X32, 34),
-        make_tuple(&vpx_fdct32x32_c,
-                   &vpx_idct32x32_1024_add_c,
-                   &vpx_idct32x32_1_add_msa,
-                   TX_32X32, 1),
-        make_tuple(&vpx_fdct16x16_c,
-                   &vpx_idct16x16_256_add_c,
-                   &vpx_idct16x16_10_add_msa,
-                   TX_16X16, 10),
-        make_tuple(&vpx_fdct16x16_c,
-                   &vpx_idct16x16_256_add_c,
-                   &vpx_idct16x16_1_add_msa,
-                   TX_16X16, 1),
-        make_tuple(&vpx_fdct8x8_c,
-                   &vpx_idct8x8_64_add_c,
-                   &vpx_idct8x8_12_add_msa,
-                   TX_8X8, 10),
-        make_tuple(&vpx_fdct8x8_c,
-                   &vpx_idct8x8_64_add_c,
-                   &vpx_idct8x8_1_add_msa,
-                   TX_8X8, 1),
-        make_tuple(&vpx_fdct4x4_c,
-                   &vpx_idct4x4_16_add_c,
-                   &vpx_idct4x4_1_add_msa,
-                   TX_4X4, 1)));
-#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
+        make_tuple(&vp9_fdct16x16_c,
+                   &vp9_idct16x16_256_add_c,
+                   &vp9_idct16x16_10_add_ssse3,
+                   TX_16X16, 10)));
+#endif
 }  // namespace
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -63,12 +63,12 @@ TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
  uint8_t *const dst_image_ptr = dst_image + 8;
  uint8_t *const flimits =
      reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
-  (void)memset(flimits, 255, block_width);
+  (void)vpx_memset(flimits, 255, block_width);

  // Initialize pixels in the input:
  //   block pixels to value 1,
  //   border pixels to value 10.
-  (void)memset(src_image, 10, input_size);
+  (void)vpx_memset(src_image, 10, input_size);
  uint8_t *pixel_ptr = src_image_ptr;
  for (int i = 0; i < block_height; ++i) {
    for (int j = 0; j < block_width; ++j) {
@@ -78,7 +78,7 @@ TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
  }

  // Initialize pixels in the output to 99.
-  (void)memset(dst_image, 99, output_size);
+  (void)vpx_memset(dst_image, 99, output_size);

  ASM_REGISTER_STATE_CHECK(
      GetParam()(src_image_ptr, dst_image_ptr, input_stride,
@@ -110,9 +110,4 @@ INSTANTIATE_TEST_CASE_P(SSE2, VP8PostProcessingFilterTest,
    ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2));
 #endif

-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, VP8PostProcessingFilterTest,
-    ::testing::Values(vp8_post_proc_down_and_across_mb_row_msa));
-#endif
-
 }  // namespace
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -8,196 +8,337 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <math.h>
+#include <stdlib.h>
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "vp8/common/blockd.h"
-#include "vp8/common/onyx.h"
-#include "vp8/encoder/block.h"
-#include "vp8/encoder/onyx_int.h"
-#include "vp8/encoder/quantize.h"
+
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_integer.h"
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-const int kNumBlocks = 25;
-const int kNumBlockEntries = 16;
-
-typedef void (*VP8Quantize)(BLOCK *b, BLOCKD *d);
-
-typedef std::tr1::tuple<VP8Quantize, VP8Quantize> VP8QuantizeParam;

 using libvpx_test::ACMRandom;
-using std::tr1::make_tuple;

-// Create and populate a VP8_COMP instance which has a complete set of
-// quantization inputs as well as a second MACROBLOCKD for output.
-class QuantizeTestBase {
+namespace {
+#if CONFIG_VP9_HIGHBITDEPTH
+const int number_of_iterations = 100;
+
+typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
+                           int skip_block, const int16_t *zbin,
+                           const int16_t *round, const int16_t *quant,
+                           const int16_t *quant_shift,
+                           tran_low_t *qcoeff, tran_low_t *dqcoeff,
+                           const int16_t *dequant, uint16_t *eob,
+                           const int16_t *scan, const int16_t *iscan);
+typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, int>
+    QuantizeParam;
+class QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
 public:
-  virtual ~QuantizeTestBase() {
-    vp8_remove_compressor(&vp8_comp_);
-    vp8_comp_ = NULL;
-    vpx_free(macroblockd_dst_);
-    macroblockd_dst_ = NULL;
-    libvpx_test::ClearSystemState();
-  }
-
- protected:
-  void SetupCompressor() {
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-
-    // The full configuration is necessary to generate the quantization tables.
-    VP8_CONFIG vp8_config;
-    memset(&vp8_config, 0, sizeof(vp8_config));
-
-    vp8_comp_ = vp8_create_compressor(&vp8_config);
-
-    // Set the tables based on a quantizer of 0.
-    vp8_set_quantizer(vp8_comp_, 0);
-
-    // Set up all the block/blockd pointers for the mb in vp8_comp_.
-    vp8cx_frame_init_quantizer(vp8_comp_);
-
-    // Copy macroblockd from the reference to get pre-set-up dequant values.
-    macroblockd_dst_ = reinterpret_cast<MACROBLOCKD *>(
-        vpx_memalign(32, sizeof(*macroblockd_dst_)));
-    memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_));
-    // Fix block pointers - currently they point to the blocks in the reference
-    // structure.
-    vp8_setup_block_dptrs(macroblockd_dst_);
-  }
-
-  void UpdateQuantizer(int q) {
-    vp8_set_quantizer(vp8_comp_, q);
-
-    memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_));
-    vp8_setup_block_dptrs(macroblockd_dst_);
-  }
-
-  void FillCoeffConstant(int16_t c) {
-    for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) {
-      vp8_comp_->mb.coeff[i] = c;
-    }
-  }
-
-  void FillCoeffRandom() {
-    for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) {
-      vp8_comp_->mb.coeff[i] = rnd_.Rand8();
-    }
-  }
-
-  void CheckOutput() {
-    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.qcoeff, macroblockd_dst_->qcoeff,
-                        sizeof(*macroblockd_dst_->qcoeff) * kNumBlocks *
-                            kNumBlockEntries))
-        << "qcoeff mismatch";
-    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.dqcoeff, macroblockd_dst_->dqcoeff,
-                        sizeof(*macroblockd_dst_->dqcoeff) * kNumBlocks *
-                            kNumBlockEntries))
-        << "dqcoeff mismatch";
-    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.eobs, macroblockd_dst_->eobs,
-                        sizeof(*macroblockd_dst_->eobs) * kNumBlocks))
-        << "eobs mismatch";
-  }
-
-  VP8_COMP *vp8_comp_;
-  MACROBLOCKD *macroblockd_dst_;
-
- private:
-  ACMRandom rnd_;
-};
-
-class QuantizeTest : public QuantizeTestBase,
-                     public ::testing::TestWithParam<VP8QuantizeParam> {
- protected:
+  virtual ~QuantizeTest() {}
  virtual void SetUp() {
-    SetupCompressor();
-    asm_quant_ = GET_PARAM(0);
-    c_quant_ = GET_PARAM(1);
+    quantize_op_   = GET_PARAM(0);
+    ref_quantize_op_ = GET_PARAM(1);
+    bit_depth_  = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
  }

-  void RunComparison() {
-    for (int i = 0; i < kNumBlocks; ++i) {
-      ASM_REGISTER_STATE_CHECK(
-          c_quant_(&vp8_comp_->mb.block[i], &vp8_comp_->mb.e_mbd.block[i]));
-      ASM_REGISTER_STATE_CHECK(
-          asm_quant_(&vp8_comp_->mb.block[i], &macroblockd_dst_->block[i]));
-    }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }

-    CheckOutput();
+ protected:
+  int bit_depth_;
+  int mask_;
+  QuantizeFunc quantize_op_;
+  QuantizeFunc ref_quantize_op_;
+};
+class Quantize32Test : public ::testing::TestWithParam<QuantizeParam> {
+ public:
+  virtual ~Quantize32Test() {}
+  virtual void SetUp() {
+    quantize_op_   = GET_PARAM(0);
+    ref_quantize_op_ = GET_PARAM(1);
+    bit_depth_  = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
  }

- private:
-  VP8Quantize asm_quant_;
-  VP8Quantize c_quant_;
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  int bit_depth_;
+  int mask_;
+  QuantizeFunc quantize_op_;
+  QuantizeFunc ref_quantize_op_;
 };

-TEST_P(QuantizeTest, TestZeroInput) {
-  FillCoeffConstant(0);
-  RunComparison();
-}
-
-TEST_P(QuantizeTest, TestLargeNegativeInput) {
-  FillCoeffConstant(0);
-  // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues
-  // like BUG=883 where the constant being compared was incorrectly initialized.
-  vp8_comp_->mb.coeff[0] = -8191;
-  RunComparison();
-}
-
-TEST_P(QuantizeTest, TestRandomInput) {
-  FillCoeffRandom();
-  RunComparison();
-}
-
-TEST_P(QuantizeTest, TestMultipleQ) {
-  for (int q = 0; q < QINDEX_RANGE; ++q) {
-    UpdateQuantizer(q);
-    FillCoeffRandom();
-    RunComparison();
+TEST_P(QuantizeTest, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       256);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,           1);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int skip_block = i == 0;
+    TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
+    TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
+    const scan_order *scan_order = &vp9_intra_scan_orders[sz][tx_type];
+    int count = (4 << sz) * (4 << sz);  // 16, 64, 256
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = rnd.Rand16()&mask_;
+    }
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
  }
+  EXPECT_EQ(0, err_count_total)
+    << "Error: Quantization Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
 }
+TEST_P(Quantize32Test, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       1024);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,           1);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int skip_block = i == 0;
+    TX_SIZE sz = TX_32X32;
+    TX_TYPE tx_type = (TX_TYPE)(i % 4);
+
+    const scan_order *scan_order = &vp9_intra_scan_orders[sz][tx_type];
+    int count = (4 << sz) * (4 << sz);  // 1024
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = rnd.Rand16()&mask_;
+    }
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+    << "Error: Quantization Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
+}
+TEST_P(QuantizeTest, EOBCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       256);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  256);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,       1);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int skip_block = i == 0;
+    TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
+    TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
+    const scan_order *scan_order = &vp9_intra_scan_orders[sz][tx_type];
+    int count = (4 << sz) * (4 << sz);  // 16, 64, 256
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    // Two random entries
+    for (int j = 0; j < count; j++) {
+        coeff_ptr[j] = 0;
+    }
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+    << "Error: Quantization Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
+}
+TEST_P(Quantize32Test, EOBCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       1024);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  1024);
+  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
+  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
+  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,       1);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int skip_block = i == 0;
+    TX_SIZE sz = TX_32X32;
+    TX_TYPE tx_type = (TX_TYPE)(i % 4);
+    const scan_order *scan_order = &vp9_intra_scan_orders[sz][tx_type];
+    int count = (4 << sz) * (4 << sz);  // 1024
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+        coeff_ptr[j] = 0;
+    }
+    // Two random entries
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+    << "Error: Quantization Test, C output doesn't match SSE2 output. "
+    << "First failed at test case " << first_failure;
+}
+using std::tr1::make_tuple;

 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
-    SSE2, QuantizeTest,
-    ::testing::Values(
-        make_tuple(&vp8_fast_quantize_b_sse2, &vp8_fast_quantize_b_c),
-        make_tuple(&vp8_regular_quantize_b_sse2, &vp8_regular_quantize_b_c)));
+  SSE2_C_COMPARE, QuantizeTest,
+  ::testing::Values(
+    make_tuple(&vp9_highbd_quantize_b_sse2,
+               &vp9_highbd_quantize_b_c, 8),
+    make_tuple(&vp9_highbd_quantize_b_sse2,
+               &vp9_highbd_quantize_b_c, 10),
+    make_tuple(&vp9_highbd_quantize_b_sse2,
+               &vp9_highbd_quantize_b_c, 12)));
+INSTANTIATE_TEST_CASE_P(
+  SSE2_C_COMPARE, Quantize32Test,
+  ::testing::Values(
+    make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+               &vp9_highbd_quantize_b_32x32_c, 8),
+    make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+               &vp9_highbd_quantize_b_32x32_c, 10),
+    make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+               &vp9_highbd_quantize_b_32x32_c, 12)));
 #endif  // HAVE_SSE2
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, QuantizeTest,
-                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_ssse3,
-                                                     &vp8_fast_quantize_b_c)));
-#endif  // HAVE_SSSE3
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(
-    SSE4_1, QuantizeTest,
-    ::testing::Values(make_tuple(&vp8_regular_quantize_b_sse4_1,
-                                 &vp8_regular_quantize_b_c)));
-#endif  // HAVE_SSE4_1
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest,
-                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_neon,
-                                                     &vp8_fast_quantize_b_c)));
-#endif  // HAVE_NEON
-
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(
-    MSA, QuantizeTest,
-    ::testing::Values(
-        make_tuple(&vp8_fast_quantize_b_msa, &vp8_fast_quantize_b_c),
-        make_tuple(&vp8_regular_quantize_b_msa, &vp8_regular_quantize_b_c)));
-#endif  // HAVE_MSA
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
--- a/test/realtime_test.cc
+++ b/test/realtime_test.cc
@@ -1,64 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/video_source.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-namespace {
-
-const int kVideoSourceWidth = 320;
-const int kVideoSourceHeight = 240;
-const int kFramesToEncode = 2;
-
-class RealtimeTest
-    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
- protected:
-  RealtimeTest()
-      : EncoderTest(GET_PARAM(0)), frame_packets_(0) {}
-  virtual ~RealtimeTest() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    cfg_.g_lag_in_frames = 0;
-    SetMode(::libvpx_test::kRealTime);
-  }
-
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-    // TODO(tomfinegan): We're changing the pass value here to make sure
-    // we get frames when real time mode is combined with |g_pass| set to
-    // VPX_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets
-    // the pass value based on the mode passed into EncoderTest::SetMode(),
-    // which overrides the one specified in SetUp() above.
-    cfg_.g_pass = VPX_RC_FIRST_PASS;
-  }
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {
-    frame_packets_++;
-  }
-
-  int frame_packets_;
-};
-
-TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) {
-  ::libvpx_test::RandomVideoSource video;
-  video.SetSize(kVideoSourceWidth, kVideoSourceHeight);
-  video.set_limit(kFramesToEncode);
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  EXPECT_EQ(kFramesToEncode, frame_packets_);
-}
-
-VP8_INSTANTIATE_TEST_CASE(RealtimeTest,
-                          ::testing::Values(::libvpx_test::kRealTime));
-VP9_INSTANTIATE_TEST_CASE(RealtimeTest,
-                          ::testing::Values(::libvpx_test::kRealTime));
-
-}  // namespace
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -30,16 +30,20 @@

 #if defined(_WIN64)

-#undef NOMINMAX
-#define NOMINMAX
-#define WIN32_LEAN_AND_MEAN
+#define _WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #include <winnt.h>

+namespace testing {
+namespace internal {
+
 inline bool operator==(const M128A& lhs, const M128A& rhs) {
  return (lhs.Low == rhs.Low && lhs.High == rhs.High);
 }

+}  // namespace internal
+}  // namespace testing
+
 namespace libvpx_test {

 // Compares the state of xmm[6-15] at construction with their state at
@@ -92,7 +96,7 @@ class RegisterStateCheck {

 extern "C" {
 // Save the d8-d15 registers into store.
-void vpx_push_neon(int64_t *store);
+void vp9_push_neon(int64_t *store);
 }

 namespace libvpx_test {
@@ -107,7 +111,7 @@ class RegisterStateCheck {

 private:
  static bool StoreRegisters(int64_t store[8]) {
-    vpx_push_neon(store);
+    vp9_push_neon(store);
    return true;
  }

@@ -115,7 +119,7 @@ class RegisterStateCheck {
  bool Check() const {
    if (!initialized_) return false;
    int64_t post_store[8];
-    vpx_push_neon(post_store);
+    vp9_push_neon(post_store);
    for (int i = 0; i < 8; ++i) {
      EXPECT_EQ(pre_store_[i], post_store[i]) << "d"
          << i + 8 << " has been modified";
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -7,8 +7,6 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
-#include <stdio.h>
-
 #include <climits>
 #include <vector>
 #include "third_party/googletest/src/include/gtest/gtest.h"
@@ -83,187 +81,34 @@ static void write_ivf_frame_header(const vpx_codec_cx_pkt_t *const pkt,
 const unsigned int kInitialWidth = 320;
 const unsigned int kInitialHeight = 240;

-struct FrameInfo {
-  FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
-      : pts(_pts), w(_w), h(_h) {}
-
-  vpx_codec_pts_t pts;
-  unsigned int w;
-  unsigned int h;
-};
-
-void ScaleForFrameNumber(unsigned int frame,
-                         unsigned int initial_w,
-                         unsigned int initial_h,
-                         unsigned int *w,
-                         unsigned int *h,
-                         int flag_codec) {
-  if (frame < 10) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  if (frame < 20) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 30) {
-    *w = initial_w / 2;
-    *h = initial_h / 2;
-    return;
-  }
-  if (frame < 40) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  if (frame < 50) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 60) {
-    *w = initial_w / 2;
-    *h = initial_h / 2;
-    return;
-  }
-  if (frame < 70) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  if (frame < 80) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 90) {
-    *w = initial_w / 2;
-    *h = initial_h / 2;
-    return;
-  }
-  if (frame < 100) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 110) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  if (frame < 120) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 130) {
-    *w = initial_w / 2;
-    *h = initial_h / 2;
-    return;
-  }
-  if (frame < 140) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 150) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  if (frame < 160) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 170) {
-    *w = initial_w / 2;
-    *h = initial_h / 2;
-    return;
-  }
-  if (frame < 180) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 190) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  if (frame < 200) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 210) {
-    *w = initial_w / 2;
-    *h = initial_h / 2;
-    return;
-  }
-  if (frame < 220) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 230) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  if (frame < 240) {
-    *w = initial_w * 3 / 4;
-    *h = initial_h * 3 / 4;
-    return;
-  }
-  if (frame < 250) {
-    *w = initial_w  / 2;
-    *h = initial_h / 2;
-    return;
-  }
-  if (frame < 260) {
-    *w = initial_w;
-    *h = initial_h;
-    return;
-  }
-  // Go down very low.
-  if (frame < 270) {
-    *w = initial_w / 4;
-    *h = initial_h / 4;
-    return;
-  }
-  if (flag_codec == 1) {
-    // Cases that only works for VP9.
-    // For VP9: Swap width and height of original.
-    if (frame < 320) {
-      *w = initial_h;
-      *h = initial_w;
-      return;
-    }
-  }
-  *w = initial_w;
-  *h = initial_h;
+unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) {
+  if (frame < 10)
+    return val;
+  if (frame < 20)
+    return val / 2;
+  if (frame < 30)
+    return val * 2 / 3;
+  if (frame < 40)
+    return val / 4;
+  if (frame < 50)
+    return val * 7 / 8;
+  return val;
 }

 class ResizingVideoSource : public ::libvpx_test::DummyVideoSource {
 public:
  ResizingVideoSource() {
    SetSize(kInitialWidth, kInitialHeight);
-    limit_ = 350;
+    limit_ = 60;
  }
-  int flag_codec_;
+
  virtual ~ResizingVideoSource() {}

 protected:
  virtual void Next() {
    ++frame_;
-    unsigned int width;
-    unsigned int height;
-    ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height,
-                        flag_codec_);
-    SetSize(width, height);
+    SetSize(ScaleForFrameNumber(frame_, kInitialWidth),
+            ScaleForFrameNumber(frame_, kInitialHeight));
    FillFrame();
  }
 };
@@ -275,6 +120,15 @@ class ResizeTest : public ::libvpx_test::EncoderTest,

  virtual ~ResizeTest() {}

+  struct FrameInfo {
+    FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
+        : pts(_pts), w(_w), h(_h) {}
+
+    vpx_codec_pts_t pts;
+    unsigned int w;
+    unsigned int h;
+  };
+
  virtual void SetUp() {
    InitializeConfig();
    SetMode(GET_PARAM(1));
@@ -290,21 +144,18 @@ class ResizeTest : public ::libvpx_test::EncoderTest,

 TEST_P(ResizeTest, TestExternalResizeWorks) {
  ResizingVideoSource video;
-  video.flag_codec_ = 0;
-  cfg_.g_lag_in_frames = 0;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
       info != frame_info_list_.end(); ++info) {
    const unsigned int frame = static_cast<unsigned>(info->pts);
-    unsigned int expected_w;
-    unsigned int expected_h;
-    ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight,
-                        &expected_w, &expected_h, 0);
+    const unsigned int expected_w = ScaleForFrameNumber(frame, kInitialWidth);
+    const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight);
+
    EXPECT_EQ(expected_w, info->w)
-        << "Frame " << frame << " had unexpected width";
+        << "Frame " << frame << "had unexpected width";
    EXPECT_EQ(expected_h, info->h)
-        << "Frame " << frame << " had unexpected height";
+        << "Frame " << frame << "had unexpected height";
  }
 }

@@ -344,27 +195,13 @@ class ResizeInternalTest : public ResizeTest {

  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                  libvpx_test::Encoder *encoder) {
-    if (change_config_) {
-      int new_q = 60;
-      if (video->frame() == 0) {
-        struct vpx_scaling_mode mode = {VP8E_ONETWO, VP8E_ONETWO};
-        encoder->Control(VP8E_SET_SCALEMODE, &mode);
-      }
-      if (video->frame() == 1) {
-        struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
-        encoder->Control(VP8E_SET_SCALEMODE, &mode);
-        cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q;
-        encoder->Config(&cfg_);
-      }
-    } else {
-      if (video->frame() == kStepDownFrame) {
-        struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
-        encoder->Control(VP8E_SET_SCALEMODE, &mode);
-      }
-      if (video->frame() == kStepUpFrame) {
-        struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
-        encoder->Control(VP8E_SET_SCALEMODE, &mode);
-      }
+    if (video->frame() == kStepDownFrame) {
+      struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
+      encoder->Control(VP8E_SET_SCALEMODE, &mode);
+    }
+    if (video->frame() == kStepUpFrame) {
+      struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
+      encoder->Control(VP8E_SET_SCALEMODE, &mode);
    }
  }

@@ -389,7 +226,6 @@ class ResizeInternalTest : public ResizeTest {
 #endif

  double frame0_psnr_;
-  bool change_config_;
 #if WRITE_COMPRESSED_STREAM
  FILE *outfile_;
  unsigned int out_frames_;
@@ -400,7 +236,6 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 10);
  init_flags_ = VPX_CODEC_USE_PSNR;
-  change_config_ = false;

  // q picked such that initial keyframe on this clip is ~30dB PSNR
  cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
@@ -425,311 +260,7 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
  }
 }

-TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) {
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 10);
-  cfg_.g_w = 352;
-  cfg_.g_h = 288;
-  change_config_ = true;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-class ResizeRealtimeTest : public ::libvpx_test::EncoderTest,
-  public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
- protected:
-  ResizeRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
-  virtual ~ResizeRealtimeTest() {}
-
-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-                                  libvpx_test::Encoder *encoder) {
-    if (video->frame() == 0) {
-      encoder->Control(VP9E_SET_AQ_MODE, 3);
-      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-    }
-
-    if (change_bitrate_ && video->frame() == 120) {
-      change_bitrate_ = false;
-      cfg_.rc_target_bitrate = 500;
-      encoder->Config(&cfg_);
-    }
-  }
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(GET_PARAM(1));
-    set_cpu_used_ = GET_PARAM(2);
-  }
-
-  virtual void DecompressedFrameHook(const vpx_image_t &img,
-                                     vpx_codec_pts_t pts) {
-    frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
-  }
-
-  virtual void MismatchHook(const vpx_image_t *img1,
-                             const vpx_image_t *img2) {
-    double mismatch_psnr = compute_psnr(img1, img2);
-    mismatch_psnr_ += mismatch_psnr;
-    ++mismatch_nframes_;
-  }
-
-  unsigned int GetMismatchFrames() {
-      return mismatch_nframes_;
-  }
-
-  void DefaultConfig() {
-    cfg_.rc_buf_initial_sz = 500;
-    cfg_.rc_buf_optimal_sz = 600;
-    cfg_.rc_buf_sz = 1000;
-    cfg_.rc_min_quantizer = 2;
-    cfg_.rc_max_quantizer = 56;
-    cfg_.rc_undershoot_pct = 50;
-    cfg_.rc_overshoot_pct = 50;
-    cfg_.rc_end_usage = VPX_CBR;
-    cfg_.kf_mode = VPX_KF_AUTO;
-    cfg_.g_lag_in_frames = 0;
-    cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
-    // Enable dropped frames.
-    cfg_.rc_dropframe_thresh = 1;
-    // Enable error_resilience mode.
-    cfg_.g_error_resilient  = 1;
-    // Enable dynamic resizing.
-    cfg_.rc_resize_allowed = 1;
-    // Run at low bitrate.
-    cfg_.rc_target_bitrate = 200;
-  }
-
-  std::vector< FrameInfo > frame_info_list_;
-  int set_cpu_used_;
-  bool change_bitrate_;
-  double mismatch_psnr_;
-  int mismatch_nframes_;
-};
-
-TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
-  ResizingVideoSource video;
-  video.flag_codec_ = 1;
-  DefaultConfig();
-  // Disable internal resize for this test.
-  cfg_.rc_resize_allowed = 0;
-  change_bitrate_ = false;
-  mismatch_psnr_ = 0.0;
-  mismatch_nframes_ = 0;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
-       info != frame_info_list_.end(); ++info) {
-    const unsigned int frame = static_cast<unsigned>(info->pts);
-    unsigned int expected_w;
-    unsigned int expected_h;
-    ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight,
-                        &expected_w, &expected_h, 1);
-    EXPECT_EQ(expected_w, info->w)
-        << "Frame " << frame << " had unexpected width";
-    EXPECT_EQ(expected_h, info->h)
-        << "Frame " << frame << " had unexpected height";
-    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
-  }
-}
-
-// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
-// Run at low bitrate, with resize_allowed = 1, and verify that we get
-// one resize down event.
-TEST_P(ResizeRealtimeTest, TestInternalResizeDown) {
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 299);
-  DefaultConfig();
-  cfg_.g_w = 352;
-  cfg_.g_h = 288;
-  change_bitrate_ = false;
-  mismatch_psnr_ = 0.0;
-  mismatch_nframes_ = 0;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  unsigned int last_w = cfg_.g_w;
-  unsigned int last_h = cfg_.g_h;
-  int resize_count = 0;
-  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
-       info != frame_info_list_.end(); ++info) {
-    if (info->w != last_w || info->h != last_h) {
-      // Verify that resize down occurs.
-      ASSERT_LT(info->w, last_w);
-      ASSERT_LT(info->h, last_h);
-      last_w = info->w;
-      last_h = info->h;
-      resize_count++;
-    }
-  }
-
-#if CONFIG_VP9_DECODER
-  // Verify that we get 1 resize down event in this test.
-  ASSERT_EQ(1, resize_count) << "Resizing should occur.";
-  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
-#else
-  printf("Warning: VP9 decoder unavailable, unable to check resize count!\n");
-#endif
-}
-
-// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
-// Start at low target bitrate, raise the bitrate in the middle of the clip,
-// scaling-up should occur after bitrate changed.
-TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 359);
-  DefaultConfig();
-  cfg_.g_w = 352;
-  cfg_.g_h = 288;
-  change_bitrate_ = true;
-  mismatch_psnr_ = 0.0;
-  mismatch_nframes_ = 0;
-  // Disable dropped frames.
-  cfg_.rc_dropframe_thresh = 0;
-  // Starting bitrate low.
-  cfg_.rc_target_bitrate = 80;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  unsigned int last_w = cfg_.g_w;
-  unsigned int last_h = cfg_.g_h;
-  int resize_count = 0;
-  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
-       info != frame_info_list_.end(); ++info) {
-    if (info->w != last_w || info->h != last_h) {
-      resize_count++;
-      if (resize_count == 1) {
-        // Verify that resize down occurs.
-        ASSERT_LT(info->w, last_w);
-        ASSERT_LT(info->h, last_h);
-      } else if (resize_count == 2) {
-        // Verify that resize up occurs.
-        ASSERT_GT(info->w, last_w);
-        ASSERT_GT(info->h, last_h);
-      }
-      last_w = info->w;
-      last_h = info->h;
-    }
-  }
-
-#if CONFIG_VP9_DECODER
-  // Verify that we get 2 resize events in this test.
-  ASSERT_EQ(resize_count, 2) << "Resizing should occur twice.";
-  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
-#else
-  printf("Warning: VP9 decoder unavailable, unable to check resize count!\n");
-#endif
-}
-
-vpx_img_fmt_t CspForFrameNumber(int frame) {
-  if (frame < 10)
-    return VPX_IMG_FMT_I420;
-  if (frame < 20)
-    return VPX_IMG_FMT_I444;
-  return VPX_IMG_FMT_I420;
-}
-
-class ResizeCspTest : public ResizeTest {
- protected:
-#if WRITE_COMPRESSED_STREAM
-  ResizeCspTest()
-      : ResizeTest(),
-        frame0_psnr_(0.0),
-        outfile_(NULL),
-        out_frames_(0) {}
-#else
-  ResizeCspTest() : ResizeTest(), frame0_psnr_(0.0) {}
-#endif
-
-  virtual ~ResizeCspTest() {}
-
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-#if WRITE_COMPRESSED_STREAM
-    outfile_ = fopen("vp91-2-05-cspchape.ivf", "wb");
-#endif
-  }
-
-  virtual void EndPassHook() {
-#if WRITE_COMPRESSED_STREAM
-    if (outfile_) {
-      if (!fseek(outfile_, 0, SEEK_SET))
-        write_ivf_file_header(&cfg_, out_frames_, outfile_);
-      fclose(outfile_);
-      outfile_ = NULL;
-    }
-#endif
-  }
-
-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-                                  libvpx_test::Encoder *encoder) {
-    if (CspForFrameNumber(video->frame()) != VPX_IMG_FMT_I420 &&
-        cfg_.g_profile != 1) {
-      cfg_.g_profile = 1;
-      encoder->Config(&cfg_);
-    }
-    if (CspForFrameNumber(video->frame()) == VPX_IMG_FMT_I420 &&
-        cfg_.g_profile != 0) {
-      cfg_.g_profile = 0;
-      encoder->Config(&cfg_);
-    }
-  }
-
-  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
-    if (!frame0_psnr_)
-      frame0_psnr_ = pkt->data.psnr.psnr[0];
-    EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0);
-  }
-
-#if WRITE_COMPRESSED_STREAM
-  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
-    ++out_frames_;
-
-    // Write initial file header if first frame.
-    if (pkt->data.frame.pts == 0)
-      write_ivf_file_header(&cfg_, 0, outfile_);
-
-    // Write frame header and data.
-    write_ivf_frame_header(pkt, outfile_);
-    (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
-  }
-#endif
-
-  double frame0_psnr_;
-#if WRITE_COMPRESSED_STREAM
-  FILE *outfile_;
-  unsigned int out_frames_;
-#endif
-};
-
-class ResizingCspVideoSource : public ::libvpx_test::DummyVideoSource {
- public:
-  ResizingCspVideoSource() {
-    SetSize(kInitialWidth, kInitialHeight);
-    limit_ = 30;
-  }
-
-  virtual ~ResizingCspVideoSource() {}
-
- protected:
-  virtual void Next() {
-    ++frame_;
-    SetImageFormat(CspForFrameNumber(frame_));
-    FillFrame();
-  }
-};
-
-TEST_P(ResizeCspTest, TestResizeCspWorks) {
-  ResizingCspVideoSource video;
-  init_flags_ = VPX_CODEC_USE_PSNR;
-  cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
-  cfg_.g_lag_in_frames = 0;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
 VP8_INSTANTIATE_TEST_CASE(ResizeTest, ONE_PASS_TEST_MODES);
-VP9_INSTANTIATE_TEST_CASE(ResizeTest,
-                          ::testing::Values(::libvpx_test::kRealTime));
 VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest,
                          ::testing::Values(::libvpx_test::kOnePassBest));
-VP9_INSTANTIATE_TEST_CASE(ResizeRealtimeTest,
-                          ::testing::Values(::libvpx_test::kRealTime),
-                          ::testing::Range(5, 9));
-VP9_INSTANTIATE_TEST_CASE(ResizeCspTest,
-                          ::testing::Values(::libvpx_test::kRealTime));
 }  // namespace
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
--- a/test/set_roi.cc
+++ b/test/set_roi.cc
@@ -53,7 +53,7 @@ TEST(VP8RoiMapTest, ParameterCheck) {
  cpi.common.mb_rows = 240 >> 4;
  cpi.common.mb_cols = 320 >> 4;
  const int mbs = (cpi.common.mb_rows * cpi.common.mb_cols);
-  memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data));
+  vpx_memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data));

  // Segment map
  cpi.segmentation_map = reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
@@ -61,9 +61,9 @@ TEST(VP8RoiMapTest, ParameterCheck) {
  // Allocate memory for the source memory map.
  unsigned char *roi_map =
    reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
-  memset(&roi_map[mbs >> 2], 1, (mbs >> 2));
-  memset(&roi_map[mbs >> 1], 2, (mbs >> 2));
-  memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2));
+  vpx_memset(&roi_map[mbs >> 2], 1, (mbs >> 2));
+  vpx_memset(&roi_map[mbs >> 1], 2, (mbs >> 2));
+  vpx_memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2));

  // Do a test call with valid parameters.
  int roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
--- a/test/simple_encoder.sh
+++ b/test/simple_encoder.sh
@@ -23,7 +23,7 @@ simple_encoder_verify_environment() {
  fi
 }

-# Runs simple_encoder using the codec specified by $1 with a frame limit of 100.
+# Runs simple_encoder using the codec specified by $1.
 simple_encoder() {
  local encoder="${LIBVPX_BIN_PATH}/simple_encoder${VPX_TEST_EXE_SUFFIX}"
  local codec="$1"
@@ -35,7 +35,7 @@ simple_encoder() {
  fi

  eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \
-      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 0 100 \
+      "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 \
      ${devnull}

  [ -e "${output_file}" ] || return 1
@@ -47,13 +47,16 @@ simple_encoder_vp8() {
  fi
 }

-simple_encoder_vp9() {
+# TODO(tomfinegan): Add a frame limit param to simple_encoder and enable this
+# test. VP9 is just too slow right now: This test takes 4m30s+ on a fast
+# machine.
+DISABLED_simple_encoder_vp9() {
  if [ "$(vp9_encode_available)" = "yes" ]; then
    simple_encoder vp9 || return 1
  fi
 }

 simple_encoder_tests="simple_encoder_vp8
-                      simple_encoder_vp9"
+                      DISABLED_simple_encoder_vp9"

 run_tests simple_encoder_verify_environment "${simple_encoder_tests}"
--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc
@@ -11,15 +11,13 @@
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"

@@ -186,48 +184,58 @@ TEST_P(SixtapPredictTest, TestWithRandomData) {

 using std::tr1::make_tuple;

+const SixtapPredictFunc sixtap_16x16_c = vp8_sixtap_predict16x16_c;
+const SixtapPredictFunc sixtap_8x8_c = vp8_sixtap_predict8x8_c;
+const SixtapPredictFunc sixtap_8x4_c = vp8_sixtap_predict8x4_c;
+const SixtapPredictFunc sixtap_4x4_c = vp8_sixtap_predict4x4_c;
 INSTANTIATE_TEST_CASE_P(
    C, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, &vp8_sixtap_predict16x16_c),
-        make_tuple(8, 8, &vp8_sixtap_predict8x8_c),
-        make_tuple(8, 4, &vp8_sixtap_predict8x4_c),
-        make_tuple(4, 4, &vp8_sixtap_predict4x4_c)));
+        make_tuple(16, 16, sixtap_16x16_c),
+        make_tuple(8, 8, sixtap_8x8_c),
+        make_tuple(8, 4, sixtap_8x4_c),
+        make_tuple(4, 4, sixtap_4x4_c)));
 #if HAVE_NEON
+const SixtapPredictFunc sixtap_16x16_neon = vp8_sixtap_predict16x16_neon;
+const SixtapPredictFunc sixtap_8x8_neon = vp8_sixtap_predict8x8_neon;
+const SixtapPredictFunc sixtap_8x4_neon = vp8_sixtap_predict8x4_neon;
 INSTANTIATE_TEST_CASE_P(
-    NEON, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, &vp8_sixtap_predict16x16_neon),
-        make_tuple(8, 8, &vp8_sixtap_predict8x8_neon),
-        make_tuple(8, 4, &vp8_sixtap_predict8x4_neon)));
+    DISABLED_NEON, SixtapPredictTest, ::testing::Values(
+        make_tuple(16, 16, sixtap_16x16_neon),
+        make_tuple(8, 8, sixtap_8x8_neon),
+        make_tuple(8, 4, sixtap_8x4_neon)));
 #endif
 #if HAVE_MMX
+const SixtapPredictFunc sixtap_16x16_mmx = vp8_sixtap_predict16x16_mmx;
+const SixtapPredictFunc sixtap_8x8_mmx = vp8_sixtap_predict8x8_mmx;
+const SixtapPredictFunc sixtap_8x4_mmx = vp8_sixtap_predict8x4_mmx;
+const SixtapPredictFunc sixtap_4x4_mmx = vp8_sixtap_predict4x4_mmx;
 INSTANTIATE_TEST_CASE_P(
    MMX, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, &vp8_sixtap_predict16x16_mmx),
-        make_tuple(8, 8, &vp8_sixtap_predict8x8_mmx),
-        make_tuple(8, 4, &vp8_sixtap_predict8x4_mmx),
-        make_tuple(4, 4, &vp8_sixtap_predict4x4_mmx)));
+        make_tuple(16, 16, sixtap_16x16_mmx),
+        make_tuple(8, 8, sixtap_8x8_mmx),
+        make_tuple(8, 4, sixtap_8x4_mmx),
+        make_tuple(4, 4, sixtap_4x4_mmx)));
 #endif
 #if HAVE_SSE2
+const SixtapPredictFunc sixtap_16x16_sse2 = vp8_sixtap_predict16x16_sse2;
+const SixtapPredictFunc sixtap_8x8_sse2 = vp8_sixtap_predict8x8_sse2;
+const SixtapPredictFunc sixtap_8x4_sse2 = vp8_sixtap_predict8x4_sse2;
 INSTANTIATE_TEST_CASE_P(
    SSE2, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, &vp8_sixtap_predict16x16_sse2),
-        make_tuple(8, 8, &vp8_sixtap_predict8x8_sse2),
-        make_tuple(8, 4, &vp8_sixtap_predict8x4_sse2)));
+        make_tuple(16, 16, sixtap_16x16_sse2),
+        make_tuple(8, 8, sixtap_8x8_sse2),
+        make_tuple(8, 4, sixtap_8x4_sse2)));
 #endif
 #if HAVE_SSSE3
+const SixtapPredictFunc sixtap_16x16_ssse3 = vp8_sixtap_predict16x16_ssse3;
+const SixtapPredictFunc sixtap_8x8_ssse3 = vp8_sixtap_predict8x8_ssse3;
+const SixtapPredictFunc sixtap_8x4_ssse3 = vp8_sixtap_predict8x4_ssse3;
+const SixtapPredictFunc sixtap_4x4_ssse3 = vp8_sixtap_predict4x4_ssse3;
 INSTANTIATE_TEST_CASE_P(
    SSSE3, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, &vp8_sixtap_predict16x16_ssse3),
-        make_tuple(8, 8, &vp8_sixtap_predict8x8_ssse3),
-        make_tuple(8, 4, &vp8_sixtap_predict8x4_ssse3),
-        make_tuple(4, 4, &vp8_sixtap_predict4x4_ssse3)));
-#endif
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(
-    MSA, SixtapPredictTest, ::testing::Values(
-        make_tuple(16, 16, &vp8_sixtap_predict16x16_msa),
-        make_tuple(8, 8, &vp8_sixtap_predict8x8_msa),
-        make_tuple(8, 4, &vp8_sixtap_predict8x4_msa),
-        make_tuple(4, 4, &vp8_sixtap_predict4x4_msa)));
+        make_tuple(16, 16, sixtap_16x16_ssse3),
+        make_tuple(8, 8, sixtap_8x8_ssse3),
+        make_tuple(8, 4, sixtap_8x4_ssse3),
+        make_tuple(4, 4, sixtap_4x4_ssse3)));
 #endif
 }  // namespace
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -0,0 +1,123 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
+#include "vp8/common/blockd.h"
+#include "vp8/encoder/block.h"
+#include "vpx_mem/vpx_mem.h"
+
+typedef void (*SubtractBlockFunc)(BLOCK *be, BLOCKD *bd, int pitch);
+
+namespace {
+
+class SubtractBlockTest : public ::testing::TestWithParam<SubtractBlockFunc> {
+ public:
+  virtual void TearDown() {
+    libvpx_test::ClearSystemState();
+  }
+};
+
+using libvpx_test::ACMRandom;
+
+TEST_P(SubtractBlockTest, SimpleSubtract) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  BLOCK be;
+  BLOCKD bd;
+  // in libvpx, this stride is always 16
+  const int kDiffPredStride = 16;
+  const int kSrcStride[] = {32, 16, 8, 4, 0};
+  const int kBlockWidth = 4;
+  const int kBlockHeight = 4;
+
+  // Allocate... align to 16 for mmx/sse tests
+  uint8_t *source = reinterpret_cast<uint8_t*>(
+      vpx_memalign(16, kBlockHeight * kSrcStride[0] * sizeof(*source)));
+  be.src_diff = reinterpret_cast<int16_t*>(
+      vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*be.src_diff)));
+  bd.predictor = reinterpret_cast<unsigned char*>(
+      vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
+
+  for (int i = 0; kSrcStride[i] > 0; ++i) {
+    // start at block0
+    be.src = 0;
+    be.base_src = &source;
+    be.src_stride = kSrcStride[i];
+
+    // set difference
+    int16_t *src_diff = be.src_diff;
+    for (int r = 0; r < kBlockHeight; ++r) {
+      for (int c = 0; c < kBlockWidth; ++c) {
+        src_diff[c] = static_cast<int16_t>(0xa5a5u);
+      }
+      src_diff += kDiffPredStride;
+    }
+
+    // set destination
+    uint8_t *base_src = *be.base_src;
+    for (int r = 0; r < kBlockHeight; ++r) {
+      for (int c = 0; c < kBlockWidth; ++c) {
+        base_src[c] = rnd.Rand8();
+      }
+      base_src += be.src_stride;
+    }
+
+    // set predictor
+    uint8_t *predictor = bd.predictor;
+    for (int r = 0; r < kBlockHeight; ++r) {
+      for (int c = 0; c < kBlockWidth; ++c) {
+        predictor[c] = rnd.Rand8();
+      }
+      predictor += kDiffPredStride;
+    }
+
+    ASM_REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));
+
+    base_src = *be.base_src;
+    src_diff = be.src_diff;
+    predictor = bd.predictor;
+    for (int r = 0; r < kBlockHeight; ++r) {
+      for (int c = 0; c < kBlockWidth; ++c) {
+        EXPECT_EQ(base_src[c], (src_diff[c] + predictor[c])) << "r = " << r
+                                                             << ", c = " << c;
+      }
+      src_diff += kDiffPredStride;
+      predictor += kDiffPredStride;
+      base_src += be.src_stride;
+    }
+  }
+  vpx_free(be.src_diff);
+  vpx_free(source);
+  vpx_free(bd.predictor);
+}
+
+INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
+                        ::testing::Values(vp8_subtract_b_c));
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, SubtractBlockTest,
+                        ::testing::Values(vp8_subtract_b_neon));
+#endif
+
+#if HAVE_MMX
+INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest,
+                        ::testing::Values(vp8_subtract_b_mmx));
+#endif
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(SSE2, SubtractBlockTest,
+                        ::testing::Values(vp8_subtract_b_sse2));
+#endif
+
+}  // namespace
--- a/test/superframe_test.cc
+++ b/test/superframe_test.cc
@@ -16,12 +16,8 @@

 namespace {

-const int kTestMode = 0;
-
-typedef std::tr1::tuple<libvpx_test::TestMode,int> SuperframeTestParam;
-
 class SuperframeTest : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<SuperframeTestParam> {
+    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
 protected:
  SuperframeTest() : EncoderTest(GET_PARAM(0)), modified_buf_(NULL),
      last_sf_pts_(0) {}
@@ -29,9 +25,7 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,

  virtual void SetUp() {
    InitializeConfig();
-    const SuperframeTestParam input = GET_PARAM(1);
-    const libvpx_test::TestMode mode = std::tr1::get<kTestMode>(input);
-    SetMode(mode);
+    SetMode(GET_PARAM(1));
    sf_count_ = 0;
    sf_count_max_ = INT_MAX;
  }
@@ -56,7 +50,7 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,
    const uint8_t marker = buffer[pkt->data.frame.sz - 1];
    const int frames = (marker & 0x7) + 1;
    const int mag = ((marker >> 3) & 3) + 1;
-    const unsigned int index_sz = 2 + mag * frames;
+    const unsigned int index_sz = 2 + mag  * frames;
    if ((marker & 0xe0) == 0xc0 &&
        pkt->data.frame.sz >= index_sz &&
        buffer[pkt->data.frame.sz - index_sz] == marker) {
@@ -98,7 +92,6 @@ TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) {
  EXPECT_EQ(sf_count_, 1);
 }

-VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine(
-    ::testing::Values(::libvpx_test::kTwoPassGood),
-    ::testing::Values(0)));
+VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Values(
+    ::libvpx_test::kTwoPassGood));
 }  // namespace
--- a/Show More
+++ b/Show More