Release v1.5.0

Javan Whistling Duck release. Change-Id: If44c9ca16a8188b68759325fbacc771365cb4af8
vp9_dx_iface: move struct defs to separate header
2015-11-09 14:12:38 -08:00 · 2015-10-31 12:23:53 -07:00 · 2015-10-31 12:23:53 -07:00 · 2015-10-26 20:52:16 +00:00 · 2015-10-26 19:12:34 +00:00 · 2015-10-26 19:12:08 +00:00
801 changed files with 116753 additions and 85773 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -30,28 +30,32 @@
 /examples/decode_with_partial_drops
 /examples/example_xma
 /examples/postproc
+/examples/resize_util
 /examples/set_maps
 /examples/simple_decoder
 /examples/simple_encoder
 /examples/twopass_encoder
 /examples/vp8_multi_resolution_encoder
 /examples/vp8cx_set_ref
+/examples/vp9_lossless_encoder
 /examples/vp9_spatial_scalable_encoder
 /examples/vpx_temporal_scalable_patterns
+/examples/vpx_temporal_svc_encoder
 /ivfdec
 /ivfdec.dox
 /ivfenc
 /ivfenc.dox
 /libvpx.so*
 /libvpx.ver
-/obj_int_extract
 /samples.dox
+/test_intra_pred_speed
 /test_libvpx
 /vp8_api1_migration.dox
 /vp[89x]_rtcd.h
 /vpx.pc
 /vpx_config.c
 /vpx_config.h
+/vpx_dsp_rtcd.h
 /vpx_scale_rtcd.h
 /vpx_version.h
 /vpxdec
--- a/.mailmap
+++ b/.mailmap
@@ -1,18 +1,36 @@
 Adrian Grange <agrange@google.com>
+Adrian Grange <agrange@google.com> <agrange@agrange-macbookpro.roam.corp.google.com>
+Aℓex Converse <aconverse@google.com>
+Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
 Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
+Alpha Lam <hclam@google.com> <hclam@chromium.org>
+Deb Mukherjee <debargha@google.com>
+Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
+Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
 Hangyu Kuang <hkuang@google.com>
+Hangyu Kuang <hkuang@google.com> <hkuang@hkuang-macbookpro.roam.corp.google.com>
+Hui Su <huisu@google.com>
+Jacky Chen <jackychen@google.com>
 Jim Bankoski <jimbankoski@google.com>
-John Koleszar <jkoleszar@google.com>
 Johann Koenig <johannkoenig@google.com>
 Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
 Johann Koenig <johannkoenig@google.com> <johannkoenig@dhcp-172-19-7-52.mtv.corp.google.com>
+Johann Koenig <johannkoenig@google.com> <johann.koenig@gmail.com>
+John Koleszar <jkoleszar@google.com>
+Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
+Marco Paniconi <marpan@google.com>
+Marco Paniconi <marpan@google.com> <marpan@chromium.org>
 Pascal Massimino <pascal.massimino@gmail.com>
+Paul Wilkins <paulwilkins@google.com>
+Ralph Giles <giles@xiph.org> <giles@entropywave.com>
+Ralph Giles <giles@xiph.org> <giles@mozilla.com>
+Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
 Sami Pietilä <samipietila@google.com>
+Tamar Levy <tamar.levy@intel.com>
+Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
 Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
 Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
 Tom Finegan <tomfinegan@google.com>
-Ralph Giles <giles@xiph.org> <giles@entropywave.com>
-Ralph Giles <giles@xiph.org> <giles@mozilla.com>
-Alpha Lam <hclam@google.com> <hclam@chromium.org>
-Deb Mukherjee <debargha@google.com>
+Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
 Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
+Yaowu Xu <yaowu@google.com> <yaowu@YAOWU2-W.ad.corp.google.com>
--- a/40
+++ b/40
@@ -3,10 +3,11 @@

 Aaron Watry <awatry@gmail.com>
 Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
+Adam Xu <adam@xuyaowu.com>
 Adrian Grange <agrange@google.com>
+Aℓex Converse <aconverse@google.com>
 Ahmad Sharif <asharif@google.com>
 Alexander Voronov <avoronov@graphics.cs.msu.ru>
-Alex Converse <alex.converse@gmail.com>
 Alexis Ballier <aballier@gentoo.org>
 Alok Ahuja <waveletcoeff@gmail.com>
 Alpha Lam <hclam@google.com>
@@ -14,44 +15,65 @@ A.Mahfoodh <ab.mahfoodh@gmail.com>
 Ami Fischman <fischman@chromium.org>
 Andoni Morales Alastruey <ylatuya@gmail.com>
 Andres Mejia <mcitadel@gmail.com>
+Andrew Russell <anrussell@google.com>
+Angie Chiang <angiebird@google.com>
 Aron Rosenberg <arosenberg@logitech.com>
 Attila Nagy <attilanagy@google.com>
+Brion Vibber <bvibber@wikimedia.org>
 changjun.yang <changjun.yang@intel.com>
+Charles 'Buck' Krasic <ckrasic@google.com>
 chm <chm@rock-chips.com>
 Christian Duvivier <cduvivier@google.com>
 Daniel Kang <ddkang@google.com>
 Deb Mukherjee <debargha@google.com>
+Dim Temp <dimtemp0@gmail.com>
 Dmitry Kovalev <dkovalev@google.com>
 Dragan Mrdjan <dmrdjan@mips.com>
-Erik Niemeyer <erik.a.niemeyer@gmail.com>
+Ed Baker <edward.baker@intel.com>
+Ehsan Akhgari <ehsan.akhgari@gmail.com>
+Erik Niemeyer <erik.a.niemeyer@intel.com>
 Fabio Pedretti <fabio.ped@libero.it>
 Frank Galligan <fgalligan@google.com>
 Fredrik Söderquist <fs@opera.com>
 Fritz Koenig <frkoenig@google.com>
 Gaute Strokkenes <gaute.strokkenes@broadcom.com>
+Geza Lore <gezalore@gmail.com>
+Ghislain MARY <ghislainmary2@gmail.com>
 Giuseppe Scrivano <gscrivano@gnu.org>
+Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
 Guillaume Martres <gmartres@google.com>
 Guillermo Ballester Valor <gbvalor@gmail.com>
 Hangyu Kuang <hkuang@google.com>
+Hanno Böck <hanno@hboeck.de>
 Henrik Lundin <hlundin@google.com>
 Hui Su <huisu@google.com>
 Ivan Maltz <ivanmaltz@google.com>
+Jacek Caban <cjacek@gmail.com>
+Jacky Chen <jackychen@google.com>
 James Berry <jamesberry@google.com>
+James Yu <james.yu@linaro.org>
 James Zern <jzern@google.com>
+Jan Gerber <j@mailb.org>
 Jan Kratochvil <jan.kratochvil@redhat.com>
 Janne Salonen <jsalonen@google.com>
 Jeff Faust <jfaust@google.com>
 Jeff Muizelaar <jmuizelaar@mozilla.com>
 Jeff Petkau <jpet@chromium.org>
+Jia Jia <jia.jia@linaro.org>
 Jim Bankoski <jimbankoski@google.com>
 Jingning Han <jingning@google.com>
+Joey Parrish <joeyparrish@google.com>
 Johann Koenig <johannkoenig@google.com>
 John Koleszar <jkoleszar@google.com>
+Johnny Klonaris <google@jawknee.com>
+John Stark <jhnstrk@gmail.com>
 Joshua Bleecher Snyder <josh@treelinelabs.com>
 Joshua Litt <joshualitt@google.com>
+Julia Robson <juliamrobson@gmail.com>
 Justin Clift <justin@salasaga.org>
 Justin Lebar <justin.lebar@gmail.com>
 KO Myung-Hun <komh@chollian.net>
+Lawrence Velázquez <larryv@macports.org>
 Lou Quillio <louquillio@google.com>
 Luca Barbato <lu_zero@gentoo.org>
 Makoto Kato <makoto.kt@gmail.com>
@@ -65,36 +87,48 @@ Michael Kohler <michaelkohler@live.com>
 Mike Frysinger <vapier@chromium.org>
 Mike Hommey <mhommey@mozilla.com>
 Mikhal Shemer <mikhal@google.com>
+Minghai Shang <minghai@google.com>
 Morton Jonuschat <yabawock@gmail.com>
+Nico Weber <thakis@chromium.org>
 Parag Salasakar <img.mips1@gmail.com>
 Pascal Massimino <pascal.massimino@gmail.com>
 Patrik Westin <patrik.westin@gmail.com>
 Paul Wilkins <paulwilkins@google.com>
 Pavol Rusnak <stick@gk2.sk>
 Paweł Hajdan <phajdan@google.com>
+Pengchong Jin <pengchong@google.com>
+Peter de Rivaz <peter.derivaz@gmail.com>
 Philip Jägenstedt <philipj@opera.com>
 Priit Laes <plaes@plaes.org>
 Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
 Rafaël Carré <funman@videolan.org>
 Ralph Giles <giles@xiph.org>
 Rob Bradford <rob@linux.intel.com>
-Ronald S. Bultje <rbultje@google.com>
+Ronald S. Bultje <rsbultje@gmail.com>
+Rui Ueyama <ruiu@google.com>
 Sami Pietilä <samipietila@google.com>
 Scott Graham <scottmg@chromium.org>
 Scott LaVarnway <slavarnway@google.com>
+Sean McGovern <gseanmcg@gmail.com>
+Sergey Ulanov <sergeyu@chromium.org>
 Shimon Doodkin <helpmepro1@gmail.com>
+Shunyao Li <shunyaoli@google.com>
 Stefan Holmer <holmer@google.com>
 Suman Sunkara <sunkaras@google.com>
 Taekhyun Kim <takim@nvidia.com>
 Takanori MATSUURA <t.matsuu@gmail.com>
 Tamar Levy <tamar.levy@intel.com>
+Tao Bai <michaelbai@chromium.org>
 Tero Rintaluoma <teror@google.com>
 Thijs Vermeir <thijsvermeir@gmail.com>
+Tim Kopp <tkopp@google.com>
 Timothy B. Terriberry <tterribe@xiph.org>
 Tom Finegan <tomfinegan@google.com>
 Vignesh Venkatasubramanian <vigneshv@google.com>
 Yaowu Xu <yaowu@google.com>
+Yongzhe Wang <yongzhe@google.com>
 Yunqing Wang <yunqingwang@google.com>
+Zoe Liu <zoeliu@google.com>
 Google Inc.
 The Mozilla Foundation
 The Xiph.Org Foundation
--- a/40
+++ b/40
@@ -1,3 +1,43 @@
+2015-11-09 v1.5.0 "Javan Whistling Duck"
+  This release improves upon the VP9 encoder and speeds up the encoding and
+  decoding processes.
+
+  - Upgrading:
+    This release is ABI incompatible with 1.4.0. It drops deprecated VP8
+    controls and adds a variety of VP9 controls for testing.
+
+    The vpxenc utility now prefers VP9 by default.
+
+  - Enhancements:
+    Faster VP9 encoding and decoding
+    Smaller library size by combining functions used by VP8 and VP9
+
+  - Bug Fixes:
+    A variety of fuzzing issues
+
+2015-04-03 v1.4.0 "Indian Runner Duck"
+  This release includes significant improvements to the VP9 codec.
+
+  - Upgrading:
+    This release is ABI incompatible with 1.3.0. It drops the compatibility
+    layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec
+    controls for VP9.
+
+  - Enhancements:
+    Faster VP9 encoding and decoding
+    Multithreaded VP9 decoding (tile and frame-based)
+    Multithreaded VP9 encoding - on by default
+    YUV 4:2:2 and 4:4:4 support in VP9
+    10 and 12bit support in VP9
+    64bit ARM support by replacing ARM assembly with intrinsics
+
+  - Bug Fixes:
+    Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0
+    files.
+
+  - Known Issues:
+    Frame Parallel decoding fails for segmented and non-420 files.
+
 2013-11-15 v1.3.0 "Forest"
  This release introduces the VP9 codec in a backward-compatible way.
  All existing users of VP8 can continue to use the library without
--- a/2
+++ b/2
@@ -17,7 +17,7 @@ or agree to the institution of patent litigation or any other patent
 enforcement activity against any entity (including a cross-claim or
 counterclaim in a lawsuit) alleging that any of these implementations of WebM
 or any code incorporated within any of these implementations of WebM
-constitutes direct or contributory patent infringement, or inducement of
+constitute direct or contributory patent infringement, or inducement of
 patent infringement, then any patent rights granted to you under this License
 for these implementations of WebM shall terminate as of the date such
 litigation is filed.
--- a/24
+++ b/24
@@ -1,4 +1,4 @@
-README - 30 May 2014
+README - 23 March 2015

 Welcome to the WebM VP8/VP9 Codec SDK!

@@ -47,10 +47,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  --help output of the configure script. As of this writing, the list of
  available targets is:

-    armv5te-android-gcc
-    armv5te-linux-rvct
-    armv5te-linux-gcc
-    armv5te-none-rvct
    armv6-darwin-gcc
    armv6-linux-rvct
    armv6-linux-gcc
@@ -63,15 +59,10 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    armv7-none-rvct
    armv7-win32-vs11
    armv7-win32-vs12
+    armv7-win32-vs14
    armv7s-darwin-gcc
    mips32-linux-gcc
    mips64-linux-gcc
-    ppc32-darwin8-gcc
-    ppc32-darwin9-gcc
-    ppc32-linux-gcc
-    ppc64-darwin8-gcc
-    ppc64-darwin9-gcc
-    ppc64-linux-gcc
    sparc-solaris-gcc
    x86-android-gcc
    x86-darwin8-gcc
@@ -82,6 +73,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-darwin11-gcc
    x86-darwin12-gcc
    x86-darwin13-gcc
+    x86-darwin14-gcc
    x86-iphonesimulator-gcc
    x86-linux-gcc
    x86-linux-icc
@@ -94,11 +86,14 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-win32-vs10
    x86-win32-vs11
    x86-win32-vs12
+    x86-win32-vs14
+    x86_64-android-gcc
    x86_64-darwin9-gcc
    x86_64-darwin10-gcc
    x86_64-darwin11-gcc
    x86_64-darwin12-gcc
    x86_64-darwin13-gcc
+    x86_64-darwin14-gcc
    x86_64-iphonesimulator-gcc
    x86_64-linux-gcc
    x86_64-linux-icc
@@ -109,12 +104,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-win64-vs10
    x86_64-win64-vs11
    x86_64-win64-vs12
-    universal-darwin8-gcc
-    universal-darwin9-gcc
-    universal-darwin10-gcc
-    universal-darwin11-gcc
-    universal-darwin12-gcc
-    universal-darwin13-gcc
+    x86_64-win64-vs14
    generic-gnu

  The generic-gnu target, in conjunction with the CROSS environment variable,
--- a/args.c
+++ b/args.c
@@ -14,9 +14,7 @@
 #include <limits.h>
 #include "args.h"

-#ifdef _MSC_VER
-#define snprintf _snprintf
-#endif
+#include "vpx_ports/msvc.h"

 #if defined(__GNUC__) && __GNUC__
 extern void die(const char *fmt, ...) __attribute__((noreturn));
--- a/build/arm-msvs/obj_int_extract.bat
+++ b/build/arm-msvs/obj_int_extract.bat
@@ -1,18 +0,0 @@
-REM   Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-REM
-REM   Use of this source code is governed by a BSD-style license
-REM   that can be found in the LICENSE file in the root of the source
-REM   tree. An additional intellectual property rights grant can be found
-REM   in the file PATENTS.  All contributing project authors may
-REM   be found in the AUTHORS file in the root of the source tree.
-echo on
-
-REM Arguments:
-REM   %1 - Relative path to the directory containing the vp8 and vpx_scale
-REM        source directories.
-REM   %2 - Path to obj_int_extract.exe.
-cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
-%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
-
-cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vpx_scale/vpx_scale_asm_offsets.c"
-%2\obj_int_extract.exe rvds "vpx_scale_asm_offsets.obj" > "vpx_scale_asm_offsets.asm"
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -43,7 +43,7 @@
 # will remove any NEON dependency.

 # To change to building armeabi, run ./libvpx/configure again, but with
-# --target=arm5te-android-gcc and modify the Application.mk file to
+# --target=armv6-android-gcc and modify the Application.mk file to
 # set APP_ABI := armeabi
 #
 # Running ndk-build will build libvpx and include it in your project.
@@ -60,13 +60,15 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
  include $(CONFIG_DIR)libs-armv7-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq  ($(TARGET_ARCH_ABI),armeabi)
-  include $(CONFIG_DIR)libs-armv5te-android-gcc.mk
+  include $(CONFIG_DIR)libs-armv6-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq  ($(TARGET_ARCH_ABI),arm64-v8a)
  include $(CONFIG_DIR)libs-armv8-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq ($(TARGET_ARCH_ABI),x86)
  include $(CONFIG_DIR)libs-x86-android-gcc.mk
+else ifeq ($(TARGET_ARCH_ABI),x86_64)
+  include $(CONFIG_DIR)libs-x86_64-android-gcc.mk
 else ifeq ($(TARGET_ARCH_ABI),mips)
  include $(CONFIG_DIR)libs-mips-android-gcc.mk
 else
@@ -91,51 +93,8 @@ LOCAL_CFLAGS := -O3
 # like x86inc.asm and x86_abi_support.asm
 LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)

-# -----------------------------------------------------------------------------
-# Template  : asm_offsets_template
-# Arguments : 1: assembly offsets file to be created
-#             2: c file to base assembly offsets on
-# Returns   : None
-# Usage     : $(eval $(call asm_offsets_template,<asmfile>, <srcfile>
-# Rationale : Create offsets at compile time using for structures that are
-#             defined in c, but used in assembly functions.
-# -----------------------------------------------------------------------------
-define asm_offsets_template
-
-_SRC:=$(2)
-_OBJ:=$(ASM_CNV_PATH)/$$(notdir $(2)).S
-
-_FLAGS = $$($$(my)CFLAGS) \
-          $$(call get-src-file-target-cflags,$(2)) \
-          $$(call host-c-includes,$$(LOCAL_C_INCLUDES) $$(CONFIG_DIR)) \
-          $$(LOCAL_CFLAGS) \
-          $$(NDK_APP_CFLAGS) \
-          $$(call host-c-includes,$$($(my)C_INCLUDES)) \
-          -DINLINE_ASM \
-          -S \
-
-_TEXT = "Compile $$(call get-src-file-text,$(2))"
-_CC   = $$(TARGET_CC)
-
-$$(eval $$(call ev-build-file))
-
-$(1) : $$(_OBJ) $(2)
-	@mkdir -p $$(dir $$@)
-	@grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)$(ASM_CONVERSION) > $$@
-endef
-
-# Use ads2gas script to convert from RVCT format to GAS format.  This
-#  puts the processed file under $(ASM_CNV_PATH).  Local clean rule
-#  to handle removing these
-ifeq ($(CONFIG_VP8_ENCODER), yes)
-  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm
-endif
-ifeq ($(HAVE_NEON_ASM), yes)
-  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm
-endif
-
 .PRECIOUS: %.asm.s
-$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm $(ASM_CNV_OFFSETS_DEPEND)
+$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm
 	@mkdir -p $(dir $@)
 	@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@

@@ -201,45 +160,44 @@ LOCAL_CFLAGS += \

 LOCAL_MODULE := libvpx

-LOCAL_LDLIBS := -llog
-
 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
  LOCAL_STATIC_LIBRARIES := cpufeatures
 endif

 # Add a dependency to force generation of the RTCD files.
+define rtcd_dep_template
+rtcd_dep_template_SRCS := $(addprefix $(LOCAL_PATH)/, $(LOCAL_SRC_FILES))
+rtcd_dep_template_SRCS := $$(rtcd_dep_template_SRCS:.neon=)
 ifeq ($(CONFIG_VP8), yes)
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
+$$(rtcd_dep_template_SRCS): vp8_rtcd.h
 endif
 ifeq ($(CONFIG_VP9), yes)
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp9_rtcd.h
+$$(rtcd_dep_template_SRCS): vp9_rtcd.h
 endif
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_scale_rtcd.h
+ifeq ($(CONFIG_VP10), yes)
+$$(rtcd_dep_template_SRCS): vp10_rtcd.h
+endif
+$$(rtcd_dep_template_SRCS): vpx_scale_rtcd.h
+$$(rtcd_dep_template_SRCS): vpx_dsp_rtcd.h

-ifeq ($(TARGET_ARCH_ABI),x86)
-$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
+ifneq ($(findstring $(TARGET_ARCH_ABI),x86 x86_64),)
+$$(rtcd_dep_template_SRCS): vpx_config.asm
 endif
+endef
+
+$(eval $(call rtcd_dep_template))

 .PHONY: clean
 clean:
 	@echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]"
 	@$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
-	@$(RM) $(patsubst %.asm, %.*, $(ASM_CNV_OFFSETS_DEPEND))
 	@$(RM) -r $(ASM_CNV_PATH)
 	@$(RM) $(CLEAN-OBJS)

-include $(BUILD_SHARED_LIBRARY)
-
-ifeq ($(HAVE_NEON), yes)
-  $(eval $(call asm_offsets_template,\
-    $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm, \
-    $(LIBVPX_PATH)/vpx_scale/vpx_scale_asm_offsets.c))
-endif
-
-ifeq ($(CONFIG_VP8_ENCODER), yes)
-  $(eval $(call asm_offsets_template,\
-    $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm, \
-    $(LIBVPX_PATH)/vp8/encoder/vp8_asm_enc_offsets.c))
+ifeq ($(ENABLE_SHARED),1)
+  include $(BUILD_SHARED_LIBRARY)
+else
+  include $(BUILD_STATIC_LIBRARY)
 endif

 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -22,8 +22,10 @@ clean:: .DEFAULT
 exampletest: .DEFAULT
 install:: .DEFAULT
 test:: .DEFAULT
+test-no-data-check:: .DEFAULT
 testdata:: .DEFAULT
 utiltest: .DEFAULT
+exampletest-no-data-check utiltest-no-data-check: .DEFAULT


 # Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
@@ -56,13 +58,10 @@ dist:
        fi
 endif

+# Since we invoke make recursively for multiple targets we need to include the
+# .mk file for the correct target, but only when $(target) is non-empty.
 ifneq ($(target),)
-# Normally, we want to build the filename from the target and the toolchain.
-# This disambiguates from the $(target).mk file that exists in the source tree.
-# However, the toolchain is part of the target in universal builds, so we
-# don't want to include TOOLCHAIN in that case. FAT_ARCHS is used to test
-# if we're in the universal case.
-include $(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk
+include $(target)-$(TOOLCHAIN).mk
 endif
 BUILD_ROOT?=.
 VPATH=$(SRC_PATH_BARE)
@@ -116,6 +115,9 @@ test::
 testdata::
 .PHONY: utiltest
 utiltest:
+.PHONY: test-no-data-check exampletest-no-data-check utiltest-no-data-check
+test-no-data-check::
+exampletest-no-data-check utiltest-no-data-check:

 # Add compiler flags for intrinsic files
 ifeq ($(TOOLCHAIN), x86-os2-gcc)
@@ -138,6 +140,8 @@ $(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx $(STACKREALIGN)
 $(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(STACKREALIGN)
 $(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(STACKREALIGN)
 $(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(STACKREALIGN)
+$(BUILD_PFX)%vp9_reconintra.c.d: CFLAGS += $(STACKREALIGN)
+$(BUILD_PFX)%vp9_reconintra.c.o: CFLAGS += $(STACKREALIGN)

 $(BUILD_PFX)%.c.d: %.c
 	$(if $(quiet),@echo "    [DEP] $@")
@@ -146,6 +150,7 @@ $(BUILD_PFX)%.c.d: %.c

 $(BUILD_PFX)%.c.o: %.c
 	$(if $(quiet),@echo "    [CC] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CC) $(INTERNAL_CFLAGS) $(CFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.cc.d: %.cc
@@ -155,6 +160,7 @@ $(BUILD_PFX)%.cc.d: %.cc

 $(BUILD_PFX)%.cc.o: %.cc
 	$(if $(quiet),@echo "    [CXX] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.cpp.d: %.cpp
@@ -164,6 +170,7 @@ $(BUILD_PFX)%.cpp.d: %.cpp

 $(BUILD_PFX)%.cpp.o: %.cpp
 	$(if $(quiet),@echo "    [CXX] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.asm.d: %.asm
@@ -174,6 +181,7 @@ $(BUILD_PFX)%.asm.d: %.asm

 $(BUILD_PFX)%.asm.o: %.asm
 	$(if $(quiet),@echo "    [AS] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<

 $(BUILD_PFX)%.s.d: %.s
@@ -184,12 +192,14 @@ $(BUILD_PFX)%.s.d: %.s

 $(BUILD_PFX)%.s.o: %.s
 	$(if $(quiet),@echo "    [AS] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<

 .PRECIOUS: %.c.S
 %.c.S: CFLAGS += -DINLINE_ASM
 $(BUILD_PFX)%.c.S: %.c
 	$(if $(quiet),@echo "    [GEN] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CC) -S $(CFLAGS) -o $@ $<

 .PRECIOUS: %.asm.s
@@ -216,14 +226,6 @@ else
 	$(qexec)cp $< $@
 endif

-#
-# Rule to extract assembly constants from C sources
-#
-obj_int_extract: build/make/obj_int_extract.c
-	$(if $(quiet),@echo "    [HOSTCC] $@")
-	$(qexec)$(HOSTCC) -I. -I$(SRC_PATH_BARE) -o $@ $<
-CLEAN-OBJS += obj_int_extract
-
 #
 # Utility functions
 #
@@ -285,7 +287,7 @@ define archive_template
 # for creating them.
 $(1):
 	$(if $(quiet),@echo "    [AR] $$@")
-	$(qexec)$$(AR) $$(ARFLAGS) $$@ $$?
+	$(qexec)$$(AR) $$(ARFLAGS) $$@ $$^
 endef

 define so_template
@@ -315,18 +317,15 @@ $(1):
        $$(filter %.o,$$^) $$(extralibs)
 endef

-
-
-define lipo_lib_template
-$(1): $(addsuffix /$(1),$(FAT_ARCHS))
-	$(if $(quiet),@echo "    [LIPO] $$@")
-	$(qexec)libtool -static -o $$@ $$?
-endef
-
-define lipo_bin_template
-$(1): $(addsuffix /$(1),$(FAT_ARCHS))
-	$(if $(quiet),@echo "    [LIPO] $$@")
-	$(qexec)lipo -output $$@ -create $$?
+define dll_template
+# Not using a pattern rule here because we don't want to generate empty
+# archives when they are listed as a dependency in files not responsible
+# for creating them.
+$(1):
+	$(if $(quiet),@echo "    [LD] $$@")
+	$(qexec)$$(LD) -Zdll $$(LDFLAGS) \
+        -o $$@ \
+        $$(filter %.o,$$^) $$(extralibs) $$(EXPORTS_FILE)
 endef


@@ -340,9 +339,11 @@ endif
 skip_deps := $(filter %clean,$(MAKECMDGOALS))
 skip_deps += $(findstring testdata,$(MAKECMDGOALS))
 ifeq ($(strip $(skip_deps)),)
-  # Older versions of make don't like -include directives with no arguments
-  ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
-    -include $(filter %.d,$(OBJS-yes:.o=.d))
+  ifeq ($(CONFIG_DEPENDENCY_TRACKING),yes)
+    # Older versions of make don't like -include directives with no arguments
+    ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
+      -include $(filter %.d,$(OBJS-yes:.o=.d))
+    endif
  endif
 endif

@@ -383,8 +384,9 @@ LIBS=$(call enabled,LIBS)
 .libs: $(LIBS)
 	@touch $@
 $(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib))))
-$(foreach lib,$(filter %so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
-$(foreach lib,$(filter %$(VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
+$(foreach lib,$(filter %so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
+$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
+$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dll,$(LIBS)),$(eval $(call dll_template,$(lib))))

 INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS)
 ifeq ($(MAKECMDGOALS),dist)
@@ -424,11 +426,7 @@ ifneq ($(call enabled,DIST-SRCS),)
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_sln.sh
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_vcxproj.sh
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/msvs_common.sh
-    DIST-SRCS-$(CONFIG_MSVS)  += build/x86-msvs/obj_int_extract.bat
-    DIST-SRCS-$(CONFIG_MSVS)  += build/arm-msvs/obj_int_extract.bat
    DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
-    # Include obj_int_extract if we use offsets from *_asm_*_offsets
-    DIST-SRCS-$(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64)    += build/make/obj_int_extract.c
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2gas.pl
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2gas_apple.pl
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2armasm_ms.pl
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -73,6 +73,10 @@ generate_filter() {
                open_tag File RelativePath="$f"

                if [ "$pat" == "asm" ] && $asm_use_custom_step; then
+                    # Avoid object file name collisions, i.e. vpx_config.c and
+                    # vpx_config.asm produce the same object file without
+                    # this additional suffix.
+                    objf=${objf%.obj}_asm.obj
                    for plat in "${platforms[@]}"; do
                        for cfg in Debug Release; do
                            open_tag FileConfiguration \
@@ -295,22 +299,7 @@ generate_vcproj() {
        case "$target" in
            x86*)
                case "$name" in
-                    obj_int_extract)
-                        tag Tool \
-                            Name="VCCLCompilerTool" \
-                            Optimization="0" \
-                            AdditionalIncludeDirectories="$incs" \
-                            PreprocessorDefinitions="WIN32;DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
-                            RuntimeLibrary="$debug_runtime" \
-                            WarningLevel="3" \
-                            DebugInformationFormat="1" \
-                            $warn_64bit \
-                    ;;
                    vpx)
-                        tag Tool \
-                            Name="VCPreBuildEventTool" \
-                            CommandLine="call obj_int_extract.bat &quot;$src_path_bare&quot; $plat_no_ws\\\$(ConfigurationName)" \
-
                        tag Tool \
                            Name="VCCLCompilerTool" \
                            Optimization="0" \
@@ -347,11 +336,6 @@ generate_vcproj() {
                case "$target" in
                    x86*)
                        case "$name" in
-                            obj_int_extract)
-                                tag Tool \
-                                    Name="VCLinkerTool" \
-                                    GenerateDebugInformation="true" \
-                            ;;
                            *)
                                tag Tool \
                                    Name="VCLinkerTool" \
@@ -400,24 +384,7 @@ generate_vcproj() {
        case "$target" in
            x86*)
                case "$name" in
-                    obj_int_extract)
-                        tag Tool \
-                            Name="VCCLCompilerTool" \
-                            Optimization="2" \
-                            FavorSizeorSpeed="1" \
-                            AdditionalIncludeDirectories="$incs" \
-                            PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
-                            RuntimeLibrary="$release_runtime" \
-                            UsePrecompiledHeader="0" \
-                            WarningLevel="3" \
-                            DebugInformationFormat="0" \
-                            $warn_64bit \
-                    ;;
                    vpx)
-                        tag Tool \
-                            Name="VCPreBuildEventTool" \
-                            CommandLine="call obj_int_extract.bat &quot;$src_path_bare&quot; $plat_no_ws\\\$(ConfigurationName)" \
-
                        tag Tool \
                            Name="VCCLCompilerTool" \
                            Optimization="2" \
@@ -456,11 +423,6 @@ generate_vcproj() {
                case "$target" in
                    x86*)
                        case "$name" in
-                            obj_int_extract)
-                                tag Tool \
-                                    Name="VCLinkerTool" \
-                                    GenerateDebugInformation="true" \
-                            ;;
                            *)
                                tag Tool \
                                    Name="VCLinkerTool" \
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -19,13 +19,13 @@ show_help() {
    cat <<EOF
 Usage: ${self_basename} [options] file1 [file2 ...]

-This script generates a Visual Studio 2005 solution file from a list of project
+This script generates a Visual Studio solution file from a list of project
 files.

 Options:
    --help                      Print this message
    --out=outfile               Redirect output to a file
-    --ver=version               Version (7,8,9,10,11) of visual studio to generate for
+    --ver=version               Version (7,8,9,10,11,12,14) of visual studio to generate for
    --target=isa-os-cc          Target specifier
 EOF
    exit 1
@@ -255,7 +255,7 @@ for opt in "$@"; do
    ;;
    --ver=*) vs_ver="$optval"
             case $optval in
-             [789]|10|11|12)
+             [789]|10|11|12|14)
             ;;
             *) die Unrecognized Visual Studio Version in $opt
             ;;
@@ -300,12 +300,15 @@ case "${vs_ver:-8}" in
    12) sln_vers="12.00"
       sln_vers_str="Visual Studio 2013"
    ;;
+    14) sln_vers="14.00"
+       sln_vers_str="Visual Studio 2015"
+    ;;
 esac
 case "${vs_ver:-8}" in
    [789])
    sfx=vcproj
    ;;
-    10|11|12)
+    10|11|12|14)
    sfx=vcxproj
    ;;
 esac
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -34,7 +34,7 @@ Options:
    --name=project_name         Name of the project (required)
    --proj-guid=GUID            GUID to use for the project
    --module-def=filename       File containing export definitions (for DLLs)
-    --ver=version               Version (10,11,12) of visual studio to generate for
+    --ver=version               Version (10,11,12,14) of visual studio to generate for
    --src-path-bare=dir         Path to root of source tree
    -Ipath/to/include           Additional include directories
    -DFLAG[=value]              Preprocessor macros to define
@@ -168,7 +168,7 @@ for opt in "$@"; do
        --ver=*)
            vs_ver="$optval"
            case "$optval" in
-                10|11|12)
+                10|11|12|14)
                ;;
                *) die Unrecognized Visual Studio Version in $opt
                ;;
@@ -218,7 +218,7 @@ guid=${guid:-`generate_uuid`}
 asm_use_custom_step=false
 uses_asm=${uses_asm:-false}
 case "${vs_ver:-11}" in
-    10|11|12)
+    10|11|12|14)
       asm_use_custom_step=$uses_asm
    ;;
 esac
@@ -262,15 +262,9 @@ case "$target" in
        asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} &quot;%(FullPath)&quot;"
    ;;
    arm*)
-        asm_Debug_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
-        asm_Release_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
-        if [ "$name" = "obj_int_extract" ]; then
-            # We don't want to build this tool for the target architecture,
-            # but for an architecture we can run locally during the build.
-            platforms[0]="Win32"
-        else
-            platforms[0]="ARM"
-        fi
+        platforms[0]="ARM"
+        asm_Debug_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
+        asm_Release_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
    ;;
    *) die "Unsupported target $target!"
    ;;
@@ -350,6 +344,9 @@ generate_vcxproj() {
                # has to enable AppContainerApplication as well.
                tag_content PlatformToolset v120
            fi
+            if [ "$vs_ver" = "14" ]; then
+                tag_content PlatformToolset v140
+            fi
            tag_content CharacterSet Unicode
            if [ "$config" = "Release" ]; then
                tag_content WholeProgramOptimization true
@@ -400,23 +397,13 @@ generate_vcxproj() {
                if [ "$hostplat" == "ARM" ]; then
                    hostplat=Win32
                fi
-                open_tag PreBuildEvent
-                tag_content Command "call obj_int_extract.bat &quot;$src_path_bare&quot; $hostplat\\\$(Configuration)"
-                close_tag PreBuildEvent
            fi
            open_tag ClCompile
            if [ "$config" = "Debug" ]; then
                opt=Disabled
                runtime=$debug_runtime
                curlibs=$debug_libs
-                case "$name" in
-                obj_int_extract)
-                    debug=DEBUG
-                    ;;
-                *)
-                    debug=_DEBUG
-                    ;;
-                esac
+                debug=_DEBUG
            else
                opt=MaxSpeed
                runtime=$release_runtime
@@ -424,14 +411,7 @@ generate_vcxproj() {
                tag_content FavorSizeOrSpeed Speed
                debug=NDEBUG
            fi
-            case "$name" in
-            obj_int_extract)
-                extradefines=";_CONSOLE"
-                ;;
-            *)
-                extradefines=";$defines"
-                ;;
-            esac
+            extradefines=";$defines"
            tag_content Optimization $opt
            tag_content AdditionalIncludeDirectories "$incs;%(AdditionalIncludeDirectories)"
            tag_content PreprocessorDefinitions "WIN32;$debug;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE$extradefines;%(PreprocessorDefinitions)"
@@ -451,10 +431,6 @@ generate_vcxproj() {
            case "$proj_kind" in
            exe)
                open_tag Link
-                if [ "$name" != "obj_int_extract" ]; then
-                    tag_content AdditionalDependencies "$curlibs;%(AdditionalDependencies)"
-                    tag_content AdditionalLibraryDirectories "$libdirs;%(AdditionalLibraryDirectories)"
-                fi
                tag_content GenerateDebugInformation true
                # Console is the default normally, but if
                # AppContainerApplication is set, we need to override it.
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -18,15 +18,18 @@ set -e
 devnull='> /dev/null 2>&1'

 BUILD_ROOT="_iosbuild"
+CONFIGURE_ARGS="--disable-docs
+                --disable-examples
+                --disable-libyuv
+                --disable-unit-tests"
 DIST_DIR="_dist"
 FRAMEWORK_DIR="VPX.framework"
 HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
-MAKE_JOBS=1
-LIBVPX_SOURCE_DIR=$(dirname "$0" | sed -e s,/build/make,,)
+SCRIPT_DIR=$(dirname "$0")
+LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
 LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
 ORIG_PWD="$(pwd)"
 TARGETS="arm64-darwin-gcc
-         armv6-darwin-gcc
         armv7-darwin-gcc
         armv7s-darwin-gcc
         x86-iphonesimulator-gcc
@@ -37,15 +40,24 @@ TARGETS="arm64-darwin-gcc
 build_target() {
  local target="$1"
  local old_pwd="$(pwd)"
+  local target_specific_flags=""

  vlog "***Building target: ${target}***"

+  case "${target}" in
+    x86-*)
+      target_specific_flags="--enable-pic"
+      vlog "Enabled PIC for ${target}"
+      ;;
+  esac
+
  mkdir "${target}"
  cd "${target}"
-  eval "../../${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
-      --disable-docs ${EXTRA_CONFIGURE_ARGS} ${devnull}
+  eval "${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
+    ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${target_specific_flags} \
+    ${devnull}
  export DIST_DIR
-  eval make -j ${MAKE_JOBS} dist ${devnull}
+  eval make dist ${devnull}
  cd "${old_pwd}"

  vlog "***Done building target: ${target}***"
@@ -58,9 +70,6 @@ target_to_preproc_symbol() {
    arm64-*)
      echo "__aarch64__"
      ;;
-    armv6-*)
-      echo "__ARM_ARCH_6__"
-      ;;
    armv7-*)
      echo "__ARM_ARCH_7A__"
      ;;
@@ -176,8 +185,13 @@ build_framework() {
 # Trap function. Cleans up the subtree used to build all targets contained in
 # $TARGETS.
 cleanup() {
+  local readonly res=$?
  cd "${ORIG_PWD}"

+  if [ $res -ne 0 ]; then
+    elog "build exited with error ($res)"
+  fi
+
  if [ "${PRESERVE_BUILD_OUTPUT}" != "yes" ]; then
    rm -rf "${BUILD_ROOT}"
  fi
@@ -187,14 +201,22 @@ iosbuild_usage() {
 cat << EOF
  Usage: ${0##*/} [arguments]
    --help: Display this message and exit.
-    --jobs: Number of make jobs.
+    --extra-configure-args <args>: Extra args to pass when configuring libvpx.
    --preserve-build-output: Do not delete the build directory.
    --show-build-output: Show output from each library build.
+    --targets <targets>: Override default target list. Defaults:
+         ${TARGETS}
+    --test-link: Confirms all targets can be linked. Functionally identical to
+                 passing --enable-examples via --extra-configure-args.
    --verbose: Output information about the environment and each stage of the
               build.
 EOF
 }

+elog() {
+  echo "${0##*/} failed because: $@" 1>&2
+}
+
 vlog() {
  if [ "${VERBOSE}" = "yes" ]; then
    echo "$@"
@@ -214,16 +236,19 @@ while [ -n "$1" ]; do
      iosbuild_usage
      exit
      ;;
-    --jobs)
-      MAKE_JOBS="$2"
-      shift
-      ;;
    --preserve-build-output)
      PRESERVE_BUILD_OUTPUT=yes
      ;;
    --show-build-output)
      devnull=
      ;;
+    --test-link)
+      EXTRA_CONFIGURE_ARGS="${EXTRA_CONFIGURE_ARGS} --enable-examples"
+      ;;
+    --targets)
+      TARGETS="$2"
+      shift
+      ;;
    --verbose)
      VERBOSE=yes
      ;;
@@ -239,16 +264,19 @@ if [ "${VERBOSE}" = "yes" ]; then
 cat << EOF
  BUILD_ROOT=${BUILD_ROOT}
  DIST_DIR=${DIST_DIR}
+  CONFIGURE_ARGS=${CONFIGURE_ARGS}
  EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
  FRAMEWORK_DIR=${FRAMEWORK_DIR}
  HEADER_DIR=${HEADER_DIR}
-  MAKE_JOBS=${MAKE_JOBS}
-  PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
  LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR}
  LIPO=${LIPO}
+  MAKEFLAGS=${MAKEFLAGS}
  ORIG_PWD=${ORIG_PWD}
+  PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT}
  TARGETS="${TARGETS}"
 EOF
 fi

 build_framework "${TARGETS}"
+echo "Successfully built '${FRAMEWORK_DIR}' for:"
+echo "         ${TARGETS}"
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -1,857 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "vpx_config.h"
-#include "vpx/vpx_integer.h"
-
-typedef enum {
-  OUTPUT_FMT_PLAIN,
-  OUTPUT_FMT_RVDS,
-  OUTPUT_FMT_GAS,
-  OUTPUT_FMT_C_HEADER,
-} output_fmt_t;
-
-int log_msg(const char *fmt, ...) {
-  int res;
-  va_list ap;
-  va_start(ap, fmt);
-  res = vfprintf(stderr, fmt, ap);
-  va_end(ap);
-  return res;
-}
-
-#if defined(__GNUC__) && __GNUC__
-
-#if defined(FORCE_PARSE_ELF)
-
-#if defined(__MACH__)
-#undef __MACH__
-#endif
-
-#if !defined(__ELF__)
-#define __ELF__
-#endif
-#endif
-
-#if defined(__MACH__)
-
-#include <mach-o/loader.h>
-#include <mach-o/nlist.h>
-
-int print_macho_equ(output_fmt_t mode, uint8_t* name, int val) {
-  switch (mode) {
-    case OUTPUT_FMT_RVDS:
-      printf("%-40s EQU %5d\n", name, val);
-      return 0;
-    case OUTPUT_FMT_GAS:
-      printf(".set %-40s, %5d\n", name, val);
-      return 0;
-    case OUTPUT_FMT_C_HEADER:
-      printf("#define %-40s %5d\n", name, val);
-      return 0;
-    default:
-      log_msg("Unsupported mode: %d", mode);
-      return 1;
-  }
-}
-
-int parse_macho(uint8_t *base_buf, size_t sz, output_fmt_t mode) {
-  int i, j;
-  struct mach_header header;
-  uint8_t *buf = base_buf;
-  int base_data_section = 0;
-  int bits = 0;
-
-  /* We can read in mach_header for 32 and 64 bit architectures
-   * because it's identical to mach_header_64 except for the last
-   * element (uint32_t reserved), which we don't use. Then, when
-   * we know which architecture we're looking at, increment buf
-   * appropriately.
-   */
-  memcpy(&header, buf, sizeof(struct mach_header));
-
-  if (header.magic == MH_MAGIC) {
-    if (header.cputype == CPU_TYPE_ARM
-        || header.cputype == CPU_TYPE_X86) {
-      bits = 32;
-      buf += sizeof(struct mach_header);
-    } else {
-      log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_[ARM|X86].\n");
-      goto bail;
-    }
-  } else if (header.magic == MH_MAGIC_64) {
-    if (header.cputype == CPU_TYPE_X86_64) {
-      bits = 64;
-      buf += sizeof(struct mach_header_64);
-    } else {
-      log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_X86_64.\n");
-      goto bail;
-    }
-  } else {
-    log_msg("Bad magic number for object file. 0x%x or 0x%x expected, 0x%x found.\n",
-            MH_MAGIC, MH_MAGIC_64, header.magic);
-    goto bail;
-  }
-
-  if (header.filetype != MH_OBJECT) {
-    log_msg("Bad filetype for object file. Currently only tested for MH_OBJECT.\n");
-    goto bail;
-  }
-
-  for (i = 0; i < header.ncmds; i++) {
-    struct load_command lc;
-
-    memcpy(&lc, buf, sizeof(struct load_command));
-
-    if (lc.cmd == LC_SEGMENT) {
-      uint8_t *seg_buf = buf;
-      struct section s;
-      struct segment_command seg_c;
-
-      memcpy(&seg_c, seg_buf, sizeof(struct segment_command));
-      seg_buf += sizeof(struct segment_command);
-
-      /* Although each section is given it's own offset, nlist.n_value
-       * references the offset of the first section. This isn't
-       * apparent without debug information because the offset of the
-       * data section is the same as the first section. However, with
-       * debug sections mixed in, the offset of the debug section
-       * increases but n_value still references the first section.
-       */
-      if (seg_c.nsects < 1) {
-        log_msg("Not enough sections\n");
-        goto bail;
-      }
-
-      memcpy(&s, seg_buf, sizeof(struct section));
-      base_data_section = s.offset;
-    } else if (lc.cmd == LC_SEGMENT_64) {
-      uint8_t *seg_buf = buf;
-      struct section_64 s;
-      struct segment_command_64 seg_c;
-
-      memcpy(&seg_c, seg_buf, sizeof(struct segment_command_64));
-      seg_buf += sizeof(struct segment_command_64);
-
-      /* Explanation in LG_SEGMENT */
-      if (seg_c.nsects < 1) {
-        log_msg("Not enough sections\n");
-        goto bail;
-      }
-
-      memcpy(&s, seg_buf, sizeof(struct section_64));
-      base_data_section = s.offset;
-    } else if (lc.cmd == LC_SYMTAB) {
-      if (base_data_section != 0) {
-        struct symtab_command sc;
-        uint8_t *sym_buf = base_buf;
-        uint8_t *str_buf = base_buf;
-
-        memcpy(&sc, buf, sizeof(struct symtab_command));
-
-        if (sc.cmdsize != sizeof(struct symtab_command)) {
-          log_msg("Can't find symbol table!\n");
-          goto bail;
-        }
-
-        sym_buf += sc.symoff;
-        str_buf += sc.stroff;
-
-        for (j = 0; j < sc.nsyms; j++) {
-          /* Location of string is cacluated each time from the
-           * start of the string buffer.  On darwin the symbols
-           * are prefixed by "_", so we bump the pointer by 1.
-           * The target value is defined as an int in *_asm_*_offsets.c,
-           * which is 4 bytes on all targets we currently use.
-           */
-          if (bits == 32) {
-            struct nlist nl;
-            int val;
-
-            memcpy(&nl, sym_buf, sizeof(struct nlist));
-            sym_buf += sizeof(struct nlist);
-
-            memcpy(&val, base_buf + base_data_section + nl.n_value,
-                   sizeof(val));
-            print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
-          } else { /* if (bits == 64) */
-            struct nlist_64 nl;
-            int val;
-
-            memcpy(&nl, sym_buf, sizeof(struct nlist_64));
-            sym_buf += sizeof(struct nlist_64);
-
-            memcpy(&val, base_buf + base_data_section + nl.n_value,
-                   sizeof(val));
-            print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
-          }
-        }
-      }
-    }
-
-    buf += lc.cmdsize;
-  }
-
-  return 0;
-bail:
-  return 1;
-
-}
-
-#elif defined(__ELF__)
-#include "elf.h"
-
-#define COPY_STRUCT(dst, buf, ofst, sz) do {\
-    if(ofst + sizeof((*(dst))) > sz) goto bail;\
-    memcpy(dst, buf+ofst, sizeof((*(dst))));\
-  } while(0)
-
-#define ENDIAN_ASSIGN(val, memb) do {\
-    if(!elf->le_data) {log_msg("Big Endian data not supported yet!\n");goto bail;}\
-    (val) = (memb);\
-  } while(0)
-
-#define ENDIAN_ASSIGN_IN_PLACE(memb) do {\
-    ENDIAN_ASSIGN(memb, memb);\
-  } while(0)
-
-typedef struct {
-  uint8_t      *buf; /* Buffer containing ELF data */
-  size_t        sz;  /* Buffer size */
-  int           le_data; /* Data is little-endian */
-  unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
-  int           bits; /* 32 or 64 */
-  Elf32_Ehdr    hdr32;
-  Elf64_Ehdr    hdr64;
-} elf_obj_t;
-
-int parse_elf_header(elf_obj_t *elf) {
-  int res;
-  /* Verify ELF Magic numbers */
-  COPY_STRUCT(&elf->e_ident, elf->buf, 0, elf->sz);
-  res = elf->e_ident[EI_MAG0] == ELFMAG0;
-  res &= elf->e_ident[EI_MAG1] == ELFMAG1;
-  res &= elf->e_ident[EI_MAG2] == ELFMAG2;
-  res &= elf->e_ident[EI_MAG3] == ELFMAG3;
-  res &= elf->e_ident[EI_CLASS] == ELFCLASS32
-         || elf->e_ident[EI_CLASS] == ELFCLASS64;
-  res &= elf->e_ident[EI_DATA] == ELFDATA2LSB;
-
-  if (!res) goto bail;
-
-  elf->le_data = elf->e_ident[EI_DATA] == ELFDATA2LSB;
-
-  /* Read in relevant values */
-  if (elf->e_ident[EI_CLASS] == ELFCLASS32) {
-    elf->bits = 32;
-    COPY_STRUCT(&elf->hdr32, elf->buf, 0, elf->sz);
-
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_type);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_machine);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_version);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_entry);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phoff);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shoff);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_flags);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_ehsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phentsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phnum);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shentsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shnum);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shstrndx);
-  } else { /* if (elf->e_ident[EI_CLASS] == ELFCLASS64) */
-    elf->bits = 64;
-    COPY_STRUCT(&elf->hdr64, elf->buf, 0, elf->sz);
-
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_type);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_machine);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_version);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_entry);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phoff);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shoff);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_flags);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_ehsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phentsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phnum);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shentsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shnum);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shstrndx);
-  }
-
-  return 0;
-bail:
-  log_msg("Failed to parse ELF file header");
-  return 1;
-}
-
-int parse_elf_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr32, Elf64_Shdr *hdr64) {
-  if (hdr32) {
-    if (idx >= elf->hdr32.e_shnum)
-      goto bail;
-
-    COPY_STRUCT(hdr32, elf->buf, elf->hdr32.e_shoff + idx * elf->hdr32.e_shentsize,
-                elf->sz);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_name);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_type);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_flags);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addr);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_offset);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_size);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_link);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_info);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addralign);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_entsize);
-  } else { /* if (hdr64) */
-    if (idx >= elf->hdr64.e_shnum)
-      goto bail;
-
-    COPY_STRUCT(hdr64, elf->buf, elf->hdr64.e_shoff + idx * elf->hdr64.e_shentsize,
-                elf->sz);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_name);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_type);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_flags);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addr);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_offset);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_size);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_link);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_info);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addralign);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_entsize);
-  }
-
-  return 0;
-bail:
-  return 1;
-}
-
-const char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx) {
-  if (elf->bits == 32) {
-    Elf32_Shdr shdr;
-
-    if (parse_elf_section(elf, s_idx, &shdr, NULL)) {
-      log_msg("Failed to parse ELF string table: section %d, index %d\n",
-              s_idx, idx);
-      return "";
-    }
-
-    return (char *)(elf->buf + shdr.sh_offset + idx);
-  } else { /* if (elf->bits == 64) */
-    Elf64_Shdr shdr;
-
-    if (parse_elf_section(elf, s_idx, NULL, &shdr)) {
-      log_msg("Failed to parse ELF string table: section %d, index %d\n",
-              s_idx, idx);
-      return "";
-    }
-
-    return (char *)(elf->buf + shdr.sh_offset + idx);
-  }
-}
-
-int parse_elf_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym32, Elf64_Sym *sym64) {
-  if (sym32) {
-    COPY_STRUCT(sym32, elf->buf, ofst, elf->sz);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_name);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_value);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_size);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_info);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_other);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_shndx);
-  } else { /* if (sym64) */
-    COPY_STRUCT(sym64, elf->buf, ofst, elf->sz);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_name);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_value);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_size);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_info);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_other);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_shndx);
-  }
-  return 0;
-bail:
-  return 1;
-}
-
-int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode) {
-  elf_obj_t    elf;
-  unsigned int ofst;
-  int          i;
-  Elf32_Off    strtab_off32;
-  Elf64_Off    strtab_off64; /* save String Table offset for later use */
-
-  memset(&elf, 0, sizeof(elf));
-  elf.buf = buf;
-  elf.sz = sz;
-
-  /* Parse Header */
-  if (parse_elf_header(&elf))
-    goto bail;
-
-  if (elf.bits == 32) {
-    Elf32_Shdr shdr;
-    for (i = 0; i < elf.hdr32.e_shnum; i++) {
-      parse_elf_section(&elf, i, &shdr, NULL);
-
-      if (shdr.sh_type == SHT_STRTAB) {
-        char strtsb_name[128];
-
-        strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
-
-        if (!(strcmp(strtsb_name, ".shstrtab"))) {
-          /* log_msg("found section: %s\n", strtsb_name); */
-          strtab_off32 = shdr.sh_offset;
-          break;
-        }
-      }
-    }
-  } else { /* if (elf.bits == 64) */
-    Elf64_Shdr shdr;
-    for (i = 0; i < elf.hdr64.e_shnum; i++) {
-      parse_elf_section(&elf, i, NULL, &shdr);
-
-      if (shdr.sh_type == SHT_STRTAB) {
-        char strtsb_name[128];
-
-        strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
-
-        if (!(strcmp(strtsb_name, ".shstrtab"))) {
-          /* log_msg("found section: %s\n", strtsb_name); */
-          strtab_off64 = shdr.sh_offset;
-          break;
-        }
-      }
-    }
-  }
-
-  /* Parse all Symbol Tables */
-  if (elf.bits == 32) {
-    Elf32_Shdr shdr;
-    for (i = 0; i < elf.hdr32.e_shnum; i++) {
-      parse_elf_section(&elf, i, &shdr, NULL);
-
-      if (shdr.sh_type == SHT_SYMTAB) {
-        for (ofst = shdr.sh_offset;
-             ofst < shdr.sh_offset + shdr.sh_size;
-             ofst += shdr.sh_entsize) {
-          Elf32_Sym sym;
-
-          parse_elf_symbol(&elf, ofst, &sym, NULL);
-
-          /* For all OBJECTS (data objects), extract the value from the
-           * proper data segment.
-           */
-          /* if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
-              log_msg("found data object %s\n",
-                      parse_elf_string_table(&elf,
-                                             shdr.sh_link,
-                                             sym.st_name));
-           */
-
-          if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
-              && sym.st_size == 4) {
-            Elf32_Shdr dhdr;
-            int val = 0;
-            char section_name[128];
-
-            parse_elf_section(&elf, sym.st_shndx, &dhdr, NULL);
-
-            /* For explanition - refer to _MSC_VER version of code */
-            strcpy(section_name, (char *)(elf.buf + strtab_off32 + dhdr.sh_name));
-            /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
-
-            if (strcmp(section_name, ".bss")) {
-              if (sizeof(val) != sym.st_size) {
-                /* The target value is declared as an int in
-                 * *_asm_*_offsets.c, which is 4 bytes on all
-                 * targets we currently use. Complain loudly if
-                 * this is not true.
-                 */
-                log_msg("Symbol size is wrong\n");
-                goto bail;
-              }
-
-              memcpy(&val,
-                     elf.buf + dhdr.sh_offset + sym.st_value,
-                     sym.st_size);
-            }
-
-            if (!elf.le_data) {
-              log_msg("Big Endian data not supported yet!\n");
-              goto bail;
-            }
-
-            switch (mode) {
-              case OUTPUT_FMT_RVDS:
-                printf("%-40s EQU %5d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-                break;
-              case OUTPUT_FMT_GAS:
-                printf(".equ %-40s, %5d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-                break;
-              case OUTPUT_FMT_C_HEADER:
-                printf("#define %-40s %5d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-                break;
-              default:
-                printf("%s = %d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-            }
-          }
-        }
-      }
-    }
-  } else { /* if (elf.bits == 64) */
-    Elf64_Shdr shdr;
-    for (i = 0; i < elf.hdr64.e_shnum; i++) {
-      parse_elf_section(&elf, i, NULL, &shdr);
-
-      if (shdr.sh_type == SHT_SYMTAB) {
-        for (ofst = shdr.sh_offset;
-             ofst < shdr.sh_offset + shdr.sh_size;
-             ofst += shdr.sh_entsize) {
-          Elf64_Sym sym;
-
-          parse_elf_symbol(&elf, ofst, NULL, &sym);
-
-          /* For all OBJECTS (data objects), extract the value from the
-           * proper data segment.
-           */
-          /* if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
-              log_msg("found data object %s\n",
-                      parse_elf_string_table(&elf,
-                                             shdr.sh_link,
-                                             sym.st_name));
-           */
-
-          if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT
-              && sym.st_size == 4) {
-            Elf64_Shdr dhdr;
-            int val = 0;
-            char section_name[128];
-
-            parse_elf_section(&elf, sym.st_shndx, NULL, &dhdr);
-
-            /* For explanition - refer to _MSC_VER version of code */
-            strcpy(section_name, (char *)(elf.buf + strtab_off64 + dhdr.sh_name));
-            /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
-
-            if ((strcmp(section_name, ".bss"))) {
-              if (sizeof(val) != sym.st_size) {
-                /* The target value is declared as an int in
-                 * *_asm_*_offsets.c, which is 4 bytes on all
-                 * targets we currently use. Complain loudly if
-                 * this is not true.
-                 */
-                log_msg("Symbol size is wrong\n");
-                goto bail;
-              }
-
-              memcpy(&val,
-                     elf.buf + dhdr.sh_offset + sym.st_value,
-                     sym.st_size);
-            }
-
-            if (!elf.le_data) {
-              log_msg("Big Endian data not supported yet!\n");
-              goto bail;
-            }
-
-            switch (mode) {
-              case OUTPUT_FMT_RVDS:
-                printf("%-40s EQU %5d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-                break;
-              case OUTPUT_FMT_GAS:
-                printf(".equ %-40s, %5d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-                break;
-              default:
-                printf("%s = %d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  if (mode == OUTPUT_FMT_RVDS)
-    printf("    END\n");
-
-  return 0;
-bail:
-  log_msg("Parse error: File does not appear to be valid ELF32 or ELF64\n");
-  return 1;
-}
-
-#endif
-#endif /* defined(__GNUC__) && __GNUC__ */
-
-
-#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
-/*  See "Microsoft Portable Executable and Common Object File Format Specification"
-    for reference.
-*/
-#define get_le32(x) ((*(x)) | (*(x+1)) << 8 |(*(x+2)) << 16 | (*(x+3)) << 24 )
-#define get_le16(x) ((*(x)) | (*(x+1)) << 8)
-
-int parse_coff(uint8_t *buf, size_t sz) {
-  unsigned int nsections, symtab_ptr, symtab_sz, strtab_ptr;
-  unsigned int sectionrawdata_ptr;
-  unsigned int i;
-  uint8_t *ptr;
-  uint32_t symoffset;
-
-  char **sectionlist;  // this array holds all section names in their correct order.
-  // it is used to check if the symbol is in .bss or .rdata section.
-
-  nsections = get_le16(buf + 2);
-  symtab_ptr = get_le32(buf + 8);
-  symtab_sz = get_le32(buf + 12);
-  strtab_ptr = symtab_ptr + symtab_sz * 18;
-
-  if (nsections > 96) {
-    log_msg("Too many sections\n");
-    return 1;
-  }
-
-  sectionlist = malloc(nsections * sizeof(sectionlist));
-
-  if (sectionlist == NULL) {
-    log_msg("Allocating first level of section list failed\n");
-    return 1;
-  }
-
-  // log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
-
-  /*
-  The size of optional header is always zero for an obj file. So, the section header
-  follows the file header immediately.
-  */
-
-  ptr = buf + 20;     // section header
-
-  for (i = 0; i < nsections; i++) {
-    char sectionname[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
-    strncpy(sectionname, ptr, 8);
-    // log_msg("COFF: Parsing section %s\n",sectionname);
-
-    sectionlist[i] = malloc(strlen(sectionname) + 1);
-
-    if (sectionlist[i] == NULL) {
-      log_msg("Allocating storage for %s failed\n", sectionname);
-      goto bail;
-    }
-    strcpy(sectionlist[i], sectionname);
-
-    // check if it's .rdata and is not a COMDAT section.
-    if (!strcmp(sectionname, ".rdata") &&
-        (get_le32(ptr + 36) & 0x1000) == 0) {
-      sectionrawdata_ptr = get_le32(ptr + 20);
-    }
-
-    ptr += 40;
-  }
-
-  // log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
-  // log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
-
-  /*  The compiler puts the data with non-zero offset in .rdata section, but puts the data with
-      zero offset in .bss section. So, if the data in in .bss section, set offset=0.
-      Note from Wiki: In an object module compiled from C, the bss section contains
-      the local variables (but not functions) that were declared with the static keyword,
-      except for those with non-zero initial values. (In C, static variables are initialized
-      to zero by default.) It also contains the non-local (both extern and static) variables
-      that are also initialized to zero (either explicitly or by default).
-      */
-  // move to symbol table
-  /* COFF symbol table:
-      offset      field
-      0           Name(*)
-      8           Value
-      12          SectionNumber
-      14          Type
-      16          StorageClass
-      17          NumberOfAuxSymbols
-      */
-  ptr = buf + symtab_ptr;
-
-  for (i = 0; i < symtab_sz; i++) {
-    int16_t section = get_le16(ptr + 12); // section number
-
-    if (section > 0 && ptr[16] == 2) {
-      // if(section > 0 && ptr[16] == 3 && get_le32(ptr+8)) {
-
-      if (get_le32(ptr)) {
-        char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
-        strncpy(name, ptr, 8);
-        // log_msg("COFF: Parsing symbol %s\n",name);
-        /* The 64bit Windows compiler doesn't prefix with an _.
-         * Check what's there, and bump if necessary
-         */
-        if (name[0] == '_')
-          printf("%-40s EQU ", name + 1);
-        else
-          printf("%-40s EQU ", name);
-      } else {
-        // log_msg("COFF: Parsing symbol %s\n",
-        //        buf + strtab_ptr + get_le32(ptr+4));
-        if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
-          printf("%-40s EQU ",
-                 buf + strtab_ptr + get_le32(ptr + 4) + 1);
-        else
-          printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
-      }
-
-      if (!(strcmp(sectionlist[section - 1], ".bss"))) {
-        symoffset = 0;
-      } else {
-        symoffset = get_le32(buf + sectionrawdata_ptr + get_le32(ptr + 8));
-      }
-
-      // log_msg("      Section: %d\n",section);
-      // log_msg("      Class:   %d\n",ptr[16]);
-      // log_msg("      Address: %u\n",get_le32(ptr+8));
-      // log_msg("      Offset: %u\n", symoffset);
-
-      printf("%5d\n", symoffset);
-    }
-
-    ptr += 18;
-  }
-
-  printf("    END\n");
-
-  for (i = 0; i < nsections; i++) {
-    free(sectionlist[i]);
-  }
-
-  free(sectionlist);
-
-  return 0;
-bail:
-
-  for (i = 0; i < nsections; i++) {
-    free(sectionlist[i]);
-  }
-
-  free(sectionlist);
-
-  return 1;
-}
-#endif /* defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__) */
-
-int main(int argc, char **argv) {
-  output_fmt_t mode = OUTPUT_FMT_PLAIN;
-  const char *f;
-  uint8_t *file_buf;
-  int res;
-  FILE *fp;
-  long int file_size;
-
-  if (argc < 2 || argc > 3) {
-    fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
-    fprintf(stderr, "  <obj file>\tobject file to parse\n");
-    fprintf(stderr, "Output Formats:\n");
-    fprintf(stderr, "  gas  - compatible with GNU assembler\n");
-    fprintf(stderr, "  rvds - compatible with armasm\n");
-    fprintf(stderr, "  cheader - c/c++ header file\n");
-    goto bail;
-  }
-
-  f = argv[2];
-
-  if (!strcmp(argv[1], "rvds"))
-    mode = OUTPUT_FMT_RVDS;
-  else if (!strcmp(argv[1], "gas"))
-    mode = OUTPUT_FMT_GAS;
-  else if (!strcmp(argv[1], "cheader"))
-    mode = OUTPUT_FMT_C_HEADER;
-  else
-    f = argv[1];
-
-  fp = fopen(f, "rb");
-
-  if (!fp) {
-    perror("Unable to open file");
-    goto bail;
-  }
-
-  if (fseek(fp, 0, SEEK_END)) {
-    perror("stat");
-    goto bail;
-  }
-
-  file_size = ftell(fp);
-  file_buf = malloc(file_size);
-
-  if (!file_buf) {
-    perror("malloc");
-    goto bail;
-  }
-
-  rewind(fp);
-
-  if (fread(file_buf, sizeof(char), file_size, fp) != file_size) {
-    perror("read");
-    goto bail;
-  }
-
-  if (fclose(fp)) {
-    perror("close");
-    goto bail;
-  }
-
-#if defined(__GNUC__) && __GNUC__
-#if defined(__MACH__)
-  res = parse_macho(file_buf, file_size, mode);
-#elif defined(__ELF__)
-  res = parse_elf(file_buf, file_size, mode);
-#endif
-#endif
-#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
-  res = parse_coff(file_buf, file_size);
-#endif
-
-  free(file_buf);
-
-  if (!res)
-    return EXIT_SUCCESS;
-
-bail:
-  return EXIT_FAILURE;
-}
--- a/build/make/rtcd.pl
+++ b/build/make/rtcd.pl
@@ -319,13 +319,14 @@ EOF

  print <<EOF;
 #if HAVE_DSPR2
+void vpx_dsputil_static_init();
 #if CONFIG_VP8
 void dsputil_static_init();
-dsputil_static_init();
 #endif
-#if CONFIG_VP9
-void vp9_dsputil_static_init();
-vp9_dsputil_static_init();
+
+vpx_dsputil_static_init();
+#if CONFIG_VP8
+dsputil_static_init();
 #endif
 #endif
 }
@@ -376,17 +377,18 @@ if ($opts{arch} eq 'x86') {
      @ALL_ARCHS = filter("$opts{arch}", qw/dspr2/);
      last;
    }
+    if (/HAVE_MSA=yes/) {
+      @ALL_ARCHS = filter("$opts{arch}", qw/msa/);
+      last;
+    }
  }
  close CONFIG_FILE;
  mips;
-} elsif ($opts{arch} eq 'armv5te') {
-  @ALL_ARCHS = filter(qw/edsp/);
-  arm;
 } elsif ($opts{arch} eq 'armv6') {
-  @ALL_ARCHS = filter(qw/edsp media/);
+  @ALL_ARCHS = filter(qw/media/);
  arm;
-} elsif ($opts{arch} eq 'armv7') {
-  @ALL_ARCHS = filter(qw/edsp media neon_asm neon/);
+} elsif ($opts{arch} =~ /armv7\w?/) {
+  @ALL_ARCHS = filter(qw/media neon_asm neon/);
  @REQUIRES = filter(keys %required ? keys %required : qw/media/);
  &require(@REQUIRES);
  arm;
--- a/build/x86-msvs/obj_int_extract.bat
+++ b/build/x86-msvs/obj_int_extract.bat
@@ -1,15 +0,0 @@
-REM   Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-REM
-REM   Use of this source code is governed by a BSD-style license
-REM   that can be found in the LICENSE file in the root of the source
-REM   tree. An additional intellectual property rights grant can be found
-REM   in the file PATENTS.  All contributing project authors may
-REM   be found in the AUTHORS file in the root of the source tree.
-echo on
-
-REM Arguments:
-REM   %1 - Relative path to the directory containing the vp8 source directory.
-REM   %2 - Path to obj_int_extract.exe.
-cl /I. /I%1 /nologo /c "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
-%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
-
--- a/codereview.settings
+++ b/codereview.settings
@@ -0,0 +1,4 @@
+# This file is used by gcl to get repository specific information.
+GERRIT_HOST: chromium-review.googlesource.com
+GERRIT_PORT: 29418
+CODE_REVIEW_SERVER: chromium-review.googlesource.com
--- a/158
+++ b/158
@@ -26,19 +26,19 @@ Advanced options:
  ${toggle_unit_tests}            unit tests
  ${toggle_decode_perf_tests}     build decoder perf tests with unit tests
  ${toggle_encode_perf_tests}     build encoder perf tests with unit tests
+  --cpu=CPU                       tune for the specified CPU (ARM: cortex-a8, X86: sse3)
  --libc=PATH                     path to alternate libc
  --size-limit=WxH                max size to allow in the decoder
  --as={yasm|nasm|auto}           use specified assembler [auto, yasm preferred]
  --sdk-path=PATH                 path to root of sdk (android builds only)
-  ${toggle_fast_unaligned}        don't use unaligned accesses, even when
-                                  supported by hardware [auto]
  ${toggle_codec_srcs}            in/exclude codec library source code
  ${toggle_debug_libs}            in/exclude debug version of libraries
  ${toggle_static_msvcrt}         use static MSVCRT (VS builds only)
+  ${toggle_vp9_highbitdepth}      use VP9 high bit depth (10/12) profiles
  ${toggle_vp8}                   VP8 codec support
  ${toggle_vp9}                   VP9 codec support
+  ${toggle_vp10}                  VP10 codec support
  ${toggle_internal_stats}        output of encoder internal stats for debug, if supported (encoders)
-  ${toggle_mem_tracker}           track memory usage
  ${toggle_postproc}              postprocessing
  ${toggle_vp9_postproc}          vp9 specific postprocessing
  ${toggle_multithread}           multithreaded encoding and decoding
@@ -56,6 +56,8 @@ Advanced options:
  ${toggle_postproc_visualizer}   macro block / block level visualizers
  ${toggle_multi_res_encoding}    enable multiple-resolution encoding
  ${toggle_temporal_denoising}    enable temporal denoising and disable the spatial denoiser
+  ${toggle_vp9_temporal_denoising}
+                                  enable vp9 temporal denoising
  ${toggle_webm_io}               enable input from and output to WebM container
  ${toggle_libyuv}                enable libyuv

@@ -93,10 +95,6 @@ EOF

 # all_platforms is a list of all supported target platforms. Maintain
 # alphabetically by architecture, generic-gnu last.
-all_platforms="${all_platforms} armv5te-android-gcc"
-all_platforms="${all_platforms} armv5te-linux-rvct"
-all_platforms="${all_platforms} armv5te-linux-gcc"
-all_platforms="${all_platforms} armv5te-none-rvct"
 all_platforms="${all_platforms} armv6-darwin-gcc"
 all_platforms="${all_platforms} armv6-linux-rvct"
 all_platforms="${all_platforms} armv6-linux-gcc"
@@ -109,15 +107,10 @@ all_platforms="${all_platforms} armv7-linux-gcc"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-none-rvct"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-win32-vs11"
 all_platforms="${all_platforms} armv7-win32-vs12"
+all_platforms="${all_platforms} armv7-win32-vs14"
 all_platforms="${all_platforms} armv7s-darwin-gcc"
 all_platforms="${all_platforms} mips32-linux-gcc"
 all_platforms="${all_platforms} mips64-linux-gcc"
-all_platforms="${all_platforms} ppc32-darwin8-gcc"
-all_platforms="${all_platforms} ppc32-darwin9-gcc"
-all_platforms="${all_platforms} ppc32-linux-gcc"
-all_platforms="${all_platforms} ppc64-darwin8-gcc"
-all_platforms="${all_platforms} ppc64-darwin9-gcc"
-all_platforms="${all_platforms} ppc64-linux-gcc"
 all_platforms="${all_platforms} sparc-solaris-gcc"
 all_platforms="${all_platforms} x86-android-gcc"
 all_platforms="${all_platforms} x86-darwin8-gcc"
@@ -128,6 +121,7 @@ all_platforms="${all_platforms} x86-darwin10-gcc"
 all_platforms="${all_platforms} x86-darwin11-gcc"
 all_platforms="${all_platforms} x86-darwin12-gcc"
 all_platforms="${all_platforms} x86-darwin13-gcc"
+all_platforms="${all_platforms} x86-darwin14-gcc"
 all_platforms="${all_platforms} x86-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86-linux-gcc"
 all_platforms="${all_platforms} x86-linux-icc"
@@ -140,11 +134,14 @@ all_platforms="${all_platforms} x86-win32-vs9"
 all_platforms="${all_platforms} x86-win32-vs10"
 all_platforms="${all_platforms} x86-win32-vs11"
 all_platforms="${all_platforms} x86-win32-vs12"
+all_platforms="${all_platforms} x86-win32-vs14"
+all_platforms="${all_platforms} x86_64-android-gcc"
 all_platforms="${all_platforms} x86_64-darwin9-gcc"
 all_platforms="${all_platforms} x86_64-darwin10-gcc"
 all_platforms="${all_platforms} x86_64-darwin11-gcc"
 all_platforms="${all_platforms} x86_64-darwin12-gcc"
 all_platforms="${all_platforms} x86_64-darwin13-gcc"
+all_platforms="${all_platforms} x86_64-darwin14-gcc"
 all_platforms="${all_platforms} x86_64-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
 all_platforms="${all_platforms} x86_64-linux-icc"
@@ -155,12 +152,7 @@ all_platforms="${all_platforms} x86_64-win64-vs9"
 all_platforms="${all_platforms} x86_64-win64-vs10"
 all_platforms="${all_platforms} x86_64-win64-vs11"
 all_platforms="${all_platforms} x86_64-win64-vs12"
-all_platforms="${all_platforms} universal-darwin8-gcc"
-all_platforms="${all_platforms} universal-darwin9-gcc"
-all_platforms="${all_platforms} universal-darwin10-gcc"
-all_platforms="${all_platforms} universal-darwin11-gcc"
-all_platforms="${all_platforms} universal-darwin12-gcc"
-all_platforms="${all_platforms} universal-darwin13-gcc"
+all_platforms="${all_platforms} x86_64-win64-vs14"
 all_platforms="${all_platforms} generic-gnu"

 # all_targets is a list of all targets that can be configured
@@ -197,6 +189,14 @@ if [ ${doxy_major:-0} -ge 1 ]; then
    [ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
 fi

+# disable codecs when their source directory does not exist
+[ -d "${source_path}/vp8" ] || disable_feature vp8
+[ -d "${source_path}/vp9" ] || disable_feature vp9
+[ -d "${source_path}/vp10" ] || disable_feature vp10
+
+# disable vp10 codec by default
+disable_feature vp10
+
 # install everything except the sources, by default. sources will have
 # to be enabled when doing dist builds, since that's no longer a common
 # case.
@@ -206,45 +206,31 @@ enable_feature install_libs

 enable_feature static
 enable_feature optimizations
-enable_feature fast_unaligned #allow unaligned accesses, if supported by hw
+enable_feature dependency_tracking
 enable_feature spatial_resampling
 enable_feature multithread
 enable_feature os_support
 enable_feature temporal_denoising

-[ -d "${source_path}/../include" ] && enable_feature alt_tree_layout
-for d in vp8 vp9; do
-    [ -d "${source_path}/${d}" ] && disable_feature alt_tree_layout;
-done
-
-if ! enabled alt_tree_layout; then
-# development environment
-[ -d "${source_path}/vp8" ] && CODECS="${CODECS} vp8_encoder vp8_decoder"
-[ -d "${source_path}/vp9" ] && CODECS="${CODECS} vp9_encoder vp9_decoder"
-else
-# customer environment
-[ -f "${source_path}/../include/vpx/vp8cx.h" ] && CODECS="${CODECS} vp8_encoder"
-[ -f "${source_path}/../include/vpx/vp8dx.h" ] && CODECS="${CODECS} vp8_decoder"
-[ -f "${source_path}/../include/vpx/vp9cx.h" ] && CODECS="${CODECS} vp9_encoder"
-[ -f "${source_path}/../include/vpx/vp9dx.h" ] && CODECS="${CODECS} vp9_decoder"
-[ -f "${source_path}/../include/vpx/vp8cx.h" ] || disable_feature vp8_encoder
-[ -f "${source_path}/../include/vpx/vp8dx.h" ] || disable_feature vp8_decoder
-[ -f "${source_path}/../include/vpx/vp9cx.h" ] || disable_feature vp9_encoder
-[ -f "${source_path}/../include/vpx/vp9dx.h" ] || disable_feature vp9_decoder
-
-[ -f "${source_path}/../lib/*/*mt.lib" ] && soft_enable static_msvcrt
-fi
-
-CODECS="$(echo ${CODECS} | tr ' ' '\n')"
-CODEC_FAMILIES="$(for c in ${CODECS}; do echo ${c%_*}; done | sort | uniq)"
+CODECS="
+    vp8_encoder
+    vp8_decoder
+    vp9_encoder
+    vp9_decoder
+    vp10_encoder
+    vp10_decoder
+"
+CODEC_FAMILIES="
+    vp8
+    vp9
+    vp10
+"

 ARCH_LIST="
    arm
    mips
    x86
    x86_64
-    ppc32
-    ppc64
 "
 ARCH_EXT_LIST="
    edsp
@@ -254,7 +240,7 @@ ARCH_EXT_LIST="

    mips32
    dspr2
-
+    msa
    mips64

    mmx
@@ -265,25 +251,23 @@ ARCH_EXT_LIST="
    sse4_1
    avx
    avx2
-
-    altivec
 "
 HAVE_LIST="
    ${ARCH_EXT_LIST}
    vpx_ports
    stdint_h
-    alt_tree_layout
    pthread_h
    sys_mman_h
    unistd_h
 "
 EXPERIMENT_LIST="
    spatial_svc
-    vp9_temporal_denoising
    fp_mb_stats
    emulate_hardware
+    misc_fixes
 "
 CONFIG_LIST="
+    dependency_tracking
    external_build
    install_docs
    install_bins
@@ -301,10 +285,6 @@ CONFIG_LIST="

    codec_srcs
    debug_libs
-    fast_unaligned
-    mem_manager
-    mem_tracker
-    mem_checks

    dequant_tokens
    dc_recon
@@ -334,6 +314,7 @@ CONFIG_LIST="
    encode_perf_tests
    multi_res_encoding
    temporal_denoising
+    vp9_temporal_denoising
    coefficient_range_checking
    vp9_highbitdepth
    experimental
@@ -341,6 +322,7 @@ CONFIG_LIST="
    ${EXPERIMENT_LIST}
 "
 CMDLINE_SELECT="
+    dependency_tracking
    external_build
    extra_warnings
    werror
@@ -364,7 +346,6 @@ CMDLINE_SELECT="
    libc
    as
    size_limit
-    fast_unaligned
    codec_srcs
    debug_libs

@@ -377,7 +358,6 @@ CMDLINE_SELECT="
    ${CODECS}
    ${CODEC_FAMILIES}
    static_msvcrt
-    mem_tracker
    spatial_resampling
    realtime_only
    onthefly_bitpacking
@@ -393,6 +373,7 @@ CMDLINE_SELECT="
    encode_perf_tests
    multi_res_encoding
    temporal_denoising
+    vp9_temporal_denoising
    coefficient_range_checking
    vp9_highbitdepth
    experimental
@@ -449,24 +430,8 @@ post_process_cmdline() {

 process_targets() {
    enabled child || write_common_config_banner
-    enabled universal || write_common_target_config_h  ${BUILD_PFX}vpx_config.h
-
-    # TODO: add host tools target (obj_int_extract, etc)
-
-    # For fat binaries, call configure recursively to configure for each
-    # binary architecture to be included.
-    if enabled universal; then
-        # Call configure (ourselves) for each subarchitecture
-        for arch in $fat_bin_archs; do
-            BUILD_PFX=${arch}/ toolchain=${arch} $self --child $cmdline_args || exit $?
-        done
-    fi
-
-    # The write_common_config (config.mk) logic is deferred until after the
-    # recursive calls to configure complete, because we want our universal
-    # targets to be executed last.
+    write_common_target_config_h ${BUILD_PFX}vpx_config.h
    write_common_config_targets
-    enabled universal && echo "FAT_ARCHS=${fat_bin_archs}" >> config.mk

    # Calculate the default distribution name, based on the enabled features
    cf=""
@@ -542,11 +507,11 @@ process_detect() {
        # Can only build shared libs on a subset of platforms. Doing this check
        # here rather than at option parse time because the target auto-detect
        # magic happens after the command line has been parsed.
-        if ! enabled linux; then
+        if ! enabled linux && ! enabled os2; then
            if enabled gnu; then
                echo "--enable-shared is only supported on ELF; assuming this is OK"
            else
-                die "--enable-shared only supported on ELF for now"
+                die "--enable-shared only supported on ELF and OS/2 for now"
            fi
        fi
    fi
@@ -611,30 +576,6 @@ EOF
 process_toolchain() {
    process_common_toolchain

-    # Handle universal binaries for this architecture
-    case $toolchain in
-        universal-darwin*)
-            darwin_ver=${tgt_os##darwin}
-
-            # Snow Leopard (10.6/darwin10) dropped support for PPC
-            # Include PPC support for all prior versions
-            if [ $darwin_ver -lt 10 ]; then
-                fat_bin_archs="$fat_bin_archs ppc32-${tgt_os}-gcc"
-            fi
-
-            # Tiger (10.4/darwin8) brought support for x86
-            if [ $darwin_ver -ge 8 ]; then
-                fat_bin_archs="$fat_bin_archs x86-${tgt_os}-${tgt_cc}"
-            fi
-
-            # Leopard (10.5/darwin9) brought 64 bit support
-            if [ $darwin_ver -ge 9 ]; then
-                fat_bin_archs="$fat_bin_archs x86_64-${tgt_os}-${tgt_cc}"
-            fi
-            ;;
-    esac
-
-
    # Enable some useful compiler flags
    if enabled gcc; then
        enabled werror && check_add_cflags -Werror
@@ -710,7 +651,7 @@ process_toolchain() {
                 VCPROJ_SFX=vcproj
                 gen_vcproj_cmd=${source_path}/build/make/gen_msvs_proj.sh
                 ;;
-             10|11|12)
+             10|11|12|14)
                 VCPROJ_SFX=vcxproj
                 gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
                 enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
@@ -722,7 +663,7 @@ process_toolchain() {
    esac

    # Other toolchain specific defaults
-    case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac
+    case $toolchain in x86*) soft_enable postproc;; esac

    if enabled postproc_visualizer; then
        enabled postproc || die "postproc_visualizer requires postproc to be enabled"
@@ -776,6 +717,16 @@ EOF
    esac
    # libwebm needs to be linked with C++ standard library
    enabled webm_io && LD=${CXX}
+
+    # append any user defined extra cflags
+    if [ -n "${extra_cflags}" ] ; then
+        check_add_cflags ${extra_cflags} || \
+        die "Requested extra CFLAGS '${extra_cflags}' not supported by compiler"
+    fi
+    if [ -n "${extra_cxxflags}" ]; then
+        check_add_cxxflags ${extra_cxxflags} || \
+        die "Requested extra CXXFLAGS '${extra_cxxflags}' not supported by compiler"
+    fi
 }


@@ -786,6 +737,7 @@ CONFIGURE_ARGS="$@"
 process "$@"
 print_webm_license ${BUILD_PFX}vpx_config.c "/*" " */"
 cat <<EOF >> ${BUILD_PFX}vpx_config.c
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "$CONFIGURE_ARGS";
 const char *vpx_codec_build_config(void) {return cfg;}
 EOF
--- a/examples.mk
+++ b/examples.mk
@@ -22,19 +22,22 @@ LIBYUV_SRCS +=  third_party/libyuv/include/libyuv/basic_types.h  \
                third_party/libyuv/source/planar_functions.cc \
                third_party/libyuv/source/row_any.cc \
                third_party/libyuv/source/row_common.cc \
+                third_party/libyuv/source/row_gcc.cc \
                third_party/libyuv/source/row_mips.cc \
                third_party/libyuv/source/row_neon.cc \
                third_party/libyuv/source/row_neon64.cc \
-                third_party/libyuv/source/row_posix.cc \
                third_party/libyuv/source/row_win.cc \
                third_party/libyuv/source/scale.cc \
+                third_party/libyuv/source/scale_any.cc \
                third_party/libyuv/source/scale_common.cc \
+                third_party/libyuv/source/scale_gcc.cc \
                third_party/libyuv/source/scale_mips.cc \
                third_party/libyuv/source/scale_neon.cc \
                third_party/libyuv/source/scale_neon64.cc \
-                third_party/libyuv/source/scale_posix.cc \
                third_party/libyuv/source/scale_win.cc \

+LIBWEBM_COMMON_SRCS += third_party/libwebm/webmids.hpp
+
 LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
                      third_party/libwebm/mkvmuxerutil.cpp \
                      third_party/libwebm/mkvwriter.cpp \
@@ -42,8 +45,7 @@ LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer.cpp \
                      third_party/libwebm/mkvmuxertypes.hpp \
                      third_party/libwebm/mkvmuxerutil.hpp \
                      third_party/libwebm/mkvparser.hpp \
-                      third_party/libwebm/mkvwriter.hpp \
-                      third_party/libwebm/webmids.hpp
+                      third_party/libwebm/mkvwriter.hpp

 LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser.cpp \
                      third_party/libwebm/mkvreader.cpp \
@@ -56,6 +58,7 @@ UTILS-$(CONFIG_DECODERS)    += vpxdec.c
 vpxdec.SRCS                 += md5_utils.c md5_utils.h
 vpxdec.SRCS                 += vpx_ports/mem_ops.h
 vpxdec.SRCS                 += vpx_ports/mem_ops_aligned.h
+vpxdec.SRCS                 += vpx_ports/msvc.h
 vpxdec.SRCS                 += vpx_ports/vpx_timer.h
 vpxdec.SRCS                 += vpx/vpx_integer.h
 vpxdec.SRCS                 += args.c args.h
@@ -66,6 +69,7 @@ ifeq ($(CONFIG_LIBYUV),yes)
  vpxdec.SRCS                 += $(LIBYUV_SRCS)
 endif
 ifeq ($(CONFIG_WEBM_IO),yes)
+  vpxdec.SRCS                 += $(LIBWEBM_COMMON_SRCS)
  vpxdec.SRCS                 += $(LIBWEBM_PARSER_SRCS)
  vpxdec.SRCS                 += webmdec.cc webmdec.h
 endif
@@ -80,12 +84,14 @@ vpxenc.SRCS                 += tools_common.c tools_common.h
 vpxenc.SRCS                 += warnings.c warnings.h
 vpxenc.SRCS                 += vpx_ports/mem_ops.h
 vpxenc.SRCS                 += vpx_ports/mem_ops_aligned.h
+vpxenc.SRCS                 += vpx_ports/msvc.h
 vpxenc.SRCS                 += vpx_ports/vpx_timer.h
 vpxenc.SRCS                 += vpxstats.c vpxstats.h
 ifeq ($(CONFIG_LIBYUV),yes)
  vpxenc.SRCS                 += $(LIBYUV_SRCS)
 endif
 ifeq ($(CONFIG_WEBM_IO),yes)
+  vpxenc.SRCS                 += $(LIBWEBM_COMMON_SRCS)
  vpxenc.SRCS                 += $(LIBWEBM_MUXER_SRCS)
  vpxenc.SRCS                 += webmenc.cc webmenc.h
 endif
@@ -98,6 +104,7 @@ ifeq ($(CONFIG_SPATIAL_SVC),yes)
  vp9_spatial_svc_encoder.SRCS        += tools_common.c tools_common.h
  vp9_spatial_svc_encoder.SRCS        += video_common.h
  vp9_spatial_svc_encoder.SRCS        += video_writer.h video_writer.c
+  vp9_spatial_svc_encoder.SRCS        += vpx_ports/msvc.h
  vp9_spatial_svc_encoder.SRCS        += vpxstats.c vpxstats.h
  vp9_spatial_svc_encoder.GUID        = 4A38598D-627D-4505-9C7B-D4020C84100D
  vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
@@ -112,6 +119,7 @@ vpx_temporal_svc_encoder.SRCS        += ivfenc.c ivfenc.h
 vpx_temporal_svc_encoder.SRCS        += tools_common.c tools_common.h
 vpx_temporal_svc_encoder.SRCS        += video_common.h
 vpx_temporal_svc_encoder.SRCS        += video_writer.h video_writer.c
+vpx_temporal_svc_encoder.SRCS        += vpx_ports/msvc.h
 vpx_temporal_svc_encoder.GUID        = B18C08F2-A439-4502-A78E-849BE3D60947
 vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder
 EXAMPLES-$(CONFIG_DECODERS)        += simple_decoder.c
@@ -122,6 +130,7 @@ simple_decoder.SRCS                += video_common.h
 simple_decoder.SRCS                += video_reader.h video_reader.c
 simple_decoder.SRCS                += vpx_ports/mem_ops.h
 simple_decoder.SRCS                += vpx_ports/mem_ops_aligned.h
+simple_decoder.SRCS                += vpx_ports/msvc.h
 simple_decoder.DESCRIPTION          = Simplified decoder loop
 EXAMPLES-$(CONFIG_DECODERS)        += postproc.c
 postproc.SRCS                      += ivfdec.h ivfdec.c
@@ -130,6 +139,7 @@ postproc.SRCS                      += video_common.h
 postproc.SRCS                      += video_reader.h video_reader.c
 postproc.SRCS                      += vpx_ports/mem_ops.h
 postproc.SRCS                      += vpx_ports/mem_ops_aligned.h
+postproc.SRCS                      += vpx_ports/msvc.h
 postproc.GUID                       = 65E33355-F35E-4088-884D-3FD4905881D7
 postproc.DESCRIPTION                = Decoder postprocessor control
 EXAMPLES-$(CONFIG_DECODERS)        += decode_to_md5.c
@@ -140,6 +150,7 @@ decode_to_md5.SRCS                 += video_common.h
 decode_to_md5.SRCS                 += video_reader.h video_reader.c
 decode_to_md5.SRCS                 += vpx_ports/mem_ops.h
 decode_to_md5.SRCS                 += vpx_ports/mem_ops_aligned.h
+decode_to_md5.SRCS                 += vpx_ports/msvc.h
 decode_to_md5.GUID                  = 59120B9B-2735-4BFE-B022-146CA340FE42
 decode_to_md5.DESCRIPTION           = Frame by frame MD5 checksum
 EXAMPLES-$(CONFIG_ENCODERS)     += simple_encoder.c
@@ -147,6 +158,7 @@ simple_encoder.SRCS             += ivfenc.h ivfenc.c
 simple_encoder.SRCS             += tools_common.h tools_common.c
 simple_encoder.SRCS             += video_common.h
 simple_encoder.SRCS             += video_writer.h video_writer.c
+simple_encoder.SRCS             += vpx_ports/msvc.h
 simple_encoder.GUID              = 4607D299-8A71-4D2C-9B1D-071899B6FBFD
 simple_encoder.DESCRIPTION       = Simplified encoder loop
 EXAMPLES-$(CONFIG_VP9_ENCODER)  += vp9_lossless_encoder.c
@@ -154,6 +166,7 @@ vp9_lossless_encoder.SRCS       += ivfenc.h ivfenc.c
 vp9_lossless_encoder.SRCS       += tools_common.h tools_common.c
 vp9_lossless_encoder.SRCS       += video_common.h
 vp9_lossless_encoder.SRCS       += video_writer.h video_writer.c
+vp9_lossless_encoder.SRCS       += vpx_ports/msvc.h
 vp9_lossless_encoder.GUID        = B63C7C88-5348-46DC-A5A6-CC151EF93366
 vp9_lossless_encoder.DESCRIPTION = Simplified lossless VP9 encoder
 EXAMPLES-$(CONFIG_ENCODERS)     += twopass_encoder.c
@@ -161,6 +174,7 @@ twopass_encoder.SRCS            += ivfenc.h ivfenc.c
 twopass_encoder.SRCS            += tools_common.h tools_common.c
 twopass_encoder.SRCS            += video_common.h
 twopass_encoder.SRCS            += video_writer.h video_writer.c
+twopass_encoder.SRCS            += vpx_ports/msvc.h
 twopass_encoder.GUID             = 73494FA6-4AF9-4763-8FBB-265C92402FD8
 twopass_encoder.DESCRIPTION      = Two-pass encoder loop
 EXAMPLES-$(CONFIG_DECODERS)     += decode_with_drops.c
@@ -170,6 +184,7 @@ decode_with_drops.SRCS          += video_common.h
 decode_with_drops.SRCS          += video_reader.h video_reader.c
 decode_with_drops.SRCS          += vpx_ports/mem_ops.h
 decode_with_drops.SRCS          += vpx_ports/mem_ops_aligned.h
+decode_with_drops.SRCS          += vpx_ports/msvc.h
 decode_with_drops.GUID           = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
 decode_with_drops.DESCRIPTION    = Drops frames while decoding
 EXAMPLES-$(CONFIG_ENCODERS)        += set_maps.c
@@ -177,6 +192,7 @@ set_maps.SRCS                      += ivfenc.h ivfenc.c
 set_maps.SRCS                      += tools_common.h tools_common.c
 set_maps.SRCS                      += video_common.h
 set_maps.SRCS                      += video_writer.h video_writer.c
+set_maps.SRCS                      += vpx_ports/msvc.h
 set_maps.GUID                       = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
 set_maps.DESCRIPTION                = Set active and ROI maps
 EXAMPLES-$(CONFIG_VP8_ENCODER)     += vp8cx_set_ref.c
@@ -184,6 +200,7 @@ vp8cx_set_ref.SRCS                 += ivfenc.h ivfenc.c
 vp8cx_set_ref.SRCS                 += tools_common.h tools_common.c
 vp8cx_set_ref.SRCS                 += video_common.h
 vp8cx_set_ref.SRCS                 += video_writer.h video_writer.c
+vp8cx_set_ref.SRCS                 += vpx_ports/msvc.h
 vp8cx_set_ref.GUID                  = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
 vp8cx_set_ref.DESCRIPTION           = VP8 set encoder reference frame

@@ -194,6 +211,7 @@ EXAMPLES-$(CONFIG_VP8_ENCODER)          += vp8_multi_resolution_encoder.c
 vp8_multi_resolution_encoder.SRCS       += ivfenc.h ivfenc.c
 vp8_multi_resolution_encoder.SRCS       += tools_common.h tools_common.c
 vp8_multi_resolution_encoder.SRCS       += video_writer.h video_writer.c
+vp8_multi_resolution_encoder.SRCS       += vpx_ports/msvc.h
 vp8_multi_resolution_encoder.SRCS       += $(LIBYUV_SRCS)
 vp8_multi_resolution_encoder.GUID        = 04f8738e-63c8-423b-90fa-7c2703a374de
 vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding
@@ -254,14 +272,6 @@ CODEC_EXTRA_LIBS=$(sort $(call enabled,CODEC_EXTRA_LIBS))
 $(foreach ex,$(ALL_EXAMPLES),$(eval $(notdir $(ex:.c=)).SRCS += $(ex) examples.mk))


-# If this is a universal (fat) binary, then all the subarchitectures have
-# already been built and our job is to stitch them together. The
-# BUILD_OBJS variable indicates whether we should be building
-# (compiling, linking) the library. The LIPO_OBJS variable indicates
-# that we're stitching.
-$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_OBJS,BUILD_OBJS):=yes)
-
-
 # Create build/install dependencies for all examples. The common case
 # is handled here. The MSVS case is handled below.
 NOT_MSVS = $(if $(CONFIG_MSVS),,yes)
@@ -269,24 +279,28 @@ DIST-BINS-$(NOT_MSVS)      += $(addprefix bin/,$(ALL_EXAMPLES:.c=$(EXE_SFX)))
 INSTALL-BINS-$(NOT_MSVS)   += $(addprefix bin/,$(UTILS:.c=$(EXE_SFX)))
 DIST-SRCS-yes              += $(ALL_SRCS)
 INSTALL-SRCS-yes           += $(UTIL_SRCS)
-OBJS-$(NOT_MSVS)           += $(if $(BUILD_OBJS),$(call objs,$(ALL_SRCS)))
+OBJS-$(NOT_MSVS)           += $(call objs,$(ALL_SRCS))
 BINS-$(NOT_MSVS)           += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=$(EXE_SFX)))


 # Instantiate linker template for all examples.
 CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-SHARED_LIB_SUF=$(if $(filter darwin%,$(TGT_OS)),.dylib,.so)
+ifneq ($(filter darwin%,$(TGT_OS)),)
+SHARED_LIB_SUF=.dylib
+else
+ifneq ($(filter os2%,$(TGT_OS)),)
+SHARED_LIB_SUF=_dll.a
+else
+SHARED_LIB_SUF=.so
+endif
+endif
 CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
 $(foreach bin,$(BINS-yes),\
-    $(if $(BUILD_OBJS),$(eval $(bin):\
-        $(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF)))\
-    $(if $(BUILD_OBJS),$(eval $(call linker_template,$(bin),\
+    $(eval $(bin):$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF))\
+    $(eval $(call linker_template,$(bin),\
        $(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) \
        -l$(CODEC_LIB) $(addprefix -l,$(CODEC_EXTRA_LIBS))\
-        )))\
-    $(if $(LIPO_OBJS),$(eval $(call lipo_bin_template,$(bin))))\
-    )
-
+        )))

 # The following pairs define a mapping of locations in the distribution
 # tree to locations in the source/build trees.
@@ -314,8 +328,8 @@ endif
 # the makefiles). We may want to revisit this.
 define vcproj_template
 $(1): $($(1:.$(VCPROJ_SFX)=).SRCS) vpx.$(VCPROJ_SFX)
-	@echo "    [vcproj] $$@"
-	$$(GEN_VCPROJ)\
+	$(if $(quiet),@echo "    [vcproj] $$@")
+	$(qexec)$$(GEN_VCPROJ)\
            --exe\
            --target=$$(TOOLCHAIN)\
            --name=$$(@:.$(VCPROJ_SFX)=)\
@@ -338,6 +352,7 @@ $(foreach proj,$(call enabled,PROJECTS),\
 #
 %.dox: %.c
 	@echo "    [DOXY] $@"
+	@mkdir -p $(dir $@)
 	@echo "/*!\page example_$(@F:.dox=) $(@F:.dox=)" > $@
 	@echo "   \includelineno $(<F)" >> $@
 	@echo "*/" >> $@
--- a/examples/decode_to_md5.c
+++ b/examples/decode_to_md5.c
@@ -36,9 +36,9 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "./md5_utils.h"
-#include "./tools_common.h"
-#include "./video_reader.h"
+#include "../md5_utils.h"
+#include "../tools_common.h"
+#include "../video_reader.h"
 #include "./vpx_config.h"

 static void get_image_md5(const vpx_image_t *img, unsigned char digest[16]) {
@@ -71,7 +71,7 @@ static void print_md5(FILE *stream, unsigned char digest[16]) {

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/decode_with_drops.c
+++ b/examples/decode_with_drops.c
@@ -59,13 +59,13 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "./tools_common.h"
-#include "./video_reader.h"
+#include "../tools_common.h"
+#include "../video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/postproc.c
+++ b/examples/postproc.c
@@ -46,13 +46,13 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "./tools_common.h"
-#include "./video_reader.h"
+#include "../tools_common.h"
+#include "../video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/resize_util.c
+++ b/examples/resize_util.c
@@ -15,15 +15,23 @@
 #include <stdlib.h>
 #include <string.h>

-#include "./vp9/encoder/vp9_resize.h"
+#include "../tools_common.h"
+#include "../vp9/encoder/vp9_resize.h"

-static void usage(char *progname) {
+static const char *exec_name = NULL;
+
+static void usage() {
  printf("Usage:\n");
  printf("%s <input_yuv> <width>x<height> <target_width>x<target_height> ",
-         progname);
+         exec_name);
  printf("<output_yuv> [<frames>]\n");
 }

+void usage_exit(void) {
+  usage();
+  exit(EXIT_FAILURE);
+}
+
 static int parse_dim(char *v, int *width, int *height) {
  char *x = strchr(v, 'x');
  if (x == NULL)
@@ -47,9 +55,11 @@ int main(int argc, char *argv[]) {
  int f, frames;
  int width, height, target_width, target_height;

+  exec_name = argv[0];
+
  if (argc < 5) {
    printf("Incorrect parameters:\n");
-    usage(argv[0]);
+    usage();
    return 1;
  }

@@ -57,25 +67,25 @@ int main(int argc, char *argv[]) {
  fout = argv[4];
  if (!parse_dim(argv[2], &width, &height)) {
    printf("Incorrect parameters: %s\n", argv[2]);
-    usage(argv[0]);
+    usage();
    return 1;
  }
  if (!parse_dim(argv[3], &target_width, &target_height)) {
    printf("Incorrect parameters: %s\n", argv[3]);
-    usage(argv[0]);
+    usage();
    return 1;
  }

  fpin = fopen(fin, "rb");
  if (fpin == NULL) {
    printf("Can't open file %s to read\n", fin);
-    usage(argv[0]);
+    usage();
    return 1;
  }
  fpout = fopen(fout, "wb");
  if (fpout == NULL) {
    printf("Can't open file %s to write\n", fout);
-    usage(argv[0]);
+    usage();
    return 1;
  }
  if (argc >= 6)
--- a/examples/set_maps.c
+++ b/examples/set_maps.c
@@ -50,12 +50,12 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
          exec_name);
  exit(EXIT_FAILURE);
--- a/examples/simple_decoder.c
+++ b/examples/simple_decoder.c
@@ -82,13 +82,13 @@

 #include "vpx/vpx_decoder.h"

-#include "./tools_common.h"
-#include "./video_reader.h"
+#include "../tools_common.h"
+#include "../video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/simple_encoder.c
+++ b/examples/simple_encoder.c
@@ -101,12 +101,12 @@

 #include "vpx/vpx_encoder.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr,
          "Usage: %s <codec> <width> <height> <infile> <outfile> "
              "<keyframe-interval> [<error-resilient>]\nSee comments in "
--- a/examples/twopass_encoder.c
+++ b/examples/twopass_encoder.c
@@ -53,12 +53,12 @@

 #include "vpx/vpx_encoder.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
          exec_name);
  exit(EXIT_FAILURE);
--- a/examples/vp8_multi_resolution_encoder.c
+++ b/examples/vp8_multi_resolution_encoder.c
@@ -8,292 +8,729 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+/*
+ * This is an example demonstrating multi-resolution encoding in VP8.
+ * High-resolution input video is down-sampled to lower-resolutions. The
+ * encoder then encodes the video and outputs multiple bitstreams with
+ * different resolutions.
+ *
+ * This test also allows for settings temporal layers for each spatial layer.
+ * Different number of temporal layers per spatial stream may be used.
+ * Currently up to 3 temporal layers per spatial stream (encoder) are supported
+ * in this test.
+ */

-// This is an example demonstrating multi-resolution encoding in VP8.
-// High-resolution input video is down-sampled to lower-resolutions. The
-// encoder then encodes the video and outputs multiple bitstreams with
-// different resolutions.
-//
-// Configure with --enable-multi-res-encoding flag to enable this example.
+#include "./vpx_config.h"

 #include <stdio.h>
 #include <stdlib.h>
+#include <stdarg.h>
 #include <string.h>
+#include <math.h>
+#include <assert.h>
+#include <sys/time.h>
+#if USE_POSIX_MMAP
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+#include "vpx_ports/vpx_timer.h"
+#include "vpx/vpx_encoder.h"
+#include "vpx/vp8cx.h"
+#include "vpx_ports/mem_ops.h"
+#include "../tools_common.h"
+#define interface (vpx_codec_vp8_cx())
+#define fourcc    0x30385056

+void usage_exit(void) {
+  exit(EXIT_FAILURE);
+}
+
+/*
+ * The input video frame is downsampled several times to generate a multi-level
+ * hierarchical structure. NUM_ENCODERS is defined as the number of encoding
+ * levels required. For example, if the size of input video is 1280x720,
+ * NUM_ENCODERS is 3, and down-sampling factor is 2, the encoder outputs 3
+ * bitstreams with resolution of 1280x720(level 0), 640x360(level 1), and
+ * 320x180(level 2) respectively.
+ */
+
+/* Number of encoders (spatial resolutions) used in this test. */
+#define NUM_ENCODERS 3
+
+/* Maximum number of temporal layers allowed for this test. */
+#define MAX_NUM_TEMPORAL_LAYERS 3
+
+/* This example uses the scaler function in libyuv. */
 #include "third_party/libyuv/include/libyuv/basic_types.h"
 #include "third_party/libyuv/include/libyuv/scale.h"
 #include "third_party/libyuv/include/libyuv/cpu_id.h"

-#include "vpx/vpx_encoder.h"
-#include "vpx/vp8cx.h"
+int (*read_frame_p)(FILE *f, vpx_image_t *img);

-#include "./tools_common.h"
-#include "./video_writer.h"
+static int read_frame(FILE *f, vpx_image_t *img) {
+    size_t nbytes, to_read;
+    int    res = 1;

-// The input video frame is downsampled several times to generate a
-// multi-level  hierarchical structure. kNumEncoders is defined as the number
-// of encoding  levels required. For example, if the size of input video is
-// 1280x720, kNumEncoders is 3, and down-sampling factor is 2, the encoder
-// outputs 3 bitstreams with resolution of 1280x720(level 0),
-// 640x360(level 1), and 320x180(level 2) respectively.
-#define kNumEncoders 3
-
-static const char *exec_name;
-
-void usage_exit() {
-  fprintf(stderr,
-          "Usage: %s <width> <height> <infile> <outfile(s)> <output psnr?>\n",
-          exec_name);
-  exit(EXIT_FAILURE);
+    to_read = img->w*img->h*3/2;
+    nbytes = fread(img->planes[0], 1, to_read, f);
+    if(nbytes != to_read) {
+        res = 0;
+        if(nbytes > 0)
+            printf("Warning: Read partial frame. Check your width & height!\n");
+    }
+    return res;
 }

-int main(int argc, char *argv[]) {
-  int frame_cnt = 0;
-  FILE *infile = NULL;
-  VpxVideoWriter *writers[kNumEncoders];
-  vpx_codec_ctx_t codec[kNumEncoders];
-  vpx_codec_enc_cfg_t cfg[kNumEncoders];
-  vpx_image_t raw[kNumEncoders];
-  const VpxInterface *const encoder = get_vpx_encoder_by_name("vp8");
-  // Currently, only realtime mode is supported in multi-resolution encoding.
-  const int arg_deadline = VPX_DL_REALTIME;
-  int i;
-  int width = 0;
-  int height = 0;
-  int frame_avail = 0;
-  int got_data = 0;
+static int read_frame_by_row(FILE *f, vpx_image_t *img) {
+    size_t nbytes, to_read;
+    int    res = 1;
+    int plane;

-  // Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you
-  // don't need to know PSNR, which will skip PSNR calculation and save
-  // encoding time.
-  int show_psnr = 0;
-  uint64_t psnr_sse_total[kNumEncoders] = {0};
-  uint64_t psnr_samples_total[kNumEncoders] = {0};
-  double psnr_totals[kNumEncoders][4] = {{0, 0}};
-  int psnr_count[kNumEncoders] = {0};
-
-  // Set the required target bitrates for each resolution level.
-  // If target bitrate for highest-resolution level is set to 0,
-  // (i.e. target_bitrate[0]=0), we skip encoding at that level.
-  unsigned int target_bitrate[kNumEncoders] = {1000, 500, 100};
-
-  // Enter the frame rate of the input video.
-  const int framerate = 30;
-  // Set down-sampling factor for each resolution level.
-  //   dsf[0] controls down sampling from level 0 to level 1;
-  //   dsf[1] controls down sampling from level 1 to level 2;
-  //   dsf[2] is not used.
-  vpx_rational_t dsf[kNumEncoders] = {{2, 1}, {2, 1}, {1, 1}};
-
-  exec_name = argv[0];
-
-  if (!encoder)
-    die("Unsupported codec.");
-
-  // exe_name, input width, input height, input file,
-  // output file 1, output file 2, output file 3, psnr on/off
-  if (argc != (5 + kNumEncoders))
-    die("Invalid number of input options.");
-
-  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
-
-  width = strtol(argv[1], NULL, 0);
-  height = strtol(argv[2], NULL, 0);
-
-  if (width < 16 || width % 2 || height < 16 || height % 2)
-    die("Invalid resolution: %ldx%ld", width, height);
-
-  // Open input video file for encoding
-  if (!(infile = fopen(argv[3], "rb")))
-    die("Failed to open %s for reading", argv[3]);
-
-  show_psnr = strtol(argv[kNumEncoders + 4], NULL, 0);
-
-  // Populate default encoder configuration
-  for (i = 0; i < kNumEncoders; ++i) {
-    vpx_codec_err_t res =
-        vpx_codec_enc_config_default(encoder->codec_interface(), &cfg[i], 0);
-    if (res != VPX_CODEC_OK) {
-      printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
-      return EXIT_FAILURE;
-    }
-  }
-
-  // Update the default configuration according to needs of the application.
-  // Highest-resolution encoder settings
-  cfg[0].g_w = width;
-  cfg[0].g_h = height;
-  cfg[0].g_threads = 1;
-  cfg[0].rc_dropframe_thresh = 30;
-  cfg[0].rc_end_usage = VPX_CBR;
-  cfg[0].rc_resize_allowed = 0;
-  cfg[0].rc_min_quantizer = 4;
-  cfg[0].rc_max_quantizer = 56;
-  cfg[0].rc_undershoot_pct = 98;
-  cfg[0].rc_overshoot_pct = 100;
-  cfg[0].rc_buf_initial_sz = 500;
-  cfg[0].rc_buf_optimal_sz = 600;
-  cfg[0].rc_buf_sz = 1000;
-  cfg[0].g_error_resilient = 1;
-  cfg[0].g_lag_in_frames = 0;
-  cfg[0].kf_mode = VPX_KF_AUTO;  // VPX_KF_DISABLED
-  cfg[0].kf_min_dist = 3000;
-  cfg[0].kf_max_dist = 3000;
-  cfg[0].rc_target_bitrate = target_bitrate[0];
-  cfg[0].g_timebase.num = 1;
-  cfg[0].g_timebase.den = framerate;
-
-  // Other-resolution encoder settings
-  for (i = 1; i < kNumEncoders; ++i) {
-    cfg[i] = cfg[0];
-    cfg[i].g_threads = 1;
-    cfg[i].rc_target_bitrate = target_bitrate[i];
-
-    // Note: Width & height of other-resolution encoders are calculated
-    // from the highest-resolution encoder's size and the corresponding
-    // down_sampling_factor.
+    for (plane = 0; plane < 3; plane++)
    {
-      unsigned int iw = cfg[i - 1].g_w * dsf[i - 1].den + dsf[i - 1].num - 1;
-      unsigned int ih = cfg[i - 1].g_h * dsf[i - 1].den + dsf[i - 1].num - 1;
-      cfg[i].g_w = iw / dsf[i - 1].num;
-      cfg[i].g_h = ih / dsf[i - 1].num;
-    }
+        unsigned char *ptr;
+        int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
+        int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
+        int r;

-    // Make width & height to be multiplier of 2.
-    if ((cfg[i].g_w) % 2)
-      cfg[i].g_w++;
-
-    if ((cfg[i].g_h) % 2)
-      cfg[i].g_h++;
-  }
-
-  // Open output file for each encoder to output bitstreams
-  for (i = 0; i < kNumEncoders; ++i) {
-    VpxVideoInfo info = {
-      encoder->fourcc,
-      cfg[i].g_w,
-      cfg[i].g_h,
-      {cfg[i].g_timebase.num, cfg[i].g_timebase.den}
-    };
-
-    if (!(writers[i] = vpx_video_writer_open(argv[i+4], kContainerIVF, &info)))
-      die("Failed to open %s for writing", argv[i+4]);
-  }
-
-  // Allocate image for each encoder
-  for (i = 0; i < kNumEncoders; ++i)
-    if (!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
-      die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
-
-  // Initialize multi-encoder
-  if (vpx_codec_enc_init_multi(&codec[0], encoder->codec_interface(), &cfg[0],
-                               kNumEncoders,
-                               show_psnr ? VPX_CODEC_USE_PSNR : 0, &dsf[0]))
-    die_codec(&codec[0], "Failed to initialize encoder");
-
-  // The extra encoding configuration parameters can be set as follows.
-  for (i = 0; i < kNumEncoders; i++) {
-    // Set encoding speed
-    if (vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, -6))
-      die_codec(&codec[i], "Failed to set cpu_used");
-
-    // Set static threshold.
-    if (vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1))
-      die_codec(&codec[i], "Failed to set static threshold");
-
-    // Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING
-    // Enable denoising for the highest-resolution encoder.
-    if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, i == 0))
-      die_codec(&codec[0], "Failed to set noise_sensitivity");
-  }
-
-  frame_avail = 1;
-  got_data = 0;
-
-  while (frame_avail || got_data) {
-    vpx_codec_iter_t iter[kNumEncoders] = {NULL};
-    const vpx_codec_cx_pkt_t *pkt[kNumEncoders];
-
-    frame_avail = vpx_img_read(&raw[0], infile);
-
-    if (frame_avail) {
-      for (i = 1; i < kNumEncoders; ++i) {
-        vpx_image_t *const prev = &raw[i - 1];
-
-        // Scale the image down a number of times by downsampling factor
-        // FilterMode 1 or 2 give better psnr than FilterMode 0.
-        I420Scale(prev->planes[VPX_PLANE_Y], prev->stride[VPX_PLANE_Y],
-                  prev->planes[VPX_PLANE_U], prev->stride[VPX_PLANE_U],
-                  prev->planes[VPX_PLANE_V], prev->stride[VPX_PLANE_V],
-                  prev->d_w, prev->d_h,
-                  raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
-                  raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
-                  raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
-                  raw[i].d_w, raw[i].d_h, 1);
-      }
-    }
-
-    // Encode frame.
-    if (vpx_codec_encode(&codec[0], frame_avail? &raw[0] : NULL,
-                         frame_cnt, 1, 0, arg_deadline)) {
-      die_codec(&codec[0], "Failed to encode frame");
-    }
-
-    for (i = kNumEncoders - 1; i >= 0; i--) {
-      got_data = 0;
-
-      while ((pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i]))) {
-        got_data = 1;
-        switch (pkt[i]->kind) {
-          case VPX_CODEC_CX_FRAME_PKT:
-            vpx_video_writer_write_frame(writers[i], pkt[i]->data.frame.buf,
-                                         pkt[i]->data.frame.sz, frame_cnt - 1);
-          break;
-          case VPX_CODEC_PSNR_PKT:
-            if (show_psnr) {
-              int j;
-              psnr_sse_total[i] += pkt[i]->data.psnr.sse[0];
-              psnr_samples_total[i] += pkt[i]->data.psnr.samples[0];
-              for (j = 0; j < 4; j++)
-                psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j];
-              psnr_count[i]++;
-            }
+        /* Determine the correct plane based on the image format. The for-loop
+         * always counts in Y,U,V order, but this may not match the order of
+         * the data on disk.
+         */
+        switch (plane)
+        {
+        case 1:
+            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
            break;
-          default:
+        case 2:
+            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
            break;
+        default:
+            ptr = img->planes[plane];
        }
-        printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT &&
-               (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
-        fflush(stdout);
-      }
-    }
-    frame_cnt++;
-  }
-  printf("\n");

-  fclose(infile);
+        for (r = 0; r < h; r++)
+        {
+            to_read = w;

-  printf("Processed %d frames.\n", frame_cnt - 1);
-  for (i = 0; i < kNumEncoders; ++i) {
-    // Calculate PSNR and print it out
-    if (show_psnr && psnr_count[i] > 0) {
-      int j;
-      double ovpsnr = sse_to_psnr(psnr_samples_total[i], 255.0,
-                                  psnr_sse_total[i]);
+            nbytes = fread(ptr, 1, to_read, f);
+            if(nbytes != to_read) {
+                res = 0;
+                if(nbytes > 0)
+                    printf("Warning: Read partial frame. Check your width & height!\n");
+                break;
+            }

-      fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i);
-      fprintf(stderr, " %.3lf", ovpsnr);
-      for (j = 0; j < 4; j++)
-        fprintf(stderr, " %.3lf", psnr_totals[i][j]/psnr_count[i]);
+            ptr += img->stride[plane];
+        }
+        if (!res)
+            break;
    }

-    if (vpx_codec_destroy(&codec[i]))
-      die_codec(&codec[i], "Failed to destroy codec");
-
-    vpx_img_free(&raw[i]);
-    vpx_video_writer_close(writers[i]);
-  }
-  printf("\n");
-
-  return EXIT_SUCCESS;
+    return res;
+}
+
+static void write_ivf_file_header(FILE *outfile,
+                                  const vpx_codec_enc_cfg_t *cfg,
+                                  int frame_cnt) {
+    char header[32];
+
+    if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
+        return;
+    header[0] = 'D';
+    header[1] = 'K';
+    header[2] = 'I';
+    header[3] = 'F';
+    mem_put_le16(header+4,  0);                   /* version */
+    mem_put_le16(header+6,  32);                  /* headersize */
+    mem_put_le32(header+8,  fourcc);              /* headersize */
+    mem_put_le16(header+12, cfg->g_w);            /* width */
+    mem_put_le16(header+14, cfg->g_h);            /* height */
+    mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
+    mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
+    mem_put_le32(header+24, frame_cnt);           /* length */
+    mem_put_le32(header+28, 0);                   /* unused */
+
+    (void) fwrite(header, 1, 32, outfile);
+}
+
+static void write_ivf_frame_header(FILE *outfile,
+                                   const vpx_codec_cx_pkt_t *pkt)
+{
+    char             header[12];
+    vpx_codec_pts_t  pts;
+
+    if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
+        return;
+
+    pts = pkt->data.frame.pts;
+    mem_put_le32(header, pkt->data.frame.sz);
+    mem_put_le32(header+4, pts&0xFFFFFFFF);
+    mem_put_le32(header+8, pts >> 32);
+
+    (void) fwrite(header, 1, 12, outfile);
+}
+
+/* Temporal scaling parameters */
+/* This sets all the temporal layer parameters given |num_temporal_layers|,
+ * including the target bit allocation across temporal layers. Bit allocation
+ * parameters will be passed in as user parameters in another version.
+ */
+static void set_temporal_layer_pattern(int num_temporal_layers,
+                                       vpx_codec_enc_cfg_t *cfg,
+                                       int bitrate,
+                                       int *layer_flags)
+{
+    assert(num_temporal_layers <= MAX_NUM_TEMPORAL_LAYERS);
+    switch (num_temporal_layers)
+    {
+    case 1:
+    {
+        /* 1-layer */
+        cfg->ts_number_layers     = 1;
+        cfg->ts_periodicity       = 1;
+        cfg->ts_rate_decimator[0] = 1;
+        cfg->ts_layer_id[0] = 0;
+        cfg->ts_target_bitrate[0] = bitrate;
+
+        // Update L only.
+        layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+        break;
+    }
+
+    case 2:
+    {
+        /* 2-layers, with sync point at first frame of layer 1. */
+        cfg->ts_number_layers     = 2;
+        cfg->ts_periodicity       = 2;
+        cfg->ts_rate_decimator[0] = 2;
+        cfg->ts_rate_decimator[1] = 1;
+        cfg->ts_layer_id[0] = 0;
+        cfg->ts_layer_id[1] = 1;
+        // Use 60/40 bit allocation as example.
+        cfg->ts_target_bitrate[0] = 0.6f * bitrate;
+        cfg->ts_target_bitrate[1] = bitrate;
+
+        /* 0=L, 1=GF */
+        // ARF is used as predictor for all frames, and is only updated on
+        // key frame. Sync point every 8 frames.
+
+        // Layer 0: predict from L and ARF, update L and G.
+        layer_flags[0] = VP8_EFLAG_NO_REF_GF |
+                         VP8_EFLAG_NO_UPD_ARF;
+
+        // Layer 1: sync point: predict from L and ARF, and update G.
+        layer_flags[1] = VP8_EFLAG_NO_REF_GF |
+                         VP8_EFLAG_NO_UPD_LAST |
+                         VP8_EFLAG_NO_UPD_ARF;
+
+        // Layer 0, predict from L and ARF, update L.
+        layer_flags[2] = VP8_EFLAG_NO_REF_GF  |
+                         VP8_EFLAG_NO_UPD_GF  |
+                         VP8_EFLAG_NO_UPD_ARF;
+
+        // Layer 1: predict from L, G and ARF, and update G.
+        layer_flags[3] = VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_UPD_LAST |
+                         VP8_EFLAG_NO_UPD_ENTROPY;
+
+        // Layer 0
+        layer_flags[4] = layer_flags[2];
+
+        // Layer 1
+        layer_flags[5] = layer_flags[3];
+
+        // Layer 0
+        layer_flags[6] = layer_flags[4];
+
+        // Layer 1
+        layer_flags[7] = layer_flags[5];
+        break;
+    }
+
+    case 3:
+    default:
+    {
+        // 3-layers structure where ARF is used as predictor for all frames,
+        // and is only updated on key frame.
+        // Sync points for layer 1 and 2 every 8 frames.
+        cfg->ts_number_layers     = 3;
+        cfg->ts_periodicity       = 4;
+        cfg->ts_rate_decimator[0] = 4;
+        cfg->ts_rate_decimator[1] = 2;
+        cfg->ts_rate_decimator[2] = 1;
+        cfg->ts_layer_id[0] = 0;
+        cfg->ts_layer_id[1] = 2;
+        cfg->ts_layer_id[2] = 1;
+        cfg->ts_layer_id[3] = 2;
+        // Use 40/20/40 bit allocation as example.
+        cfg->ts_target_bitrate[0] = 0.4f * bitrate;
+        cfg->ts_target_bitrate[1] = 0.6f * bitrate;
+        cfg->ts_target_bitrate[2] = bitrate;
+
+        /* 0=L, 1=GF, 2=ARF */
+
+        // Layer 0: predict from L and ARF; update L and G.
+        layer_flags[0] =  VP8_EFLAG_NO_UPD_ARF |
+                          VP8_EFLAG_NO_REF_GF;
+
+        // Layer 2: sync point: predict from L and ARF; update none.
+        layer_flags[1] = VP8_EFLAG_NO_REF_GF |
+                         VP8_EFLAG_NO_UPD_GF |
+                         VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_UPD_LAST |
+                         VP8_EFLAG_NO_UPD_ENTROPY;
+
+        // Layer 1: sync point: predict from L and ARF; update G.
+        layer_flags[2] = VP8_EFLAG_NO_REF_GF |
+                         VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_UPD_LAST;
+
+        // Layer 2: predict from L, G, ARF; update none.
+        layer_flags[3] = VP8_EFLAG_NO_UPD_GF |
+                         VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_UPD_LAST |
+                         VP8_EFLAG_NO_UPD_ENTROPY;
+
+        // Layer 0: predict from L and ARF; update L.
+        layer_flags[4] = VP8_EFLAG_NO_UPD_GF |
+                         VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_REF_GF;
+
+        // Layer 2: predict from L, G, ARF; update none.
+        layer_flags[5] = layer_flags[3];
+
+        // Layer 1: predict from L, G, ARF; update G.
+        layer_flags[6] = VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_UPD_LAST;
+
+        // Layer 2: predict from L, G, ARF; update none.
+        layer_flags[7] = layer_flags[3];
+        break;
+    }
+    }
+}
+
+/* The periodicity of the pattern given the number of temporal layers. */
+static int periodicity_to_num_layers[MAX_NUM_TEMPORAL_LAYERS] = {1, 8, 8};
+
+int main(int argc, char **argv)
+{
+    FILE                 *infile, *outfile[NUM_ENCODERS];
+    FILE                 *downsampled_input[NUM_ENCODERS - 1];
+    char                 filename[50];
+    vpx_codec_ctx_t      codec[NUM_ENCODERS];
+    vpx_codec_enc_cfg_t  cfg[NUM_ENCODERS];
+    int                  frame_cnt = 0;
+    vpx_image_t          raw[NUM_ENCODERS];
+    vpx_codec_err_t      res[NUM_ENCODERS];
+
+    int                  i;
+    long                 width;
+    long                 height;
+    int                  length_frame;
+    int                  frame_avail;
+    int                  got_data;
+    int                  flags = 0;
+    int                  layer_id = 0;
+
+    int                  layer_flags[VPX_TS_MAX_PERIODICITY * NUM_ENCODERS]
+                                     = {0};
+    int                  flag_periodicity;
+
+    /*Currently, only realtime mode is supported in multi-resolution encoding.*/
+    int                  arg_deadline = VPX_DL_REALTIME;
+
+    /* Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you
+       don't need to know PSNR, which will skip PSNR calculation and save
+       encoding time. */
+    int                  show_psnr = 0;
+    int                  key_frame_insert = 0;
+    uint64_t             psnr_sse_total[NUM_ENCODERS] = {0};
+    uint64_t             psnr_samples_total[NUM_ENCODERS] = {0};
+    double               psnr_totals[NUM_ENCODERS][4] = {{0,0}};
+    int                  psnr_count[NUM_ENCODERS] = {0};
+
+    double               cx_time = 0;
+    struct  timeval      tv1, tv2, difftv;
+
+    /* Set the required target bitrates for each resolution level.
+     * If target bitrate for highest-resolution level is set to 0,
+     * (i.e. target_bitrate[0]=0), we skip encoding at that level.
+     */
+    unsigned int         target_bitrate[NUM_ENCODERS]={1000, 500, 100};
+
+    /* Enter the frame rate of the input video */
+    int                  framerate = 30;
+
+    /* Set down-sampling factor for each resolution level.
+       dsf[0] controls down sampling from level 0 to level 1;
+       dsf[1] controls down sampling from level 1 to level 2;
+       dsf[2] is not used. */
+    vpx_rational_t dsf[NUM_ENCODERS] = {{2, 1}, {2, 1}, {1, 1}};
+
+    /* Set the number of temporal layers for each encoder/resolution level,
+     * starting from highest resoln down to lowest resoln. */
+    unsigned int         num_temporal_layers[NUM_ENCODERS] = {3, 3, 3};
+
+    if(argc!= (7 + 3 * NUM_ENCODERS))
+        die("Usage: %s <width> <height> <frame_rate>  <infile> <outfile(s)> "
+            "<rate_encoder(s)> <temporal_layer(s)> <key_frame_insert> <output psnr?> \n",
+            argv[0]);
+
+    printf("Using %s\n",vpx_codec_iface_name(interface));
+
+    width = strtol(argv[1], NULL, 0);
+    height = strtol(argv[2], NULL, 0);
+    framerate = strtol(argv[3], NULL, 0);
+
+    if(width < 16 || width%2 || height <16 || height%2)
+        die("Invalid resolution: %ldx%ld", width, height);
+
+    /* Open input video file for encoding */
+    if(!(infile = fopen(argv[4], "rb")))
+        die("Failed to open %s for reading", argv[4]);
+
+    /* Open output file for each encoder to output bitstreams */
+    for (i=0; i< NUM_ENCODERS; i++)
+    {
+        if(!target_bitrate[i])
+        {
+            outfile[i] = NULL;
+            continue;
+        }
+
+        if(!(outfile[i] = fopen(argv[i+5], "wb")))
+            die("Failed to open %s for writing", argv[i+4]);
+    }
+
+    // Bitrates per spatial layer: overwrite default rates above.
+    for (i=0; i< NUM_ENCODERS; i++)
+    {
+        target_bitrate[i] = strtol(argv[NUM_ENCODERS + 5 + i], NULL, 0);
+    }
+
+    // Temporal layers per spatial layers: overwrite default settings above.
+    for (i=0; i< NUM_ENCODERS; i++)
+    {
+        num_temporal_layers[i] = strtol(argv[2 * NUM_ENCODERS + 5 + i], NULL, 0);
+        if (num_temporal_layers[i] < 1 || num_temporal_layers[i] > 3)
+          die("Invalid temporal layers: %d, Must be 1, 2, or 3. \n",
+              num_temporal_layers);
+    }
+
+    /* Open file to write out each spatially downsampled input stream. */
+    for (i=0; i< NUM_ENCODERS - 1; i++)
+    {
+       // Highest resoln is encoder 0.
+        if (sprintf(filename,"ds%d.yuv",NUM_ENCODERS - i) < 0)
+        {
+            return EXIT_FAILURE;
+        }
+        downsampled_input[i] = fopen(filename,"wb");
+    }
+
+    key_frame_insert = strtol(argv[3 * NUM_ENCODERS + 5], NULL, 0);
+
+    show_psnr = strtol(argv[3 * NUM_ENCODERS + 6], NULL, 0);
+
+
+    /* Populate default encoder configuration */
+    for (i=0; i< NUM_ENCODERS; i++)
+    {
+        res[i] = vpx_codec_enc_config_default(interface, &cfg[i], 0);
+        if(res[i]) {
+            printf("Failed to get config: %s\n", vpx_codec_err_to_string(res[i]));
+            return EXIT_FAILURE;
+        }
+    }
+
+    /*
+     * Update the default configuration according to needs of the application.
+     */
+    /* Highest-resolution encoder settings */
+    cfg[0].g_w = width;
+    cfg[0].g_h = height;
+    cfg[0].rc_dropframe_thresh = 0;
+    cfg[0].rc_end_usage = VPX_CBR;
+    cfg[0].rc_resize_allowed = 0;
+    cfg[0].rc_min_quantizer = 2;
+    cfg[0].rc_max_quantizer = 56;
+    cfg[0].rc_undershoot_pct = 100;
+    cfg[0].rc_overshoot_pct = 15;
+    cfg[0].rc_buf_initial_sz = 500;
+    cfg[0].rc_buf_optimal_sz = 600;
+    cfg[0].rc_buf_sz = 1000;
+    cfg[0].g_error_resilient = 1;              /* Enable error resilient mode */
+    cfg[0].g_lag_in_frames   = 0;
+
+    /* Disable automatic keyframe placement */
+    /* Note: These 3 settings are copied to all levels. But, except the lowest
+     * resolution level, all other levels are set to VPX_KF_DISABLED internally.
+     */
+    cfg[0].kf_mode           = VPX_KF_AUTO;
+    cfg[0].kf_min_dist = 3000;
+    cfg[0].kf_max_dist = 3000;
+
+    cfg[0].rc_target_bitrate = target_bitrate[0];       /* Set target bitrate */
+    cfg[0].g_timebase.num = 1;                          /* Set fps */
+    cfg[0].g_timebase.den = framerate;
+
+    /* Other-resolution encoder settings */
+    for (i=1; i< NUM_ENCODERS; i++)
+    {
+        memcpy(&cfg[i], &cfg[0], sizeof(vpx_codec_enc_cfg_t));
+
+        cfg[i].rc_target_bitrate = target_bitrate[i];
+
+        /* Note: Width & height of other-resolution encoders are calculated
+         * from the highest-resolution encoder's size and the corresponding
+         * down_sampling_factor.
+         */
+        {
+            unsigned int iw = cfg[i-1].g_w*dsf[i-1].den + dsf[i-1].num - 1;
+            unsigned int ih = cfg[i-1].g_h*dsf[i-1].den + dsf[i-1].num - 1;
+            cfg[i].g_w = iw/dsf[i-1].num;
+            cfg[i].g_h = ih/dsf[i-1].num;
+        }
+
+        /* Make width & height to be multiplier of 2. */
+        // Should support odd size ???
+        if((cfg[i].g_w)%2)cfg[i].g_w++;
+        if((cfg[i].g_h)%2)cfg[i].g_h++;
+    }
+
+
+    // Set the number of threads per encode/spatial layer.
+    // (1, 1, 1) means no encoder threading.
+    cfg[0].g_threads = 2;
+    cfg[1].g_threads = 1;
+    cfg[2].g_threads = 1;
+
+    /* Allocate image for each encoder */
+    for (i=0; i< NUM_ENCODERS; i++)
+        if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
+            die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
+
+    if (raw[0].stride[VPX_PLANE_Y] == raw[0].d_w)
+        read_frame_p = read_frame;
+    else
+        read_frame_p = read_frame_by_row;
+
+    for (i=0; i< NUM_ENCODERS; i++)
+        if(outfile[i])
+            write_ivf_file_header(outfile[i], &cfg[i], 0);
+
+    /* Temporal layers settings */
+    for ( i=0; i<NUM_ENCODERS; i++)
+    {
+        set_temporal_layer_pattern(num_temporal_layers[i],
+                                   &cfg[i],
+                                   cfg[i].rc_target_bitrate,
+                                   &layer_flags[i * VPX_TS_MAX_PERIODICITY]);
+    }
+
+    /* Initialize multi-encoder */
+    if(vpx_codec_enc_init_multi(&codec[0], interface, &cfg[0], NUM_ENCODERS,
+                                (show_psnr ? VPX_CODEC_USE_PSNR : 0), &dsf[0]))
+        die_codec(&codec[0], "Failed to initialize encoder");
+
+    /* The extra encoding configuration parameters can be set as follows. */
+    /* Set encoding speed */
+    for ( i=0; i<NUM_ENCODERS; i++)
+    {
+        int speed = -6;
+        /* Lower speed for the lowest resolution. */
+        if (i == NUM_ENCODERS - 1) speed = -4;
+        if(vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, speed))
+            die_codec(&codec[i], "Failed to set cpu_used");
+    }
+
+    /* Set static threshold = 1 for all encoders */
+    for ( i=0; i<NUM_ENCODERS; i++)
+    {
+        if(vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1))
+            die_codec(&codec[i], "Failed to set static threshold");
+    }
+
+    /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */
+    /* Enable denoising for the highest-resolution encoder. */
+    if(vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 1))
+        die_codec(&codec[0], "Failed to set noise_sensitivity");
+    for ( i=1; i< NUM_ENCODERS; i++)
+    {
+        if(vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0))
+            die_codec(&codec[i], "Failed to set noise_sensitivity");
+    }
+
+    /* Set the number of token partitions */
+    for ( i=0; i<NUM_ENCODERS; i++)
+    {
+        if(vpx_codec_control(&codec[i], VP8E_SET_TOKEN_PARTITIONS, 1))
+            die_codec(&codec[i], "Failed to set static threshold");
+    }
+
+    /* Set the max intra target bitrate */
+    for ( i=0; i<NUM_ENCODERS; i++)
+    {
+        unsigned int max_intra_size_pct =
+            (int)(((double)cfg[0].rc_buf_optimal_sz * 0.5) * framerate / 10);
+        if(vpx_codec_control(&codec[i], VP8E_SET_MAX_INTRA_BITRATE_PCT,
+                             max_intra_size_pct))
+            die_codec(&codec[i], "Failed to set static threshold");
+       //printf("%d %d \n",i,max_intra_size_pct);
+    }
+
+    frame_avail = 1;
+    got_data = 0;
+
+    while(frame_avail || got_data)
+    {
+        vpx_codec_iter_t iter[NUM_ENCODERS]={NULL};
+        const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS];
+
+        flags = 0;
+        frame_avail = read_frame_p(infile, &raw[0]);
+
+        if(frame_avail)
+        {
+            for ( i=1; i<NUM_ENCODERS; i++)
+            {
+                /*Scale the image down a number of times by downsampling factor*/
+                /* FilterMode 1 or 2 give better psnr than FilterMode 0. */
+                I420Scale(raw[i-1].planes[VPX_PLANE_Y], raw[i-1].stride[VPX_PLANE_Y],
+                          raw[i-1].planes[VPX_PLANE_U], raw[i-1].stride[VPX_PLANE_U],
+                          raw[i-1].planes[VPX_PLANE_V], raw[i-1].stride[VPX_PLANE_V],
+                          raw[i-1].d_w, raw[i-1].d_h,
+                          raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
+                          raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
+                          raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
+                          raw[i].d_w, raw[i].d_h, 1);
+                /* Write out down-sampled input. */
+                length_frame = cfg[i].g_w *  cfg[i].g_h *3/2;
+                if (fwrite(raw[i].planes[0], 1, length_frame,
+                           downsampled_input[NUM_ENCODERS - i - 1]) !=
+                               length_frame)
+                {
+                    return EXIT_FAILURE;
+                }
+            }
+        }
+
+        /* Set the flags (reference and update) for all the encoders.*/
+        for ( i=0; i<NUM_ENCODERS; i++)
+        {
+            layer_id = cfg[i].ts_layer_id[frame_cnt % cfg[i].ts_periodicity];
+            flags = 0;
+            flag_periodicity = periodicity_to_num_layers
+                [num_temporal_layers[i] - 1];
+            flags = layer_flags[i * VPX_TS_MAX_PERIODICITY +
+                                frame_cnt % flag_periodicity];
+            // Key frame flag for first frame.
+            if (frame_cnt == 0)
+            {
+                flags |= VPX_EFLAG_FORCE_KF;
+            }
+            if (frame_cnt > 0 && frame_cnt == key_frame_insert)
+            {
+                flags = VPX_EFLAG_FORCE_KF;
+            }
+
+            vpx_codec_control(&codec[i], VP8E_SET_FRAME_FLAGS, flags);
+            vpx_codec_control(&codec[i], VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
+        }
+
+        gettimeofday(&tv1, NULL);
+        /* Encode each frame at multi-levels */
+        /* Note the flags must be set to 0 in the encode call if they are set
+           for each frame with the vpx_codec_control(), as done above. */
+        if(vpx_codec_encode(&codec[0], frame_avail? &raw[0] : NULL,
+            frame_cnt, 1, 0, arg_deadline))
+        {
+            die_codec(&codec[0], "Failed to encode frame");
+        }
+        gettimeofday(&tv2, NULL);
+        timersub(&tv2, &tv1, &difftv);
+        cx_time += (double)(difftv.tv_sec * 1000000 + difftv.tv_usec);
+        for (i=NUM_ENCODERS-1; i>=0 ; i--)
+        {
+            got_data = 0;
+            while( (pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i])) )
+            {
+                got_data = 1;
+                switch(pkt[i]->kind) {
+                    case VPX_CODEC_CX_FRAME_PKT:
+                        write_ivf_frame_header(outfile[i], pkt[i]);
+                        (void) fwrite(pkt[i]->data.frame.buf, 1,
+                                      pkt[i]->data.frame.sz, outfile[i]);
+                    break;
+                    case VPX_CODEC_PSNR_PKT:
+                        if (show_psnr)
+                        {
+                            int j;
+
+                            psnr_sse_total[i] += pkt[i]->data.psnr.sse[0];
+                            psnr_samples_total[i] += pkt[i]->data.psnr.samples[0];
+                            for (j = 0; j < 4; j++)
+                            {
+                                psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j];
+                            }
+                            psnr_count[i]++;
+                        }
+
+                        break;
+                    default:
+                        break;
+                }
+                printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT
+                       && (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)? "K":"");
+                fflush(stdout);
+            }
+        }
+        frame_cnt++;
+    }
+    printf("\n");
+    printf("FPS for encoding %d %f %f \n", frame_cnt, (float)cx_time / 1000000,
+           1000000 * (double)frame_cnt / (double)cx_time);
+
+    fclose(infile);
+
+    printf("Processed %ld frames.\n",(long int)frame_cnt-1);
+    for (i=0; i< NUM_ENCODERS; i++)
+    {
+        /* Calculate PSNR and print it out */
+        if ( (show_psnr) && (psnr_count[i]>0) )
+        {
+            int j;
+            double ovpsnr = sse_to_psnr(psnr_samples_total[i], 255.0,
+                                        psnr_sse_total[i]);
+
+            fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i);
+
+            fprintf(stderr, " %.3lf", ovpsnr);
+            for (j = 0; j < 4; j++)
+            {
+                fprintf(stderr, " %.3lf", psnr_totals[i][j]/psnr_count[i]);
+            }
+        }
+
+        if(vpx_codec_destroy(&codec[i]))
+            die_codec(&codec[i], "Failed to destroy codec");
+
+        vpx_img_free(&raw[i]);
+
+        if(!outfile[i])
+            continue;
+
+        /* Try to rewrite the file header with the actual frame count */
+        if(!fseek(outfile[i], 0, SEEK_SET))
+            write_ivf_file_header(outfile[i], &cfg[i], frame_cnt-1);
+        fclose(outfile[i]);
+    }
+    printf("\n");
+
+    return EXIT_SUCCESS;
 }
--- a/examples/vp8cx_set_ref.c
+++ b/examples/vp8cx_set_ref.c
@@ -53,12 +53,12 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> <frame>\n",
          exec_name);
  exit(EXIT_FAILURE);
--- a/examples/vp9_lossless_encoder.c
+++ b/examples/vp9_lossless_encoder.c
@@ -15,12 +15,12 @@
 #include "vpx/vpx_encoder.h"
 #include "vpx/vp8cx.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "vp9_lossless_encoder: Example demonstrating VP9 lossless "
                  "encoding feature. Supports raw input only.\n");
  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -14,24 +14,34 @@
 * that benefit from a scalable bitstream.
 */

+#include <math.h>
 #include <stdarg.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>

-#include "./args.h"
-#include "./tools_common.h"
-#include "./video_writer.h"

+#include "../args.h"
+#include "../tools_common.h"
+#include "../video_writer.h"
+
+#include "../vpx_ports/vpx_timer.h"
 #include "vpx/svc_context.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
-#include "./vpxstats.h"
+#include "../vpxstats.h"
+#define OUTPUT_RC_STATS 1

 static const arg_def_t skip_frames_arg =
    ARG_DEF("s", "skip-frames", 1, "input frames to skip");
 static const arg_def_t frames_arg =
    ARG_DEF("f", "frames", 1, "number of frames to encode");
+static const arg_def_t threads_arg =
+    ARG_DEF("th", "threads", 1, "number of threads to use");
+#if OUTPUT_RC_STATS
+static const arg_def_t output_rc_stats_arg =
+    ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
+#endif
 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
 static const arg_def_t timebase_arg =
@@ -42,6 +52,9 @@ static const arg_def_t spatial_layers_arg =
    ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
 static const arg_def_t temporal_layers_arg =
    ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
+static const arg_def_t temporal_layering_mode_arg =
+    ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
+        "VP9E_TEMPORAL_LAYERING_MODE");
 static const arg_def_t kf_dist_arg =
    ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
 static const arg_def_t scale_factors_arg =
@@ -60,6 +73,15 @@ static const arg_def_t min_bitrate_arg =
    ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
 static const arg_def_t max_bitrate_arg =
    ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
+static const arg_def_t lag_in_frame_arg =
+    ARG_DEF(NULL, "lag-in-frames", 1, "Number of frame to input before "
+        "generating any outputs");
+static const arg_def_t rc_end_usage_arg =
+    ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
+static const arg_def_t speed_arg =
+    ARG_DEF("sp", "speed", 1, "speed configuration");
+static const arg_def_t aqmode_arg =
+    ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");

 #if CONFIG_VP9_HIGHBITDEPTH
 static const struct arg_enum_list bitdepth_enum[] = {
@@ -80,11 +102,17 @@ static const arg_def_t *svc_args[] = {
  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &spatial_layers_arg,
  &kf_dist_arg,       &scale_factors_arg, &passes_arg,      &pass_arg,
  &fpf_name_arg,      &min_q_arg,         &max_q_arg,       &min_bitrate_arg,
-  &max_bitrate_arg,   &temporal_layers_arg,
+  &max_bitrate_arg,   &temporal_layers_arg, &temporal_layering_mode_arg,
+  &lag_in_frame_arg,  &threads_arg,       &aqmode_arg,
+#if OUTPUT_RC_STATS
+  &output_rc_stats_arg,
+#endif
+
 #if CONFIG_VP9_HIGHBITDEPTH
  &bitdepth_arg,
 #endif
-  NULL
+  &speed_arg,
+  &rc_end_usage_arg,  NULL
 };

 static const uint32_t default_frames_to_skip = 0;
@@ -97,6 +125,10 @@ static const uint32_t default_bitrate = 1000;
 static const uint32_t default_spatial_layers = 5;
 static const uint32_t default_temporal_layers = 1;
 static const uint32_t default_kf_dist = 100;
+static const uint32_t default_temporal_layering_mode = 0;
+static const uint32_t default_output_rc_stats = 0;
+static const int32_t default_speed = -1;  // -1 means use library default.
+static const uint32_t default_threads = 0;  // zero means use library default.

 typedef struct {
  const char *input_filename;
@@ -111,7 +143,7 @@ typedef struct {

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
          exec_name);
  fprintf(stderr, "Options:\n");
@@ -138,6 +170,12 @@ static void parse_command_line(int argc, const char **argv_,
  svc_ctx->log_level = SVC_LOG_DEBUG;
  svc_ctx->spatial_layers = default_spatial_layers;
  svc_ctx->temporal_layers = default_temporal_layers;
+  svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
+#if OUTPUT_RC_STATS
+  svc_ctx->output_rc_stat = default_output_rc_stats;
+#endif
+  svc_ctx->speed = default_speed;
+  svc_ctx->threads = default_threads;

  // start with default encoder configuration
  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
@@ -179,6 +217,22 @@ static void parse_command_line(int argc, const char **argv_,
      svc_ctx->spatial_layers = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
      svc_ctx->temporal_layers = arg_parse_uint(&arg);
+#if OUTPUT_RC_STATS
+    } else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
+      svc_ctx->output_rc_stat = arg_parse_uint(&arg);
+#endif
+    } else if (arg_match(&arg, &speed_arg, argi)) {
+      svc_ctx->speed = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &aqmode_arg, argi)) {
+      svc_ctx->aqmode = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &threads_arg, argi)) {
+      svc_ctx->threads = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
+      svc_ctx->temporal_layering_mode =
+          enc_cfg->temporal_layering_mode = arg_parse_int(&arg);
+      if (svc_ctx->temporal_layering_mode) {
+        enc_cfg->g_error_resilient = 1;
+      }
    } else if (arg_match(&arg, &kf_dist_arg, argi)) {
      enc_cfg->kf_min_dist = arg_parse_uint(&arg);
      enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
@@ -207,6 +261,10 @@ static void parse_command_line(int argc, const char **argv_,
      min_bitrate = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
      max_bitrate = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &lag_in_frame_arg, argi)) {
+      enc_cfg->g_lag_in_frames = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &rc_end_usage_arg, argi)) {
+      enc_cfg->rc_end_usage = arg_parse_uint(&arg);
 #if CONFIG_VP9_HIGHBITDEPTH
    } else if (arg_match(&arg, &bitdepth_arg, argi)) {
      enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg);
@@ -307,6 +365,238 @@ static void parse_command_line(int argc, const char **argv_,
      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
 }

+#if OUTPUT_RC_STATS
+// For rate control encoding stats.
+struct RateControlStats {
+  // Number of input frames per layer.
+  int layer_input_frames[VPX_MAX_LAYERS];
+  // Total (cumulative) number of encoded frames per layer.
+  int layer_tot_enc_frames[VPX_MAX_LAYERS];
+  // Number of encoded non-key frames per layer.
+  int layer_enc_frames[VPX_MAX_LAYERS];
+  // Framerate per layer (cumulative).
+  double layer_framerate[VPX_MAX_LAYERS];
+  // Target average frame size per layer (per-frame-bandwidth per layer).
+  double layer_pfb[VPX_MAX_LAYERS];
+  // Actual average frame size per layer.
+  double layer_avg_frame_size[VPX_MAX_LAYERS];
+  // Average rate mismatch per layer (|target - actual| / target).
+  double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
+  // Actual encoding bitrate per layer (cumulative).
+  double layer_encoding_bitrate[VPX_MAX_LAYERS];
+  // Average of the short-time encoder actual bitrate.
+  // TODO(marpan): Should we add these short-time stats for each layer?
+  double avg_st_encoding_bitrate;
+  // Variance of the short-time encoder actual bitrate.
+  double variance_st_encoding_bitrate;
+  // Window (number of frames) for computing short-time encoding bitrate.
+  int window_size;
+  // Number of window measurements.
+  int window_count;
+};
+
+// Note: these rate control stats assume only 1 key frame in the
+// sequence (i.e., first frame only).
+static void set_rate_control_stats(struct RateControlStats *rc,
+                                     vpx_codec_enc_cfg_t *cfg) {
+  unsigned int sl, tl;
+  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
+  // per-frame-bandwidth, for the rate control encoding stats below.
+  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
+
+  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
+      const int layer = sl * cfg->ts_number_layers + tl;
+      const int tlayer0 = sl * cfg->ts_number_layers;
+      rc->layer_framerate[layer] =
+          framerate / cfg->ts_rate_decimator[tl];
+      if (tl > 0) {
+        rc->layer_pfb[layer] = 1000.0 *
+            (cfg->layer_target_bitrate[layer] -
+                cfg->layer_target_bitrate[layer - 1]) /
+            (rc->layer_framerate[layer] -
+                rc->layer_framerate[layer - 1]);
+      } else {
+        rc->layer_pfb[tlayer0] = 1000.0 *
+            cfg->layer_target_bitrate[tlayer0] /
+            rc->layer_framerate[tlayer0];
+      }
+      rc->layer_input_frames[layer] = 0;
+      rc->layer_enc_frames[layer] = 0;
+      rc->layer_tot_enc_frames[layer] = 0;
+      rc->layer_encoding_bitrate[layer] = 0.0;
+      rc->layer_avg_frame_size[layer] = 0.0;
+      rc->layer_avg_rate_mismatch[layer] = 0.0;
+    }
+  }
+  rc->window_count = 0;
+  rc->window_size = 15;
+  rc->avg_st_encoding_bitrate = 0.0;
+  rc->variance_st_encoding_bitrate = 0.0;
+}
+
+static void printout_rate_control_summary(struct RateControlStats *rc,
+                                          vpx_codec_enc_cfg_t *cfg,
+                                          int frame_cnt) {
+  unsigned int sl, tl;
+  int tot_num_frames = 0;
+  double perc_fluctuation = 0.0;
+  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
+  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
+      cfg->ss_number_layers, cfg->ts_number_layers);
+  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
+      const int layer = sl * cfg->ts_number_layers + tl;
+      const int num_dropped = (tl > 0) ?
+          (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) :
+          (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1);
+      if (!sl)
+        tot_num_frames += rc->layer_input_frames[layer];
+      rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
+          rc->layer_encoding_bitrate[layer] / tot_num_frames;
+      rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] /
+          rc->layer_enc_frames[layer];
+      rc->layer_avg_rate_mismatch[layer] =
+          100.0 * rc->layer_avg_rate_mismatch[layer] /
+          rc->layer_enc_frames[layer];
+      printf("For layer#: sl%d tl%d \n", sl, tl);
+      printf("Bitrate (target vs actual): %d %f.0 kbps\n",
+             cfg->layer_target_bitrate[layer],
+             rc->layer_encoding_bitrate[layer]);
+      printf("Average frame size (target vs actual): %f %f bits\n",
+             rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
+      printf("Average rate_mismatch: %f\n",
+             rc->layer_avg_rate_mismatch[layer]);
+      printf("Number of input frames, encoded (non-key) frames, "
+          "and percent dropped frames: %d %d %f.0 \n",
+          rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
+          100.0 * num_dropped / rc->layer_input_frames[layer]);
+      printf("\n");
+    }
+  }
+  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
+  rc->variance_st_encoding_bitrate =
+      rc->variance_st_encoding_bitrate / rc->window_count -
+      (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
+  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
+      rc->avg_st_encoding_bitrate;
+  printf("Short-time stats, for window of %d frames: \n", rc->window_size);
+  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
+         rc->avg_st_encoding_bitrate,
+         sqrt(rc->variance_st_encoding_bitrate),
+         perc_fluctuation);
+  if (frame_cnt != tot_num_frames)
+    die("Error: Number of input frames not equal to output encoded frames != "
+        "%d tot_num_frames = %d\n", frame_cnt, tot_num_frames);
+}
+
+vpx_codec_err_t parse_superframe_index(const uint8_t *data,
+                                       size_t data_sz,
+                                       uint32_t sizes[8], int *count) {
+  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
+  // it is a super frame index. If the last byte of real video compression
+  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
+  // not the associated matching marker byte at the front of the index we have
+  // an invalid bitstream and need to return an error.
+
+  uint8_t marker;
+
+  marker = *(data + data_sz - 1);
+  *count = 0;
+
+
+  if ((marker & 0xe0) == 0xc0) {
+    const uint32_t frames = (marker & 0x7) + 1;
+    const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+    const size_t index_sz = 2 + mag * frames;
+
+    // This chunk is marked as having a superframe index but doesn't have
+    // enough data for it, thus it's an invalid superframe index.
+    if (data_sz < index_sz)
+      return VPX_CODEC_CORRUPT_FRAME;
+
+    {
+      const uint8_t marker2 = *(data + data_sz - index_sz);
+
+      // This chunk is marked as having a superframe index but doesn't have
+      // the matching marker byte at the front of the index therefore it's an
+      // invalid chunk.
+      if (marker != marker2)
+        return VPX_CODEC_CORRUPT_FRAME;
+    }
+
+    {
+      // Found a valid superframe index.
+      uint32_t i, j;
+      const uint8_t *x = &data[data_sz - index_sz + 1];
+
+      for (i = 0; i < frames; ++i) {
+        uint32_t this_sz = 0;
+
+        for (j = 0; j < mag; ++j)
+          this_sz |= (*x++) << (j * 8);
+        sizes[i] = this_sz;
+      }
+      *count = frames;
+    }
+  }
+  return VPX_CODEC_OK;
+}
+#endif
+
+// Example pattern for spatial layers and 2 temporal layers used in the
+// bypass/flexible mode. The pattern corresponds to the pattern
+// VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in
+// non-flexible mode.
+void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
+                                 int is_key_frame,
+                                 vpx_svc_ref_frame_config_t *ref_frame_config) {
+  for (sl = 0; sl < num_spatial_layers; ++sl) {
+    if (!tl) {
+      if (!sl) {
+        ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF |
+                                            VP8_EFLAG_NO_REF_ARF |
+                                            VP8_EFLAG_NO_UPD_GF |
+                                            VP8_EFLAG_NO_UPD_ARF;
+      } else {
+        if (is_key_frame) {
+          ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_LAST |
+                                              VP8_EFLAG_NO_REF_ARF |
+                                              VP8_EFLAG_NO_UPD_GF |
+                                              VP8_EFLAG_NO_UPD_ARF;
+        } else {
+        ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF |
+                                            VP8_EFLAG_NO_UPD_GF |
+                                            VP8_EFLAG_NO_UPD_ARF;
+        }
+      }
+    } else if (tl == 1) {
+      if (!sl) {
+        ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_GF |
+                                            VP8_EFLAG_NO_REF_ARF |
+                                            VP8_EFLAG_NO_UPD_LAST |
+                                            VP8_EFLAG_NO_UPD_GF;
+      } else {
+        ref_frame_config->frame_flags[sl] = VP8_EFLAG_NO_REF_ARF |
+                                            VP8_EFLAG_NO_UPD_LAST |
+                                            VP8_EFLAG_NO_UPD_GF;
+      }
+    }
+    if (tl == 0) {
+      ref_frame_config->lst_fb_idx[sl] = sl;
+      if (sl)
+        ref_frame_config->gld_fb_idx[sl] = sl - 1;
+      else
+        ref_frame_config->gld_fb_idx[sl] = 0;
+      ref_frame_config->alt_fb_idx[sl] = 0;
+    } else if (tl == 1) {
+      ref_frame_config->lst_fb_idx[sl] = sl;
+      ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1;
+      ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl;
+    }
+  }
+}
+
 int main(int argc, const char **argv) {
  AppInput app_input = {0};
  VpxVideoWriter *writer = NULL;
@@ -323,7 +613,18 @@ int main(int argc, const char **argv) {
  FILE *infile = NULL;
  int end_of_stream = 0;
  int frames_received = 0;
-
+#if OUTPUT_RC_STATS
+  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
+  struct RateControlStats rc;
+  vpx_svc_layer_id_t layer_id;
+  vpx_svc_ref_frame_config_t ref_frame_config;
+  int sl, tl;
+  double sum_bitrate = 0.0;
+  double sum_bitrate2 = 0.0;
+  double framerate  = 30.0;
+#endif
+  struct vpx_usec_timer timer;
+  int64_t cx_time = 0;
  memset(&svc_ctx, 0, sizeof(svc_ctx));
  svc_ctx.log_print = 1;
  exec_name = argv[0];
@@ -350,6 +651,13 @@ int main(int argc, const char **argv) {
      VPX_CODEC_OK)
    die("Failed to initialize encoder\n");

+#if OUTPUT_RC_STATS
+  if (svc_ctx.output_rc_stat) {
+    set_rate_control_stats(&rc, &enc_cfg);
+    framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
+  }
+#endif
+
  info.codec_fourcc = VP9_FOURCC;
  info.time_base.numerator = enc_cfg.g_timebase.num;
  info.time_base.denominator = enc_cfg.g_timebase.den;
@@ -361,11 +669,34 @@ int main(int argc, const char **argv) {
    if (!writer)
      die("Failed to open %s for writing\n", app_input.output_filename);
  }
+#if OUTPUT_RC_STATS
+  // For now, just write temporal layer streams.
+  // TODO(wonkap): do spatial by re-writing superframe.
+  if (svc_ctx.output_rc_stat) {
+    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
+      char file_name[PATH_MAX];
+
+      snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
+               app_input.output_filename, tl);
+      outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
+      if (!outfile[tl])
+        die("Failed to open %s for writing", file_name);
+    }
+  }
+#endif

  // skip initial frames
  for (i = 0; i < app_input.frames_to_skip; ++i)
    vpx_img_read(&raw, infile);

+  if (svc_ctx.speed != -1)
+    vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
+  if (svc_ctx.threads)
+    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
+  if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
+    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+
+
  // Encode frames
  while (!end_of_stream) {
    vpx_codec_iter_t iter = NULL;
@@ -376,8 +707,37 @@ int main(int argc, const char **argv) {
      end_of_stream = 1;
    }

+    // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates)
+    // and the buffer indices for each spatial layer of the current
+    // (super)frame to be encoded. The temporal layer_id for the current frame
+    // also needs to be set.
+    // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS"
+    // mode to "VP9E_LAYERING_MODE_BYPASS".
+    if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) {
+      // Example for 2 temporal layers.
+      if (frame_cnt % 2 == 0)
+        layer_id.temporal_layer_id = 0;
+      else
+        layer_id.temporal_layer_id = 1;
+      // Note that we only set the temporal layer_id, since we are calling
+      // the encode for the whole superframe. The encoder will internally loop
+      // over all the spatial layers for the current superframe.
+      vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
+      set_frame_flags_bypass_mode(sl, layer_id.temporal_layer_id,
+                                  svc_ctx.spatial_layers,
+                                  frame_cnt == 0,
+                                  &ref_frame_config);
+      vpx_codec_control(&codec, VP9E_SET_SVC_REF_FRAME_CONFIG,
+                        &ref_frame_config);
+    }
+
+    vpx_usec_timer_start(&timer);
    res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
-                         pts, frame_duration, VPX_DL_GOOD_QUALITY);
+                         pts, frame_duration, svc_ctx.speed >= 5 ?
+                         VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
+    vpx_usec_timer_mark(&timer);
+    cx_time += vpx_usec_timer_elapsed(&timer);
+
    printf("%s", vpx_svc_get_message(&svc_ctx));
    if (res != VPX_CODEC_OK) {
      die_codec(&codec, "Failed to encode frame");
@@ -386,11 +746,90 @@ int main(int argc, const char **argv) {
    while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
      switch (cx_pkt->kind) {
        case VPX_CODEC_CX_FRAME_PKT: {
-          if (cx_pkt->data.frame.sz > 0)
+          if (cx_pkt->data.frame.sz > 0) {
+#if OUTPUT_RC_STATS
+            uint32_t sizes[8];
+            int count = 0;
+#endif
            vpx_video_writer_write_frame(writer,
                                         cx_pkt->data.frame.buf,
                                         cx_pkt->data.frame.sz,
                                         cx_pkt->data.frame.pts);
+#if OUTPUT_RC_STATS
+            // TODO(marpan/wonkap): Put this (to line728) in separate function.
+            if (svc_ctx.output_rc_stat) {
+              vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
+              parse_superframe_index(cx_pkt->data.frame.buf,
+                                     cx_pkt->data.frame.sz, sizes, &count);
+              for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
+                                        layer_id.temporal_layer_id];
+              }
+              for (tl = layer_id.temporal_layer_id;
+                  tl < enc_cfg.ts_number_layers; ++tl) {
+                vpx_video_writer_write_frame(outfile[tl],
+                                             cx_pkt->data.frame.buf,
+                                             cx_pkt->data.frame.sz,
+                                             cx_pkt->data.frame.pts);
+              }
+
+              for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                for (tl = layer_id.temporal_layer_id;
+                    tl < enc_cfg.ts_number_layers; ++tl) {
+                  const int layer = sl * enc_cfg.ts_number_layers + tl;
+                  ++rc.layer_tot_enc_frames[layer];
+                  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
+                  // Keep count of rate control stats per layer, for non-key
+                  // frames.
+                  if (tl == layer_id.temporal_layer_id &&
+                      !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
+                    rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
+                    rc.layer_avg_rate_mismatch[layer] +=
+                        fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
+                        rc.layer_pfb[layer];
+                    ++rc.layer_enc_frames[layer];
+                  }
+                }
+              }
+
+              // Update for short-time encoding bitrate states, for moving
+              // window of size rc->window, shifted by rc->window / 2.
+              // Ignore first window segment, due to key frame.
+              if (frame_cnt > rc.window_size) {
+                tl = layer_id.temporal_layer_id;
+                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
+                }
+                if (frame_cnt % rc.window_size == 0) {
+                  rc.window_count += 1;
+                  rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
+                  rc.variance_st_encoding_bitrate +=
+                      (sum_bitrate / rc.window_size) *
+                      (sum_bitrate / rc.window_size);
+                  sum_bitrate = 0.0;
+                }
+              }
+
+              // Second shifted window.
+              if (frame_cnt > rc.window_size + rc.window_size / 2) {
+               tl = layer_id.temporal_layer_id;
+               for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                 sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
+               }
+
+               if (frame_cnt > 2 * rc.window_size &&
+                  frame_cnt % rc.window_size == 0) {
+                 rc.window_count += 1;
+                 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
+                 rc.variance_st_encoding_bitrate +=
+                    (sum_bitrate2 / rc.window_size) *
+                    (sum_bitrate2 / rc.window_size);
+                 sum_bitrate2 = 0.0;
+               }
+              }
+            }
+#endif
+          }

          printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
                 !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
@@ -415,25 +854,34 @@ int main(int argc, const char **argv) {
      pts += frame_duration;
    }
  }
-
  printf("Processed %d frames\n", frame_cnt);
-
  fclose(infile);
+#if OUTPUT_RC_STATS
+  if (svc_ctx.output_rc_stat) {
+    printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
+    printf("\n");
+  }
+#endif
  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
-
  if (app_input.passes == 2)
    stats_close(&app_input.rc_stats, 1);
-
  if (writer) {
    vpx_video_writer_close(writer);
  }
-
+#if OUTPUT_RC_STATS
+  if (svc_ctx.output_rc_stat) {
+    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
+      vpx_video_writer_close(outfile[tl]);
+    }
+  }
+#endif
+  printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n",
+         frame_cnt,
+         1000 * (float)cx_time / (double)(frame_cnt * 1000000),
+         1000000 * (double)frame_cnt / (double)cx_time);
  vpx_img_free(&raw);
-
  // display average size, psnr
  printf("%s", vpx_svc_dump_statistics(&svc_ctx));
-
  vpx_svc_release(&svc_ctx);
-
  return EXIT_SUCCESS;
 }
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -19,16 +19,16 @@
 #include <string.h>

 #include "./vpx_config.h"
-#include "vpx_ports/vpx_timer.h"
+#include "../vpx_ports/vpx_timer.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  exit(EXIT_FAILURE);
 }

@@ -61,6 +61,16 @@ struct RateControlMetrics {
  double layer_avg_rate_mismatch[VPX_TS_MAX_LAYERS];
  // Actual encoding bitrate per layer (cumulative).
  double layer_encoding_bitrate[VPX_TS_MAX_LAYERS];
+  // Average of the short-time encoder actual bitrate.
+  // TODO(marpan): Should we add these short-time stats for each layer?
+  double avg_st_encoding_bitrate;
+  // Variance of the short-time encoder actual bitrate.
+  double variance_st_encoding_bitrate;
+  // Window (number of frames) for computing short-timee encoding bitrate.
+  int window_size;
+  // Number of window measurements.
+  int window_count;
+  int layer_target_bitrate[VPX_MAX_LAYERS];
 };

 // Note: these rate control metrics assume only 1 key frame in the
@@ -76,13 +86,13 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
  // per-frame-bandwidth, for the rate control encoding stats below.
  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
  rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
-  rc->layer_pfb[0] = 1000.0 * cfg->ts_target_bitrate[0] /
+  rc->layer_pfb[0] = 1000.0 * rc->layer_target_bitrate[0] /
      rc->layer_framerate[0];
  for (i = 0; i < cfg->ts_number_layers; ++i) {
    if (i > 0) {
      rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
      rc->layer_pfb[i] = 1000.0 *
-          (cfg->ts_target_bitrate[i] - cfg->ts_target_bitrate[i - 1]) /
+          (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
          (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
    }
    rc->layer_input_frames[i] = 0;
@@ -92,6 +102,10 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
    rc->layer_avg_frame_size[i] = 0.0;
    rc->layer_avg_rate_mismatch[i] = 0.0;
  }
+  rc->window_count = 0;
+  rc->window_size = 15;
+  rc->avg_st_encoding_bitrate = 0.0;
+  rc->variance_st_encoding_bitrate = 0.0;
 }

 static void printout_rate_control_summary(struct RateControlMetrics *rc,
@@ -99,6 +113,7 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
                                          int frame_cnt) {
  unsigned int i = 0;
  int tot_num_frames = 0;
+  double perc_fluctuation = 0.0;
  printf("Total number of processed frames: %d\n\n", frame_cnt -1);
  printf("Rate control layer stats for %d layer(s):\n\n",
      cfg->ts_number_layers);
@@ -114,7 +129,7 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
    rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] /
        rc->layer_enc_frames[i];
    printf("For layer#: %d \n", i);
-    printf("Bitrate (target vs actual): %d %f \n", cfg->ts_target_bitrate[i],
+    printf("Bitrate (target vs actual): %d %f \n", rc->layer_target_bitrate[i],
           rc->layer_encoding_bitrate[i]);
    printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
           rc->layer_avg_frame_size[i]);
@@ -125,6 +140,17 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
        100.0 * num_dropped / rc->layer_input_frames[i]);
    printf("\n");
  }
+  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
+  rc->variance_st_encoding_bitrate =
+      rc->variance_st_encoding_bitrate / rc->window_count -
+      (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
+  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
+      rc->avg_st_encoding_bitrate;
+  printf("Short-time stats, for window of %d frames: \n",rc->window_size);
+  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
+         rc->avg_st_encoding_bitrate,
+         sqrt(rc->variance_st_encoding_bitrate),
+         perc_fluctuation);
  if ((frame_cnt - 1) != tot_num_frames)
    die("Error: Number of input frames not equal to output! \n");
 }
@@ -456,7 +482,11 @@ int main(int argc, char **argv) {
  int layering_mode = 0;
  int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
  int flag_periodicity = 1;
+#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
  vpx_svc_layer_id_t layer_id = {0, 0};
+#else
+  vpx_svc_layer_id_t layer_id = {0};
+#endif
  const VpxInterface *encoder = NULL;
  FILE *infile = NULL;
  struct RateControlMetrics rc;
@@ -469,6 +499,9 @@ int main(int argc, char **argv) {
 #else
  const int min_args = min_args_base;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
+  double sum_bitrate = 0.0;
+  double sum_bitrate2 = 0.0;
+  double framerate  = 30.0;

  exec_name = argv[0];
  // Check usage and arguments.
@@ -565,21 +598,32 @@ int main(int argc, char **argv) {
  for (i = min_args_base;
       (int)i < min_args_base + mode_to_num_layers[layering_mode];
       ++i) {
-    cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
+    rc.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
+    if (strncmp(encoder->name, "vp8", 3) == 0)
+      cfg.ts_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
+    else if (strncmp(encoder->name, "vp9", 3) == 0)
+      cfg.layer_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
  }

  // Real time parameters.
  cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
  cfg.rc_end_usage = VPX_CBR;
-  cfg.rc_resize_allowed = 0;
  cfg.rc_min_quantizer = 2;
  cfg.rc_max_quantizer = 56;
+  if (strncmp(encoder->name, "vp9", 3) == 0)
+    cfg.rc_max_quantizer = 52;
  cfg.rc_undershoot_pct = 50;
  cfg.rc_overshoot_pct = 50;
  cfg.rc_buf_initial_sz = 500;
  cfg.rc_buf_optimal_sz = 600;
  cfg.rc_buf_sz = 1000;

+  // Disable dynamic resizing by default.
+  cfg.rc_resize_allowed = 0;
+
+  // Use 1 thread as default.
+  cfg.g_threads = 1;
+
  // Enable error resilient mode.
  cfg.g_error_resilient = 1;
  cfg.g_lag_in_frames   = 0;
@@ -588,6 +632,8 @@ int main(int argc, char **argv) {
  // Disable automatic keyframe placement.
  cfg.kf_min_dist = cfg.kf_max_dist = 3000;

+  cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
  set_temporal_layer_pattern(layering_mode,
                             &cfg,
                             layer_flags,
@@ -596,14 +642,15 @@ int main(int argc, char **argv) {
  set_rate_control_metrics(&rc, &cfg);

  // Target bandwidth for the whole stream.
-  // Set to ts_target_bitrate for highest layer (total bitrate).
-  cfg.rc_target_bitrate = cfg.ts_target_bitrate[cfg.ts_number_layers - 1];
+  // Set to layer_target_bitrate for highest layer (total bitrate).
+  cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1];

  // Open input file.
  if (!(infile = fopen(argv[1], "rb"))) {
    die("Failed to open %s for reading", argv[1]);
  }

+  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
  // Open an output file for each stream.
  for (i = 0; i < cfg.ts_number_layers; ++i) {
    char file_name[PATH_MAX];
@@ -636,23 +683,36 @@ int main(int argc, char **argv) {

  if (strncmp(encoder->name, "vp8", 3) == 0) {
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
-    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOnYOnly);
+    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
+    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
  } else if (strncmp(encoder->name, "vp9", 3) == 0) {
-      vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
-      vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
-      vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
-      vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
-      if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) {
-        die_codec(&codec, "Failed to set SVC");
+    vpx_svc_extra_cfg_t svc_params;
+    vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
+    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+    vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
+    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
+    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
+    vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
+    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
+    if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0))
+      die_codec(&codec, "Failed to set SVC");
+    for (i = 0; i < cfg.ts_number_layers; ++i) {
+      svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
+      svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
    }
+    svc_params.scaling_factor_num[0] = cfg.g_h;
+    svc_params.scaling_factor_den[0] = cfg.g_h;
+    vpx_codec_control(&codec, VP9E_SET_SVC_PARAMETERS, &svc_params);
+  }
+  if (strncmp(encoder->name, "vp8", 3) == 0) {
+    vpx_codec_control(&codec, VP8E_SET_SCREEN_CONTENT_MODE, 0);
  }
-  vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
  vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1);
  // This controls the maximum target size of the key frame.
  // For generating smaller key frames, use a smaller max_intra_size_pct
  // value, like 100 or 200.
  {
-    const int max_intra_size_pct = 200;
+    const int max_intra_size_pct = 900;
    vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
                      max_intra_size_pct);
  }
@@ -662,14 +722,21 @@ int main(int argc, char **argv) {
    struct vpx_usec_timer timer;
    vpx_codec_iter_t iter = NULL;
    const vpx_codec_cx_pkt_t *pkt;
+#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
    // Update the temporal layer_id. No spatial layers in this test.
    layer_id.spatial_layer_id = 0;
+#endif
    layer_id.temporal_layer_id =
        cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
    if (strncmp(encoder->name, "vp9", 3) == 0) {
      vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
+    } else if (strncmp(encoder->name, "vp8", 3) == 0) {
+      vpx_codec_control(&codec, VP8E_SET_TEMPORAL_LAYER_ID,
+                        layer_id.temporal_layer_id);
    }
    flags = layer_flags[frame_cnt % flag_periodicity];
+    if (layering_mode == 0)
+      flags = 0;
    frame_avail = vpx_img_read(&raw, infile);
    if (frame_avail)
      ++rc.layer_input_frames[layer_id.temporal_layer_id];
@@ -705,6 +772,33 @@ int main(int argc, char **argv) {
              ++rc.layer_enc_frames[i];
            }
          }
+          // Update for short-time encoding bitrate states, for moving window
+          // of size rc->window, shifted by rc->window / 2.
+          // Ignore first window segment, due to key frame.
+          if (frame_cnt > rc.window_size) {
+            sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
+            if (frame_cnt % rc.window_size == 0) {
+              rc.window_count += 1;
+              rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
+              rc.variance_st_encoding_bitrate +=
+                  (sum_bitrate / rc.window_size) *
+                  (sum_bitrate / rc.window_size);
+              sum_bitrate = 0.0;
+            }
+          }
+          // Second shifted window.
+          if (frame_cnt > rc.window_size + rc.window_size / 2) {
+            sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
+            if (frame_cnt > 2 * rc.window_size &&
+                frame_cnt % rc.window_size == 0) {
+              rc.window_count += 1;
+              rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
+              rc.variance_st_encoding_bitrate +=
+                  (sum_bitrate2 / rc.window_size) *
+                  (sum_bitrate2 / rc.window_size);
+              sum_bitrate2 = 0.0;
+            }
+          }
          break;
          default:
            break;
--- a/libs.doxy_template
+++ b/libs.doxy_template
@@ -36,7 +36,7 @@ DOXYFILE_ENCODING      = UTF-8
 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded
 # by quotes) that should identify the project.

-PROJECT_NAME           = "WebM VP8 Codec SDK"
+PROJECT_NAME           = "WebM Codec SDK"

 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
 # base path where the generated documentation will be put.
@@ -415,12 +415,6 @@ MAX_INITIALIZER_LINES  = 30

 SHOW_USED_FILES        = YES

-# If the sources in your project are distributed over multiple directories
-# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
-# in the documentation. The default is NO.
-
-SHOW_DIRECTORIES       = NO
-
 # The FILE_VERSION_FILTER tag can be used to specify a program or script that
 # doxygen should invoke to get the current version for each file (typically from the
 # version control system). Doxygen will invoke the program by executing (via
@@ -715,12 +709,6 @@ HTML_FOOTER            =

 HTML_STYLESHEET        =

-# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
-# files or namespaces will be aligned in HTML using tables. If set to
-# NO a bullet list will be used.
-
-HTML_ALIGN_MEMBERS     = YES
-
 # If the GENERATE_HTMLHELP tag is set to YES, additional index files
 # will be generated that can be used as input for tools like the
 # Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
--- a/libs.mk
+++ b/libs.mk
@@ -17,32 +17,6 @@ else
  ASM:=.asm
 endif

-#
-# Calculate platform- and compiler-specific offsets for hand coded assembly
-#
-ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
-OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
-define asm_offsets_template
-$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).S
-	@echo "    [CREATE] $$@"
-	$$(qexec)LC_ALL=C grep $$(OFFSET_PATTERN) $$< | tr -d '$$$$\#' $$(ADS2GAS) > $$@
-$$(BUILD_PFX)$(2).S: $(2)
-CLEAN-OBJS += $$(BUILD_PFX)$(1) $(2).S
-endef
-else
-  ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
-define asm_offsets_template
-$$(BUILD_PFX)$(1): obj_int_extract
-$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).o
-	@echo "    [CREATE] $$@"
-	$$(qexec)./obj_int_extract rvds $$< $$(ADS2GAS) > $$@
-OBJS-yes += $$(BUILD_PFX)$(2).o
-CLEAN-OBJS += $$(BUILD_PFX)$(1)
-$$(filter %$$(ASM).o,$$(OBJS-yes)): $$(BUILD_PFX)$(1)
-endef
-endif # rvct
-endif # !gcc
-
 #
 # Rule to generate runtime cpu detection files
 #
@@ -51,7 +25,7 @@ $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
 	@echo "    [CREATE] $$@"
 	$$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \
          --sym=$(1) \
-          --config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
+          --config=$$(CONFIG_DIR)$$(target)-$$(TOOLCHAIN).mk \
          $$(RTCD_OPTIONS) $$^ > $$@
 CLEAN-OBJS += $$(BUILD_PFX)$(1).h
 RTCD += $$(BUILD_PFX)$(1).h
@@ -60,13 +34,6 @@ endef
 CODEC_SRCS-yes += CHANGELOG
 CODEC_SRCS-yes += libs.mk

-# If this is a universal (fat) binary, then all the subarchitectures have
-# already been built and our job is to stitch them together. The
-# BUILD_LIBVPX variable indicates whether we should be building
-# (compiling, linking) the library. The LIPO_LIBVPX variable indicates
-# that we're stitching.
-$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
-
 include $(SRC_PATH_BARE)/vpx/vpx_codec.mk
 CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))
 CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS))
@@ -80,7 +47,13 @@ CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS))
 include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk
 CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))

-ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
+include $(SRC_PATH_BARE)/vpx_dsp/vpx_dsp.mk
+CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS))
+
+include $(SRC_PATH_BARE)/vpx_util/vpx_util.mk
+CODEC_SRCS-yes += $(addprefix vpx_util/,$(call enabled,UTIL_SRCS))
+
+ifeq ($(CONFIG_VP8),yes)
  VP8_PREFIX=vp8/
  include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
 endif
@@ -103,7 +76,7 @@ ifeq ($(CONFIG_VP8_DECODER),yes)
  CODEC_DOC_SECTIONS += vp8 vp8_decoder
 endif

-ifneq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),)
+ifeq ($(CONFIG_VP9),yes)
  VP9_PREFIX=vp9/
  include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk
 endif
@@ -136,6 +109,40 @@ endif
 VP9_PREFIX=vp9/
 $(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra

+#  VP10 make file
+ifeq ($(CONFIG_VP10),yes)
+  VP10_PREFIX=vp10/
+  include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10_common.mk
+endif
+
+ifeq ($(CONFIG_VP10_ENCODER),yes)
+  VP10_PREFIX=vp10/
+  include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10cx.mk
+  CODEC_SRCS-yes += $(addprefix $(VP10_PREFIX),$(call enabled,VP10_CX_SRCS))
+  CODEC_EXPORTS-yes += $(addprefix $(VP10_PREFIX),$(VP10_CX_EXPORTS))
+  CODEC_SRCS-yes += $(VP10_PREFIX)vp10cx.mk vpx/vp8.h vpx/vp8cx.h
+  INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h
+  INSTALL-LIBS-$(CONFIG_SPATIAL_SVC) += include/vpx/svc_context.h
+  INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP10_PREFIX)/%
+  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h
+  CODEC_DOC_SECTIONS += vp9 vp9_encoder
+endif
+
+ifeq ($(CONFIG_VP10_DECODER),yes)
+  VP10_PREFIX=vp10/
+  include $(SRC_PATH_BARE)/$(VP10_PREFIX)vp10dx.mk
+  CODEC_SRCS-yes += $(addprefix $(VP10_PREFIX),$(call enabled,VP10_DX_SRCS))
+  CODEC_EXPORTS-yes += $(addprefix $(VP10_PREFIX),$(VP10_DX_EXPORTS))
+  CODEC_SRCS-yes += $(VP10_PREFIX)vp10dx.mk vpx/vp8.h vpx/vp8dx.h
+  INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h
+  INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP10_PREFIX)/%
+  CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h
+  CODEC_DOC_SECTIONS += vp9 vp9_decoder
+endif
+
+VP10_PREFIX=vp10/
+$(BUILD_PFX)$(VP10_PREFIX)%.c.o: CFLAGS += -Wextra
+
 ifeq ($(CONFIG_ENCODERS),yes)
  CODEC_DOC_SECTIONS += encoder
 endif
@@ -163,18 +170,18 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Release/%)
 INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Debug/%)
 endif

-CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
-CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_once.h
-CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
+CODEC_SRCS-yes += build/make/version.sh
+CODEC_SRCS-yes += build/make/rtcd.pl
+CODEC_SRCS-yes += vpx_ports/emmintrin_compat.h
+CODEC_SRCS-yes += vpx_ports/mem_ops.h
+CODEC_SRCS-yes += vpx_ports/mem_ops_aligned.h
+CODEC_SRCS-yes += vpx_ports/vpx_once.h
+CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c
 INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
 ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
 endif
-CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
+CODEC_EXPORTS-yes += vpx/exports_com
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
 CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec

@@ -205,33 +212,13 @@ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(call enabled,CODEC_EXPORTS)
 # based build systems.
 libvpx_srcs.txt:
 	@echo "    [CREATE] $@"
-	@echo $(CODEC_SRCS) | xargs -n1 echo | sort -u > $@
+	@echo $(CODEC_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
 CLEAN-OBJS += libvpx_srcs.txt


 ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
 ifeq ($(CONFIG_MSVS),yes)

-obj_int_extract.bat: $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat
-	@cp $^ $@
-
-obj_int_extract.$(VCPROJ_SFX): obj_int_extract.bat
-obj_int_extract.$(VCPROJ_SFX): $(SRC_PATH_BARE)/build/make/obj_int_extract.c
-	@echo "    [CREATE] $@"
-	$(qexec)$(GEN_VCPROJ) \
-    --exe \
-    --target=$(TOOLCHAIN) \
-    --name=obj_int_extract \
-    --ver=$(CONFIG_VS_VERSION) \
-    --proj-guid=E1360C65-D375-4335-8057-7ED99CC3F9B2 \
-    --src-path-bare="$(SRC_PATH_BARE)" \
-    $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
-    --out=$@ $^ \
-    -I. \
-    -I"$(SRC_PATH_BARE)" \
-
-PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.$(VCPROJ_SFX)
-
 vpx.def: $(call enabled,CODEC_EXPORTS)
 	@echo "    [CREATE] $@"
 	$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
@@ -246,7 +233,7 @@ ASM_INCLUDES := \
    vpx_config.asm \
    vpx_ports/x86_abi_support.asm \

-vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
+vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
 	@echo "    [CREATE] $@"
 	$(qexec)$(GEN_VCPROJ) \
            $(if $(CONFIG_SHARED),--dll,--lib) \
@@ -261,7 +248,7 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
            $(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \
            --src-path-bare="$(SRC_PATH_BARE)" \

-PROJECTS-$(BUILD_LIBVPX) += vpx.$(VCPROJ_SFX)
+PROJECTS-yes += vpx.$(VCPROJ_SFX)

 vpx.$(VCPROJ_SFX): vpx_config.asm
 vpx.$(VCPROJ_SFX): $(RTCD)
@@ -269,32 +256,42 @@ vpx.$(VCPROJ_SFX): $(RTCD)
 endif
 else
 LIBVPX_OBJS=$(call objs,$(CODEC_SRCS))
-OBJS-$(BUILD_LIBVPX) += $(LIBVPX_OBJS)
-LIBS-$(if $(BUILD_LIBVPX),$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
+OBJS-yes += $(LIBVPX_OBJS)
+LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)

-
-BUILD_LIBVPX_SO         := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
-
+SO_VERSION_MAJOR := 3
+SO_VERSION_MINOR := 0
+SO_VERSION_PATCH := 0
 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
-LIBVPX_SO               := libvpx.$(VERSION_MAJOR).dylib
+LIBVPX_SO               := libvpx.$(SO_VERSION_MAJOR).dylib
+SHARED_LIB_SUF          := .dylib
 EXPORT_FILE             := libvpx.syms
 LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
                             libvpx.dylib  )
 else
-LIBVPX_SO               := libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)
+ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS))
+LIBVPX_SO               := libvpx$(SO_VERSION_MAJOR).dll
+SHARED_LIB_SUF          := _dll.a
+EXPORT_FILE             := libvpx.def
+LIBVPX_SO_SYMLINKS      :=
+LIBVPX_SO_IMPLIB        := libvpx_dll.a
+else
+LIBVPX_SO               := libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH)
+SHARED_LIB_SUF          := .so
 EXPORT_FILE             := libvpx.ver
-SYM_LINK                := libvpx.so
 LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
-                             libvpx.so libvpx.so.$(VERSION_MAJOR) \
-                             libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR))
+                             libvpx.so libvpx.so.$(SO_VERSION_MAJOR) \
+                             libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
+endif
 endif

-LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)\
-                           $(notdir $(LIBVPX_SO_SYMLINKS))
+LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\
+                           $(notdir $(LIBVPX_SO_SYMLINKS)) \
+                           $(if $(LIBVPX_SO_IMPLIB), $(BUILD_PFX)$(LIBVPX_SO_IMPLIB))
 $(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) $(EXPORT_FILE)
 $(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
-$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR)
+$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(SO_VERSION_MAJOR)
 $(BUILD_PFX)$(LIBVPX_SO): EXPORTS_FILE = $(EXPORT_FILE)

 libvpx.ver: $(call enabled,CODEC_EXPORTS)
@@ -309,6 +306,19 @@ libvpx.syms: $(call enabled,CODEC_EXPORTS)
 	$(qexec)awk '{print "_"$$2}' $^ >$@
 CLEAN-OBJS += libvpx.syms

+libvpx.def: $(call enabled,CODEC_EXPORTS)
+	@echo "    [CREATE] $@"
+	$(qexec)echo LIBRARY $(LIBVPX_SO:.dll=) INITINSTANCE TERMINSTANCE > $@
+	$(qexec)echo "DATA MULTIPLE NONSHARED" >> $@
+	$(qexec)echo "EXPORTS" >> $@
+	$(qexec)awk '!/vpx_svc_*/ {print "_"$$2}' $^ >>$@
+CLEAN-OBJS += libvpx.def
+
+libvpx_dll.a: $(LIBVPX_SO)
+	@echo "    [IMPLIB] $@"
+	$(qexec)emximp -o $@ $<
+CLEAN-OBJS += libvpx_dll.a
+
 define libvpx_symlink_template
 $(1): $(2)
 	@echo "    [LN]     $(2) $$@"
@@ -324,11 +334,12 @@ $(eval $(call libvpx_symlink_template,\
    $(LIBVPX_SO)))


-INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBVPX_SO_SYMLINKS)
-INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBSUBDIR)/$(LIBVPX_SO)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(if $(LIBVPX_SO_IMPLIB),$(LIBSUBDIR)/$(LIBVPX_SO_IMPLIB))


-LIBS-$(BUILD_LIBVPX) += vpx.pc
+LIBS-yes += vpx.pc
 vpx.pc: config.mk libs.mk
 	@echo "    [CREATE] $@"
 	$(qexec)echo '# pkg-config file from libvpx $(VERSION_STRING)' > $@
@@ -354,9 +365,6 @@ INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
 CLEAN-OBJS += vpx.pc
 endif

-LIBS-$(LIPO_LIBVPX) += libvpx.a
-$(eval $(if $(LIPO_LIBVPX),$(call lipo_lib_template,libvpx.a)))
-
 #
 # Rule to make assembler configuration file from C configuration file
 #
@@ -377,7 +385,7 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_config.asm
 endif

 #
-# Add assembler dependencies for configuration and offsets
+# Add assembler dependencies for configuration.
 #
 $(filter %.s.o,$(OBJS-yes)):     $(BUILD_PFX)vpx_config.asm
 $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
@@ -395,14 +403,18 @@ LIBVPX_TEST_DATA_PATH ?= .

 include $(SRC_PATH_BARE)/test/test.mk
 LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
-LIBVPX_TEST_BINS=./test_libvpx$(EXE_SFX)
+LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX)
 LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
                     $(call enabled,LIBVPX_TEST_DATA))
 libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)

+TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
+TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
+TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
+
 libvpx_test_srcs.txt:
 	@echo "    [CREATE] $@"
-	@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | sort -u > $@
+	@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
 CLEAN-OBJS += libvpx_test_srcs.txt

 $(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1
@@ -463,7 +475,25 @@ test_libvpx.$(VCPROJ_SFX): $(LIBVPX_TEST_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_

 PROJECTS-$(CONFIG_MSVS) += test_libvpx.$(VCPROJ_SFX)

-LIBVPX_TEST_BINS := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BINS)))
+LIBVPX_TEST_BIN := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BIN)))
+
+ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
+PROJECTS-$(CONFIG_MSVS) += test_intra_pred_speed.$(VCPROJ_SFX)
+test_intra_pred_speed.$(VCPROJ_SFX): $(TEST_INTRA_PRED_SPEED_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX)
+	@echo "    [CREATE] $@"
+	$(qexec)$(GEN_VCPROJ) \
+            --exe \
+            --target=$(TOOLCHAIN) \
+            --name=test_intra_pred_speed \
+            -D_VARIADIC_MAX=10 \
+            --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
+            --ver=$(CONFIG_VS_VERSION) \
+            --src-path-bare="$(SRC_PATH_BARE)" \
+            $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
+            --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
+            -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
+            -L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^
+endif  # TEST_INTRA_PRED_SPEED
 endif
 else

@@ -474,45 +504,54 @@ ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS))
 # Disabling pthreads globally will cause issues on darwin and possibly elsewhere
 $(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0
 endif
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
-OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
-LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
+GTEST_INCLUDES := -I$(SRC_PATH_BARE)/third_party/googletest/src
+GTEST_INCLUDES += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(GTEST_OBJS)
+LIBS-yes += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
 $(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS)

 LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS)))
-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
-OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS)
-BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS)
+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(LIBVPX_TEST_OBJS)
+BINS-yes += $(LIBVPX_TEST_BIN)

 CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
-$(foreach bin,$(LIBVPX_TEST_BINS),\
-    $(if $(BUILD_LIBVPX),$(eval $(bin): \
-        lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
-    $(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
-        $(LIBVPX_TEST_OBJS) \
-        -L. -lvpx -lgtest $(extralibs) -lm)\
-        )))\
-    $(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\
+CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
+TEST_LIBS := lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a
+$(LIBVPX_TEST_BIN): $(TEST_LIBS)
+$(eval $(call linkerxx_template,$(LIBVPX_TEST_BIN), \
+              $(LIBVPX_TEST_OBJS) \
+              -L. -lvpx -lgtest $(extralibs) -lm))

-endif
+ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
+$(TEST_INTRA_PRED_SPEED_OBJS) $(TEST_INTRA_PRED_SPEED_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(TEST_INTRA_PRED_SPEED_OBJS)
+BINS-yes += $(TEST_INTRA_PRED_SPEED_BIN)
+
+$(TEST_INTRA_PRED_SPEED_BIN): $(TEST_LIBS)
+$(eval $(call linkerxx_template,$(TEST_INTRA_PRED_SPEED_BIN), \
+              $(TEST_INTRA_PRED_SPEED_OBJS) \
+              -L. -lvpx -lgtest $(extralibs) -lm))
+endif  # TEST_INTRA_PRED_SPEED
+
+endif  # CONFIG_UNIT_TESTS

 # Install test sources only if codec source is included
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\
    $(shell find $(SRC_PATH_BARE)/third_party/googletest -type f))
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS)
+INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(TEST_INTRA_PRED_SPEED_SRCS)

 define test_shard_template
 test:: test_shard.$(1)
-test_shard.$(1): $(LIBVPX_TEST_BINS) testdata
+test-no-data-check:: test_shard_ndc.$(1)
+test_shard.$(1) test_shard_ndc.$(1): $(LIBVPX_TEST_BIN)
 	@set -e; \
-	 for t in $(LIBVPX_TEST_BINS); do \
-	   export GTEST_SHARD_INDEX=$(1); \
-	   export GTEST_TOTAL_SHARDS=$(2); \
-	   $$$$t; \
-	 done
+	 export GTEST_SHARD_INDEX=$(1); \
+	 export GTEST_TOTAL_SHARDS=$(2); \
+	 $(LIBVPX_TEST_BIN)
+test_shard.$(1): testdata
 .PHONY: test_shard.$(1)
 endef

@@ -535,7 +574,11 @@ libs.doxy: $(CODEC_DOC_SRCS)
 	@echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@

 ## Generate rtcd.h for all objects
+ifeq ($(CONFIG_DEPENDENCY_TRACKING),yes)
 $(OBJS-yes:.o=.d): $(RTCD)
+else
+$(OBJS-yes): $(RTCD)
+endif

 ## Update the global src list
 SRCS += $(CODEC_SRCS) $(LIBVPX_TEST_SRCS) $(GTEST_SRCS)
@@ -553,15 +596,16 @@ ifeq ($(CONFIG_MSVS),yes)
 # TODO(tomfinegan): Support running the debug versions of tools?
 TEST_BIN_PATH := $(addsuffix /$(TGT_OS:win64=x64)/Release, $(TEST_BIN_PATH))
 endif
-utiltest: testdata
+utiltest utiltest-no-data-check:
 	$(qexec)$(SRC_PATH_BARE)/test/vpxdec.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(TEST_BIN_PATH)
 	$(qexec)$(SRC_PATH_BARE)/test/vpxenc.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(TEST_BIN_PATH)
+utiltest: testdata
 else
-utiltest:
+utiltest utiltest-no-data-check:
 	@echo Unit tests must be enabled to make the utiltest target.
 endif

@@ -579,11 +623,12 @@ ifeq ($(CONFIG_MSVS),yes)
 # TODO(tomfinegan): Support running the debug versions of tools?
 EXAMPLES_BIN_PATH := $(TGT_OS:win64=x64)/Release
 endif
-exampletest: examples testdata
+exampletest exampletest-no-data-check: examples
 	$(qexec)$(SRC_PATH_BARE)/test/examples.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(EXAMPLES_BIN_PATH)
+exampletest: testdata
 else
-exampletest:
+exampletest exampletest-no-data-check:
 	@echo Unit tests must be enabled to make the exampletest target.
 endif
--- a/mainpage.dox
+++ b/mainpage.dox
@@ -1,4 +1,4 @@
-/*!\mainpage WebM VP8 Codec SDK
+/*!\mainpage WebM Codec SDK

  \section main_contents Page Contents
  - \ref main_intro
@@ -6,11 +6,11 @@
  - \ref main_support

  \section main_intro Introduction
-  Welcome to the WebM VP8 Codec SDK. This SDK allows you to integrate your
-  applications with the VP8 video codec, a high quality, royalty free, open
-  source codec deployed on millions of computers and devices worldwide.
+  Welcome to the WebM Codec SDK. This SDK allows you to integrate your
+  applications with the VP8 and VP9 video codecs, high quality, royalty free,
+  open source codecs deployed on billions of computers and devices worldwide.

-  This distribution of the WebM VP8 Codec SDK includes the following support:
+  This distribution of the WebM Codec SDK includes the following support:

  \if vp8_encoder
  - \ref vp8_encoder
@@ -28,12 +28,12 @@
  - Read the \ref samples "sample code" for examples of how to interact with the
    codec.
  - \ref codec reference
-    \if encoder
-    - \ref encoder reference
-    \endif
-    \if decoder
-    - \ref decoder reference
-    \endif
+  \if encoder
+  - \ref encoder reference
+  \endif
+  \if decoder
+  - \ref decoder reference
+  \endif

  \section main_support Support Options & FAQ
  The WebM project is an open source project supported by its community. For
--- a/md5_utils.c
+++ b/md5_utils.c
@@ -24,7 +24,7 @@

 #include "md5_utils.h"

-void
+static void
 byteSwap(UWORD32 *buf, unsigned words) {
  md5byte *p;

--- a/rate_hist.c
+++ b/rate_hist.c
@@ -88,6 +88,9 @@ void update_rate_histogram(struct rate_hist *hist,
  if (now < cfg->rc_buf_initial_sz)
    return;

+  if (!cfg->rc_target_bitrate)
+    return;
+
  then = now;

  /* Sum the size over the past rc_buf_sz ms */
--- a/solution.mk
+++ b/solution.mk
@@ -9,7 +9,7 @@
 ##

 # libvpx reverse dependencies (targets that depend on libvpx)
-VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest obj_int_extract)
+VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest)
 VPX_RDEPS=$(foreach vcp,\
              $(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.$(VCPROJ_SFX)=):vpx)

@@ -17,7 +17,6 @@ vpx.sln: $(wildcard *.$(VCPROJ_SFX))
 	@echo "    [CREATE] $@"
 	$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
            $(if $(filter vpx.$(VCPROJ_SFX),$^),$(VPX_RDEPS)) \
-            --dep=vpx:obj_int_extract \
            --dep=test_libvpx:gtest \
            --ver=$(CONFIG_VS_VERSION)\
            --out=$@ $^
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -29,14 +29,14 @@ class ACMRandom {
  uint16_t Rand16(void) {
    const uint32_t value =
        random_.Generate(testing::internal::Random::kMaxRange);
-    return (value >> 16) & 0xffff;
+    return (value >> 15) & 0xffff;
  }

  uint8_t Rand8(void) {
    const uint32_t value =
        random_.Generate(testing::internal::Random::kMaxRange);
    // There's a bit more entropy in the upper bits of this implementation.
-    return (value >> 24) & 0xff;
+    return (value >> 23) & 0xff;
  }

  uint8_t Rand8Extremes(void) {
--- a/test/active_map_refresh_test.cc
+++ b/test/active_map_refresh_test.cc
@@ -0,0 +1,127 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include <algorithm>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+
+// Check if any pixel in a 16x16 macroblock varies between frames.
+int CheckMb(const vpx_image_t &current, const vpx_image_t &previous,
+            int mb_r, int mb_c) {
+  for (int plane = 0; plane < 3; plane++) {
+    int r = 16 * mb_r;
+    int c0 = 16 * mb_c;
+    int r_top = std::min(r + 16, static_cast<int>(current.d_h));
+    int c_top = std::min(c0 + 16, static_cast<int>(current.d_w));
+    r = std::max(r, 0);
+    c0 = std::max(c0, 0);
+    if (plane > 0 && current.x_chroma_shift) {
+      c_top = (c_top + 1) >> 1;
+      c0 >>= 1;
+    }
+    if (plane > 0 && current.y_chroma_shift) {
+      r_top = (r_top + 1) >> 1;
+      r >>= 1;
+    }
+    for (; r < r_top; ++r) {
+      for (int c = c0; c < c_top; ++c) {
+        if (current.planes[plane][current.stride[plane] * r + c] !=
+            previous.planes[plane][previous.stride[plane] * r + c])
+          return 1;
+      }
+    }
+  }
+  return 0;
+}
+
+void GenerateMap(int mb_rows, int mb_cols, const vpx_image_t &current,
+                 const vpx_image_t &previous, uint8_t *map) {
+  for (int mb_r = 0; mb_r < mb_rows; ++mb_r) {
+    for (int mb_c = 0; mb_c < mb_cols; ++mb_c) {
+      map[mb_r * mb_cols + mb_c] = CheckMb(current, previous, mb_r, mb_c);
+    }
+  }
+}
+
+const int kAqModeCyclicRefresh = 3;
+
+class ActiveMapRefreshTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+  ActiveMapRefreshTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~ActiveMapRefreshTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    cpu_used_ = GET_PARAM(2);
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    ::libvpx_test::Y4mVideoSource *y4m_video =
+        static_cast<libvpx_test::Y4mVideoSource *>(video);
+    if (video->frame() == 1) {
+      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
+      encoder->Control(VP9E_SET_AQ_MODE, kAqModeCyclicRefresh);
+    } else if (video->frame() >= 2 && video->img()) {
+      vpx_image_t *current = video->img();
+      vpx_image_t *previous = y4m_holder_->img();
+      ASSERT_TRUE(previous != NULL);
+      vpx_active_map_t map = vpx_active_map_t();
+      const int width = static_cast<int>(current->d_w);
+      const int height = static_cast<int>(current->d_h);
+      const int mb_width = (width + 15) / 16;
+      const int mb_height = (height + 15) / 16;
+      uint8_t *active_map = new uint8_t[mb_width * mb_height];
+      GenerateMap(mb_height, mb_width, *current, *previous, active_map);
+      map.cols = mb_width;
+      map.rows = mb_height;
+      map.active_map = active_map;
+      encoder->Control(VP8E_SET_ACTIVEMAP, &map);
+      delete[] active_map;
+    }
+    if (video->img()) {
+      y4m_video->SwapBuffers(y4m_holder_);
+    }
+  }
+
+  int cpu_used_;
+  ::libvpx_test::Y4mVideoSource *y4m_holder_;
+};
+
+TEST_P(ActiveMapRefreshTest, Test) {
+  cfg_.g_lag_in_frames = 0;
+  cfg_.g_profile = 1;
+  cfg_.rc_target_bitrate = 600;
+  cfg_.rc_resize_allowed = 0;
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 30;
+  cfg_.g_pass = VPX_RC_ONE_PASS;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.kf_max_dist = 90000;
+
+  ::libvpx_test::Y4mVideoSource video("desktop_credits.y4m", 0, 30);
+  ::libvpx_test::Y4mVideoSource video_holder("desktop_credits.y4m", 0, 30);
+  video_holder.Begin();
+  y4m_holder_ = &video_holder;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+VP9_INSTANTIATE_TEST_CASE(ActiveMapRefreshTest,
+                          ::testing::Values(::libvpx_test::kRealTime),
+                          ::testing::Range(5, 6));
+}  // namespace
--- a/test/android/Android.mk
+++ b/test/android/Android.mk
@@ -40,9 +40,17 @@ include $(CLEAR_VARS)
 LOCAL_ARM_MODE := arm
 LOCAL_MODULE := libvpx_test
 LOCAL_STATIC_LIBRARIES := gtest libwebm
-LOCAL_SHARED_LIBRARIES := vpx
+
+ifeq ($(ENABLE_SHARED),1)
+  LOCAL_SHARED_LIBRARIES := vpx
+else
+  LOCAL_STATIC_LIBRARIES += vpx
+endif
+
 include $(LOCAL_PATH)/test/test.mk
 LOCAL_C_INCLUDES := $(BINDINGS_DIR)
 FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes)))
 LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC))
+# some test files depend on *_rtcd.h, ensure they're generated first.
+$(eval $(call rtcd_dep_template))
 include $(BUILD_EXECUTABLE)
--- a/test/blockiness_test.cc
+++ b/test/blockiness_test.cc
@@ -0,0 +1,229 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#if CONFIG_VP9_ENCODER
+#include "./vp9_rtcd.h"
+#endif
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "vpx_mem/vpx_mem.h"
+
+
+extern "C"
+double vp9_get_blockiness(const unsigned char *img1, int img1_pitch,
+                          const unsigned char *img2, int img2_pitch,
+                          int width, int height);
+
+using libvpx_test::ACMRandom;
+
+namespace {
+class BlockinessTestBase : public ::testing::Test {
+ public:
+  BlockinessTestBase(int width, int height) : width_(width), height_(height) {}
+
+  static void SetUpTestCase() {
+    source_data_ = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    reference_data_ = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+  }
+
+  static void TearDownTestCase() {
+    vpx_free(source_data_);
+    source_data_ = NULL;
+    vpx_free(reference_data_);
+    reference_data_ = NULL;
+  }
+
+  virtual void TearDown() {
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  // Handle frames up to 640x480
+  static const int kDataAlignment = 16;
+  static const int kDataBufferSize = 640*480;
+
+  virtual void SetUp() {
+    source_stride_ = (width_ + 31) & ~31;
+    reference_stride_ = width_ * 2;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  void FillConstant(uint8_t *data, int stride, uint8_t fill_constant,
+                    int width, int height) {
+    for (int h = 0; h < height; ++h) {
+      for (int w = 0; w < width; ++w) {
+        data[h * stride + w] = fill_constant;
+      }
+    }
+  }
+
+  void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) {
+    FillConstant(data, stride, fill_constant, width_, height_);
+  }
+
+  void FillRandom(uint8_t *data, int stride, int width, int height) {
+    for (int h = 0; h < height; ++h) {
+      for (int w = 0; w < width; ++w) {
+        data[h * stride + w] = rnd_.Rand8();
+      }
+    }
+  }
+
+  void FillRandom(uint8_t *data, int stride) {
+    FillRandom(data, stride, width_, height_);
+  }
+
+  void FillRandomBlocky(uint8_t *data, int stride) {
+    for (int h = 0; h < height_; h += 4) {
+      for (int w = 0; w < width_; w += 4) {
+        FillRandom(data + h * stride + w, stride, 4, 4);
+      }
+    }
+  }
+
+  void FillCheckerboard(uint8_t *data, int stride) {
+    for (int h = 0; h < height_; h += 4) {
+      for (int w = 0; w < width_; w += 4) {
+        if (((h/4) ^ (w/4)) & 1)
+          FillConstant(data + h * stride + w, stride, 255, 4, 4);
+        else
+          FillConstant(data + h * stride + w, stride, 0, 4, 4);
+      }
+    }
+  }
+
+  void Blur(uint8_t *data, int stride, int taps) {
+    int sum = 0;
+    int half_taps = taps / 2;
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < taps; ++w) {
+        sum += data[w + h * stride];
+      }
+      for (int w = taps; w < width_; ++w) {
+        sum += data[w + h * stride] - data[w - taps + h * stride];
+        data[w - half_taps + h * stride] = (sum + half_taps) / taps;
+      }
+    }
+    for (int w = 0; w < width_; ++w) {
+      for (int h = 0; h < taps; ++h) {
+        sum += data[h + w * stride];
+      }
+      for (int h = taps; h < height_; ++h) {
+        sum += data[w + h * stride] - data[(h - taps) * stride + w];
+        data[(h - half_taps) * stride + w] = (sum + half_taps) / taps;
+      }
+    }
+  }
+  int width_, height_;
+  static uint8_t* source_data_;
+  int source_stride_;
+  static uint8_t* reference_data_;
+  int reference_stride_;
+
+  ACMRandom rnd_;
+};
+
+#if CONFIG_VP9_ENCODER
+typedef std::tr1::tuple<int, int> BlockinessParam;
+class BlockinessVP9Test
+    : public BlockinessTestBase,
+      public ::testing::WithParamInterface<BlockinessParam> {
+ public:
+  BlockinessVP9Test() : BlockinessTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+  int CheckBlockiness() {
+    return vp9_get_blockiness(source_data_, source_stride_,
+                              reference_data_, reference_stride_,
+                              width_, height_);
+  }
+};
+#endif  // CONFIG_VP9_ENCODER
+
+uint8_t* BlockinessTestBase::source_data_ = NULL;
+uint8_t* BlockinessTestBase::reference_data_ = NULL;
+
+#if CONFIG_VP9_ENCODER
+TEST_P(BlockinessVP9Test, SourceBlockierThanReference) {
+  // Source is blockier than reference.
+  FillRandomBlocky(source_data_, source_stride_);
+  FillConstant(reference_data_, reference_stride_, 128);
+  int super_blocky = CheckBlockiness();
+
+  EXPECT_EQ(0, super_blocky) << "Blocky source should produce 0 blockiness.";
+}
+
+TEST_P(BlockinessVP9Test, ReferenceBlockierThanSource) {
+  // Source is blockier than reference.
+  FillConstant(source_data_, source_stride_, 128);
+  FillRandomBlocky(reference_data_, reference_stride_);
+  int super_blocky = CheckBlockiness();
+
+  EXPECT_GT(super_blocky, 0.0)
+      << "Blocky reference should score high for blockiness.";
+}
+
+TEST_P(BlockinessVP9Test, BlurringDecreasesBlockiness) {
+  // Source is blockier than reference.
+  FillConstant(source_data_, source_stride_, 128);
+  FillRandomBlocky(reference_data_, reference_stride_);
+  int super_blocky = CheckBlockiness();
+
+  Blur(reference_data_, reference_stride_, 4);
+  int less_blocky = CheckBlockiness();
+
+  EXPECT_GT(super_blocky, less_blocky)
+      << "A straight blur should decrease blockiness.";
+}
+
+TEST_P(BlockinessVP9Test, WorstCaseBlockiness) {
+  // Source is blockier than reference.
+  FillConstant(source_data_, source_stride_, 128);
+  FillCheckerboard(reference_data_, reference_stride_);
+
+  int super_blocky = CheckBlockiness();
+
+  Blur(reference_data_, reference_stride_, 4);
+  int less_blocky = CheckBlockiness();
+
+  EXPECT_GT(super_blocky, less_blocky)
+      << "A straight blur should decrease blockiness.";
+}
+#endif  // CONFIG_VP9_ENCODER
+
+
+using std::tr1::make_tuple;
+
+//------------------------------------------------------------------------------
+// C functions
+
+#if CONFIG_VP9_ENCODER
+const BlockinessParam c_vp9_tests[] = {
+  make_tuple(320, 240),
+  make_tuple(318, 242),
+  make_tuple(318, 238),
+};
+INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests));
+#endif
+
+}  // namespace
--- a/test/borders_test.cc
+++ b/test/borders_test.cc
@@ -80,4 +80,7 @@ TEST_P(BordersTest, TestLowBitrate) {

 VP9_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values(
    ::libvpx_test::kTwoPassGood));
+
+VP10_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values(
+    ::libvpx_test::kTwoPassGood));
 }  // namespace
--- a/test/byte_alignment_test.cc
+++ b/test/byte_alignment_test.cc
@@ -0,0 +1,189 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+
+#include "./vpx_config.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#if CONFIG_WEBM_IO
+#include "test/webm_video_source.h"
+#endif
+
+namespace {
+
+const int kLegacyByteAlignment = 0;
+const int kLegacyYPlaneByteAlignment = 32;
+const int kNumPlanesToCheck = 3;
+const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
+const char kVP9Md5File[] = "vp90-2-02-size-lf-1920x1080.webm.md5";
+
+#if CONFIG_WEBM_IO
+
+struct ByteAlignmentTestParam {
+  int byte_alignment;
+  vpx_codec_err_t expected_value;
+  bool decode_remaining;
+};
+
+const ByteAlignmentTestParam kBaTestParams[] = {
+  {kLegacyByteAlignment, VPX_CODEC_OK, true},
+  {32, VPX_CODEC_OK, true},
+  {64, VPX_CODEC_OK, true},
+  {128, VPX_CODEC_OK, true},
+  {256, VPX_CODEC_OK, true},
+  {512, VPX_CODEC_OK, true},
+  {1024, VPX_CODEC_OK, true},
+  {1, VPX_CODEC_INVALID_PARAM, false},
+  {-2, VPX_CODEC_INVALID_PARAM, false},
+  {4, VPX_CODEC_INVALID_PARAM, false},
+  {16, VPX_CODEC_INVALID_PARAM, false},
+  {255, VPX_CODEC_INVALID_PARAM, false},
+  {2048, VPX_CODEC_INVALID_PARAM, false},
+};
+
+// Class for testing byte alignment of reference buffers.
+class ByteAlignmentTest
+    : public ::testing::TestWithParam<ByteAlignmentTestParam> {
+ protected:
+  ByteAlignmentTest()
+      : video_(NULL),
+        decoder_(NULL),
+        md5_file_(NULL) {}
+
+  virtual void SetUp() {
+    video_ = new libvpx_test::WebMVideoSource(kVP9TestFile);
+    ASSERT_TRUE(video_ != NULL);
+    video_->Init();
+    video_->Begin();
+
+    const vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
+    decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
+    ASSERT_TRUE(decoder_ != NULL);
+
+    OpenMd5File(kVP9Md5File);
+  }
+
+  virtual void TearDown() {
+    if (md5_file_ != NULL)
+      fclose(md5_file_);
+
+    delete decoder_;
+    delete video_;
+  }
+
+  void SetByteAlignment(int byte_alignment, vpx_codec_err_t expected_value) {
+    decoder_->Control(VP9_SET_BYTE_ALIGNMENT, byte_alignment, expected_value);
+  }
+
+  vpx_codec_err_t DecodeOneFrame(int byte_alignment_to_check) {
+    const vpx_codec_err_t res =
+        decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+    CheckDecodedFrames(byte_alignment_to_check);
+    if (res == VPX_CODEC_OK)
+      video_->Next();
+    return res;
+  }
+
+  vpx_codec_err_t DecodeRemainingFrames(int byte_alignment_to_check) {
+    for (; video_->cxdata() != NULL; video_->Next()) {
+      const vpx_codec_err_t res =
+          decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+      if (res != VPX_CODEC_OK)
+        return res;
+      CheckDecodedFrames(byte_alignment_to_check);
+    }
+    return VPX_CODEC_OK;
+  }
+
+ private:
+  // Check if |data| is aligned to |byte_alignment_to_check|.
+  // |byte_alignment_to_check| must be a power of 2.
+  void CheckByteAlignment(const uint8_t *data, int byte_alignment_to_check) {
+    ASSERT_EQ(0u, reinterpret_cast<size_t>(data) % byte_alignment_to_check);
+  }
+
+  // Iterate through the planes of the decoded frames and check for
+  // alignment based off |byte_alignment_to_check|.
+  void CheckDecodedFrames(int byte_alignment_to_check) {
+    libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
+    const vpx_image_t *img;
+
+    // Get decompressed data
+    while ((img = dec_iter.Next()) != NULL) {
+      if (byte_alignment_to_check == kLegacyByteAlignment) {
+        CheckByteAlignment(img->planes[0], kLegacyYPlaneByteAlignment);
+      } else {
+        for (int i = 0; i < kNumPlanesToCheck; ++i) {
+          CheckByteAlignment(img->planes[i], byte_alignment_to_check);
+        }
+      }
+      CheckMd5(*img);
+    }
+  }
+
+  // TODO(fgalligan): Move the MD5 testing code into another class.
+  void OpenMd5File(const std::string &md5_file_name_) {
+    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
+    ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
+        << md5_file_name_;
+  }
+
+  void CheckMd5(const vpx_image_t &img) {
+    ASSERT_TRUE(md5_file_ != NULL);
+    char expected_md5[33];
+    char junk[128];
+
+    // Read correct md5 checksums.
+    const int res = fscanf(md5_file_, "%s  %s", expected_md5, junk);
+    ASSERT_NE(EOF, res) << "Read md5 data failed";
+    expected_md5[32] = '\0';
+
+    ::libvpx_test::MD5 md5_res;
+    md5_res.Add(&img);
+    const char *const actual_md5 = md5_res.Get();
+
+    // Check md5 match.
+    ASSERT_STREQ(expected_md5, actual_md5) << "MD5 checksums don't match";
+  }
+
+  libvpx_test::WebMVideoSource *video_;
+  libvpx_test::VP9Decoder *decoder_;
+  FILE *md5_file_;
+};
+
+TEST_F(ByteAlignmentTest, SwitchByteAlignment) {
+  const int num_elements = 14;
+  const int byte_alignments[] = { 0, 32, 64, 128, 256, 512, 1024,
+                                  0, 1024, 32, 512, 64, 256, 128 };
+
+  for (int i = 0; i < num_elements; ++i) {
+    SetByteAlignment(byte_alignments[i], VPX_CODEC_OK);
+    ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame(byte_alignments[i]));
+  }
+  SetByteAlignment(byte_alignments[0], VPX_CODEC_OK);
+  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(byte_alignments[0]));
+}
+
+TEST_P(ByteAlignmentTest, TestAlignment) {
+  const ByteAlignmentTestParam t = GetParam();
+  SetByteAlignment(t.byte_alignment, t.expected_value);
+  if (t.decode_remaining)
+    ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(t.byte_alignment));
+}
+
+INSTANTIATE_TEST_CASE_P(Alignments, ByteAlignmentTest,
+                        ::testing::ValuesIn(kBaTestParams));
+
+#endif  // CONFIG_WEBM_IO
+
+}  // namespace
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -13,10 +13,10 @@
 #include "./vpx_config.h"
 #include "vpx/vpx_decoder.h"
 #include "vpx/vpx_encoder.h"
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
 #include "vpx/vp8cx.h"
 #endif
-#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER
+#if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER || CONFIG_VP10_DECODER
 #include "vpx/vp8dx.h"
 #endif

@@ -35,6 +35,11 @@ class CodecFactory {
  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const = 0;

+  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 const vpx_codec_flags_t flags,
+                                 unsigned long deadline)  // NOLINT(runtime/int)
+                                 const = 0;
+
  virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
                                 unsigned long deadline,
                                 const unsigned long init_flags,
@@ -72,6 +77,10 @@ class VP8Decoder : public Decoder {
  VP8Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
      : Decoder(cfg, deadline) {}

+  VP8Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
+             unsigned long deadline)  // NOLINT
+      : Decoder(cfg, flag, deadline) {}
+
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP8_DECODER
@@ -104,8 +113,14 @@ class VP8CodecFactory : public CodecFactory {

  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const {
+    return CreateDecoder(cfg, 0, deadline);
+  }
+
+  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 const vpx_codec_flags_t flags,
+                                 unsigned long deadline) const {  // NOLINT
 #if CONFIG_VP8_DECODER
-    return new VP8Decoder(cfg, deadline);
+    return new VP8Decoder(cfg, flags, deadline);
 #else
    return NULL;
 #endif
@@ -154,6 +169,10 @@ class VP9Decoder : public Decoder {
  VP9Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
      : Decoder(cfg, deadline) {}

+  VP9Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
+             unsigned long deadline)  // NOLINT
+      : Decoder(cfg, flag, deadline) {}
+
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP9_DECODER
@@ -186,8 +205,14 @@ class VP9CodecFactory : public CodecFactory {

  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const {
+    return CreateDecoder(cfg, 0, deadline);
+  }
+
+  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 const vpx_codec_flags_t flags,
+                                 unsigned long deadline) const {  // NOLINT
 #if CONFIG_VP9_DECODER
-    return new VP9Decoder(cfg, deadline);
+    return new VP9Decoder(cfg, flags, deadline);
 #else
    return NULL;
 #endif
@@ -208,6 +233,8 @@ class VP9CodecFactory : public CodecFactory {
                                               int usage) const {
 #if CONFIG_VP9_ENCODER
    return vpx_codec_enc_config_default(&vpx_codec_vp9_cx_algo, cfg, usage);
+#elif CONFIG_VP10_ENCODER
+    return vpx_codec_enc_config_default(&vpx_codec_vp10_cx_algo, cfg, usage);
 #else
    return VPX_CODEC_INCAPABLE;
 #endif
@@ -226,7 +253,96 @@ const libvpx_test::VP9CodecFactory kVP9;
 #define VP9_INSTANTIATE_TEST_CASE(test, ...)
 #endif  // CONFIG_VP9

+/*
+ * VP10 Codec Definitions
+ */
+#if CONFIG_VP10
+class VP10Decoder : public Decoder {
+ public:
+  VP10Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
+      : Decoder(cfg, deadline) {}
+
+  VP10Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
+              unsigned long deadline)  // NOLINT
+      : Decoder(cfg, flag, deadline) {}
+
+ protected:
+  virtual vpx_codec_iface_t* CodecInterface() const {
+#if CONFIG_VP10_DECODER
+    return &vpx_codec_vp10_dx_algo;
+#else
+    return NULL;
+#endif
+  }
+};
+
+class VP10Encoder : public Encoder {
+ public:
+  VP10Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline,
+              const unsigned long init_flags, TwopassStatsStore *stats)
+      : Encoder(cfg, deadline, init_flags, stats) {}
+
+ protected:
+  virtual vpx_codec_iface_t* CodecInterface() const {
+#if CONFIG_VP10_ENCODER
+    return &vpx_codec_vp10_cx_algo;
+#else
+    return NULL;
+#endif
+  }
+};
+
+class VP10CodecFactory : public CodecFactory {
+ public:
+  VP10CodecFactory() : CodecFactory() {}
+
+  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 unsigned long deadline) const {
+    return CreateDecoder(cfg, 0, deadline);
+  }
+
+  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 const vpx_codec_flags_t flags,
+                                 unsigned long deadline) const {  // NOLINT
+#if CONFIG_VP10_DECODER
+    return new VP10Decoder(cfg, flags, deadline);
+#else
+    return NULL;
+#endif
+  }
+
+  virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
+                                 unsigned long deadline,
+                                 const unsigned long init_flags,
+                                 TwopassStatsStore *stats) const {
+#if CONFIG_VP10_ENCODER
+    return new VP10Encoder(cfg, deadline, init_flags, stats);
+#else
+    return NULL;
+#endif
+  }
+
+  virtual vpx_codec_err_t DefaultEncoderConfig(vpx_codec_enc_cfg_t *cfg,
+                                               int usage) const {
+#if CONFIG_VP10_ENCODER
+    return vpx_codec_enc_config_default(&vpx_codec_vp10_cx_algo, cfg, usage);
+#else
+    return VPX_CODEC_INCAPABLE;
+#endif
+  }
+};
+
+const libvpx_test::VP10CodecFactory kVP10;
+
+#define VP10_INSTANTIATE_TEST_CASE(test, ...)\
+  INSTANTIATE_TEST_CASE_P(VP10, test, \
+      ::testing::Combine( \
+          ::testing::Values(static_cast<const libvpx_test::CodecFactory*>( \
+               &libvpx_test::kVP10)), \
+          __VA_ARGS__))
+#else
+#define VP10_INSTANTIATE_TEST_CASE(test, ...)
+#endif  // CONFIG_VP10

 }  // namespace libvpx_test
-
 #endif  // TEST_CODEC_FACTORY_H_
--- a/test/consistency_test.cc
+++ b/test/consistency_test.cc
@@ -0,0 +1,224 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#if CONFIG_VP9_ENCODER
+#include "./vp9_rtcd.h"
+#endif
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vpx_dsp/ssim.h"
+#include "vpx_mem/vpx_mem.h"
+
+extern "C"
+double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch,
+                            uint8_t *img2, int img2_pitch,
+                            int width, int height,
+                            Ssimv *sv2, Metrics *m,
+                            int do_inconsistency);
+
+using libvpx_test::ACMRandom;
+
+namespace {
+class ConsistencyTestBase : public ::testing::Test {
+ public:
+  ConsistencyTestBase(int width, int height) : width_(width), height_(height) {}
+
+  static void SetUpTestCase() {
+    source_data_[0] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    reference_data_[0] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    source_data_[1] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    reference_data_[1] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    ssim_array_ = new Ssimv[kDataBufferSize / 16];
+  }
+
+  static void ClearSsim() {
+    memset(ssim_array_, 0, kDataBufferSize / 16);
+  }
+  static void TearDownTestCase() {
+    vpx_free(source_data_[0]);
+    source_data_[0] = NULL;
+    vpx_free(reference_data_[0]);
+    reference_data_[0] = NULL;
+    vpx_free(source_data_[1]);
+    source_data_[1] = NULL;
+    vpx_free(reference_data_[1]);
+    reference_data_[1] = NULL;
+
+    delete[] ssim_array_;
+  }
+
+  virtual void TearDown() {
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  // Handle frames up to 640x480
+  static const int kDataAlignment = 16;
+  static const int kDataBufferSize = 640*480;
+
+  virtual void SetUp() {
+    source_stride_ = (width_ + 31) & ~31;
+    reference_stride_ = width_ * 2;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  void FillRandom(uint8_t *data, int stride, int width, int height) {
+    for (int h = 0; h < height; ++h) {
+      for (int w = 0; w < width; ++w) {
+        data[h * stride + w] = rnd_.Rand8();
+      }
+    }
+  }
+
+  void FillRandom(uint8_t *data, int stride) {
+    FillRandom(data, stride, width_, height_);
+  }
+
+  void Copy(uint8_t *reference, uint8_t *source) {
+    memcpy(reference, source, kDataBufferSize);
+  }
+
+  void Blur(uint8_t *data, int stride, int taps) {
+    int sum = 0;
+    int half_taps = taps / 2;
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < taps; ++w) {
+        sum += data[w + h * stride];
+      }
+      for (int w = taps; w < width_; ++w) {
+        sum += data[w + h * stride] - data[w - taps + h * stride];
+        data[w - half_taps + h * stride] = (sum + half_taps) / taps;
+      }
+    }
+    for (int w = 0; w < width_; ++w) {
+      for (int h = 0; h < taps; ++h) {
+        sum += data[h + w * stride];
+      }
+      for (int h = taps; h < height_; ++h) {
+        sum += data[w + h * stride] - data[(h - taps) * stride + w];
+        data[(h - half_taps) * stride + w] = (sum + half_taps) / taps;
+      }
+    }
+  }
+  int width_, height_;
+  static uint8_t* source_data_[2];
+  int source_stride_;
+  static uint8_t* reference_data_[2];
+  int reference_stride_;
+  static Ssimv *ssim_array_;
+  Metrics metrics_;
+
+  ACMRandom rnd_;
+};
+
+#if CONFIG_VP9_ENCODER
+typedef std::tr1::tuple<int, int> ConsistencyParam;
+class ConsistencyVP9Test
+    : public ConsistencyTestBase,
+      public ::testing::WithParamInterface<ConsistencyParam> {
+ public:
+  ConsistencyVP9Test() : ConsistencyTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+  double CheckConsistency(int frame) {
+    EXPECT_LT(frame, 2)<< "Frame to check has to be less than 2.";
+    return
+        vpx_get_ssim_metrics(source_data_[frame], source_stride_,
+                             reference_data_[frame], reference_stride_,
+                             width_, height_, ssim_array_, &metrics_, 1);
+  }
+};
+#endif  // CONFIG_VP9_ENCODER
+
+uint8_t* ConsistencyTestBase::source_data_[2] = {NULL, NULL};
+uint8_t* ConsistencyTestBase::reference_data_[2] = {NULL, NULL};
+Ssimv* ConsistencyTestBase::ssim_array_ = NULL;
+
+#if CONFIG_VP9_ENCODER
+TEST_P(ConsistencyVP9Test, ConsistencyIsZero) {
+  FillRandom(source_data_[0], source_stride_);
+  Copy(source_data_[1], source_data_[0]);
+  Copy(reference_data_[0], source_data_[0]);
+  Blur(reference_data_[0], reference_stride_, 3);
+  Copy(reference_data_[1], source_data_[0]);
+  Blur(reference_data_[1], reference_stride_, 3);
+
+  double inconsistency = CheckConsistency(1);
+  inconsistency = CheckConsistency(0);
+  EXPECT_EQ(inconsistency, 0.0)
+      << "Should have 0 inconsistency if they are exactly the same.";
+
+  // If sources are not consistent reference frames inconsistency should
+  // be less than if the source is consistent.
+  FillRandom(source_data_[0], source_stride_);
+  FillRandom(source_data_[1], source_stride_);
+  FillRandom(reference_data_[0], reference_stride_);
+  FillRandom(reference_data_[1], reference_stride_);
+  CheckConsistency(0);
+  inconsistency = CheckConsistency(1);
+
+  Copy(source_data_[1], source_data_[0]);
+  CheckConsistency(0);
+  double inconsistency2 = CheckConsistency(1);
+  EXPECT_LT(inconsistency, inconsistency2)
+      << "Should have less inconsistency if source itself is inconsistent.";
+
+  // Less of a blur should be less inconsistent than more blur coming off a
+  // a frame with no blur.
+  ClearSsim();
+  FillRandom(source_data_[0], source_stride_);
+  Copy(source_data_[1], source_data_[0]);
+  Copy(reference_data_[0], source_data_[0]);
+  Copy(reference_data_[1], source_data_[0]);
+  Blur(reference_data_[1], reference_stride_, 4);
+  CheckConsistency(0);
+  inconsistency = CheckConsistency(1);
+  ClearSsim();
+  Copy(reference_data_[1], source_data_[0]);
+  Blur(reference_data_[1], reference_stride_, 8);
+  CheckConsistency(0);
+  inconsistency2 = CheckConsistency(1);
+
+  EXPECT_LT(inconsistency, inconsistency2)
+      << "Stronger Blur should produce more inconsistency.";
+}
+#endif  // CONFIG_VP9_ENCODER
+
+
+using std::tr1::make_tuple;
+
+//------------------------------------------------------------------------------
+// C functions
+
+#if CONFIG_VP9_ENCODER
+const ConsistencyParam c_vp9_tests[] = {
+  make_tuple(320, 240),
+  make_tuple(318, 242),
+  make_tuple(318, 238),
+};
+INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test,
+                        ::testing::ValuesIn(c_vp9_tests));
+#endif
+
+}  // namespace
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
--- a/test/cpu_speed_test.cc
+++ b/test/cpu_speed_test.cc
@@ -140,4 +140,9 @@ VP9_INSTANTIATE_TEST_CASE(
    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
                      ::libvpx_test::kRealTime),
    ::testing::Range(0, 9));
+
+VP10_INSTANTIATE_TEST_CASE(
+    CpuSpeedTest,
+    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood),
+    ::testing::Range(0, 3));
 }  // namespace
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -14,6 +14,7 @@
 #include "test/i420_video_source.h"
 #include "test/util.h"
 #include "test/y4m_video_source.h"
+#include "vpx/vpx_codec.h"

 namespace {

@@ -38,13 +39,25 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
    first_drop_ = 0;
    bits_total_ = 0;
    duration_ = 0.0;
+    denoiser_offon_test_ = 0;
+    denoiser_offon_period_ = -1;
  }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
+    if (video->frame() == 0)
+      encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
+
+    if (denoiser_offon_test_) {
+      ASSERT_GT(denoiser_offon_period_, 0)
+          << "denoiser_offon_period_ is not positive.";
+      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
+        // Flip denoiser_on_ periodically
+        denoiser_on_ ^= 1;
+      }
      encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
    }
+
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
    duration_ = 0;
@@ -124,6 +137,8 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
  double effective_datarate_;
  size_t bits_in_last_frame_;
  int denoiser_on_;
+  int denoiser_offon_test_;
+  int denoiser_offon_period_;
 };

 #if CONFIG_TEMPORAL_DENOISING
@@ -155,6 +170,29 @@ TEST_P(DatarateTestLarge, DenoiserLevels) {
        << " The datarate for the file missed the target!";
  }
 }
+
+// Check basic datarate targeting, for a single bitrate, when denoiser is off
+// and on.
+TEST_P(DatarateTestLarge, DenoiserOffOn) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 299);
+  cfg_.rc_target_bitrate = 300;
+  ResetModel();
+  // The denoiser is off by default.
+  denoiser_on_ = 0;
+  // Set the offon test flag.
+  denoiser_offon_test_ = 1;
+  denoiser_offon_period_ = 100;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
+      << " The datarate for the file exceeds the target!";
+  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
+      << " The datarate for the file missed the target!";
+}
 #endif  // CONFIG_TEMPORAL_DENOISING

 TEST_P(DatarateTestLarge, BasicBufferModel) {
@@ -246,6 +284,8 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
    for (int i = 0; i < 3; ++i) {
      bits_total_[i] = 0;
    }
+    denoiser_offon_test_ = 0;
+    denoiser_offon_period_ = -1;
  }

  //
@@ -313,22 +353,30 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
+    if (video->frame() == 0)
      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-      encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_);
+
+    if (denoiser_offon_test_) {
+      ASSERT_GT(denoiser_offon_period_, 0)
+          << "denoiser_offon_period_ is not positive.";
+      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
+        // Flip denoiser_on_ periodically
+        denoiser_on_ ^= 1;
+      }
    }
+
+    encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_);
+
    if (cfg_.ts_number_layers > 1) {
-      if (video->frame() == 1) {
+      if (video->frame() == 0) {
        encoder->Control(VP9E_SET_SVC, 1);
      }
-      vpx_svc_layer_id_t layer_id = {0, 0};
+      vpx_svc_layer_id_t layer_id;
      layer_id.spatial_layer_id = 0;
      frame_flags_ = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
      layer_id.temporal_layer_id = SetLayerId(video->frame(),
                                              cfg_.ts_number_layers);
-      if (video->frame() > 0) {
-       encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
-      }
+      encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
    }
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
@@ -398,6 +446,8 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
  vpx_codec_pts_t first_drop_;
  int num_drops_;
  int denoiser_on_;
+  int denoiser_offon_test_;
+  int denoiser_offon_period_;
 };

 // Check basic rate targeting,
@@ -488,7 +538,7 @@ TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) {
        << " The first dropped frame for drop_thresh " << i
        << " > first dropped frame for drop_thresh "
        << i - kDropFrameThreshTestStep;
-    ASSERT_GE(num_drops_, last_num_drops)
+    ASSERT_GE(num_drops_, last_num_drops * 0.90)
        << " The number of dropped frames for drop_thresh " << i
        << " < number of dropped frames for drop_thresh "
        << i - kDropFrameThreshTestStep;
@@ -514,20 +564,25 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) {
  cfg_.ts_rate_decimator[0] = 2;
  cfg_.ts_rate_decimator[1] = 1;

+  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
+  if (deadline_ == VPX_DL_REALTIME)
+    cfg_.g_error_resilient = 1;
+
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 200);
  for (int i = 200; i <= 800; i += 200) {
    cfg_.rc_target_bitrate = i;
    ResetModel();
    // 60-40 bitrate allocation for 2 temporal layers.
-    cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
+    cfg_.layer_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
+    cfg_.layer_target_bitrate[1] = cfg_.rc_target_bitrate;
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
-      ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
+      ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
          << " The datarate for the file is lower than target by too much, "
              "for layer: " << j;
-      ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
+      ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
          << " The datarate for the file is greater than target by too much, "
              "for layer: " << j;
    }
@@ -552,25 +607,27 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayers) {
  cfg_.ts_rate_decimator[1] = 2;
  cfg_.ts_rate_decimator[2] = 1;

+  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 200);
  for (int i = 200; i <= 800; i += 200) {
    cfg_.rc_target_bitrate = i;
    ResetModel();
    // 40-20-40 bitrate allocation for 3 temporal layers.
-    cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
+    cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
+    cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
+    cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
      // TODO(yaowu): Work out more stable rc control strategy and
      //              Adjust the thresholds to be tighter than .75.
-      ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75)
+      ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.75)
          << " The datarate for the file is lower than target by too much, "
              "for layer: " << j;
      // TODO(yaowu): Work out more stable rc control strategy and
      //              Adjust the thresholds to be tighter than 1.25.
-      ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25)
+      ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.25)
          << " The datarate for the file is greater than target by too much, "
              "for layer: " << j;
    }
@@ -598,20 +655,22 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
  cfg_.ts_rate_decimator[1] = 2;
  cfg_.ts_rate_decimator[2] = 1;

+  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 200);
  cfg_.rc_target_bitrate = 200;
  ResetModel();
  // 40-20-40 bitrate allocation for 3 temporal layers.
-  cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
-  cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
-  cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
+  cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
+  cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
+  cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
-    ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
+    ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
        << " The datarate for the file is lower than target by too much, "
            "for layer: " << j;
-    ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
+    ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
        << " The datarate for the file is greater than target by too much, "
            "for layer: " << j;
    // Expect some frame drops in this test: for this 200 frames test,
@@ -649,11 +708,271 @@ TEST_P(DatarateTestVP9Large, DenoiserLevels) {
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
      << " The datarate for the file is greater than target by too much!";
 }
+
+// Check basic datarate targeting, for a single bitrate, when denoiser is off
+// and on.
+TEST_P(DatarateTestVP9Large, DenoiserOffOn) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_min_quantizer = 2;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 299);
+
+  // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
+  // there is only one denoiser mode: denoiserYonly(which is 1),
+  // but may add more modes in the future.
+  cfg_.rc_target_bitrate = 300;
+  ResetModel();
+  // The denoiser is off by default.
+  denoiser_on_ = 0;
+  // Set the offon test flag.
+  denoiser_offon_test_ = 1;
+  denoiser_offon_period_ = 100;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
+      << " The datarate for the file is lower than target by too much!";
+  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
+      << " The datarate for the file is greater than target by too much!";
+}
 #endif  // CONFIG_VP9_TEMPORAL_DENOISING

+class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
+    public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ public:
+  DatarateOnePassCbrSvc() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~DatarateOnePassCbrSvc() {}
+ protected:
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    speed_setting_ = GET_PARAM(2);
+    ResetModel();
+  }
+  virtual void ResetModel() {
+    last_pts_ = 0;
+    bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
+    frame_number_ = 0;
+    first_drop_ = 0;
+    bits_total_ = 0;
+    duration_ = 0.0;
+    mismatch_psnr_ = 0.0;
+    mismatch_nframes_ = 0;
+  }
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+  }
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      int i;
+      for (i = 0; i < 2; ++i) {
+        svc_params_.max_quantizers[i] = 63;
+        svc_params_.min_quantizers[i] = 0;
+      }
+      svc_params_.scaling_factor_num[0] = 144;
+      svc_params_.scaling_factor_den[0] = 288;
+      svc_params_.scaling_factor_num[1] = 288;
+      svc_params_.scaling_factor_den[1] = 288;
+      encoder->Control(VP9E_SET_SVC, 1);
+      encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_);
+      encoder->Control(VP8E_SET_CPUUSED, speed_setting_);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, 0);
+      encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, (cfg_.g_threads >> 1));
+    }
+    const vpx_rational_t tb = video->timebase();
+    timebase_ = static_cast<double>(tb.num) / tb.den;
+    duration_ = 0;
+  }
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
+    if (last_pts_ == 0)
+      duration = 1;
+    bits_in_buffer_model_ += static_cast<int64_t>(
+        duration * timebase_ * cfg_.rc_target_bitrate * 1000);
+    const bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY)
+                         ? true: false;
+    if (!key_frame) {
+      ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
+          << pkt->data.frame.pts;
+    }
+    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
+    bits_in_buffer_model_ -= frame_size_in_bits;
+    bits_total_ += frame_size_in_bits;
+    if (!first_drop_ && duration > 1)
+      first_drop_ = last_pts_ + 1;
+    last_pts_ = pkt->data.frame.pts;
+    bits_in_last_frame_ = frame_size_in_bits;
+    ++frame_number_;
+  }
+  virtual void EndPassHook(void) {
+    if (bits_total_) {
+      const double file_size_in_kb = bits_total_ / 1000.;  // bits per kilobit
+      duration_ = (last_pts_ + 1) * timebase_;
+      effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0
+          / (cfg_.rc_buf_initial_sz / 1000.0 + duration_);
+      file_datarate_ = file_size_in_kb / duration_;
+    }
+  }
+
+  virtual void MismatchHook(const vpx_image_t *img1,
+                            const vpx_image_t *img2) {
+    double mismatch_psnr = compute_psnr(img1, img2);
+    mismatch_psnr_ += mismatch_psnr;
+    ++mismatch_nframes_;
+  }
+
+  unsigned int GetMismatchFrames() {
+    return mismatch_nframes_;
+  }
+
+  vpx_codec_pts_t last_pts_;
+  int64_t bits_in_buffer_model_;
+  double timebase_;
+  int frame_number_;
+  vpx_codec_pts_t first_drop_;
+  int64_t bits_total_;
+  double duration_;
+  double file_datarate_;
+  double effective_datarate_;
+  size_t bits_in_last_frame_;
+  vpx_svc_extra_cfg_t svc_params_;
+  int speed_setting_;
+  double mismatch_psnr_;
+  int mismatch_nframes_;
+};
+static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
+    const vpx_svc_extra_cfg_t *svc_params,
+    int spatial_layers,
+    int temporal_layers,
+    int temporal_layering_mode,
+    unsigned int total_rate) {
+  int sl, spatial_layer_target;
+  float total = 0;
+  float alloc_ratio[VPX_MAX_LAYERS] = {0};
+  for (sl = 0; sl < spatial_layers; ++sl) {
+    if (svc_params->scaling_factor_den[sl] > 0) {
+      alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] *
+          1.0 / svc_params->scaling_factor_den[sl]);
+      total += alloc_ratio[sl];
+    }
+  }
+  for (sl = 0; sl < spatial_layers; ++sl) {
+    enc_cfg->ss_target_bitrate[sl] = spatial_layer_target =
+        (unsigned int)(enc_cfg->rc_target_bitrate *
+            alloc_ratio[sl] / total);
+    const int index = sl * temporal_layers;
+    if (temporal_layering_mode == 3) {
+      enc_cfg->layer_target_bitrate[index] =
+          spatial_layer_target >> 1;
+      enc_cfg->layer_target_bitrate[index + 1] =
+          (spatial_layer_target >> 1) + (spatial_layer_target >> 2);
+      enc_cfg->layer_target_bitrate[index + 2] =
+          spatial_layer_target;
+    } else if (temporal_layering_mode == 2) {
+      enc_cfg->layer_target_bitrate[index] =
+          spatial_layer_target * 2 / 3;
+      enc_cfg->layer_target_bitrate[index + 1] =
+          spatial_layer_target;
+    }
+  }
+}
+
+// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
+// 3 temporal layers. Run CIF clip with 1 thread.
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.ss_number_layers = 2;
+  cfg_.ts_number_layers = 3;
+  cfg_.ts_rate_decimator[0] = 4;
+  cfg_.ts_rate_decimator[1] = 2;
+  cfg_.ts_rate_decimator[2] = 1;
+  cfg_.g_error_resilient = 1;
+  cfg_.g_threads = 1;
+  cfg_.temporal_layering_mode = 3;
+  svc_params_.scaling_factor_num[0] = 144;
+  svc_params_.scaling_factor_den[0] = 288;
+  svc_params_.scaling_factor_num[1] = 288;
+  svc_params_.scaling_factor_den[1] = 288;
+  // TODO(wonkap/marpan): No frame drop for now, we need to implement correct
+  // frame dropping for SVC.
+  cfg_.rc_dropframe_thresh = 0;
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 200);
+  // TODO(wonkap/marpan): Check that effective_datarate for each layer hits the
+  // layer target_bitrate. Also check if test can pass at lower bitrate (~200k).
+  for (int i = 400; i <= 800; i += 200) {
+    cfg_.rc_target_bitrate = i;
+    ResetModel();
+    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+        cfg_.ts_number_layers, cfg_.temporal_layering_mode,
+        cfg_.rc_target_bitrate);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85)
+            << " The datarate for the file exceeds the target by too much!";
+    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
+        << " The datarate for the file is lower than the target by too much!";
+    EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+  }
+}
+
+// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
+// 3 temporal layers. Run HD clip with 4 threads.
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc4threads) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.ss_number_layers = 2;
+  cfg_.ts_number_layers = 3;
+  cfg_.ts_rate_decimator[0] = 4;
+  cfg_.ts_rate_decimator[1] = 2;
+  cfg_.ts_rate_decimator[2] = 1;
+  cfg_.g_error_resilient = 1;
+  cfg_.g_threads = 4;
+  cfg_.temporal_layering_mode = 3;
+  svc_params_.scaling_factor_num[0] = 144;
+  svc_params_.scaling_factor_den[0] = 288;
+  svc_params_.scaling_factor_num[1] = 288;
+  svc_params_.scaling_factor_den[1] = 288;
+  // TODO(wonkap/marpan): No frame drop for now, we need to implement correct
+  // frame dropping for SVC.
+  cfg_.rc_dropframe_thresh = 0;
+  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720,
+                                       30, 1, 0, 300);
+  cfg_.rc_target_bitrate = 800;
+  ResetModel();
+  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+      cfg_.ts_number_layers, cfg_.temporal_layering_mode,
+      cfg_.rc_target_bitrate);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85)
+          << " The datarate for the file exceeds the target by too much!";
+  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
+      << " The datarate for the file is lower than the target by too much!";
+  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+}
+
 VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES);
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
                          ::testing::Values(::libvpx_test::kOnePassGood,
-                          ::libvpx_test::kRealTime),
+                                            ::libvpx_test::kRealTime),
                          ::testing::Range(2, 7));
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvc,
+                          ::testing::Values(::libvpx_test::kRealTime),
+                          ::testing::Range(5, 8));
 }  // namespace
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -13,15 +13,18 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"

 using libvpx_test::ACMRandom;

@@ -37,30 +40,6 @@ static int round(double x) {
 #endif

 const int kNumCoeffs = 256;
-const double PI = 3.1415926535898;
-void reference2_16x16_idct_2d(double *input, double *output) {
-  double x;
-  for (int l = 0; l < 16; ++l) {
-    for (int k = 0; k < 16; ++k) {
-      double s = 0;
-      for (int i = 0; i < 16; ++i) {
-        for (int j = 0; j < 16; ++j) {
-          x = cos(PI * j * (l + 0.5) / 16.0) *
-              cos(PI * i * (k + 0.5) / 16.0) *
-              input[i * 16 + j] / 256;
-          if (i != 0)
-            x *= sqrt(2.0);
-          if (j != 0)
-            x *= sqrt(2.0);
-          s += x;
-        }
-      }
-      output[k*16+l] = s;
-    }
-  }
-}
-
-
 const double C1 = 0.995184726672197;
 const double C2 = 0.98078528040323;
 const double C3 = 0.956940335732209;
@@ -269,12 +248,12 @@ typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>

 void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
                   int /*tx_type*/) {
-  vp9_fdct16x16_c(in, out, stride);
+  vpx_fdct16x16_c(in, out, stride);
 }

 void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,
                   int /*tx_type*/) {
-  vp9_idct16x16_256_add_c(in, dest, stride);
+  vpx_idct16x16_256_add_c(in, dest, stride);
 }

 void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride,
@@ -289,11 +268,11 @@ void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_c(in, out, stride, 10);
+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 10);
 }

 void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_c(in, out, stride, 12);
+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 12);
 }

 void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
@@ -315,31 +294,31 @@ void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 }

 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_c(in, out, stride, 10);
+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 10);
 }

 void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_c(in, out, stride, 12);
+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 12);
 }

 #if HAVE_SSE2
 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
 }

 void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
 }

 void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
 }

 void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
 }
-#endif
-#endif
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 class Trans16x16TestBase {
 public:
@@ -356,13 +335,13 @@ class Trans16x16TestBase {
    int64_t total_error = 0;
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
-      DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-      DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif

      // Initialize a test block with input range [-mask_, mask_].
@@ -416,9 +395,9 @@ class Trans16x16TestBase {
  void RunCoeffCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
@@ -437,15 +416,13 @@ class Trans16x16TestBase {
  void RunMemCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
-        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0) {
@@ -472,24 +449,19 @@ class Trans16x16TestBase {
  void RunQuantCheck(int dc_thred, int ac_thred) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 100000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);

-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
-        if (bit_depth_ == VPX_BITS_8)
-          input_block[j] = rnd.Rand8() - rnd.Rand8();
-        else
-          input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0)
@@ -502,11 +474,11 @@ class Trans16x16TestBase {
      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);

      // clear reconstructed pixel buffers
-      vpx_memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
-      vpx_memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
+      memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
+      memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
 #if CONFIG_VP9_HIGHBITDEPTH
-      vpx_memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
-      vpx_memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
+      memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
+      memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
 #endif

      // quantization with maximum allowed step sizes
@@ -539,14 +511,14 @@ class Trans16x16TestBase {
  void RunInvAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
-#endif
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH

    for (int i = 0; i < count_test_block; ++i) {
      double out_r[kNumCoeffs];
@@ -562,7 +534,7 @@ class Trans16x16TestBase {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          in[j] = src16[j] - dst16[j];
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
        }
      }

@@ -576,7 +548,7 @@ class Trans16x16TestBase {
      } else {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                            16));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
@@ -585,7 +557,7 @@ class Trans16x16TestBase {
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
        const uint32_t diff = dst[j] - src[j];
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
        const uint32_t error = diff * diff;
        EXPECT_GE(1u, error)
            << "Error: 16x16 IDCT has error " << error
@@ -593,24 +565,25 @@ class Trans16x16TestBase {
      }
    }
  }
+
  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 10000;
    const int eob = 10;
    const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
-#endif
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH

    for (int i = 0; i < count_test_block; ++i) {
      for (int j = 0; j < kNumCoeffs; ++j) {
        if (j < eob) {
          // Random values less than the threshold, either positive or negative
-          coeff[scan[j]] = rnd(thresh) * (1-2*(i%2));
+          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
        } else {
          coeff[scan[j]] = 0;
        }
@@ -621,7 +594,7 @@ class Trans16x16TestBase {
        } else {
          dst16[j] = 0;
          ref16[j] = 0;
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
        }
      }
      if (bit_depth_ == VPX_BITS_8) {
@@ -632,7 +605,7 @@ class Trans16x16TestBase {
        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                 pitch_));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
@@ -641,7 +614,7 @@ class Trans16x16TestBase {
            bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
 #else
        const uint32_t diff = dst[j] - ref[j];
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
        const uint32_t error = diff * diff;
        EXPECT_EQ(0u, error)
            << "Error: 16x16 IDCT Comparison has error " << error
@@ -649,6 +622,7 @@ class Trans16x16TestBase {
      }
    }
  }
+
  int pitch_;
  int tx_type_;
  vpx_bit_depth_t bit_depth_;
@@ -796,9 +770,9 @@ class InvTrans16x16DCT
  virtual void SetUp() {
    ref_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    thresh_  = GET_PARAM(2);
+    thresh_ = GET_PARAM(2);
    bit_depth_ = GET_PARAM(3);
-    pitch_    = 16;
+    pitch_ = 16;
    mask_ = (1 << bit_depth_) - 1;
 }
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
@@ -824,15 +798,15 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
-#endif
+        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -858,22 +832,22 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_neon, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8)));
 #endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_sse2,
-                   &vp9_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct16x16_sse2,
+                   &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
@@ -885,33 +859,25 @@ INSTANTIATE_TEST_CASE_P(
                   VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3,
                   VPX_BITS_8)));
-#endif
+#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct16x16_sse2,
+        make_tuple(&vpx_highbd_fdct16x16_sse2,
                   &idct16x16_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct16x16_c,
+        make_tuple(&vpx_highbd_fdct16x16_c,
                   &idct16x16_256_add_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct16x16_sse2,
+        make_tuple(&vpx_highbd_fdct16x16_sse2,
                   &idct16x16_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct16x16_c,
+        make_tuple(&vpx_highbd_fdct16x16_c,
                   &idct16x16_256_add_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct16x16_sse2,
-                   &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct16x16_sse2,
+                   &vpx_idct16x16_256_add_c, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans16x16HT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 2, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 3, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 1, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 2, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 3, VPX_BITS_12),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
@@ -930,13 +896,21 @@ INSTANTIATE_TEST_CASE_P(
                   &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
        make_tuple(&idct16x16_12,
                   &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
-#endif
+#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

-#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    SSSE3, Trans16x16DCT,
+    MSA, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0,
+        make_tuple(&vpx_fdct16x16_msa,
+                   &vpx_idct16x16_256_add_msa, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans16x16HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
                   VPX_BITS_8)));
-#endif
+#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -13,16 +13,18 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"

 using libvpx_test::ACMRandom;

@@ -80,17 +82,17 @@ typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 8);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 8);
 }

 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 10);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
 }

 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 12);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
 }
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
 public:
@@ -119,13 +121,13 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
  uint32_t max_error = 0;
  int64_t total_error = 0;
  const int count_test_block = 10000;
-  DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+  DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif

  for (int i = 0; i < count_test_block; ++i) {
@@ -184,16 +186,16 @@ TEST_P(Trans32x32Test, CoeffCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 1000;

-  DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+  DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

  for (int i = 0; i < count_test_block; ++i) {
    for (int j = 0; j < kNumCoeffs; ++j)
      input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);

    const int stride = 32;
-    vp9_fdct32x32_c(input_block, output_ref_block, stride);
+    vpx_fdct32x32_c(input_block, output_ref_block, stride);
    ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride));

    if (version_ == 0) {
@@ -212,15 +214,13 @@ TEST_P(Trans32x32Test, MemCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 2000;

-  DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

  for (int i = 0; i < count_test_block; ++i) {
    // Initialize a test block with input range [-mask_, mask_].
    for (int j = 0; j < kNumCoeffs; ++j) {
-      input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
      input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
    }
    if (i == 0) {
@@ -232,7 +232,7 @@ TEST_P(Trans32x32Test, MemCheck) {
    }

    const int stride = 32;
-    vp9_fdct32x32_c(input_extreme_block, output_ref_block, stride);
+    vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride);
    ASM_REGISTER_STATE_CHECK(
        fwd_txfm_(input_extreme_block, output_block, stride));

@@ -257,13 +257,13 @@ TEST_P(Trans32x32Test, MemCheck) {
 TEST_P(Trans32x32Test, InverseAccuracy) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 1000;
-  DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+  DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif

  for (int i = 0; i < count_test_block; ++i) {
@@ -315,71 +315,81 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct32x32_c,
+        make_tuple(&vpx_highbd_fdct32x32_c,
                   &idct32x32_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_rd_c,
+        make_tuple(&vpx_highbd_fdct32x32_rd_c,
                   &idct32x32_10, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_c,
+        make_tuple(&vpx_highbd_fdct32x32_c,
                   &idct32x32_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct32x32_rd_c,
+        make_tuple(&vpx_highbd_fdct32x32_rd_c,
                   &idct32x32_12, 1, VPX_BITS_12),
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_c,
-                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_c,
+                   &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_c,
-                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
-#endif
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_c,
+                   &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_c,
-                   &vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
-#endif
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_c,
+                   &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
+#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_sse2,
-                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_sse2,
-                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
-#endif
+        make_tuple(&vpx_fdct32x32_sse2,
+                   &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_sse2,
+                   &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
+        make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
                   VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
+        make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
                   VPX_BITS_12),
-        make_tuple(&vp9_fdct32x32_sse2, &vp9_idct32x32_1024_add_c, 0,
+        make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0,
                   VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_sse2, &vp9_idct32x32_1024_add_c, 1,
+        make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
                   VPX_BITS_8)));
-#endif
+#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    AVX2, Trans32x32Test,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_avx2,
-                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
-        make_tuple(&vp9_fdct32x32_rd_avx2,
-                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
-#endif
+        make_tuple(&vpx_fdct32x32_avx2,
+                   &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_avx2,
+                   &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+#endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans32x32Test,
+    ::testing::Values(
+        make_tuple(&vpx_fdct32x32_msa,
+                   &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
+        make_tuple(&vpx_fdct32x32_rd_msa,
+                   &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
+#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/decode_api_test.cc
+++ b/test/decode_api_test.cc
@@ -7,10 +7,11 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
+
 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "test/ivf_video_source.h"
 #include "./vpx_config.h"
+#include "test/ivf_video_source.h"
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

@@ -25,6 +26,9 @@ TEST(DecodeAPI, InvalidParams) {
 #endif
 #if CONFIG_VP9_DECODER
    &vpx_codec_vp9_dx_algo,
+#endif
+#if CONFIG_VP10_DECODER
+    &vpx_codec_vp10_dx_algo,
 #endif
  };
  uint8_t buf[1] = {0};
@@ -57,6 +61,21 @@ TEST(DecodeAPI, InvalidParams) {
  }
 }

+#if CONFIG_VP8_DECODER
+TEST(DecodeAPI, OptionalParams) {
+  vpx_codec_ctx_t dec;
+
+#if CONFIG_ERROR_CONCEALMENT
+  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, &vpx_codec_vp8_dx_algo, NULL,
+                                             VPX_CODEC_USE_ERROR_CONCEALMENT));
+#else
+  EXPECT_EQ(VPX_CODEC_INCAPABLE,
+            vpx_codec_dec_init(&dec, &vpx_codec_vp8_dx_algo, NULL,
+                               VPX_CODEC_USE_ERROR_CONCEALMENT));
+#endif  // CONFIG_ERROR_CONCEALMENT
+}
+#endif  // CONFIG_VP8_DECODER
+
 #if CONFIG_VP9_DECODER
 // Test VP9 codec controls after a decode error to ensure the code doesn't
 // misbehave.
@@ -65,6 +84,7 @@ void TestVp9Controls(vpx_codec_ctx_t *dec) {
    VP8D_GET_LAST_REF_UPDATES,
    VP8D_GET_FRAME_CORRUPTED,
    VP9D_GET_DISPLAY_SIZE,
+    VP9D_GET_FRAME_SIZE
  };
  int val[2];

@@ -113,8 +133,13 @@ TEST(DecodeAPI, Vp9InvalidDecode) {
  vpx_codec_ctx_t dec;
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0));
  const uint32_t frame_size = static_cast<uint32_t>(video.frame_size());
+#if CONFIG_VP9_HIGHBITDEPTH
  EXPECT_EQ(VPX_CODEC_MEM_ERROR,
            vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0));
+#else
+  EXPECT_EQ(VPX_CODEC_UNSUP_BITSTREAM,
+            vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0));
+#endif
  vpx_codec_iter_t iter = NULL;
  EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter));

--- a/test/decode_perf_test.cc
+++ b/test/decode_perf_test.cc
@@ -8,13 +8,17 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <string>
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
 #include "test/ivf_video_source.h"
 #include "test/md5_helper.h"
 #include "test/util.h"
 #include "test/webm_video_source.h"
 #include "vpx_ports/vpx_timer.h"
+#include "./ivfenc.h"
 #include "./vpx_version.h"

 using std::tr1::make_tuple;
@@ -24,7 +28,9 @@ namespace {
 #define VIDEO_NAME 0
 #define THREADS 1

+const int kMaxPsnr = 100;
 const double kUsecsInSec = 1000000.0;
+const char kNewEncodeOutputFile[] = "new_encode.ivf";

 /*
 DecodePerfTest takes a tuple of filename + number of threads to decode with
@@ -105,4 +111,163 @@ TEST_P(DecodePerfTest, PerfTest) {
 INSTANTIATE_TEST_CASE_P(VP9, DecodePerfTest,
                        ::testing::ValuesIn(kVP9DecodePerfVectors));

+class VP9NewEncodeDecodePerfTest :
+    public ::libvpx_test::EncoderTest,
+    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ protected:
+  VP9NewEncodeDecodePerfTest()
+      : EncoderTest(GET_PARAM(0)),
+        encoding_mode_(GET_PARAM(1)),
+        speed_(0),
+        outfile_(0),
+        out_frames_(0) {
+  }
+
+  virtual ~VP9NewEncodeDecodePerfTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+
+    cfg_.g_lag_in_frames = 25;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_undershoot_pct = 50;
+    cfg_.rc_overshoot_pct = 50;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.rc_resize_allowed = 0;
+    cfg_.rc_end_usage = VPX_VBR;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(VP8E_SET_CPUUSED, speed_);
+      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, 2);
+    }
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+    const std::string data_path = getenv("LIBVPX_TEST_DATA_PATH");
+    const std::string path_to_source = data_path + "/" + kNewEncodeOutputFile;
+    outfile_ = fopen(path_to_source.c_str(), "wb");
+    ASSERT_TRUE(outfile_ != NULL);
+  }
+
+  virtual void EndPassHook() {
+    if (outfile_ != NULL) {
+      if (!fseek(outfile_, 0, SEEK_SET))
+        ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_);
+      fclose(outfile_);
+      outfile_ = NULL;
+    }
+  }
+
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    ++out_frames_;
+
+    // Write initial file header if first frame.
+    if (pkt->data.frame.pts == 0)
+      ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_);
+
+    // Write frame header and data.
+    ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz);
+    ASSERT_EQ(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_),
+              pkt->data.frame.sz);
+  }
+
+  virtual bool DoDecode() { return false; }
+
+  void set_speed(unsigned int speed) {
+    speed_ = speed;
+  }
+
+ private:
+  libvpx_test::TestMode encoding_mode_;
+  uint32_t speed_;
+  FILE *outfile_;
+  uint32_t out_frames_;
+};
+
+struct EncodePerfTestVideo {
+  EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
+                      uint32_t bitrate_, int frames_)
+      : name(name_),
+        width(width_),
+        height(height_),
+        bitrate(bitrate_),
+        frames(frames_) {}
+  const char *name;
+  uint32_t width;
+  uint32_t height;
+  uint32_t bitrate;
+  int frames;
+};
+
+const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
+  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
+};
+
+TEST_P(VP9NewEncodeDecodePerfTest, PerfTest) {
+  SetUp();
+
+  // TODO(JBB): Make this work by going through the set of given files.
+  const int i = 0;
+  const vpx_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;
+
+  init_flags_ = VPX_CODEC_USE_PSNR;
+
+  const char *video_name = kVP9EncodePerfTestVectors[i].name;
+  libvpx_test::I420VideoSource video(
+      video_name,
+      kVP9EncodePerfTestVectors[i].width,
+      kVP9EncodePerfTestVectors[i].height,
+      timebase.den, timebase.num, 0,
+      kVP9EncodePerfTestVectors[i].frames);
+  set_speed(2);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  const uint32_t threads = 4;
+
+  libvpx_test::IVFVideoSource decode_video(kNewEncodeOutputFile);
+  decode_video.Init();
+
+  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
+  cfg.threads = threads;
+  libvpx_test::VP9Decoder decoder(cfg, 0);
+
+  vpx_usec_timer t;
+  vpx_usec_timer_start(&t);
+
+  for (decode_video.Begin(); decode_video.cxdata() != NULL;
+       decode_video.Next()) {
+    decoder.DecodeFrame(decode_video.cxdata(), decode_video.frame_size());
+  }
+
+  vpx_usec_timer_mark(&t);
+  const double elapsed_secs =
+      static_cast<double>(vpx_usec_timer_elapsed(&t)) / kUsecsInSec;
+  const unsigned decode_frames = decode_video.frame_number();
+  const double fps = static_cast<double>(decode_frames) / elapsed_secs;
+
+  printf("{\n");
+  printf("\t\"type\" : \"decode_perf_test\",\n");
+  printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+  printf("\t\"videoName\" : \"%s\",\n", kNewEncodeOutputFile);
+  printf("\t\"threadCount\" : %u,\n", threads);
+  printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs);
+  printf("\t\"totalFrames\" : %u,\n", decode_frames);
+  printf("\t\"framesPerSecond\" : %f\n", fps);
+  printf("}\n");
+}
+
+VP9_INSTANTIATE_TEST_CASE(
+  VP9NewEncodeDecodePerfTest, ::testing::Values(::libvpx_test::kTwoPassGood));
 }  // namespace
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -7,9 +7,11 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
 #include "test/register_state_check.h"
 #include "test/video_source.h"

@@ -65,7 +67,7 @@ void DecoderTest::HandlePeekResult(Decoder *const decoder,

 void DecoderTest::RunLoop(CompressedVideoSource *video,
                          const vpx_codec_dec_cfg_t &dec_cfg) {
-  Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
+  Decoder* const decoder = codec_->CreateDecoder(dec_cfg, flags_, 0);
  ASSERT_TRUE(decoder != NULL);
  bool end_of_file = false;

@@ -110,4 +112,12 @@ void DecoderTest::RunLoop(CompressedVideoSource *video) {
  RunLoop(video, dec_cfg);
 }

+void DecoderTest::set_cfg(const vpx_codec_dec_cfg_t &dec_cfg) {
+  memcpy(&cfg_, &dec_cfg, sizeof(cfg_));
+}
+
+void DecoderTest::set_flags(const vpx_codec_flags_t flags) {
+  flags_ = flags;
+}
+
 }  // namespace libvpx_test
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -41,7 +41,13 @@ class DxDataIterator {
 class Decoder {
 public:
  Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
-      : cfg_(cfg), deadline_(deadline), init_done_(false) {
+      : cfg_(cfg), flags_(0), deadline_(deadline), init_done_(false) {
+    memset(&decoder_, 0, sizeof(decoder_));
+  }
+
+  Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
+          unsigned long deadline)  // NOLINT
+      : cfg_(cfg), flags_(flag), deadline_(deadline), init_done_(false) {
    memset(&decoder_, 0, sizeof(decoder_));
  }

@@ -66,9 +72,7 @@ class Decoder {
  }

  void Control(int ctrl_id, int arg) {
-    InitOnce();
-    const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
-    ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
+    Control(ctrl_id, arg, VPX_CODEC_OK);
  }

  void Control(int ctrl_id, const void *arg) {
@@ -77,6 +81,12 @@ class Decoder {
    ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
  }

+  void Control(int ctrl_id, int arg, vpx_codec_err_t expected_value) {
+    InitOnce();
+    const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
+    ASSERT_EQ(expected_value, res) << DecodeError();
+  }
+
  const char* DecodeError() {
    const char *detail = vpx_codec_error_detail(&decoder_);
    return detail ? detail : vpx_codec_error(&decoder_);
@@ -97,6 +107,10 @@ class Decoder {

  bool IsVP8() const;

+  vpx_codec_ctx_t * GetDecoder() {
+    return &decoder_;
+  }
+
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const = 0;

@@ -104,7 +118,7 @@ class Decoder {
    if (!init_done_) {
      const vpx_codec_err_t res = vpx_codec_dec_init(&decoder_,
                                                     CodecInterface(),
-                                                     &cfg_, 0);
+                                                     &cfg_, flags_);
      ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
      init_done_ = true;
    }
@@ -112,6 +126,7 @@ class Decoder {

  vpx_codec_ctx_t     decoder_;
  vpx_codec_dec_cfg_t cfg_;
+  vpx_codec_flags_t   flags_;
  unsigned int        deadline_;
  bool                init_done_;
 };
@@ -124,6 +139,9 @@ class DecoderTest {
  virtual void RunLoop(CompressedVideoSource *video,
                       const vpx_codec_dec_cfg_t &dec_cfg);

+  virtual void set_cfg(const vpx_codec_dec_cfg_t &dec_cfg);
+  virtual void set_flags(const vpx_codec_flags_t flags);
+
  // Hook to be called before decompressing every frame.
  virtual void PreDecodeFrameHook(const CompressedVideoSource& /*video*/,
                                  Decoder* /*decoder*/) {}
@@ -146,11 +164,16 @@ class DecoderTest {
                                const vpx_codec_err_t res_peek);

 protected:
-  explicit DecoderTest(const CodecFactory *codec) : codec_(codec) {}
+  explicit DecoderTest(const CodecFactory *codec)
+      : codec_(codec),
+        cfg_(),
+        flags_(0) {}

  virtual ~DecoderTest() {}

  const CodecFactory *codec_;
+  vpx_codec_dec_cfg_t cfg_;
+  vpx_codec_flags_t   flags_;
 };

 }  // namespace libvpx_test
--- a/test/encode_perf_test.cc
+++ b/test/encode_perf_test.cc
@@ -7,6 +7,7 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
+#include <string>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vpx_config.h"
 #include "./vpx_version.h"
@@ -50,7 +51,8 @@ const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
 };

-const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 12 };
+const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 };
+const int kEncodePerfTestThreads[] = { 1, 2, 4 };

 #define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0]))

@@ -63,7 +65,8 @@ class VP9EncodePerfTest
        min_psnr_(kMaxPsnr),
        nframes_(0),
        encoding_mode_(GET_PARAM(1)),
-        speed_(0) {}
+        speed_(0),
+        threads_(1) {}

  virtual ~VP9EncodePerfTest() {}

@@ -82,12 +85,18 @@ class VP9EncodePerfTest
    cfg_.rc_buf_optimal_sz = 600;
    cfg_.rc_resize_allowed = 0;
    cfg_.rc_end_usage = VPX_CBR;
+    cfg_.g_error_resilient = 1;
+    cfg_.g_threads = threads_;
  }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
+    if (video->frame() == 0) {
+      const int log2_tile_columns = 3;
      encoder->Control(VP8E_SET_CPUUSED, speed_);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, log2_tile_columns);
+      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
    }
  }

@@ -113,54 +122,77 @@ class VP9EncodePerfTest
    speed_ = speed;
  }

+  void set_threads(unsigned int threads) {
+    threads_ = threads;
+  }
+
 private:
  double min_psnr_;
  unsigned int nframes_;
  libvpx_test::TestMode encoding_mode_;
  unsigned speed_;
+  unsigned int threads_;
 };

 TEST_P(VP9EncodePerfTest, PerfTest) {
  for (size_t i = 0; i < NELEMENTS(kVP9EncodePerfTestVectors); ++i) {
    for (size_t j = 0; j < NELEMENTS(kEncodePerfTestSpeeds); ++j) {
-      SetUp();
+      for (size_t k = 0; k < NELEMENTS(kEncodePerfTestThreads); ++k) {
+        if (kVP9EncodePerfTestVectors[i].width < 512 &&
+            kEncodePerfTestThreads[k] > 1)
+          continue;
+        else if (kVP9EncodePerfTestVectors[i].width < 1024 &&
+                 kEncodePerfTestThreads[k] > 2)
+          continue;

-      const vpx_rational timebase = { 33333333, 1000000000 };
-      cfg_.g_timebase = timebase;
-      cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;
+        set_threads(kEncodePerfTestThreads[k]);
+        SetUp();

-      init_flags_ = VPX_CODEC_USE_PSNR;
+        const vpx_rational timebase = { 33333333, 1000000000 };
+        cfg_.g_timebase = timebase;
+        cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;

-      const unsigned frames = kVP9EncodePerfTestVectors[i].frames;
-      const char *video_name = kVP9EncodePerfTestVectors[i].name;
-      libvpx_test::I420VideoSource video(
-          video_name,
-          kVP9EncodePerfTestVectors[i].width,
-          kVP9EncodePerfTestVectors[i].height,
-          timebase.den, timebase.num, 0,
-          kVP9EncodePerfTestVectors[i].frames);
-      set_speed(kEncodePerfTestSpeeds[j]);
+        init_flags_ = VPX_CODEC_USE_PSNR;

-      vpx_usec_timer t;
-      vpx_usec_timer_start(&t);
+        const unsigned frames = kVP9EncodePerfTestVectors[i].frames;
+        const char *video_name = kVP9EncodePerfTestVectors[i].name;
+        libvpx_test::I420VideoSource video(
+            video_name,
+            kVP9EncodePerfTestVectors[i].width,
+            kVP9EncodePerfTestVectors[i].height,
+            timebase.den, timebase.num, 0,
+            kVP9EncodePerfTestVectors[i].frames);
+        set_speed(kEncodePerfTestSpeeds[j]);

-      ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+        vpx_usec_timer t;
+        vpx_usec_timer_start(&t);

-      vpx_usec_timer_mark(&t);
-      const double elapsed_secs = vpx_usec_timer_elapsed(&t) / kUsecsInSec;
-      const double fps = frames / elapsed_secs;
-      const double minimum_psnr = min_psnr();
+        ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

-      printf("{\n");
-      printf("\t\"type\" : \"encode_perf_test\",\n");
-      printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
-      printf("\t\"videoName\" : \"%s\",\n", video_name);
-      printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs);
-      printf("\t\"totalFrames\" : %u,\n", frames);
-      printf("\t\"framesPerSecond\" : %f,\n", fps);
-      printf("\t\"minPsnr\" : %f,\n", minimum_psnr);
-      printf("\t\"speed\" : %d\n", kEncodePerfTestSpeeds[j]);
-      printf("}\n");
+        vpx_usec_timer_mark(&t);
+        const double elapsed_secs = vpx_usec_timer_elapsed(&t) / kUsecsInSec;
+        const double fps = frames / elapsed_secs;
+        const double minimum_psnr = min_psnr();
+        std::string display_name(video_name);
+        if (kEncodePerfTestThreads[k] > 1) {
+          char thread_count[32];
+          snprintf(thread_count, sizeof(thread_count), "_t-%d",
+                   kEncodePerfTestThreads[k]);
+          display_name += thread_count;
+        }
+
+        printf("{\n");
+        printf("\t\"type\" : \"encode_perf_test\",\n");
+        printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+        printf("\t\"videoName\" : \"%s\",\n", display_name.c_str());
+        printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs);
+        printf("\t\"totalFrames\" : %u,\n", frames);
+        printf("\t\"framesPerSecond\" : %f,\n", fps);
+        printf("\t\"minPsnr\" : %f,\n", minimum_psnr);
+        printf("\t\"speed\" : %d,\n", kEncodePerfTestSpeeds[j]);
+        printf("\t\"threads\" : %d\n", kEncodePerfTestThreads[k]);
+        printf("}\n");
+      }
    }
  }
 }
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -8,15 +8,59 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "./vpx_config.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/decode_test_driver.h"
-#include "test/register_state_check.h"
-#include "test/video_source.h"
+#include <string>
+
 #include "third_party/googletest/src/include/gtest/gtest.h"

+#include "./vpx_config.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
+#include "test/register_state_check.h"
+#include "test/video_source.h"
+
 namespace libvpx_test {
+void Encoder::InitEncoder(VideoSource *video) {
+  vpx_codec_err_t res;
+  const vpx_image_t *img = video->img();
+
+  if (video->img() && !encoder_.priv) {
+    cfg_.g_w = img->d_w;
+    cfg_.g_h = img->d_h;
+    cfg_.g_timebase = video->timebase();
+    cfg_.rc_twopass_stats_in = stats_->buf();
+
+    res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_,
+                             init_flags_);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+
+#if CONFIG_VP9_ENCODER
+    if (CodecInterface() == &vpx_codec_vp9_cx_algo) {
+      // Default to 1 tile column for VP9.
+      const int log2_tile_columns = 0;
+      res = vpx_codec_control_(&encoder_, VP9E_SET_TILE_COLUMNS,
+                               log2_tile_columns);
+      ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+    } else
+#endif
+#if CONFIG_VP10_ENCODER
+    if (CodecInterface() == &vpx_codec_vp10_cx_algo) {
+      // Default to 1 tile column for VP10.
+      const int log2_tile_columns = 0;
+      res = vpx_codec_control_(&encoder_, VP9E_SET_TILE_COLUMNS,
+                               log2_tile_columns);
+      ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+    } else
+#endif
+    {
+#if CONFIG_VP8_ENCODER
+      ASSERT_EQ(&vpx_codec_vp8_cx_algo, CodecInterface())
+          << "Unknown Codec Interface";
+#endif
+    }
+  }
+}
+
 void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) {
  if (video->img())
    EncodeFrameInternal(*video, frame_flags);
@@ -39,17 +83,6 @@ void Encoder::EncodeFrameInternal(const VideoSource &video,
  vpx_codec_err_t res;
  const vpx_image_t *img = video.img();

-  // Handle first frame initialization
-  if (!encoder_.priv) {
-    cfg_.g_w = img->d_w;
-    cfg_.g_h = img->d_h;
-    cfg_.g_timebase = video.timebase();
-    cfg_.rc_twopass_stats_in = stats_->buf();
-    res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_,
-                             init_flags_);
-    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-  }
-
  // Handle frame resizing
  if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) {
    cfg_.g_w = img->d_w;
@@ -60,8 +93,7 @@ void Encoder::EncodeFrameInternal(const VideoSource &video,

  // Encode the frame
  API_REGISTER_STATE_CHECK(
-      res = vpx_codec_encode(&encoder_,
-                             video.img(), video.pts(), video.duration(),
+      res = vpx_codec_encode(&encoder_, img, video.pts(), video.duration(),
                             frame_flags, deadline_));
  ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
 }
@@ -77,6 +109,7 @@ void Encoder::Flush() {

 void EncoderTest::InitializeConfig() {
  const vpx_codec_err_t res = codec_->DefaultEncoderConfig(&cfg_, 0);
+  dec_cfg_ = vpx_codec_dec_cfg_t();
  ASSERT_EQ(VPX_CODEC_OK, res);
 }

@@ -110,6 +143,7 @@ void EncoderTest::SetMode(TestMode mode) {
 static bool compare_img(const vpx_image_t *img1,
                        const vpx_image_t *img2) {
  bool match = (img1->fmt == img2->fmt) &&
+               (img1->cs == img2->cs) &&
               (img1->d_w == img2->d_w) &&
               (img1->d_h == img2->d_h);

@@ -158,9 +192,19 @@ void EncoderTest::RunLoop(VideoSource *video) {
    Encoder* const encoder = codec_->CreateEncoder(cfg_, deadline_, init_flags_,
                                                   &stats_);
    ASSERT_TRUE(encoder != NULL);
-    Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
+
+    video->Begin();
+    encoder->InitEncoder(video);
+    ASSERT_FALSE(::testing::Test::HasFatalFailure());
+
+    unsigned long dec_init_flags = 0;  // NOLINT
+    // Use fragment decoder if encoder outputs partitions.
+    // NOTE: fragment decoder and partition encoder are only supported by VP8.
+    if (init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION)
+      dec_init_flags |= VPX_CODEC_USE_INPUT_FRAGMENTS;
+    Decoder* const decoder = codec_->CreateDecoder(dec_cfg, dec_init_flags, 0);
    bool again;
-    for (again = true, video->Begin(); again; video->Next()) {
+    for (again = true; again; video->Next()) {
      again = (video->img() != NULL);

      PreEncodeFrameHook(video);
@@ -200,6 +244,13 @@ void EncoderTest::RunLoop(VideoSource *video) {
        }
      }

+      // Flush the decoder when there are no more fragments.
+      if ((init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION) && has_dxdata) {
+        const vpx_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
+        if (!HandleDecodeResult(res_dec, *video, decoder))
+          break;
+      }
+
      if (has_dxdata && has_cxdata) {
        const vpx_image_t *img_enc = encoder->GetPreviewFrame();
        DxDataIterator dec_iter = decoder->GetDxData();
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -13,12 +13,13 @@
 #include <string>
 #include <vector>

-#include "./vpx_config.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "vpx/vpx_encoder.h"
-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+
+#include "./vpx_config.h"
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
 #include "vpx/vp8cx.h"
 #endif
+#include "vpx/vpx_encoder.h"

 namespace libvpx_test {

@@ -104,6 +105,8 @@ class Encoder {
    return CxDataIterator(&encoder_);
  }

+  void InitEncoder(VideoSource *video);
+
  const vpx_image_t *GetPreviewFrame() {
    return vpx_codec_get_preview_frame(&encoder_);
  }
@@ -121,6 +124,11 @@ class Encoder {
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }

+  void Control(int ctrl_id, int *arg) {
+    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+  }
+
  void Control(int ctrl_id, struct vpx_scaling_mode *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
@@ -131,13 +139,23 @@ class Encoder {
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }

-#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
+  void Control(int ctrl_id, struct vpx_svc_parameters *arg) {
+    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+  }
+#if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER || CONFIG_VP10_ENCODER
  void Control(int ctrl_id, vpx_active_map_t *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }
 #endif

+  void Config(const vpx_codec_enc_cfg_t *cfg) {
+    const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+    cfg_ = *cfg;
+  }
+
  void set_deadline(unsigned long deadline) {
    deadline_ = deadline;
  }
@@ -175,7 +193,10 @@ class EncoderTest {
 protected:
  explicit EncoderTest(const CodecFactory *codec)
      : codec_(codec), abort_(false), init_flags_(0), frame_flags_(0),
-        last_pts_(0) {}
+        last_pts_(0) {
+    // Default to 1 thread.
+    cfg_.g_threads = 1;
+  }

  virtual ~EncoderTest() {}

@@ -185,6 +206,11 @@ class EncoderTest {
  // Map the TestMode enum to the deadline_ and passes_ variables.
  void SetMode(TestMode mode);

+  // Set encoder flag.
+  void set_init_flags(unsigned long flag) {  // NOLINT(runtime/int)
+    init_flags_ = flag;
+  }
+
  // Main loop
  virtual void RunLoop(VideoSource *video);

@@ -238,6 +264,7 @@ class EncoderTest {

  bool                 abort_;
  vpx_codec_enc_cfg_t  cfg_;
+  vpx_codec_dec_cfg_t  dec_cfg_;
  unsigned int         passes_;
  unsigned long        deadline_;
  TwopassStatsStore    stats_;
--- a/test/error_block_test.cc
+++ b/test/error_block_test.cc
@@ -1,146 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <cmath>
-#include <cstdlib>
-#include <string>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_entropy.h"
-#include "vpx/vpx_integer.h"
-
-using libvpx_test::ACMRandom;
-
-namespace {
-#if CONFIG_VP9_HIGHBITDEPTH
-const int number_of_iterations = 1000;
-
-typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
-                               const tran_low_t *dqcoeff, intptr_t block_size,
-                               int64_t *ssz, int bps);
-typedef std::tr1::tuple<ErrorBlockFunc, ErrorBlockFunc, vpx_bit_depth_t>
-                        ErrorBlockParam;
-class ErrorBlockTest
-  : public ::testing::TestWithParam<ErrorBlockParam> {
- public:
-  virtual ~ErrorBlockTest() {}
-  virtual void SetUp() {
-    error_block_op_     = GET_PARAM(0);
-    ref_error_block_op_ = GET_PARAM(1);
-    bit_depth_  = GET_PARAM(2);
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  vpx_bit_depth_t bit_depth_;
-  ErrorBlockFunc error_block_op_;
-  ErrorBlockFunc ref_error_block_op_;
-};
-
-TEST_P(ErrorBlockTest, OperationCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff,   4096);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
-  int err_count_total = 0;
-  int first_failure = -1;
-  intptr_t block_size;
-  int64_t ssz;
-  int64_t ret;
-  int64_t ref_ssz;
-  int64_t ref_ret;
-  for (int i = 0; i < number_of_iterations; ++i) {
-    int err_count = 0;
-    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
-    for (int j = 0; j < block_size; j++) {
-      coeff[j]   = rnd(2<<20)-(1<<20);
-      dqcoeff[j] = rnd(2<<20)-(1<<20);
-    }
-    ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
-                                  bit_depth_);
-    ASM_REGISTER_STATE_CHECK(ret = error_block_op_(coeff, dqcoeff, block_size,
-                                                   &ssz, bit_depth_));
-    err_count += (ref_ret != ret) | (ref_ssz != ssz);
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-    << "Error: Error Block Test, C output doesn't match SSE2 output. "
-    << "First failed at test case " << first_failure;
-}
-
-TEST_P(ErrorBlockTest, ExtremeValues) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff,   4096);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff, 4096);
-  int err_count_total = 0;
-  int first_failure = -1;
-  intptr_t block_size;
-  int64_t ssz;
-  int64_t ret;
-  int64_t ref_ssz;
-  int64_t ref_ret;
-  int max_val = ((1<<20)-1);
-  for (int i = 0; i < number_of_iterations; ++i) {
-    int err_count = 0;
-    int k = (i / 9) % 5;
-
-    // Change the maximum coeff value, to test different bit boundaries
-    if ( k == 4 && (i % 9) == 0 ) {
-      max_val >>= 1;
-    }
-    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
-    for (int j = 0; j < block_size; j++) {
-      if (k < 4) {  // Test at maximum values
-        coeff[j]   = k % 2 ? max_val : -max_val;
-        dqcoeff[j] = (k >> 1) % 2 ? max_val : -max_val;
-      } else {
-        coeff[j]   = rnd(2 << 14) - (1 << 14);
-        dqcoeff[j] = rnd(2 << 14) - (1 << 14);
-      }
-    }
-    ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
-                                  bit_depth_);
-    ASM_REGISTER_STATE_CHECK(ret = error_block_op_(coeff, dqcoeff, block_size,
-                                                   &ssz, bit_depth_));
-    err_count += (ref_ret != ret) | (ref_ssz != ssz);
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-    << "Error: Error Block Test, C output doesn't match SSE2 output. "
-    << "First failed at test case " << first_failure;
-}
-
-using std::tr1::make_tuple;
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-  SSE2_C_COMPARE, ErrorBlockTest,
-  ::testing::Values(
-    make_tuple(&vp9_highbd_block_error_sse2,
-               &vp9_highbd_block_error_c, VPX_BITS_10),
-    make_tuple(&vp9_highbd_block_error_sse2,
-               &vp9_highbd_block_error_c, VPX_BITS_12),
-    make_tuple(&vp9_highbd_block_error_sse2,
-               &vp9_highbd_block_error_c, VPX_BITS_8)));
-#endif  // HAVE_SSE2
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-}  // namespace
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -20,10 +20,11 @@ const int kMaxErrorFrames = 12;
 const int kMaxDroppableFrames = 12;

 class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+    public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, bool> {
 protected:
  ErrorResilienceTestLarge()
      : EncoderTest(GET_PARAM(0)),
+        svc_support_(GET_PARAM(2)),
        psnr_(0.0),
        nframes_(0),
        mismatch_psnr_(0.0),
@@ -37,6 +38,7 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
  void Reset() {
    error_nframes_ = 0;
    droppable_nframes_ = 0;
+    pattern_switch_ = 0;
  }

  virtual void SetUp() {
@@ -56,22 +58,77 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
    nframes_++;
  }

-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video) {
+  //
+  // Frame flags and layer id for temporal layers.
+  // For two layers, test pattern is:
+  //   1     3
+  // 0    2     .....
+  // LAST is updated on base/layer 0, GOLDEN  updated on layer 1.
+  // Non-zero pattern_switch parameter means pattern will switch to
+  // not using LAST for frame_num >= pattern_switch.
+  int SetFrameFlags(int frame_num,
+                    int num_temp_layers,
+                    int pattern_switch) {
+    int frame_flags = 0;
+    if (num_temp_layers == 2) {
+        if (frame_num % 2 == 0) {
+          if (frame_num < pattern_switch || pattern_switch == 0) {
+            // Layer 0: predict from LAST and ARF, update LAST.
+            frame_flags = VP8_EFLAG_NO_REF_GF |
+                          VP8_EFLAG_NO_UPD_GF |
+                          VP8_EFLAG_NO_UPD_ARF;
+          } else {
+            // Layer 0: predict from GF and ARF, update GF.
+            frame_flags = VP8_EFLAG_NO_REF_LAST |
+                          VP8_EFLAG_NO_UPD_LAST |
+                          VP8_EFLAG_NO_UPD_ARF;
+          }
+        } else {
+          if (frame_num < pattern_switch || pattern_switch == 0) {
+            // Layer 1: predict from L, GF, and ARF, update GF.
+            frame_flags = VP8_EFLAG_NO_UPD_ARF |
+                          VP8_EFLAG_NO_UPD_LAST;
+          } else {
+            // Layer 1: predict from GF and ARF, update GF.
+            frame_flags = VP8_EFLAG_NO_REF_LAST |
+                          VP8_EFLAG_NO_UPD_LAST |
+                          VP8_EFLAG_NO_UPD_ARF;
+          }
+        }
+    }
+    return frame_flags;
+  }
+
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
    frame_flags_ &= ~(VP8_EFLAG_NO_UPD_LAST |
                      VP8_EFLAG_NO_UPD_GF |
                      VP8_EFLAG_NO_UPD_ARF);
-    if (droppable_nframes_ > 0 &&
-        (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
+    // For temporal layer case.
+    if (cfg_.ts_number_layers > 1) {
+      frame_flags_ = SetFrameFlags(video->frame(),
+                                   cfg_.ts_number_layers,
+                                   pattern_switch_);
      for (unsigned int i = 0; i < droppable_nframes_; ++i) {
        if (droppable_frames_[i] == video->frame()) {
-          std::cout << "             Encoding droppable frame: "
+          std::cout << "Encoding droppable frame: "
                    << droppable_frames_[i] << "\n";
-          frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
-                           VP8_EFLAG_NO_UPD_GF |
-                           VP8_EFLAG_NO_UPD_ARF);
-          return;
        }
      }
+    } else {
+       if (droppable_nframes_ > 0 &&
+         (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
+         for (unsigned int i = 0; i < droppable_nframes_; ++i) {
+           if (droppable_frames_[i] == video->frame()) {
+             std::cout << "Encoding droppable frame: "
+                       << droppable_frames_[i] << "\n";
+             frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
+                              VP8_EFLAG_NO_UPD_GF |
+                              VP8_EFLAG_NO_UPD_ARF);
+             return;
+           }
+         }
+       }
    }
  }

@@ -133,11 +190,18 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
    return mismatch_nframes_;
  }

+  void SetPatternSwitch(int frame_switch) {
+     pattern_switch_ = frame_switch;
+   }
+
+  bool svc_support_;
+
 private:
  double psnr_;
  unsigned int nframes_;
  unsigned int error_nframes_;
  unsigned int droppable_nframes_;
+  unsigned int pattern_switch_;
  double mismatch_psnr_;
  unsigned int mismatch_nframes_;
  unsigned int error_frames_[kMaxErrorFrames];
@@ -236,7 +300,303 @@ TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
 #endif
 }

-VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
-VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+// Check for successful decoding and no encoder/decoder mismatch
+// if we lose (i.e., drop before decoding) the enhancement layer frames for a
+// two layer temporal pattern. The base layer does not predict from the top
+// layer, so successful decoding is expected.
+TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
+  // This test doesn't run if SVC is not supported.
+  if (!svc_support_)
+    return;

+  const vpx_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = 500;
+  cfg_.g_lag_in_frames = 0;
+
+  cfg_.rc_end_usage = VPX_CBR;
+  // 2 Temporal layers, no spatial layers, CBR mode.
+  cfg_.ss_number_layers = 1;
+  cfg_.ts_number_layers = 2;
+  cfg_.ts_rate_decimator[0] = 2;
+  cfg_.ts_rate_decimator[1] = 1;
+  cfg_.ts_periodicity = 2;
+  cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
+  cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
+
+  init_flags_ = VPX_CODEC_USE_PSNR;
+
+  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     timebase.den, timebase.num, 0, 40);
+
+  // Error resilient mode ON.
+  cfg_.g_error_resilient = 1;
+  cfg_.kf_mode = VPX_KF_DISABLED;
+  SetPatternSwitch(0);
+
+  // The odd frames are the enhancement layer for 2 layer pattern, so set
+  // those frames as droppable. Drop the last 7 frames.
+  unsigned int num_droppable_frames = 7;
+  unsigned int droppable_frame_list[] = {27, 29, 31, 33, 35, 37, 39};
+  SetDroppableFrames(num_droppable_frames, droppable_frame_list);
+  SetErrorFrames(num_droppable_frames, droppable_frame_list);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Test that no mismatches have been found
+  std::cout << "             Mismatch frames: "
+            << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+
+  // Reset previously set of error/droppable frames.
+  Reset();
+}
+
+// Check for successful decoding and no encoder/decoder mismatch
+// for a two layer temporal pattern, where at some point in the
+// sequence, the LAST ref is not used anymore.
+TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) {
+  // This test doesn't run if SVC is not supported.
+  if (!svc_support_)
+    return;
+
+  const vpx_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = 500;
+  cfg_.g_lag_in_frames = 0;
+
+  cfg_.rc_end_usage = VPX_CBR;
+  // 2 Temporal layers, no spatial layers, CBR mode.
+  cfg_.ss_number_layers = 1;
+  cfg_.ts_number_layers = 2;
+  cfg_.ts_rate_decimator[0] = 2;
+  cfg_.ts_rate_decimator[1] = 1;
+  cfg_.ts_periodicity = 2;
+  cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
+  cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
+
+  init_flags_ = VPX_CODEC_USE_PSNR;
+
+  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     timebase.den, timebase.num, 0, 100);
+
+  // Error resilient mode ON.
+  cfg_.g_error_resilient = 1;
+  cfg_.kf_mode = VPX_KF_DISABLED;
+  SetPatternSwitch(60);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Test that no mismatches have been found
+  std::cout << "             Mismatch frames: "
+            << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+
+  // Reset previously set of error/droppable frames.
+  Reset();
+}
+
+class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest,
+    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ protected:
+  ErrorResilienceTestLargeCodecControls()
+      : EncoderTest(GET_PARAM(0)),
+        encoding_mode_(GET_PARAM(1)) {
+    Reset();
+  }
+
+  virtual ~ErrorResilienceTestLargeCodecControls() {}
+
+  void Reset() {
+    last_pts_ = 0;
+    tot_frame_number_ = 0;
+    // For testing up to 3 layers.
+    for (int i = 0; i < 3; ++i) {
+      bits_total_[i] = 0;
+    }
+    duration_ = 0.0;
+  }
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+  }
+
+  //
+  // Frame flags and layer id for temporal layers.
+  //
+
+  // For two layers, test pattern is:
+  //   1     3
+  // 0    2     .....
+  // For three layers, test pattern is:
+  //   1      3    5      7
+  //      2           6
+  // 0          4            ....
+  // LAST is always update on base/layer 0, GOLDEN is updated on layer 1,
+  // and ALTREF is updated on top layer for 3 layer pattern.
+  int SetFrameFlags(int frame_num, int num_temp_layers) {
+    int frame_flags = 0;
+    if (num_temp_layers == 2) {
+      if (frame_num % 2 == 0) {
+        // Layer 0: predict from L and ARF, update L.
+        frame_flags = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
+                      VP8_EFLAG_NO_UPD_ARF;
+      } else {
+        // Layer 1: predict from L, G and ARF, and update G.
+        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
+                      VP8_EFLAG_NO_UPD_ENTROPY;
+      }
+    } else if (num_temp_layers == 3) {
+      if (frame_num % 4 == 0) {
+        // Layer 0: predict from L, update L.
+        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
+                      VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
+      } else if ((frame_num - 2) % 4 == 0) {
+        // Layer 1: predict from L, G,  update G.
+        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
+                      VP8_EFLAG_NO_REF_ARF;
+      }  else if ((frame_num - 1) % 2 == 0) {
+        // Layer 2: predict from L, G, ARF; update ARG.
+        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
+      }
+    }
+    return frame_flags;
+  }
+
+  int SetLayerId(int frame_num, int num_temp_layers) {
+    int layer_id = 0;
+    if (num_temp_layers == 2) {
+      if (frame_num % 2 == 0) {
+        layer_id = 0;
+      } else {
+         layer_id = 1;
+      }
+    } else if (num_temp_layers == 3) {
+      if (frame_num % 4 == 0) {
+        layer_id = 0;
+      } else if ((frame_num - 2) % 4 == 0) {
+        layer_id = 1;
+      } else if ((frame_num - 1) % 2 == 0) {
+        layer_id = 2;
+      }
+    }
+    return layer_id;
+  }
+
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+                                  libvpx_test::Encoder *encoder) {
+    if (cfg_.ts_number_layers > 1) {
+        int layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers);
+        int frame_flags = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
+        if (video->frame() > 0) {
+          encoder->Control(VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
+          encoder->Control(VP8E_SET_FRAME_FLAGS, frame_flags);
+        }
+       const vpx_rational_t tb = video->timebase();
+       timebase_ = static_cast<double>(tb.num) / tb.den;
+       duration_ = 0;
+       return;
+    }
+  }
+
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    // Time since last timestamp = duration.
+    vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
+    if (duration > 1) {
+      // Update counter for total number of frames (#frames input to encoder).
+      // Needed for setting the proper layer_id below.
+      tot_frame_number_ += static_cast<int>(duration - 1);
+    }
+    int layer = SetLayerId(tot_frame_number_, cfg_.ts_number_layers);
+    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
+    // Update the total encoded bits. For temporal layers, update the cumulative
+    // encoded bits per layer.
+    for (int i = layer; i < static_cast<int>(cfg_.ts_number_layers); ++i) {
+      bits_total_[i] += frame_size_in_bits;
+    }
+    // Update the most recent pts.
+    last_pts_ = pkt->data.frame.pts;
+    ++tot_frame_number_;
+  }
+
+  virtual void EndPassHook(void) {
+    duration_ = (last_pts_ + 1) * timebase_;
+    if (cfg_.ts_number_layers  > 1) {
+      for (int layer = 0; layer < static_cast<int>(cfg_.ts_number_layers);
+          ++layer) {
+        if (bits_total_[layer]) {
+          // Effective file datarate:
+          effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_;
+        }
+      }
+    }
+  }
+
+  double effective_datarate_[3];
+   private:
+    libvpx_test::TestMode encoding_mode_;
+    vpx_codec_pts_t last_pts_;
+    double timebase_;
+    int64_t bits_total_[3];
+    double duration_;
+    int tot_frame_number_;
+  };
+
+// Check two codec controls used for:
+// (1) for setting temporal layer id, and (2) for settings encoder flags.
+// This test invokes those controls for each frame, and verifies encoder/decoder
+// mismatch and basic rate control response.
+// TODO(marpan): Maybe move this test to datarate_test.cc.
+TEST_P(ErrorResilienceTestLargeCodecControls, CodecControl3TemporalLayers) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_min_quantizer = 2;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.kf_mode = VPX_KF_DISABLED;
+  cfg_.g_error_resilient = 1;
+
+  // 3 Temporal layers. Framerate decimation (4, 2, 1).
+  cfg_.ts_number_layers = 3;
+  cfg_.ts_rate_decimator[0] = 4;
+  cfg_.ts_rate_decimator[1] = 2;
+  cfg_.ts_rate_decimator[2] = 1;
+  cfg_.ts_periodicity = 4;
+  cfg_.ts_layer_id[0] = 0;
+  cfg_.ts_layer_id[1] = 2;
+  cfg_.ts_layer_id[2] = 1;
+  cfg_.ts_layer_id[3] = 2;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 200);
+  for (int i = 200; i <= 800; i += 200) {
+    cfg_.rc_target_bitrate = i;
+    Reset();
+    // 40-20-40 bitrate allocation for 3 temporal layers.
+    cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
+    cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
+    cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
+      ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75)
+          << " The datarate for the file is lower than target by too much, "
+              "for layer: " << j;
+      ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25)
+          << " The datarate for the file is greater than target by too much, "
+              "for layer: " << j;
+    }
+  }
+}
+
+VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
+                          ::testing::Values(true));
+VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls,
+                          ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
+                          ::testing::Values(true));
+// SVC-related tests don't run for VP10 since SVC is not supported.
+VP10_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES,
+                           ::testing::Values(false));
 }  // namespace
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -97,13 +97,19 @@ class ExternalFrameBufferList {
    return 0;
  }

-  // Marks the external frame buffer that |fb| is pointing too as free.
+  // Marks the external frame buffer that |fb| is pointing to as free.
  // Returns < 0 on an error.
  int ReturnFrameBuffer(vpx_codec_frame_buffer_t *fb) {
-    EXPECT_TRUE(fb != NULL);
+    if (fb == NULL) {
+      EXPECT_TRUE(fb != NULL);
+      return -1;
+    }
    ExternalFrameBuffer *const ext_fb =
        reinterpret_cast<ExternalFrameBuffer*>(fb->priv);
-    EXPECT_TRUE(ext_fb != NULL);
+    if (ext_fb == NULL) {
+      EXPECT_TRUE(ext_fb != NULL);
+      return -1;
+    }
    EXPECT_EQ(1, ext_fb->in_use);
    ext_fb->in_use = 0;
    return 0;
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -13,15 +13,17 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"

 using libvpx_test::ACMRandom;

@@ -39,7 +41,7 @@ typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;

 void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
                 int tx_type) {
-  vp9_fdct4x4_c(in, out, stride);
+  vpx_fdct4x4_c(in, out, stride);
 }

 void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
@@ -53,11 +55,11 @@ void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_c(in, out, stride, 10);
+  vpx_highbd_idct4x4_16_add_c(in, out, stride, 10);
 }

 void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_c(in, out, stride, 12);
+  vpx_highbd_idct4x4_16_add_c(in, out, stride, 12);
 }

 void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
@@ -69,23 +71,23 @@ void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 }

 void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_iwht4x4_16_add_c(in, out, stride, 10);
+  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 10);
 }

 void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_iwht4x4_16_add_c(in, out, stride, 12);
+  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 12);
 }

 #if HAVE_SSE2
 void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
 }

 void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
 }
-#endif
-#endif
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 class Trans4x4TestBase {
 public:
@@ -102,13 +104,13 @@ class Trans4x4TestBase {
    int64_t total_error = 0;
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
-      DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-      DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif

      // Initialize a test block with input range [-255, 255].
@@ -142,6 +144,7 @@ class Trans4x4TestBase {
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
+        ASSERT_EQ(VPX_BITS_8, bit_depth_);
        const uint32_t diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
@@ -163,9 +166,9 @@ class Trans4x4TestBase {
  void RunCoeffCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 5000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
@@ -184,15 +187,13 @@ class Trans4x4TestBase {
  void RunMemCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 5000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
-        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0) {
@@ -219,13 +220,13 @@ class Trans4x4TestBase {
  void RunInvAccuracyCheck(int limit) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
@@ -418,15 +419,15 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
-#endif
+        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -452,7 +453,7 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -460,43 +461,54 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans4x4WHT,
    ::testing::Values(
-        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
-#endif
+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_neon, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct4x4_c,
+                   &vpx_idct4x4_16_add_neon, 0, VPX_BITS_8)));
+#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    DISABLED_NEON, Trans4x4HT,
+    NEON, Trans4x4HT,
    ::testing::Values(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
-#endif
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
    !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    MMX, Trans4x4WHT,
    ::testing::Values(
-        make_tuple(&vp9_fwht4x4_mmx, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_fwht4x4_mmx, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+#endif
+
+#if CONFIG_USE_X86INC && HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && \
+    !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4WHT,
+    ::testing::Values(
+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));
 #endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct4x4_sse2,
-                   &vp9_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct4x4_sse2,
+                   &vpx_idct4x4_16_add_sse2, 0, VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4HT,
    ::testing::Values(
@@ -504,33 +516,39 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8)));
-#endif
+#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct4x4_c,    &idct4x4_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct4x4_c,    &idct4x4_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct4x4_sse2,      &vp9_idct4x4_16_add_c, 0,
+        make_tuple(&vpx_highbd_fdct4x4_c,    &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_c,    &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vpx_fdct4x4_sse2,      &vpx_idct4x4_16_add_c, 0,
                   VPX_BITS_8)));

 INSTANTIATE_TEST_CASE_P(
    SSE2, Trans4x4HT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 2, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 3, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 1, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 2, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 3, VPX_BITS_12),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
-#endif
+#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans4x4DCT,
+    ::testing::Values(
+        make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 3, VPX_BITS_8)));
+#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -13,18 +13,40 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {

 const int kNumCoeffs = 64;
 const double kPi = 3.141592653589793238462643383279502884;
+
+const int kSignBiasMaxDiff255 = 1500;
+const int kSignBiasMaxDiff15 = 10000;
+
+typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
+typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
+                        int tx_type);
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+
+typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
+typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
+
 void reference_8x8_dct_1d(const double in[8], double out[8], int stride) {
  const double kInvSqrt2 = 0.707106781186547524400844362104;
  for (int k = 0; k < 8; k++) {
@@ -59,22 +81,9 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
  }
 }

-using libvpx_test::ACMRandom;
-
-namespace {
-typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
-                        int tx_type);
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        int tx_type);
-
-typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
-typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;

 void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
-  vp9_fdct8x8_c(in, out, stride);
+  vpx_fdct8x8_c(in, out, stride);
 }

 void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
@@ -83,11 +92,11 @@ void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_c(in, out, stride, 10);
+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 10);
 }

 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_c(in, out, stride, 12);
+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 12);
 }

 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
@@ -99,31 +108,31 @@ void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 }

 void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_c(in, out, stride, 10);
+  vpx_highbd_idct8x8_10_add_c(in, out, stride, 10);
 }

 void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_c(in, out, stride, 12);
+  vpx_highbd_idct8x8_10_add_c(in, out, stride, 12);
 }

 #if HAVE_SSE2
 void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
 }

 void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
 }

 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
 }

 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
 }
-#endif
-#endif
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 class FwdTrans8x8TestBase {
 public:
@@ -135,8 +144,8 @@ class FwdTrans8x8TestBase {

  void RunSignBiasCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_output_block, 64);
+    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
    int count_sign_block[64][2];
    const int count_test_block = 100000;

@@ -160,7 +169,7 @@ class FwdTrans8x8TestBase {

    for (int j = 0; j < 64; ++j) {
      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = 1125;
+      const int max_diff = kSignBiasMaxDiff255;
      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
          << "Error: 8x8 FDCT/FHT has a sign bias > "
          << 1. * max_diff / count_test_block * 100 << "%"
@@ -173,7 +182,7 @@ class FwdTrans8x8TestBase {
    memset(count_sign_block, 0, sizeof(count_sign_block));

    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-mask_/16, mask_/16].
+      // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
      for (int j = 0; j < 64; ++j)
        test_input_block[j] = ((rnd.Rand16() & mask_) >> 4) -
                              ((rnd.Rand16() & mask_) >> 4);
@@ -190,9 +199,9 @@ class FwdTrans8x8TestBase {

    for (int j = 0; j < 64; ++j) {
      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = 10000;
+      const int max_diff = kSignBiasMaxDiff15;
      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
-          << "Error: 4x4 FDCT/FHT has a sign bias > "
+          << "Error: 8x8 FDCT/FHT has a sign bias > "
          << 1. * max_diff / count_test_block * 100 << "%"
          << " for input range [-15, 15] at index " << j
          << " count0: " << count_sign_block[j][0]
@@ -206,13 +215,13 @@ class FwdTrans8x8TestBase {
    int max_error = 0;
    int total_error = 0;
    const int count_test_block = 100000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
+    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
+    DECLARE_ALIGNED(16, uint8_t, dst[64]);
+    DECLARE_ALIGNED(16, uint8_t, src[64]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
+    DECLARE_ALIGNED(16, uint16_t, dst16[64]);
+    DECLARE_ALIGNED(16, uint16_t, src16[64]);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
@@ -283,14 +292,14 @@ class FwdTrans8x8TestBase {
    int total_error = 0;
    int total_coeff_error = 0;
    const int count_test_block = 100000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_temp_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
+    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
+    DECLARE_ALIGNED(16, uint8_t, dst[64]);
+    DECLARE_ALIGNED(16, uint8_t, src[64]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
+    DECLARE_ALIGNED(16, uint16_t, dst16[64]);
+    DECLARE_ALIGNED(16, uint16_t, src16[64]);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
@@ -372,13 +381,13 @@ class FwdTrans8x8TestBase {
  void RunInvAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
+    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
@@ -430,9 +439,9 @@ class FwdTrans8x8TestBase {
  void RunFwdAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_r, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);

    for (int i = 0; i < count_test_block; ++i) {
      double out_r[kNumCoeffs];
@@ -460,12 +469,12 @@ void CompareInvReference(IdctFunc ref_txfm, int thresh) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 10000;
    const int eob = 12;
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
 #endif
    const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;

@@ -620,8 +629,8 @@ class InvTrans8x8DCT
  virtual void SetUp() {
    ref_txfm_ = GET_PARAM(0);
    inv_txfm_ = GET_PARAM(1);
-    thresh_   = GET_PARAM(2);
-    pitch_    = 8;
+    thresh_ = GET_PARAM(2);
+    pitch_ = 8;
    bit_depth_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
  }
@@ -649,20 +658,21 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
+        make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
-#endif
+        make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8HT,
    ::testing::Values(
+        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
@@ -671,7 +681,6 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
@@ -683,28 +692,31 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_neon, &vp9_idct8x8_64_add_neon, 0,
+        make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 0,
                   VPX_BITS_8)));
+#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    DISABLED_NEON, FwdTrans8x8HT,
+    NEON, FwdTrans8x8HT,
    ::testing::Values(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
-#endif
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_sse2, 0,
+        make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 0,
                   VPX_BITS_8)));
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8HT,
@@ -713,22 +725,21 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
-#endif
+#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_highbd_fdct8x8_c,
+        make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8),
+        make_tuple(&vpx_highbd_fdct8x8_c,
                   &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct8x8_sse2,
+        make_tuple(&vpx_highbd_fdct8x8_sse2,
                   &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct8x8_c,
+        make_tuple(&vpx_highbd_fdct8x8_c,
                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fdct8x8_sse2,
-                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
-        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
-
+        make_tuple(&vpx_highbd_fdct8x8_sse2,
+                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));

 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8HT,
@@ -751,15 +762,28 @@ INSTANTIATE_TEST_CASE_P(
                   &idct8x8_10_add_12_sse2, 6225, VPX_BITS_12),
        make_tuple(&idct8x8_12,
                   &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
-#endif
+#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

-
-#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
-    !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \
+    !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSSE3, FwdTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0,
+        make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_ssse3, 0,
                   VPX_BITS_8)));
 #endif
+
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    MSA, FwdTrans8x8DCT,
+    ::testing::Values(
+        make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    MSA, FwdTrans8x8HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
+#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/frame_size_tests.cc
+++ b/test/frame_size_tests.cc
@@ -74,7 +74,7 @@ TEST_F(VP9FrameSizeTestsLarge, ValidSizes) {
  // size or almost 1 gig of memory.
  // In total the allocations will exceed 2GiB which may cause a failure with
  // mingw + wine, use a smaller size in that case.
-#if defined(_WIN32) && !defined(_WIN64)
+#if defined(_WIN32) && !defined(_WIN64) || defined(__OS2__)
  video.SetSize(4096, 3072);
 #else
  video.SetSize(4096, 4096);
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -13,104 +13,22 @@
 #include <cstdlib>
 #include <string>

-#include "test/video_source.h"
+#include "test/yuv_video_source.h"

 namespace libvpx_test {

 // This class extends VideoSource to allow parsing of raw yv12
 // so that we can do actual file encodes.
-class I420VideoSource : public VideoSource {
+class I420VideoSource : public YUVVideoSource {
 public:
  I420VideoSource(const std::string &file_name,
                  unsigned int width, unsigned int height,
                  int rate_numerator, int rate_denominator,
                  unsigned int start, int limit)
-      : file_name_(file_name),
-        input_file_(NULL),
-        img_(NULL),
-        start_(start),
-        limit_(limit),
-        frame_(0),
-        width_(0),
-        height_(0),
-        framerate_numerator_(rate_numerator),
-        framerate_denominator_(rate_denominator) {
-    // This initializes raw_sz_, width_, height_ and allocates an img.
-    SetSize(width, height);
-  }
-
-  virtual ~I420VideoSource() {
-    vpx_img_free(img_);
-    if (input_file_)
-      fclose(input_file_);
-  }
-
-  virtual void Begin() {
-    if (input_file_)
-      fclose(input_file_);
-    input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
-        << file_name_;
-    if (start_) {
-      fseek(input_file_, static_cast<unsigned>(raw_sz_) * start_, SEEK_SET);
-    }
-
-    frame_ = start_;
-    FillFrame();
-  }
-
-  virtual void Next() {
-    ++frame_;
-    FillFrame();
-  }
-
-  virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL;  }
-
-  // Models a stream where Timebase = 1/FPS, so pts == frame.
-  virtual vpx_codec_pts_t pts() const { return frame_; }
-
-  virtual unsigned long duration() const { return 1; }
-
-  virtual vpx_rational_t timebase() const {
-    const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
-    return t;
-  }
-
-  virtual unsigned int frame() const { return frame_; }
-
-  virtual unsigned int limit() const { return limit_; }
-
-  void SetSize(unsigned int width, unsigned int height) {
-    if (width != width_ || height != height_) {
-      vpx_img_free(img_);
-      img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, width, height, 1);
-      ASSERT_TRUE(img_ != NULL);
-      width_ = width;
-      height_ = height;
-      raw_sz_ = width * height * 3 / 2;
-    }
-  }
-
-  virtual void FillFrame() {
-    ASSERT_TRUE(input_file_ != NULL);
-    // Read a frame from input_file.
-    if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
-      limit_ = frame_;
-    }
-  }
-
- protected:
-  std::string file_name_;
-  FILE *input_file_;
-  vpx_image_t *img_;
-  size_t raw_sz_;
-  unsigned int start_;
-  unsigned int limit_;
-  unsigned int frame_;
-  unsigned int width_;
-  unsigned int height_;
-  int framerate_numerator_;
-  int framerate_denominator_;
+      : YUVVideoSource(file_name, VPX_IMG_FMT_I420,
+                       width, height,
+                       rate_numerator, rate_denominator,
+                       start, limit) {}
 };

 }  // namespace libvpx_test
--- a/test/idct8x8_test.cc
+++ b/test/idct8x8_test.cc
@@ -14,8 +14,7 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "./vp9_rtcd.h"
-
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "vpx/vpx_integer.h"

@@ -68,43 +67,6 @@ void reference_dct_2d(int16_t input[64], double output[64]) {
    output[i] *= 2;
 }

-void reference_idct_1d(double input[8], double output[8]) {
-  const double kPi = 3.141592653589793238462643383279502884;
-  const double kSqrt2 = 1.414213562373095048801688724209698;
-  for (int k = 0; k < 8; k++) {
-    output[k] = 0.0;
-    for (int n = 0; n < 8; n++) {
-      output[k] += input[n]*cos(kPi*(2*k+1)*n/16.0);
-      if (n == 0)
-        output[k] = output[k]/kSqrt2;
-    }
-  }
-}
-
-void reference_idct_2d(double input[64], int16_t output[64]) {
-  double out[64], out2[64];
-  // First transform rows
-  for (int i = 0; i < 8; ++i) {
-    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j)
-      temp_in[j] = input[j + i*8];
-    reference_idct_1d(temp_in, temp_out);
-    for (int j = 0; j < 8; ++j)
-      out[j + i*8] = temp_out[j];
-  }
-  // Then transform columns
-  for (int i = 0; i < 8; ++i) {
-    double temp_in[8], temp_out[8];
-    for (int j = 0; j < 8; ++j)
-      temp_in[j] = out[j*8 + i];
-    reference_idct_1d(temp_in, temp_out);
-    for (int j = 0; j < 8; ++j)
-      out2[j*8 + i] = temp_out[j];
-  }
-  for (int i = 0; i < 64; ++i)
-    output[i] = round(out2[i]/32);
-}
-
 TEST(VP9Idct8x8Test, AccuracyCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 10000;
@@ -125,7 +87,7 @@ TEST(VP9Idct8x8Test, AccuracyCheck) {
    reference_dct_2d(input, output_r);
    for (int j = 0; j < 64; ++j)
      coeff[j] = round(output_r[j]);
-    vp9_idct8x8_64_add_c(coeff, dst, 8);
+    vpx_idct8x8_64_add_c(coeff, dst, 8);
    for (int j = 0; j < 64; ++j) {
      const int diff = dst[j] - src[j];
      const int error = diff * diff;
--- a/test/idct_test.cc
+++ b/test/idct_test.cc
@@ -10,10 +10,11 @@

 #include "./vpx_config.h"
 #include "./vp8_rtcd.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
+
 #include "third_party/googletest/src/include/gtest/gtest.h"

+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
 #include "vpx/vpx_integer.h"

 typedef void (*IdctFunc)(int16_t *input, unsigned char *pred_ptr,
@@ -113,4 +114,8 @@ INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
 INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_mmx));
 #endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, IDCTTest,
+                        ::testing::Values(vp8_short_idct4x4llm_msa));
+#endif
 }
--- a/test/intrapred_test.cc
+++ b/test/intrapred_test.cc
@@ -1,396 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <string.h>
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
-#include "vp8/common/blockd.h"
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-using libvpx_test::ACMRandom;
-
-class IntraPredBase {
- public:
-  virtual ~IntraPredBase() { libvpx_test::ClearSystemState(); }
-
- protected:
-  void SetupMacroblock(MACROBLOCKD *mbptr,
-                       MODE_INFO *miptr,
-                       uint8_t *data,
-                       int block_size,
-                       int stride,
-                       int num_planes) {
-    mbptr_ = mbptr;
-    miptr_ = miptr;
-    mbptr_->up_available = 1;
-    mbptr_->left_available = 1;
-    mbptr_->mode_info_context = miptr_;
-    stride_ = stride;
-    block_size_ = block_size;
-    num_planes_ = num_planes;
-    for (int p = 0; p < num_planes; p++)
-      data_ptr_[p] = data + stride * (block_size + 1) * p +
-                     stride + block_size;
-  }
-
-  void FillRandom() {
-    // Fill edges with random data
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    for (int p = 0; p < num_planes_; p++) {
-      for (int x = -1 ; x <= block_size_; x++)
-        data_ptr_[p][x - stride_] = rnd.Rand8();
-      for (int y = 0; y < block_size_; y++)
-        data_ptr_[p][y * stride_ - 1] = rnd.Rand8();
-    }
-  }
-
-  virtual void Predict(MB_PREDICTION_MODE mode) = 0;
-
-  void SetLeftUnavailable() {
-    mbptr_->left_available = 0;
-    for (int p = 0; p < num_planes_; p++)
-      for (int i = -1; i < block_size_; ++i)
-        data_ptr_[p][stride_ * i - 1] = 129;
-  }
-
-  void SetTopUnavailable() {
-    mbptr_->up_available = 0;
-    for (int p = 0; p < num_planes_; p++)
-      memset(&data_ptr_[p][-1 - stride_], 127, block_size_ + 2);
-  }
-
-  void SetTopLeftUnavailable() {
-    SetLeftUnavailable();
-    SetTopUnavailable();
-  }
-
-  int BlockSizeLog2Min1() const {
-    switch (block_size_) {
-      case 16:
-        return 3;
-      case 8:
-        return 2;
-      default:
-        return 0;
-    }
-  }
-
-  // check DC prediction output against a reference
-  void CheckDCPrediction() const {
-    for (int p = 0; p < num_planes_; p++) {
-      // calculate expected DC
-      int expected;
-      if (mbptr_->up_available || mbptr_->left_available) {
-        int sum = 0, shift = BlockSizeLog2Min1() + mbptr_->up_available +
-                             mbptr_->left_available;
-        if (mbptr_->up_available)
-          for (int x = 0; x < block_size_; x++)
-            sum += data_ptr_[p][x - stride_];
-        if (mbptr_->left_available)
-          for (int y = 0; y < block_size_; y++)
-            sum += data_ptr_[p][y * stride_ - 1];
-        expected = (sum + (1 << (shift - 1))) >> shift;
-      } else {
-        expected = 0x80;
-      }
-      // check that all subsequent lines are equal to the first
-      for (int y = 1; y < block_size_; ++y)
-        ASSERT_EQ(0, memcmp(data_ptr_[p], &data_ptr_[p][y * stride_],
-                            block_size_));
-      // within the first line, ensure that each pixel has the same value
-      for (int x = 1; x < block_size_; ++x)
-        ASSERT_EQ(data_ptr_[p][0], data_ptr_[p][x]);
-      // now ensure that that pixel has the expected (DC) value
-      ASSERT_EQ(expected, data_ptr_[p][0]);
-    }
-  }
-
-  // check V prediction output against a reference
-  void CheckVPrediction() const {
-    // check that all lines equal the top border
-    for (int p = 0; p < num_planes_; p++)
-      for (int y = 0; y < block_size_; y++)
-        ASSERT_EQ(0, memcmp(&data_ptr_[p][-stride_],
-                            &data_ptr_[p][y * stride_], block_size_));
-  }
-
-  // check H prediction output against a reference
-  void CheckHPrediction() const {
-    // for each line, ensure that each pixel is equal to the left border
-    for (int p = 0; p < num_planes_; p++)
-      for (int y = 0; y < block_size_; y++)
-        for (int x = 0; x < block_size_; x++)
-          ASSERT_EQ(data_ptr_[p][-1 + y * stride_],
-                    data_ptr_[p][x + y * stride_]);
-  }
-
-  static int ClipByte(int value) {
-    if (value > 255)
-      return 255;
-    else if (value < 0)
-      return 0;
-    return value;
-  }
-
-  // check TM prediction output against a reference
-  void CheckTMPrediction() const {
-    for (int p = 0; p < num_planes_; p++)
-      for (int y = 0; y < block_size_; y++)
-        for (int x = 0; x < block_size_; x++) {
-          const int expected = ClipByte(data_ptr_[p][x - stride_]
-                                      + data_ptr_[p][stride_ * y - 1]
-                                      - data_ptr_[p][-1 - stride_]);
-          ASSERT_EQ(expected, data_ptr_[p][y * stride_ + x]);
-       }
-  }
-
-  // Actual test
-  void RunTest() {
-    {
-      SCOPED_TRACE("DC_PRED");
-      FillRandom();
-      Predict(DC_PRED);
-      CheckDCPrediction();
-    }
-    {
-      SCOPED_TRACE("DC_PRED LEFT");
-      FillRandom();
-      SetLeftUnavailable();
-      Predict(DC_PRED);
-      CheckDCPrediction();
-    }
-    {
-      SCOPED_TRACE("DC_PRED TOP");
-      FillRandom();
-      SetTopUnavailable();
-      Predict(DC_PRED);
-      CheckDCPrediction();
-    }
-    {
-      SCOPED_TRACE("DC_PRED TOP_LEFT");
-      FillRandom();
-      SetTopLeftUnavailable();
-      Predict(DC_PRED);
-      CheckDCPrediction();
-    }
-    {
-      SCOPED_TRACE("H_PRED");
-      FillRandom();
-      Predict(H_PRED);
-      CheckHPrediction();
-    }
-    {
-      SCOPED_TRACE("V_PRED");
-      FillRandom();
-      Predict(V_PRED);
-      CheckVPrediction();
-    }
-    {
-      SCOPED_TRACE("TM_PRED");
-      FillRandom();
-      Predict(TM_PRED);
-      CheckTMPrediction();
-    }
-  }
-
-  MACROBLOCKD *mbptr_;
-  MODE_INFO *miptr_;
-  uint8_t *data_ptr_[2];  // in the case of Y, only [0] is used
-  int stride_;
-  int block_size_;
-  int num_planes_;
-};
-
-typedef void (*IntraPredYFunc)(MACROBLOCKD *x,
-                               uint8_t *yabove_row,
-                               uint8_t *yleft,
-                               int left_stride,
-                               uint8_t *ypred_ptr,
-                               int y_stride);
-
-class IntraPredYTest
-    : public IntraPredBase,
-      public ::testing::TestWithParam<IntraPredYFunc> {
- public:
-  static void SetUpTestCase() {
-    mb_ = reinterpret_cast<MACROBLOCKD*>(
-        vpx_memalign(32, sizeof(MACROBLOCKD)));
-    mi_ = reinterpret_cast<MODE_INFO*>(
-        vpx_memalign(32, sizeof(MODE_INFO)));
-    data_array_ = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBufferSize));
-  }
-
-  static void TearDownTestCase() {
-    vpx_free(data_array_);
-    vpx_free(mi_);
-    vpx_free(mb_);
-    data_array_ = NULL;
-  }
-
- protected:
-  static const int kBlockSize = 16;
-  static const int kDataAlignment = 16;
-  static const int kStride = kBlockSize * 3;
-  // We use 48 so that the data pointer of the first pixel in each row of
-  // each macroblock is 16-byte aligned, and this gives us access to the
-  // top-left and top-right corner pixels belonging to the top-left/right
-  // macroblocks.
-  // We use 17 lines so we have one line above us for top-prediction.
-  static const int kDataBufferSize = kStride * (kBlockSize + 1);
-
-  virtual void SetUp() {
-    pred_fn_ = GetParam();
-    SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 1);
-  }
-
-  virtual void Predict(MB_PREDICTION_MODE mode) {
-    mbptr_->mode_info_context->mbmi.mode = mode;
-    ASM_REGISTER_STATE_CHECK(pred_fn_(mbptr_,
-                                      data_ptr_[0] - kStride,
-                                      data_ptr_[0] - 1, kStride,
-                                      data_ptr_[0], kStride));
-  }
-
-  IntraPredYFunc pred_fn_;
-  static uint8_t* data_array_;
-  static MACROBLOCKD * mb_;
-  static MODE_INFO *mi_;
-};
-
-MACROBLOCKD* IntraPredYTest::mb_ = NULL;
-MODE_INFO* IntraPredYTest::mi_ = NULL;
-uint8_t* IntraPredYTest::data_array_ = NULL;
-
-TEST_P(IntraPredYTest, IntraPredTests) {
-  RunTest();
-}
-
-INSTANTIATE_TEST_CASE_P(C, IntraPredYTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mby_s_c));
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, IntraPredYTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mby_s_sse2));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredYTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mby_s_ssse3));
-#endif
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, IntraPredYTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mby_s_neon));
-#endif
-
-typedef void (*IntraPredUvFunc)(MACROBLOCKD *x,
-                                uint8_t *uabove_row,
-                                uint8_t *vabove_row,
-                                uint8_t *uleft,
-                                uint8_t *vleft,
-                                int left_stride,
-                                uint8_t *upred_ptr,
-                                uint8_t *vpred_ptr,
-                                int pred_stride);
-
-class IntraPredUVTest
-    : public IntraPredBase,
-      public ::testing::TestWithParam<IntraPredUvFunc> {
- public:
-  static void SetUpTestCase() {
-    mb_ = reinterpret_cast<MACROBLOCKD*>(
-        vpx_memalign(32, sizeof(MACROBLOCKD)));
-    mi_ = reinterpret_cast<MODE_INFO*>(
-        vpx_memalign(32, sizeof(MODE_INFO)));
-    data_array_ = reinterpret_cast<uint8_t*>(
-        vpx_memalign(kDataAlignment, kDataBufferSize));
-  }
-
-  static void TearDownTestCase() {
-    vpx_free(data_array_);
-    vpx_free(mi_);
-    vpx_free(mb_);
-    data_array_ = NULL;
-  }
-
- protected:
-  static const int kBlockSize = 8;
-  static const int kDataAlignment = 8;
-  static const int kStride = kBlockSize * 3;
-  // We use 24 so that the data pointer of the first pixel in each row of
-  // each macroblock is 8-byte aligned, and this gives us access to the
-  // top-left and top-right corner pixels belonging to the top-left/right
-  // macroblocks.
-  // We use 9 lines so we have one line above us for top-prediction.
-  // [0] = U, [1] = V
-  static const int kDataBufferSize = 2 * kStride * (kBlockSize + 1);
-
-  virtual void SetUp() {
-    pred_fn_ = GetParam();
-    SetupMacroblock(mb_, mi_, data_array_, kBlockSize, kStride, 2);
-  }
-
-  virtual void Predict(MB_PREDICTION_MODE mode) {
-    mbptr_->mode_info_context->mbmi.uv_mode = mode;
-    pred_fn_(mbptr_, data_ptr_[0] - kStride, data_ptr_[1] - kStride,
-             data_ptr_[0] - 1, data_ptr_[1] - 1, kStride,
-             data_ptr_[0], data_ptr_[1], kStride);
-  }
-
-  IntraPredUvFunc pred_fn_;
-  // We use 24 so that the data pointer of the first pixel in each row of
-  // each macroblock is 8-byte aligned, and this gives us access to the
-  // top-left and top-right corner pixels belonging to the top-left/right
-  // macroblocks.
-  // We use 9 lines so we have one line above us for top-prediction.
-  // [0] = U, [1] = V
-  static uint8_t* data_array_;
-  static MACROBLOCKD* mb_;
-  static MODE_INFO* mi_;
-};
-
-MACROBLOCKD* IntraPredUVTest::mb_ = NULL;
-MODE_INFO* IntraPredUVTest::mi_ = NULL;
-uint8_t* IntraPredUVTest::data_array_ = NULL;
-
-TEST_P(IntraPredUVTest, IntraPredTests) {
-  RunTest();
-}
-
-INSTANTIATE_TEST_CASE_P(C, IntraPredUVTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mbuv_s_c));
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, IntraPredUVTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mbuv_s_sse2));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, IntraPredUVTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mbuv_s_ssse3));
-#endif
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, IntraPredUVTest,
-                        ::testing::Values(
-                            vp8_build_intra_predictors_mbuv_s_neon));
-#endif
-
-}  // namespace
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -63,9 +63,22 @@ class InvalidFileTest
    EXPECT_NE(res, EOF) << "Read result data failed";

    // Check results match.
-    EXPECT_EQ(expected_res_dec, res_dec)
-        << "Results don't match: frame number = " << video.frame_number()
-        << ". (" << decoder->DecodeError() << ")";
+    const DecodeParam input = GET_PARAM(1);
+    if (input.threads > 1) {
+      // The serial decode check is too strict for tile-threaded decoding as
+      // there is no guarantee on the decode order nor which specific error
+      // will take precedence. Currently a tile-level error is not forwarded so
+      // the frame will simply be marked corrupt.
+      EXPECT_TRUE(res_dec == expected_res_dec ||
+                  res_dec == VPX_CODEC_CORRUPT_FRAME)
+          << "Results don't match: frame number = " << video.frame_number()
+          << ". (" << decoder->DecodeError() << "). Expected: "
+          << expected_res_dec << " or " << VPX_CODEC_CORRUPT_FRAME;
+    } else {
+      EXPECT_EQ(expected_res_dec, res_dec)
+          << "Results don't match: frame number = " << video.frame_number()
+          << ". (" << decoder->DecodeError() << ")";
+    }

    return !HasFailure();
  }
@@ -112,7 +125,9 @@ TEST_P(InvalidFileTest, ReturnCode) {

 const DecodeParam kVP9InvalidFileTests[] = {
  {1, "invalid-vp90-02-v2.webm"},
+#if CONFIG_VP9_HIGHBITDEPTH
  {1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf"},
+#endif
  {1, "invalid-vp90-03-v3.webm"},
  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf"},
  {1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf"},
@@ -121,6 +136,8 @@ const DecodeParam kVP9InvalidFileTests[] = {
  {1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf"},
  {1, "invalid-vp91-2-mixedrefcsp-444to420.ivf"},
  {1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf"},
+  {1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf"},
+  {1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf"},
 };

 VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
@@ -141,7 +158,7 @@ TEST_P(InvalidFileInvalidPeekTest, ReturnCode) {
 }

 const DecodeParam kVP9InvalidFileInvalidPeekTests[] = {
-  {1, "invalid-vp90-01-v2.webm"},
+  {1, "invalid-vp90-01-v3.webm"},
 };

 VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
@@ -151,6 +168,7 @@ const DecodeParam kMultiThreadedVP9InvalidFileTests[] = {
  {4, "invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm"},
  {4, "invalid-"
      "vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf"},
+  {4, "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf"},
  {2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf"},
  {4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf"},
 };
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -13,18 +13,17 @@
 #include <string>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_loopfilter.h"
 #include "vpx/vpx_integer.h"

-#define MAX_LOOP_FILTER 63
-
 using libvpx_test::ACMRandom;

 namespace {
@@ -53,62 +52,105 @@ typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
                               const uint8_t *thresh1);
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-typedef std::tr1::tuple<loop_op_t, loop_op_t, vpx_bit_depth_t> loop8_param_t;
-typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t,
-                        vpx_bit_depth_t> dualloop8_param_t;
+typedef std::tr1::tuple<loop_op_t, loop_op_t, int, int> loop8_param_t;
+typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;

 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
 void wrapper_vertical_16_sse2(uint16_t *s, int p, const uint8_t *blimit,
                              const uint8_t *limit, const uint8_t *thresh,
                              int count, int bd) {
-  vp9_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
+  vpx_highbd_lpf_vertical_16_sse2(s, p, blimit, limit, thresh, bd);
 }

 void wrapper_vertical_16_c(uint16_t *s, int p, const uint8_t *blimit,
                           const uint8_t *limit, const uint8_t *thresh,
                           int count, int bd) {
-  vp9_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
+  vpx_highbd_lpf_vertical_16_c(s, p, blimit, limit, thresh, bd);
 }

 void wrapper_vertical_16_dual_sse2(uint16_t *s, int p, const uint8_t *blimit,
                                   const uint8_t *limit, const uint8_t *thresh,
                                   int count, int bd) {
-  vp9_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
+  vpx_highbd_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh, bd);
 }

 void wrapper_vertical_16_dual_c(uint16_t *s, int p, const uint8_t *blimit,
                                const uint8_t *limit, const uint8_t *thresh,
                                int count, int bd) {
-  vp9_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
+  vpx_highbd_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh, bd);
 }
 #else
 void wrapper_vertical_16_sse2(uint8_t *s, int p, const uint8_t *blimit,
                              const uint8_t *limit, const uint8_t *thresh,
                              int count) {
-  vp9_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_sse2(s, p, blimit, limit, thresh);
 }

 void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
                           const uint8_t *limit, const uint8_t *thresh,
                           int count) {
-  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
 }

 void wrapper_vertical_16_dual_sse2(uint8_t *s, int p, const uint8_t *blimit,
                                   const uint8_t *limit, const uint8_t *thresh,
                                   int count) {
-  vp9_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_dual_sse2(s, p, blimit, limit, thresh);
 }

 void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
                                const uint8_t *limit, const uint8_t *thresh,
                                int count) {
-  vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
+  vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_SSE2

+#if HAVE_NEON_ASM
+#if CONFIG_VP9_HIGHBITDEPTH
+// No neon high bitdepth functions.
+#else
+void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
+                              const uint8_t *limit, const uint8_t *thresh,
+                              int count) {
+  vpx_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
+}
+
+void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
+                           const uint8_t *limit, const uint8_t *thresh,
+                           int count) {
+  vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+}
+
+void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
+                                   const uint8_t *limit, const uint8_t *thresh,
+                                   int count) {
+  vpx_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
+}
+
+void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
+                                const uint8_t *limit, const uint8_t *thresh,
+                                int count) {
+  vpx_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_NEON_ASM
+
+#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
+void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
+                             const uint8_t *limit, const uint8_t *thresh,
+                             int count) {
+  vpx_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
+}
+
+void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
+                           const uint8_t *limit, const uint8_t *thresh,
+                           int count) {
+  vpx_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+}
+#endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
+
 class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
 public:
  virtual ~Loop8Test6Param() {}
@@ -116,13 +158,15 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
    loopfilter_op_ = GET_PARAM(0);
    ref_loopfilter_op_ = GET_PARAM(1);
    bit_depth_ = GET_PARAM(2);
+    count_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
  }

  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  vpx_bit_depth_t bit_depth_;
+  int bit_depth_;
+  int count_;
  int mask_;
  loop_op_t loopfilter_op_;
  loop_op_t ref_loopfilter_op_;
@@ -141,7 +185,7 @@ class Loop8Test9Param : public ::testing::TestWithParam<dualloop8_param_t> {
  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  vpx_bit_depth_t bit_depth_;
+  int bit_depth_;
  int mask_;
  dual_loop_op_t loopfilter_op_;
  dual_loop_op_t ref_loopfilter_op_;
@@ -151,30 +195,23 @@ TEST_P(Loop8Test6Param, OperationCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
-  vpx_bit_depth_t bd = bit_depth_;
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
+  int32_t bd = bit_depth_;
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
 #else
-  DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = rnd.Rand8();
-    // mblim  <= 3 * MAX_LOOP_FILTER + 4
-    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
-      tmp = rnd.Rand8();
-    }
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
-    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
-      tmp = rnd.Rand8();
-    }
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -185,7 +222,6 @@ TEST_P(Loop8Test6Param, OperationCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
    int32_t p = kNumCoeffs/32;
-    int count = 1;

    uint16_t tmp_s[kNumCoeffs];
    int j = 0;
@@ -217,13 +253,13 @@ TEST_P(Loop8Test6Param, OperationCheck) {
      ref_s[j] = s[j];
    }
 #if CONFIG_VP9_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count, bd);
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
 #else
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count);
+    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

    for (int j = 0; j < kNumCoeffs; ++j) {
@@ -244,29 +280,36 @@ TEST_P(Loop8Test6Param, ValueCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
-  vpx_bit_depth_t bd = bit_depth_;
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
+  const int32_t bd = bit_depth_;
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
 #else
-  DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
+
+  // NOTE: The code in vp9_loopfilter.c:update_sharpness computes mblim as a
+  // function of sharpness_lvl and the loopfilter lvl as:
+  // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
+  // ...
+  // memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
+  //        SIMD_WIDTH);
+  // This means that the largest value for mblim will occur when sharpness_lvl
+  // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
+  // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
+  // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) =
+  // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4
+
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = rnd.Rand8();
-    while (tmp > 3*MAX_LOOP_FILTER + 4) {  // mblim  <= 3*MAX_LOOP_FILTER + 4
-      tmp = rnd.Rand8();
-    }
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
-    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
-      tmp = rnd.Rand8();
-    }
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -277,19 +320,18 @@ TEST_P(Loop8Test6Param, ValueCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
    int32_t p = kNumCoeffs / 32;
-    int count = 1;
    for (int j = 0; j < kNumCoeffs; ++j) {
      s[j] = rnd.Rand16() & mask_;
      ref_s[j] = s[j];
    }
 #if CONFIG_VP9_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count, bd);
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
 #else
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count);
+    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
    for (int j = 0; j < kNumCoeffs; ++j) {
      err_count += ref_s[j] != s[j];
@@ -309,31 +351,23 @@ TEST_P(Loop8Test9Param, OperationCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
-  vpx_bit_depth_t bd = bit_depth_;
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
+  const int32_t bd = bit_depth_;
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
 #else
-  DECLARE_ALIGNED_ARRAY(8,  uint8_t,  s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(8,  uint8_t,  ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(8,  uint8_t,  s[kNumCoeffs]);
+  DECLARE_ALIGNED(8,  uint8_t,  ref_s[kNumCoeffs]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = rnd.Rand8();
-    // mblim  <= 3 * MAX_LOOP_FILTER + 4
-    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
-      tmp = rnd.Rand8();
-    }
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
-    // lim  <= MAX_LOOP_FILTER
-    while (tmp > MAX_LOOP_FILTER) {
-      tmp = rnd.Rand8();
-    }
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -343,19 +377,12 @@ TEST_P(Loop8Test9Param, OperationCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
-    // mblim  <= 3 * MAX_LOOP_FILTER + 4
-    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
-      tmp = rnd.Rand8();
-    }
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
-    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
-      tmp = rnd.Rand8();
-    }
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -427,29 +454,22 @@ TEST_P(Loop8Test9Param, ValueCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
 #else
-  DECLARE_ALIGNED_ARRAY(8,  uint8_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(8,  uint8_t, ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(8,  uint8_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(8,  uint8_t, ref_s[kNumCoeffs]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = rnd.Rand8();
-    // mblim  <= 3 * MAX_LOOP_FILTER + 4
-    while (tmp > 3 * MAX_LOOP_FILTER + 4) {
-      tmp = rnd.Rand8();
-    }
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
-    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
-      tmp = rnd.Rand8();
-    }
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -459,18 +479,12 @@ TEST_P(Loop8Test9Param, ValueCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
-    while (tmp > 3 * MAX_LOOP_FILTER + 4) {  // mblim  <= 3*MAX_LOOP_FILTER + 4
-      tmp = rnd.Rand8();
-    }
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
-    while (tmp > MAX_LOOP_FILTER) {  // lim  <= MAX_LOOP_FILTER
-      tmp = rnd.Rand8();
-    }
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -486,7 +500,7 @@ TEST_P(Loop8Test9Param, ValueCheck) {
      ref_s[j] = s[j];
    }
 #if CONFIG_VP9_HIGHBITDEPTH
-    vpx_bit_depth_t bd = bit_depth_;
+    const int32_t bd = bit_depth_;
    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0,
                       blimit1, limit1, thresh1, bd);
    ASM_REGISTER_STATE_CHECK(
@@ -518,118 +532,187 @@ using std::tr1::make_tuple;
 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
-    SSE2_C_COMPARE_SINGLE, Loop8Test6Param,
+    SSE2, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, VPX_BITS_8),
+        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+                   &vpx_highbd_lpf_horizontal_4_c, 8, 1),
+        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+                   &vpx_highbd_lpf_vertical_4_c, 8, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+                   &vpx_highbd_lpf_horizontal_8_c, 8, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+                   &vpx_highbd_lpf_vertical_8_c, 8, 1),
        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, VPX_BITS_10),
+                   &wrapper_vertical_16_c, 8, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+                   &vpx_highbd_lpf_horizontal_4_c, 10, 1),
+        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+                   &vpx_highbd_lpf_vertical_4_c, 10, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+                   &vpx_highbd_lpf_horizontal_8_c, 10, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 10, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 10, 2),
+        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+                   &vpx_highbd_lpf_vertical_8_c, 10, 1),
        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, VPX_BITS_12),
-        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, VPX_BITS_12),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, VPX_BITS_12),
-        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, VPX_BITS_12),
-        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, VPX_BITS_12),
+                   &wrapper_vertical_16_c, 10, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_4_sse2,
+                   &vpx_highbd_lpf_horizontal_4_c, 12, 1),
+        make_tuple(&vpx_highbd_lpf_vertical_4_sse2,
+                   &vpx_highbd_lpf_vertical_4_c, 12, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
+                   &vpx_highbd_lpf_horizontal_8_c, 12, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 12, 1),
+        make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                   &vpx_highbd_lpf_horizontal_16_c, 12, 2),
+        make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
+                   &vpx_highbd_lpf_vertical_8_c, 12, 1),
        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, VPX_BITS_12)));
+                   &wrapper_vertical_16_c, 12, 1),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, 8, 1),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, 10, 1),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, 12, 1)));
 #else
 INSTANTIATE_TEST_CASE_P(
-    SSE2_C_COMPARE_SINGLE, Loop8Test6Param,
+    SSE2, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c,
-                   VPX_BITS_8),
-        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c,
-                   VPX_BITS_8),
-        make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c,
-                   VPX_BITS_8)));
+        make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8, 1),
+        make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, 8, 1)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif

+#if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
+INSTANTIATE_TEST_CASE_P(
+    AVX2, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8,
+                   2)));
+#endif
+
 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
-    SSE2_C_COMPARE_DUAL, Loop8Test6Param,
+    SSE2, Loop8Test9Param,
    ::testing::Values(
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, VPX_BITS_8),
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, VPX_BITS_10),
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, VPX_BITS_12)));
+        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+                   &vpx_highbd_lpf_vertical_4_dual_c, 8),
+        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+                   &vpx_highbd_lpf_vertical_8_dual_c, 8),
+        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_4_dual_c, 10),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_8_dual_c, 10),
+        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+                   &vpx_highbd_lpf_vertical_4_dual_c, 10),
+        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+                   &vpx_highbd_lpf_vertical_8_dual_c, 10),
+        make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_4_dual_c, 12),
+        make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2,
+                   &vpx_highbd_lpf_horizontal_8_dual_c, 12),
+        make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2,
+                   &vpx_highbd_lpf_vertical_4_dual_c, 12),
+        make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2,
+                   &vpx_highbd_lpf_vertical_8_dual_c, 12)));
 #else
 INSTANTIATE_TEST_CASE_P(
-    SSE2_C_COMPARE_DUAL, Loop8Test6Param,
+    SSE2, Loop8Test9Param,
    ::testing::Values(
-        make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c,
-                   VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_SSE2
-
-#if HAVE_SSE2
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    SSE2_C_COMPARE_DUAL, Loop8Test9Param,
-    ::testing::Values(
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, VPX_BITS_8),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, VPX_BITS_10),
-        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_4_dual_c, VPX_BITS_12),
-        make_tuple(&vp9_highbd_lpf_horizontal_8_dual_sse2,
-                   &vp9_highbd_lpf_horizontal_8_dual_c, VPX_BITS_12),
-        make_tuple(&vp9_highbd_lpf_vertical_4_dual_sse2,
-                   &vp9_highbd_lpf_vertical_4_dual_c, VPX_BITS_12),
-        make_tuple(&vp9_highbd_lpf_vertical_8_dual_sse2,
-                   &vp9_highbd_lpf_vertical_8_dual_c, VPX_BITS_12)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    SSE2_C_COMPARE_DUAL, Loop8Test9Param,
-    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_4_dual_sse2,
-                   &vp9_lpf_horizontal_4_dual_c, VPX_BITS_8),
-        make_tuple(&vp9_lpf_horizontal_8_dual_sse2,
-                   &vp9_lpf_horizontal_8_dual_c, VPX_BITS_8),
-        make_tuple(&vp9_lpf_vertical_4_dual_sse2,
-                   &vp9_lpf_vertical_4_dual_c, VPX_BITS_8),
-        make_tuple(&vp9_lpf_vertical_8_dual_sse2,
-                   &vp9_lpf_vertical_8_dual_c, VPX_BITS_8)));
+        make_tuple(&vpx_lpf_horizontal_4_dual_sse2,
+                   &vpx_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vpx_lpf_horizontal_8_dual_sse2,
+                   &vpx_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_4_dual_sse2,
+                   &vpx_lpf_vertical_4_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_8_dual_sse2,
+                   &vpx_lpf_vertical_8_dual_c, 8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif
+
+#if HAVE_NEON
+#if CONFIG_VP9_HIGHBITDEPTH
+// No neon high bitdepth functions.
+#else
+INSTANTIATE_TEST_CASE_P(
+    NEON, Loop8Test6Param,
+    ::testing::Values(
+#if HAVE_NEON_ASM
+// Using #if inside the macro is unsupported on MSVS but the tests are not
+// currently built for MSVS with ARM and NEON.
+        make_tuple(&vpx_lpf_horizontal_16_neon,
+                   &vpx_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_neon,
+                   &vpx_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&wrapper_vertical_16_neon,
+                   &wrapper_vertical_16_c, 8, 1),
+        make_tuple(&wrapper_vertical_16_dual_neon,
+                   &wrapper_vertical_16_dual_c, 8, 1),
+#endif  // HAVE_NEON_ASM
+        make_tuple(&vpx_lpf_horizontal_8_neon,
+                   &vpx_lpf_horizontal_8_c, 8, 1),
+        make_tuple(&vpx_lpf_vertical_8_neon,
+                   &vpx_lpf_vertical_8_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_4_neon,
+                   &vpx_lpf_horizontal_4_c, 8, 1),
+        make_tuple(&vpx_lpf_vertical_4_neon,
+                   &vpx_lpf_vertical_4_c, 8, 1)));
+INSTANTIATE_TEST_CASE_P(
+    NEON, Loop8Test9Param,
+    ::testing::Values(
+#if HAVE_NEON_ASM
+        make_tuple(&vpx_lpf_horizontal_8_dual_neon,
+                   &vpx_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_8_dual_neon,
+                   &vpx_lpf_vertical_8_dual_c, 8),
+#endif  // HAVE_NEON_ASM
+        make_tuple(&vpx_lpf_horizontal_4_dual_neon,
+                   &vpx_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_4_dual_neon,
+                   &vpx_lpf_vertical_4_dual_c, 8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_NEON
+
+#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
+INSTANTIATE_TEST_CASE_P(
+    MSA, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8, 1),
+        make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
+
+INSTANTIATE_TEST_CASE_P(
+    MSA, Loop8Test9Param,
+    ::testing::Values(
+        make_tuple(&vpx_lpf_horizontal_4_dual_msa,
+                   &vpx_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vpx_lpf_horizontal_8_dual_msa,
+                   &vpx_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_4_dual_msa,
+                   &vpx_lpf_vertical_4_dual_c, 8),
+        make_tuple(&vpx_lpf_vertical_8_dual_msa,
+                   &vpx_lpf_vertical_8_dual_c, 8)));
+#endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
+
 }  // namespace
--- a/test/md5_helper.h
+++ b/test/md5_helper.h
@@ -42,6 +42,10 @@ class MD5 {
    }
  }

+  void Add(const uint8_t *data, size_t size) {
+    MD5Update(&md5_, data, static_cast<uint32_t>(size));
+  }
+
  const char *Get(void) {
    static const char hex[16] = {
      '0', '1', '2', '3', '4', '5', '6', '7',
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -13,12 +13,13 @@
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vp9_rtcd.h"
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_integer.h"
@@ -74,16 +75,16 @@ TEST_P(PartialIDctTest, RunQuantCheck) {
      FAIL() << "Wrong Size!";
      break;
  }
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);

  const int count_test_block = 1000;
  const int block_size = size * size;

-  DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kMaxNumCoeffs);
+  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);

  int max_error = 0;
  for (int i = 0; i < count_test_block; ++i) {
@@ -153,10 +154,10 @@ TEST_P(PartialIDctTest, ResultsMatch) {
      FAIL() << "Wrong Size!";
      break;
  }
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
  const int count_test_block = 1000;
  const int max_coeff = 32766 / 4;
  const int block_size = size * size;
@@ -201,117 +202,142 @@ using std::tr1::make_tuple;
 INSTANTIATE_TEST_CASE_P(
    C, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_34_add_c,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_34_add_c,
                   TX_32X32, 34),
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_1_add_c,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_1_add_c,
                   TX_32X32, 1),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_c,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_10_add_c,
                   TX_16X16, 10),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_1_add_c,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_1_add_c,
                   TX_16X16, 1),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_c,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_12_add_c,
                   TX_8X8, 12),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_1_add_c,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_1_add_c,
                   TX_8X8, 1),
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_c,
-                   &vp9_idct4x4_1_add_c,
+        make_tuple(&vpx_fdct4x4_c,
+                   &vpx_idct4x4_16_add_c,
+                   &vpx_idct4x4_1_add_c,
                   TX_4X4, 1)));

-#if HAVE_NEON_ASM
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_1_add_neon,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_1_add_neon,
                   TX_32X32, 1),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_neon,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_10_add_neon,
                   TX_16X16, 10),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_1_add_neon,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_1_add_neon,
                   TX_16X16, 1),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_neon,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_12_add_neon,
                   TX_8X8, 12),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_1_add_neon,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_1_add_neon,
                   TX_8X8, 1),
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_c,
-                   &vp9_idct4x4_1_add_neon,
+        make_tuple(&vpx_fdct4x4_c,
+                   &vpx_idct4x4_16_add_c,
+                   &vpx_idct4x4_1_add_neon,
                   TX_4X4, 1)));
-#endif
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSE2, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_34_add_sse2,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_34_add_sse2,
                   TX_32X32, 34),
-        make_tuple(&vp9_fdct32x32_c,
-                   &vp9_idct32x32_1024_add_c,
-                   &vp9_idct32x32_1_add_sse2,
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_1_add_sse2,
                   TX_32X32, 1),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_sse2,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_10_add_sse2,
                   TX_16X16, 10),
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_1_add_sse2,
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_1_add_sse2,
                   TX_16X16, 1),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_sse2,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_12_add_sse2,
                   TX_8X8, 12),
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_1_add_sse2,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_1_add_sse2,
                   TX_8X8, 1),
-        make_tuple(&vp9_fdct4x4_c,
-                   &vp9_idct4x4_16_add_c,
-                   &vp9_idct4x4_1_add_sse2,
+        make_tuple(&vpx_fdct4x4_c,
+                   &vpx_idct4x4_16_add_c,
+                   &vpx_idct4x4_1_add_sse2,
                   TX_4X4, 1)));
 #endif

-#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
-    !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSSE3 && CONFIG_USE_X86INC && ARCH_X86_64 && \
+    !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    SSSE3_64, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vp9_fdct8x8_c,
-                   &vp9_idct8x8_64_add_c,
-                   &vp9_idct8x8_12_add_ssse3,
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_12_add_ssse3,
                   TX_8X8, 12)));
 #endif

-#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    SSSE3, PartialIDctTest,
+    MSA, PartialIDctTest,
    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_c,
-                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_ssse3,
-                   TX_16X16, 10)));
-#endif
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_34_add_msa,
+                   TX_32X32, 34),
+        make_tuple(&vpx_fdct32x32_c,
+                   &vpx_idct32x32_1024_add_c,
+                   &vpx_idct32x32_1_add_msa,
+                   TX_32X32, 1),
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_10_add_msa,
+                   TX_16X16, 10),
+        make_tuple(&vpx_fdct16x16_c,
+                   &vpx_idct16x16_256_add_c,
+                   &vpx_idct16x16_1_add_msa,
+                   TX_16X16, 1),
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_12_add_msa,
+                   TX_8X8, 10),
+        make_tuple(&vpx_fdct8x8_c,
+                   &vpx_idct8x8_64_add_c,
+                   &vpx_idct8x8_1_add_msa,
+                   TX_8X8, 1),
+        make_tuple(&vpx_fdct4x4_c,
+                   &vpx_idct4x4_16_add_c,
+                   &vpx_idct4x4_1_add_msa,
+                   TX_4X4, 1)));
+#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
 }  // namespace
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -63,12 +63,12 @@ TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
  uint8_t *const dst_image_ptr = dst_image + 8;
  uint8_t *const flimits =
      reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
-  (void)vpx_memset(flimits, 255, block_width);
+  (void)memset(flimits, 255, block_width);

  // Initialize pixels in the input:
  //   block pixels to value 1,
  //   border pixels to value 10.
-  (void)vpx_memset(src_image, 10, input_size);
+  (void)memset(src_image, 10, input_size);
  uint8_t *pixel_ptr = src_image_ptr;
  for (int i = 0; i < block_height; ++i) {
    for (int j = 0; j < block_width; ++j) {
@@ -78,7 +78,7 @@ TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
  }

  // Initialize pixels in the output to 99.
-  (void)vpx_memset(dst_image, 99, output_size);
+  (void)memset(dst_image, 99, output_size);

  ASM_REGISTER_STATE_CHECK(
      GetParam()(src_image_ptr, dst_image_ptr, input_stride,
@@ -110,4 +110,9 @@ INSTANTIATE_TEST_CASE_P(SSE2, VP8PostProcessingFilterTest,
    ::testing::Values(vp8_post_proc_down_and_across_mb_row_sse2));
 #endif

+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(MSA, VP8PostProcessingFilterTest,
+    ::testing::Values(vp8_post_proc_down_and_across_mb_row_msa));
+#endif
+
 }  // namespace
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -8,346 +8,196 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include <math.h>
-#include <stdlib.h>
 #include <string.h>

 #include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-
-#include "./vpx_config.h"
-#include "./vp9_rtcd.h"
-#include "vp9/common/vp9_entropy.h"
+#include "vp8/common/blockd.h"
+#include "vp8/common/onyx.h"
+#include "vp8/encoder/block.h"
+#include "vp8/encoder/onyx_int.h"
+#include "vp8/encoder/quantize.h"
 #include "vpx/vpx_integer.h"
-
-using libvpx_test::ACMRandom;
+#include "vpx_mem/vpx_mem.h"

 namespace {
-#if CONFIG_VP9_HIGHBITDEPTH
-const int number_of_iterations = 100;

-typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
-                           int skip_block, const int16_t *zbin,
-                           const int16_t *round, const int16_t *quant,
-                           const int16_t *quant_shift,
-                           tran_low_t *qcoeff, tran_low_t *dqcoeff,
-                           const int16_t *dequant, int zbin_oq_value,
-                           uint16_t *eob, const int16_t *scan,
-                           const int16_t *iscan);
-typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t>
-    QuantizeParam;
-class QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
- public:
-  virtual ~QuantizeTest() {}
-  virtual void SetUp() {
-    quantize_op_   = GET_PARAM(0);
-    ref_quantize_op_ = GET_PARAM(1);
-    bit_depth_  = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-  }
+const int kNumBlocks = 25;
+const int kNumBlockEntries = 16;

-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+typedef void (*VP8Quantize)(BLOCK *b, BLOCKD *d);

- protected:
-  vpx_bit_depth_t bit_depth_;
-  int mask_;
-  QuantizeFunc quantize_op_;
-  QuantizeFunc ref_quantize_op_;
-};
-class Quantize32Test : public ::testing::TestWithParam<QuantizeParam> {
- public:
-  virtual ~Quantize32Test() {}
-  virtual void SetUp() {
-    quantize_op_   = GET_PARAM(0);
-    ref_quantize_op_ = GET_PARAM(1);
-    bit_depth_  = GET_PARAM(2);
-    mask_ = (1 << bit_depth_) - 1;
-  }
+typedef std::tr1::tuple<VP8Quantize, VP8Quantize> VP8QuantizeParam;

-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  vpx_bit_depth_t bit_depth_;
-  int mask_;
-  QuantizeFunc quantize_op_;
-  QuantizeFunc ref_quantize_op_;
-};
-
-TEST_P(QuantizeTest, OperationCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int zbin_oq_value = 0;
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       256);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,           1);
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < number_of_iterations; ++i) {
-    int skip_block = i == 0;
-    TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
-    TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    int count = (4 << sz) * (4 << sz);  // 16, 64, 256
-    int err_count = 0;
-    *eob_ptr = rnd.Rand16();
-    *ref_eob_ptr = *eob_ptr;
-    for (int j = 0; j < count; j++) {
-      coeff_ptr[j] = rnd.Rand16()&mask_;
-    }
-    for (int j = 0; j < 2; j++) {
-      zbin_ptr[j] = rnd.Rand16()&mask_;
-      round_ptr[j] = rnd.Rand16();
-      quant_ptr[j] = rnd.Rand16();
-      quant_shift_ptr[j] = rnd.Rand16();
-      dequant_ptr[j] = rnd.Rand16();
-    }
-    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
-                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
-                     ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
-                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
-    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
-                                          zbin_ptr, round_ptr, quant_ptr,
-                                          quant_shift_ptr, qcoeff_ptr,
-                                          dqcoeff_ptr, dequant_ptr,
-                                          zbin_oq_value, eob_ptr,
-                                          scan_order->scan, scan_order->iscan));
-    for (int j = 0; j < sz; ++j) {
-      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
-                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
-    }
-    err_count += (*ref_eob_ptr != *eob_ptr);
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-    << "Error: Quantization Test, C output doesn't match SSE2 output. "
-    << "First failed at test case " << first_failure;
-}
-TEST_P(Quantize32Test, OperationCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int zbin_oq_value = 0;
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       1024);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,           1);
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < number_of_iterations; ++i) {
-    int skip_block = i == 0;
-    TX_SIZE sz = TX_32X32;
-    TX_TYPE tx_type = (TX_TYPE)(i % 4);
-
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    int count = (4 << sz) * (4 << sz);  // 1024
-    int err_count = 0;
-    *eob_ptr = rnd.Rand16();
-    *ref_eob_ptr = *eob_ptr;
-    for (int j = 0; j < count; j++) {
-      coeff_ptr[j] = rnd.Rand16()&mask_;
-    }
-    for (int j = 0; j < 2; j++) {
-      zbin_ptr[j] = rnd.Rand16()&mask_;
-      round_ptr[j] = rnd.Rand16();
-      quant_ptr[j] = rnd.Rand16();
-      quant_shift_ptr[j] = rnd.Rand16();
-      dequant_ptr[j] = rnd.Rand16();
-    }
-    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
-                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
-                     ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
-                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
-    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
-                                          zbin_ptr, round_ptr, quant_ptr,
-                                          quant_shift_ptr, qcoeff_ptr,
-                                          dqcoeff_ptr, dequant_ptr,
-                                          zbin_oq_value, eob_ptr,
-                                          scan_order->scan, scan_order->iscan));
-    for (int j = 0; j < sz; ++j) {
-      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
-                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
-    }
-    err_count += (*ref_eob_ptr != *eob_ptr);
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-    << "Error: Quantization Test, C output doesn't match SSE2 output. "
-    << "First failed at test case " << first_failure;
-}
-TEST_P(QuantizeTest, EOBCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int zbin_oq_value = 0;
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       256);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  256);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 256);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,       1);
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < number_of_iterations; ++i) {
-    int skip_block = i == 0;
-    TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
-    TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    int count = (4 << sz) * (4 << sz);  // 16, 64, 256
-    int err_count = 0;
-    *eob_ptr = rnd.Rand16();
-    *ref_eob_ptr = *eob_ptr;
-    // Two random entries
-    for (int j = 0; j < count; j++) {
-        coeff_ptr[j] = 0;
-    }
-    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
-    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
-    for (int j = 0; j < 2; j++) {
-      zbin_ptr[j] = rnd.Rand16()&mask_;
-      round_ptr[j] = rnd.Rand16();
-      quant_ptr[j] = rnd.Rand16();
-      quant_shift_ptr[j] = rnd.Rand16();
-      dequant_ptr[j] = rnd.Rand16();
-    }
-
-    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
-                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
-                     ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
-                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
-    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
-                                          zbin_ptr, round_ptr, quant_ptr,
-                                          quant_shift_ptr, qcoeff_ptr,
-                                          dqcoeff_ptr, dequant_ptr,
-                                          zbin_oq_value, eob_ptr,
-                                          scan_order->scan, scan_order->iscan));
-
-    for (int j = 0; j < sz; ++j) {
-      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
-                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
-    }
-    err_count += (*ref_eob_ptr != *eob_ptr);
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-    << "Error: Quantization Test, C output doesn't match SSE2 output. "
-    << "First failed at test case " << first_failure;
-}
-TEST_P(Quantize32Test, EOBCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int zbin_oq_value = 0;
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_ptr,       1024);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  zbin_ptr,          2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  round_ptr,         2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_ptr,         2);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  quant_shift_ptr,   2);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, qcoeff_ptr,      1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, dqcoeff_ptr,     1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_qcoeff_ptr,  1024);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_dqcoeff_ptr, 1024);
-  DECLARE_ALIGNED_ARRAY(16, int16_t,  dequant_ptr,       2);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, eob_ptr,           1);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_eob_ptr,       1);
-  int err_count_total = 0;
-  int first_failure = -1;
-  for (int i = 0; i < number_of_iterations; ++i) {
-    int skip_block = i == 0;
-    TX_SIZE sz = TX_32X32;
-    TX_TYPE tx_type = (TX_TYPE)(i % 4);
-    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    int count = (4 << sz) * (4 << sz);  // 1024
-    int err_count = 0;
-    *eob_ptr = rnd.Rand16();
-    *ref_eob_ptr = *eob_ptr;
-    for (int j = 0; j < count; j++) {
-        coeff_ptr[j] = 0;
-    }
-    // Two random entries
-    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
-    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
-    for (int j = 0; j < 2; j++) {
-      zbin_ptr[j] = rnd.Rand16()&mask_;
-      round_ptr[j] = rnd.Rand16();
-      quant_ptr[j] = rnd.Rand16();
-      quant_shift_ptr[j] = rnd.Rand16();
-      dequant_ptr[j] = rnd.Rand16();
-    }
-
-    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
-                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
-                     ref_dqcoeff_ptr, dequant_ptr, zbin_oq_value,
-                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
-    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
-                                          zbin_ptr, round_ptr, quant_ptr,
-                                          quant_shift_ptr, qcoeff_ptr,
-                                          dqcoeff_ptr, dequant_ptr,
-                                          zbin_oq_value, eob_ptr,
-                                          scan_order->scan, scan_order->iscan));
-
-    for (int j = 0; j < sz; ++j) {
-      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
-                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
-    }
-    err_count += (*ref_eob_ptr != *eob_ptr);
-    if (err_count && !err_count_total) {
-      first_failure = i;
-    }
-    err_count_total += err_count;
-  }
-  EXPECT_EQ(0, err_count_total)
-    << "Error: Quantization Test, C output doesn't match SSE2 output. "
-    << "First failed at test case " << first_failure;
-}
+using libvpx_test::ACMRandom;
 using std::tr1::make_tuple;

+// Create and populate a VP8_COMP instance which has a complete set of
+// quantization inputs as well as a second MACROBLOCKD for output.
+class QuantizeTestBase {
+ public:
+  virtual ~QuantizeTestBase() {
+    vp8_remove_compressor(&vp8_comp_);
+    vp8_comp_ = NULL;
+    vpx_free(macroblockd_dst_);
+    macroblockd_dst_ = NULL;
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  void SetupCompressor() {
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+
+    // The full configuration is necessary to generate the quantization tables.
+    VP8_CONFIG vp8_config;
+    memset(&vp8_config, 0, sizeof(vp8_config));
+
+    vp8_comp_ = vp8_create_compressor(&vp8_config);
+
+    // Set the tables based on a quantizer of 0.
+    vp8_set_quantizer(vp8_comp_, 0);
+
+    // Set up all the block/blockd pointers for the mb in vp8_comp_.
+    vp8cx_frame_init_quantizer(vp8_comp_);
+
+    // Copy macroblockd from the reference to get pre-set-up dequant values.
+    macroblockd_dst_ = reinterpret_cast<MACROBLOCKD *>(
+        vpx_memalign(32, sizeof(*macroblockd_dst_)));
+    memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_));
+    // Fix block pointers - currently they point to the blocks in the reference
+    // structure.
+    vp8_setup_block_dptrs(macroblockd_dst_);
+  }
+
+  void UpdateQuantizer(int q) {
+    vp8_set_quantizer(vp8_comp_, q);
+
+    memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_));
+    vp8_setup_block_dptrs(macroblockd_dst_);
+  }
+
+  void FillCoeffConstant(int16_t c) {
+    for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) {
+      vp8_comp_->mb.coeff[i] = c;
+    }
+  }
+
+  void FillCoeffRandom() {
+    for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) {
+      vp8_comp_->mb.coeff[i] = rnd_.Rand8();
+    }
+  }
+
+  void CheckOutput() {
+    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.qcoeff, macroblockd_dst_->qcoeff,
+                        sizeof(*macroblockd_dst_->qcoeff) * kNumBlocks *
+                            kNumBlockEntries))
+        << "qcoeff mismatch";
+    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.dqcoeff, macroblockd_dst_->dqcoeff,
+                        sizeof(*macroblockd_dst_->dqcoeff) * kNumBlocks *
+                            kNumBlockEntries))
+        << "dqcoeff mismatch";
+    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.eobs, macroblockd_dst_->eobs,
+                        sizeof(*macroblockd_dst_->eobs) * kNumBlocks))
+        << "eobs mismatch";
+  }
+
+  VP8_COMP *vp8_comp_;
+  MACROBLOCKD *macroblockd_dst_;
+
+ private:
+  ACMRandom rnd_;
+};
+
+class QuantizeTest : public QuantizeTestBase,
+                     public ::testing::TestWithParam<VP8QuantizeParam> {
+ protected:
+  virtual void SetUp() {
+    SetupCompressor();
+    asm_quant_ = GET_PARAM(0);
+    c_quant_ = GET_PARAM(1);
+  }
+
+  void RunComparison() {
+    for (int i = 0; i < kNumBlocks; ++i) {
+      ASM_REGISTER_STATE_CHECK(
+          c_quant_(&vp8_comp_->mb.block[i], &vp8_comp_->mb.e_mbd.block[i]));
+      ASM_REGISTER_STATE_CHECK(
+          asm_quant_(&vp8_comp_->mb.block[i], &macroblockd_dst_->block[i]));
+    }
+
+    CheckOutput();
+  }
+
+ private:
+  VP8Quantize asm_quant_;
+  VP8Quantize c_quant_;
+};
+
+TEST_P(QuantizeTest, TestZeroInput) {
+  FillCoeffConstant(0);
+  RunComparison();
+}
+
+TEST_P(QuantizeTest, TestLargeNegativeInput) {
+  FillCoeffConstant(0);
+  // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues
+  // like BUG=883 where the constant being compared was incorrectly initialized.
+  vp8_comp_->mb.coeff[0] = -8191;
+  RunComparison();
+}
+
+TEST_P(QuantizeTest, TestRandomInput) {
+  FillCoeffRandom();
+  RunComparison();
+}
+
+TEST_P(QuantizeTest, TestMultipleQ) {
+  for (int q = 0; q < QINDEX_RANGE; ++q) {
+    UpdateQuantizer(q);
+    FillCoeffRandom();
+    RunComparison();
+  }
+}
+
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
-  SSE2_C_COMPARE, QuantizeTest,
-  ::testing::Values(
-    make_tuple(&vp9_highbd_quantize_b_sse2,
-               &vp9_highbd_quantize_b_c, VPX_BITS_8),
-    make_tuple(&vp9_highbd_quantize_b_sse2,
-               &vp9_highbd_quantize_b_c, VPX_BITS_10),
-    make_tuple(&vp9_highbd_quantize_b_sse2,
-               &vp9_highbd_quantize_b_c, VPX_BITS_12)));
-INSTANTIATE_TEST_CASE_P(
-  SSE2_C_COMPARE, Quantize32Test,
-  ::testing::Values(
-    make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
-               &vp9_highbd_quantize_b_32x32_c, VPX_BITS_8),
-    make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
-               &vp9_highbd_quantize_b_32x32_c, VPX_BITS_10),
-    make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
-               &vp9_highbd_quantize_b_32x32_c, VPX_BITS_12)));
+    SSE2, QuantizeTest,
+    ::testing::Values(
+        make_tuple(&vp8_fast_quantize_b_sse2, &vp8_fast_quantize_b_c),
+        make_tuple(&vp8_regular_quantize_b_sse2, &vp8_regular_quantize_b_c)));
 #endif  // HAVE_SSE2
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(SSSE3, QuantizeTest,
+                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_ssse3,
+                                                     &vp8_fast_quantize_b_c)));
+#endif  // HAVE_SSSE3
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, QuantizeTest,
+    ::testing::Values(make_tuple(&vp8_regular_quantize_b_sse4_1,
+                                 &vp8_regular_quantize_b_c)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest,
+                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_neon,
+                                                     &vp8_fast_quantize_b_c)));
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, QuantizeTest,
+    ::testing::Values(
+        make_tuple(&vp8_fast_quantize_b_msa, &vp8_fast_quantize_b_c),
+        make_tuple(&vp8_regular_quantize_b_msa, &vp8_regular_quantize_b_c)));
+#endif  // HAVE_MSA
 }  // namespace
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -30,7 +30,9 @@

 #if defined(_WIN64)

-#define _WIN32_LEAN_AND_MEAN
+#undef NOMINMAX
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #include <winnt.h>

@@ -96,7 +98,7 @@ class RegisterStateCheck {

 extern "C" {
 // Save the d8-d15 registers into store.
-void vp9_push_neon(int64_t *store);
+void vpx_push_neon(int64_t *store);
 }

 namespace libvpx_test {
@@ -111,7 +113,7 @@ class RegisterStateCheck {

 private:
  static bool StoreRegisters(int64_t store[8]) {
-    vp9_push_neon(store);
+    vpx_push_neon(store);
    return true;
  }

@@ -119,7 +121,7 @@ class RegisterStateCheck {
  bool Check() const {
    if (!initialized_) return false;
    int64_t post_store[8];
-    vp9_push_neon(post_store);
+    vpx_push_neon(post_store);
    for (int i = 0; i < 8; ++i) {
      EXPECT_EQ(pre_store_[i], post_store[i]) << "d"
          << i + 8 << " has been modified";
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -81,6 +81,15 @@ static void write_ivf_frame_header(const vpx_codec_cx_pkt_t *const pkt,
 const unsigned int kInitialWidth = 320;
 const unsigned int kInitialHeight = 240;

+struct FrameInfo {
+  FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
+      : pts(_pts), w(_w), h(_h) {}
+
+  vpx_codec_pts_t pts;
+  unsigned int w;
+  unsigned int h;
+};
+
 unsigned int ScaleForFrameNumber(unsigned int frame, unsigned int val) {
  if (frame < 10)
    return val;
@@ -120,15 +129,6 @@ class ResizeTest : public ::libvpx_test::EncoderTest,

  virtual ~ResizeTest() {}

-  struct FrameInfo {
-    FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h)
-        : pts(_pts), w(_w), h(_h) {}
-
-    vpx_codec_pts_t pts;
-    unsigned int w;
-    unsigned int h;
-  };
-
  virtual void SetUp() {
    InitializeConfig();
    SetMode(GET_PARAM(1));
@@ -144,6 +144,7 @@ class ResizeTest : public ::libvpx_test::EncoderTest,

 TEST_P(ResizeTest, TestExternalResizeWorks) {
  ResizingVideoSource video;
+  cfg_.g_lag_in_frames = 0;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
@@ -153,9 +154,9 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
    const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight);

    EXPECT_EQ(expected_w, info->w)
-        << "Frame " << frame << "had unexpected width";
+        << "Frame " << frame << " had unexpected width";
    EXPECT_EQ(expected_h, info->h)
-        << "Frame " << frame << "had unexpected height";
+        << "Frame " << frame << " had unexpected height";
  }
 }

@@ -195,13 +196,274 @@ class ResizeInternalTest : public ResizeTest {

  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                  libvpx_test::Encoder *encoder) {
-    if (video->frame() == kStepDownFrame) {
-      struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
-      encoder->Control(VP8E_SET_SCALEMODE, &mode);
+    if (change_config_) {
+      int new_q = 60;
+      if (video->frame() == 0) {
+        struct vpx_scaling_mode mode = {VP8E_ONETWO, VP8E_ONETWO};
+        encoder->Control(VP8E_SET_SCALEMODE, &mode);
+      }
+      if (video->frame() == 1) {
+        struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
+        encoder->Control(VP8E_SET_SCALEMODE, &mode);
+        cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q;
+        encoder->Config(&cfg_);
+      }
+    } else {
+      if (video->frame() == kStepDownFrame) {
+        struct vpx_scaling_mode mode = {VP8E_FOURFIVE, VP8E_THREEFIVE};
+        encoder->Control(VP8E_SET_SCALEMODE, &mode);
+      }
+      if (video->frame() == kStepUpFrame) {
+        struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
+        encoder->Control(VP8E_SET_SCALEMODE, &mode);
+      }
    }
-    if (video->frame() == kStepUpFrame) {
-      struct vpx_scaling_mode mode = {VP8E_NORMAL, VP8E_NORMAL};
-      encoder->Control(VP8E_SET_SCALEMODE, &mode);
+  }
+
+  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
+    if (!frame0_psnr_)
+      frame0_psnr_ = pkt->data.psnr.psnr[0];
+    EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0);
+  }
+
+#if WRITE_COMPRESSED_STREAM
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    ++out_frames_;
+
+    // Write initial file header if first frame.
+    if (pkt->data.frame.pts == 0)
+      write_ivf_file_header(&cfg_, 0, outfile_);
+
+    // Write frame header and data.
+    write_ivf_frame_header(pkt, outfile_);
+    (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
+  }
+#endif
+
+  double frame0_psnr_;
+  bool change_config_;
+#if WRITE_COMPRESSED_STREAM
+  FILE *outfile_;
+  unsigned int out_frames_;
+#endif
+};
+
+TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 10);
+  init_flags_ = VPX_CODEC_USE_PSNR;
+  change_config_ = false;
+
+  // q picked such that initial keyframe on this clip is ~30dB PSNR
+  cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
+
+  // If the number of frames being encoded is smaller than g_lag_in_frames
+  // the encoded frame is unavailable using the current API. Comparing
+  // frames to detect mismatch would then not be possible. Set
+  // g_lag_in_frames = 0 to get around this.
+  cfg_.g_lag_in_frames = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    const vpx_codec_pts_t pts = info->pts;
+    if (pts >= kStepDownFrame && pts < kStepUpFrame) {
+      ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
+      ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height";
+    } else {
+      EXPECT_EQ(352U, info->w) << "Frame " << pts << " had unexpected width";
+      EXPECT_EQ(288U, info->h) << "Frame " << pts << " had unexpected height";
+    }
+  }
+}
+
+TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) {
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 10);
+  cfg_.g_w = 352;
+  cfg_.g_h = 288;
+  change_config_ = true;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+class ResizeInternalRealtimeTest : public ::libvpx_test::EncoderTest,
+  public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+  ResizeInternalRealtimeTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~ResizeInternalRealtimeTest() {}
+
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+                                  libvpx_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(VP9E_SET_AQ_MODE, 3);
+      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+    }
+
+    if (change_bitrate_ && video->frame() == 120) {
+      change_bitrate_ = false;
+      cfg_.rc_target_bitrate = 500;
+      encoder->Config(&cfg_);
+    }
+  }
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
+  }
+
+  virtual void DecompressedFrameHook(const vpx_image_t &img,
+                                     vpx_codec_pts_t pts) {
+    frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h));
+  }
+
+  void DefaultConfig() {
+    cfg_.g_w = 352;
+    cfg_.g_h = 288;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_undershoot_pct = 50;
+    cfg_.rc_overshoot_pct = 50;
+    cfg_.rc_end_usage = VPX_CBR;
+    cfg_.kf_mode = VPX_KF_AUTO;
+    cfg_.g_lag_in_frames = 0;
+    cfg_.kf_min_dist = cfg_.kf_max_dist = 3000;
+    // Enable dropped frames.
+    cfg_.rc_dropframe_thresh = 1;
+    // Enable error_resilience mode.
+    cfg_.g_error_resilient  = 1;
+    // Enable dynamic resizing.
+    cfg_.rc_resize_allowed = 1;
+    // Run at low bitrate.
+    cfg_.rc_target_bitrate = 200;
+  }
+
+  std::vector< FrameInfo > frame_info_list_;
+  int set_cpu_used_;
+  bool change_bitrate_;
+};
+
+// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
+// Run at low bitrate, with resize_allowed = 1, and verify that we get
+// one resize down event.
+TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDown) {
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 299);
+  DefaultConfig();
+  change_bitrate_ = false;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  unsigned int last_w = cfg_.g_w;
+  unsigned int last_h = cfg_.g_h;
+  int resize_count = 0;
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    if (info->w != last_w || info->h != last_h) {
+      // Verify that resize down occurs.
+      ASSERT_LT(info->w, last_w);
+      ASSERT_LT(info->h, last_h);
+      last_w = info->w;
+      last_h = info->h;
+      resize_count++;
+    }
+  }
+
+  // Verify that we get 1 resize down event in this test.
+  ASSERT_EQ(1, resize_count) << "Resizing should occur.";
+}
+
+// Verify the dynamic resizer behavior for real time, 1 pass CBR mode.
+// Start at low target bitrate, raise the bitrate in the middle of the clip,
+// scaling-up should occur after bitrate changed.
+TEST_P(ResizeInternalRealtimeTest, TestInternalResizeDownUpChangeBitRate) {
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 299);
+  DefaultConfig();
+  change_bitrate_ = true;
+  // Disable dropped frames.
+  cfg_.rc_dropframe_thresh = 0;
+  // Starting bitrate low.
+  cfg_.rc_target_bitrate = 100;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  unsigned int last_w = cfg_.g_w;
+  unsigned int last_h = cfg_.g_h;
+  int resize_count = 0;
+  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
+       info != frame_info_list_.end(); ++info) {
+    if (info->w != last_w || info->h != last_h) {
+      resize_count++;
+      if (resize_count == 1) {
+        // Verify that resize down occurs.
+        ASSERT_LT(info->w, last_w);
+        ASSERT_LT(info->h, last_h);
+      } else if (resize_count == 2) {
+        // Verify that resize up occurs.
+        ASSERT_GT(info->w, last_w);
+        ASSERT_GT(info->h, last_h);
+      }
+      last_w = info->w;
+      last_h = info->h;
+    }
+  }
+
+  // Verify that we get 2 resize events in this test.
+  ASSERT_EQ(2, resize_count) << "Resizing should occur twice.";
+}
+
+vpx_img_fmt_t CspForFrameNumber(int frame) {
+  if (frame < 10)
+    return VPX_IMG_FMT_I420;
+  if (frame < 20)
+    return VPX_IMG_FMT_I444;
+  return VPX_IMG_FMT_I420;
+}
+
+class ResizeCspTest : public ResizeTest {
+ protected:
+#if WRITE_COMPRESSED_STREAM
+  ResizeCspTest()
+      : ResizeTest(),
+        frame0_psnr_(0.0),
+        outfile_(NULL),
+        out_frames_(0) {}
+#else
+  ResizeCspTest() : ResizeTest(), frame0_psnr_(0.0) {}
+#endif
+
+  virtual ~ResizeCspTest() {}
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+#if WRITE_COMPRESSED_STREAM
+    outfile_ = fopen("vp91-2-05-cspchape.ivf", "wb");
+#endif
+  }
+
+  virtual void EndPassHook() {
+#if WRITE_COMPRESSED_STREAM
+    if (outfile_) {
+      if (!fseek(outfile_, 0, SEEK_SET))
+        write_ivf_file_header(&cfg_, out_frames_, outfile_);
+      fclose(outfile_);
+      outfile_ = NULL;
+    }
+#endif
+  }
+
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+                                  libvpx_test::Encoder *encoder) {
+    if (CspForFrameNumber(video->frame()) != VPX_IMG_FMT_I420 &&
+        cfg_.g_profile != 1) {
+      cfg_.g_profile = 1;
+      encoder->Config(&cfg_);
+    }
+    if (CspForFrameNumber(video->frame()) == VPX_IMG_FMT_I420 &&
+        cfg_.g_profile != 0) {
+      cfg_.g_profile = 0;
+      encoder->Config(&cfg_);
    }
  }

@@ -232,35 +494,39 @@ class ResizeInternalTest : public ResizeTest {
 #endif
 };

-TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 10);
+class ResizingCspVideoSource : public ::libvpx_test::DummyVideoSource {
+ public:
+  ResizingCspVideoSource() {
+    SetSize(kInitialWidth, kInitialHeight);
+    limit_ = 30;
+  }
+
+  virtual ~ResizingCspVideoSource() {}
+
+ protected:
+  virtual void Next() {
+    ++frame_;
+    SetImageFormat(CspForFrameNumber(frame_));
+    FillFrame();
+  }
+};
+
+TEST_P(ResizeCspTest, TestResizeCspWorks) {
+  ResizingCspVideoSource video;
  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  // q picked such that initial keyframe on this clip is ~30dB PSNR
  cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
-
-  // If the number of frames being encoded is smaller than g_lag_in_frames
-  // the encoded frame is unavailable using the current API. Comparing
-  // frames to detect mismatch would then not be possible. Set
-  // g_lag_in_frames = 0 to get around this.
  cfg_.g_lag_in_frames = 0;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
-       info != frame_info_list_.end(); ++info) {
-    const vpx_codec_pts_t pts = info->pts;
-    if (pts >= kStepDownFrame && pts < kStepUpFrame) {
-      ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width";
-      ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height";
-    } else {
-      EXPECT_EQ(352U, info->w) << "Frame " << pts << " had unexpected width";
-      EXPECT_EQ(288U, info->h) << "Frame " << pts << " had unexpected height";
-    }
-  }
 }

 VP8_INSTANTIATE_TEST_CASE(ResizeTest, ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ResizeTest,
+                          ::testing::Values(::libvpx_test::kRealTime));
 VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest,
                          ::testing::Values(::libvpx_test::kOnePassBest));
+VP9_INSTANTIATE_TEST_CASE(ResizeInternalRealtimeTest,
+                          ::testing::Values(::libvpx_test::kRealTime),
+                          ::testing::Range(5, 9));
+VP9_INSTANTIATE_TEST_CASE(ResizeCspTest,
+                          ::testing::Values(::libvpx_test::kRealTime));
 }  // namespace
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
--- a/test/set_roi.cc
+++ b/test/set_roi.cc
@@ -53,7 +53,7 @@ TEST(VP8RoiMapTest, ParameterCheck) {
  cpi.common.mb_rows = 240 >> 4;
  cpi.common.mb_cols = 320 >> 4;
  const int mbs = (cpi.common.mb_rows * cpi.common.mb_cols);
-  vpx_memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data));
+  memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data));

  // Segment map
  cpi.segmentation_map = reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
@@ -61,9 +61,9 @@ TEST(VP8RoiMapTest, ParameterCheck) {
  // Allocate memory for the source memory map.
  unsigned char *roi_map =
    reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
-  vpx_memset(&roi_map[mbs >> 2], 1, (mbs >> 2));
-  vpx_memset(&roi_map[mbs >> 1], 2, (mbs >> 2));
-  vpx_memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2));
+  memset(&roi_map[mbs >> 2], 1, (mbs >> 2));
+  memset(&roi_map[mbs >> 1], 2, (mbs >> 2));
+  memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2));

  // Do a test call with valid parameters.
  int roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc
@@ -11,13 +11,15 @@
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"

@@ -199,7 +201,7 @@ const SixtapPredictFunc sixtap_16x16_neon = vp8_sixtap_predict16x16_neon;
 const SixtapPredictFunc sixtap_8x8_neon = vp8_sixtap_predict8x8_neon;
 const SixtapPredictFunc sixtap_8x4_neon = vp8_sixtap_predict8x4_neon;
 INSTANTIATE_TEST_CASE_P(
-    DISABLED_NEON, SixtapPredictTest, ::testing::Values(
+    NEON, SixtapPredictTest, ::testing::Values(
        make_tuple(16, 16, sixtap_16x16_neon),
        make_tuple(8, 8, sixtap_8x8_neon),
        make_tuple(8, 4, sixtap_8x4_neon)));
@@ -238,4 +240,16 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(8, 4, sixtap_8x4_ssse3),
        make_tuple(4, 4, sixtap_4x4_ssse3)));
 #endif
+#if HAVE_MSA
+const SixtapPredictFunc sixtap_16x16_msa = vp8_sixtap_predict16x16_msa;
+const SixtapPredictFunc sixtap_8x8_msa = vp8_sixtap_predict8x8_msa;
+const SixtapPredictFunc sixtap_8x4_msa = vp8_sixtap_predict8x4_msa;
+const SixtapPredictFunc sixtap_4x4_msa = vp8_sixtap_predict4x4_msa;
+INSTANTIATE_TEST_CASE_P(
+    MSA, SixtapPredictTest, ::testing::Values(
+        make_tuple(16, 16, sixtap_16x16_msa),
+        make_tuple(8, 8, sixtap_8x8_msa),
+        make_tuple(8, 4, sixtap_8x4_msa),
+        make_tuple(4, 4, sixtap_4x4_msa)));
+#endif
 }  // namespace
--- a/test/subtract_test.cc
+++ b/test/subtract_test.cc
@@ -1,123 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
-#include "vp8/common/blockd.h"
-#include "vp8/encoder/block.h"
-#include "vpx_mem/vpx_mem.h"
-
-typedef void (*SubtractBlockFunc)(BLOCK *be, BLOCKD *bd, int pitch);
-
-namespace {
-
-class SubtractBlockTest : public ::testing::TestWithParam<SubtractBlockFunc> {
- public:
-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-  }
-};
-
-using libvpx_test::ACMRandom;
-
-TEST_P(SubtractBlockTest, SimpleSubtract) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  BLOCK be;
-  BLOCKD bd;
-  // in libvpx, this stride is always 16
-  const int kDiffPredStride = 16;
-  const int kSrcStride[] = {32, 16, 8, 4, 0};
-  const int kBlockWidth = 4;
-  const int kBlockHeight = 4;
-
-  // Allocate... align to 16 for mmx/sse tests
-  uint8_t *source = reinterpret_cast<uint8_t*>(
-      vpx_memalign(16, kBlockHeight * kSrcStride[0] * sizeof(*source)));
-  be.src_diff = reinterpret_cast<int16_t*>(
-      vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*be.src_diff)));
-  bd.predictor = reinterpret_cast<unsigned char*>(
-      vpx_memalign(16, kBlockHeight * kDiffPredStride * sizeof(*bd.predictor)));
-
-  for (int i = 0; kSrcStride[i] > 0; ++i) {
-    // start at block0
-    be.src = 0;
-    be.base_src = &source;
-    be.src_stride = kSrcStride[i];
-
-    // set difference
-    int16_t *src_diff = be.src_diff;
-    for (int r = 0; r < kBlockHeight; ++r) {
-      for (int c = 0; c < kBlockWidth; ++c) {
-        src_diff[c] = static_cast<int16_t>(0xa5a5u);
-      }
-      src_diff += kDiffPredStride;
-    }
-
-    // set destination
-    uint8_t *base_src = *be.base_src;
-    for (int r = 0; r < kBlockHeight; ++r) {
-      for (int c = 0; c < kBlockWidth; ++c) {
-        base_src[c] = rnd.Rand8();
-      }
-      base_src += be.src_stride;
-    }
-
-    // set predictor
-    uint8_t *predictor = bd.predictor;
-    for (int r = 0; r < kBlockHeight; ++r) {
-      for (int c = 0; c < kBlockWidth; ++c) {
-        predictor[c] = rnd.Rand8();
-      }
-      predictor += kDiffPredStride;
-    }
-
-    ASM_REGISTER_STATE_CHECK(GetParam()(&be, &bd, kDiffPredStride));
-
-    base_src = *be.base_src;
-    src_diff = be.src_diff;
-    predictor = bd.predictor;
-    for (int r = 0; r < kBlockHeight; ++r) {
-      for (int c = 0; c < kBlockWidth; ++c) {
-        EXPECT_EQ(base_src[c], (src_diff[c] + predictor[c])) << "r = " << r
-                                                             << ", c = " << c;
-      }
-      src_diff += kDiffPredStride;
-      predictor += kDiffPredStride;
-      base_src += be.src_stride;
-    }
-  }
-  vpx_free(be.src_diff);
-  vpx_free(source);
-  vpx_free(bd.predictor);
-}
-
-INSTANTIATE_TEST_CASE_P(C, SubtractBlockTest,
-                        ::testing::Values(vp8_subtract_b_c));
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, SubtractBlockTest,
-                        ::testing::Values(vp8_subtract_b_neon));
-#endif
-
-#if HAVE_MMX
-INSTANTIATE_TEST_CASE_P(MMX, SubtractBlockTest,
-                        ::testing::Values(vp8_subtract_b_mmx));
-#endif
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, SubtractBlockTest,
-                        ::testing::Values(vp8_subtract_b_sse2));
-#endif
-
-}  // namespace
--- a/test/superframe_test.cc
+++ b/test/superframe_test.cc
@@ -16,8 +16,13 @@

 namespace {

+const int kTestMode = 0;
+const int kSuperframeSyntax = 1;
+
+typedef std::tr1::tuple<libvpx_test::TestMode,int> SuperframeTestParam;
+
 class SuperframeTest : public ::libvpx_test::EncoderTest,
-    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+    public ::libvpx_test::CodecTestWithParam<SuperframeTestParam> {
 protected:
  SuperframeTest() : EncoderTest(GET_PARAM(0)), modified_buf_(NULL),
      last_sf_pts_(0) {}
@@ -25,9 +30,13 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,

  virtual void SetUp() {
    InitializeConfig();
-    SetMode(GET_PARAM(1));
+    const SuperframeTestParam input = GET_PARAM(1);
+    const libvpx_test::TestMode mode = std::tr1::get<kTestMode>(input);
+    const int syntax = std::tr1::get<kSuperframeSyntax>(input);
+    SetMode(mode);
    sf_count_ = 0;
    sf_count_max_ = INT_MAX;
+    is_vp10_style_superframe_ = syntax;
  }

  virtual void TearDown() {
@@ -50,7 +59,8 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,
    const uint8_t marker = buffer[pkt->data.frame.sz - 1];
    const int frames = (marker & 0x7) + 1;
    const int mag = ((marker >> 3) & 3) + 1;
-    const unsigned int index_sz = 2 + mag  * frames;
+    const unsigned int index_sz =
+        2 + mag * (frames - is_vp10_style_superframe_);
    if ((marker & 0xe0) == 0xc0 &&
        pkt->data.frame.sz >= index_sz &&
        buffer[pkt->data.frame.sz - index_sz] == marker) {
@@ -75,6 +85,7 @@ class SuperframeTest : public ::libvpx_test::EncoderTest,
    return pkt;
  }

+  int is_vp10_style_superframe_;
  int sf_count_;
  int sf_count_max_;
  vpx_codec_cx_pkt_t modified_pkt_;
@@ -92,6 +103,11 @@ TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) {
  EXPECT_EQ(sf_count_, 1);
 }

-VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Values(
-    ::libvpx_test::kTwoPassGood));
+VP9_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine(
+    ::testing::Values(::libvpx_test::kTwoPassGood),
+    ::testing::Values(0)));
+
+VP10_INSTANTIATE_TEST_CASE(SuperframeTest, ::testing::Combine(
+    ::testing::Values(::libvpx_test::kTwoPassGood),
+    ::testing::Values(CONFIG_MISC_FIXES)));
 }  // namespace
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -63,6 +63,9 @@ class SvcTest : public ::testing::Test {
    vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
    VP9CodecFactory codec_factory;
    decoder_ = codec_factory.CreateDecoder(dec_cfg, 0);
+
+    tile_columns_ = 0;
+    tile_rows_ = 0;
  }

  virtual void TearDown() {
@@ -75,6 +78,8 @@ class SvcTest : public ::testing::Test {
        vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
    EXPECT_EQ(VPX_CODEC_OK, res);
    vpx_codec_control(&codec_, VP8E_SET_CPUUSED, 4);  // Make the test faster
+    vpx_codec_control(&codec_, VP9E_SET_TILE_COLUMNS, tile_columns_);
+    vpx_codec_control(&codec_, VP9E_SET_TILE_ROWS, tile_rows_);
    codec_initialized_ = true;
  }

@@ -108,7 +113,8 @@ class SvcTest : public ::testing::Test {
    codec_enc_.g_pass = VPX_RC_FIRST_PASS;
    InitializeEncoder();

-    libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+    libvpx_test::I420VideoSource video(test_file_name_,
+                                       codec_enc_.g_w, codec_enc_.g_h,
                                       codec_enc_.g_timebase.den,
                                       codec_enc_.g_timebase.num, 0, 30);
    video.Begin();
@@ -176,7 +182,8 @@ class SvcTest : public ::testing::Test {
    }
    InitializeEncoder();

-    libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+    libvpx_test::I420VideoSource video(test_file_name_,
+                                       codec_enc_.g_w, codec_enc_.g_h,
                                       codec_enc_.g_timebase.den,
                                       codec_enc_.g_timebase.num, 0, 30);
    video.Begin();
@@ -225,10 +232,9 @@ class SvcTest : public ::testing::Test {
    EXPECT_EQ(received_frames, n);
  }

-  void DropLayersAndMakeItVP9Comaptible(struct vpx_fixed_buf *const inputs,
-                                        const int num_super_frames,
-                                        const int remained_spatial_layers,
-                                        const bool is_multiple_frame_contexts) {
+  void DropEnhancementLayers(struct vpx_fixed_buf *const inputs,
+                             const int num_super_frames,
+                             const int remained_spatial_layers) {
    ASSERT_TRUE(inputs != NULL);
    ASSERT_GT(num_super_frames, 0);
    ASSERT_GT(remained_spatial_layers, 0);
@@ -250,45 +256,6 @@ class SvcTest : public ::testing::Test {
      if (frame_count == 0) {
        // There's no super frame but only a single frame.
        ASSERT_EQ(1, remained_spatial_layers);
-        if (is_multiple_frame_contexts) {
-          // Make a new super frame.
-          uint8_t marker = 0xc1;
-          unsigned int mask;
-          int mag;
-
-          // Choose the magnitude.
-          for (mag = 0, mask = 0xff; mag < 4; ++mag) {
-            if (inputs[i].sz < mask)
-              break;
-            mask <<= 8;
-            mask |= 0xff;
-          }
-          marker |= mag << 3;
-          int index_sz = 2 + (mag + 1) * 2;
-
-          inputs[i].buf = realloc(inputs[i].buf, inputs[i].sz + index_sz + 16);
-          ASSERT_TRUE(inputs[i].buf != NULL);
-          uint8_t *frame_data = static_cast<uint8_t*>(inputs[i].buf);
-          frame_data[0] &= ~2;      // Set the show_frame flag to 0.
-          frame_data += inputs[i].sz;
-          // Add an one byte frame with show_existing_frame.
-          *frame_data++ = 0x88;
-
-          // Write the super frame index.
-          *frame_data++ = marker;
-
-          frame_sizes[0] = inputs[i].sz;
-          frame_sizes[1] = 1;
-          for (int j = 0; j < 2; ++j) {
-            unsigned int this_sz = frame_sizes[j];
-            for (int k = 0; k <= mag; k++) {
-              *frame_data++ = this_sz & 0xff;
-              this_sz >>= 8;
-            }
-          }
-          *frame_data++ = marker;
-          inputs[i].sz += index_sz + 1;
-        }
      } else {
        // Found a super frame.
        uint8_t *frame_data = static_cast<uint8_t*>(inputs[i].buf);
@@ -304,16 +271,13 @@ class SvcTest : public ::testing::Test {
        }
        ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. "
            << "remained_spatial_layers: " << remained_spatial_layers
-            << "    super_frame: " << i
-            << "    is_multiple_frame_context: " << is_multiple_frame_contexts;
-        if (frame == frame_count - 1 && !is_multiple_frame_contexts)
+            << "    super_frame: " << i;
+        if (frame == frame_count - 1)
          continue;

        frame_data += frame_sizes[frame];

        // We need to add one more frame for multiple frame contexts.
-        if (is_multiple_frame_contexts)
-          ++frame;
        uint8_t marker =
            static_cast<const uint8_t*>(inputs[i].buf)[inputs[i].sz - 1];
        const uint32_t mag = ((marker >> 3) & 0x3) + 1;
@@ -323,35 +287,14 @@ class SvcTest : public ::testing::Test {
        marker |= frame;

        // Copy existing frame sizes.
-        memmove(frame_data + (is_multiple_frame_contexts ? 2 : 1),
-                frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2);
-        if (is_multiple_frame_contexts) {
-          // Add a one byte frame with flag show_existing_frame.
-          *frame_data++ = 0x88 | (remained_spatial_layers - 1);
-        }
+        memmove(frame_data + 1, frame_start + inputs[i].sz - index_sz + 1,
+                new_index_sz - 2);
        // New marker.
        frame_data[0] = marker;
        frame_data += (mag * (frame + 1) + 1);

-        if (is_multiple_frame_contexts) {
-          // Write the frame size for the one byte frame.
-          frame_data -= mag;
-          *frame_data++ = 1;
-          for (uint32_t j = 1; j < mag; ++j) {
-            *frame_data++ = 0;
-          }
-        }
-
        *frame_data++ = marker;
        inputs[i].sz = frame_data - frame_start;
-
-        if (is_multiple_frame_contexts) {
-          // Change the show frame flag to 0 for all frames.
-          for (int j = 0; j < frame; ++j) {
-            frame_start[0] &= ~2;
-            frame_start += frame_sizes[j];
-          }
-        }
      }
    }
  }
@@ -374,6 +317,8 @@ class SvcTest : public ::testing::Test {
  std::string test_file_name_;
  bool codec_initialized_;
  Decoder *decoder_;
+  int tile_columns_;
+  int tile_rows_;
 };

 TEST_F(SvcTest, SvcInit) {
@@ -508,6 +453,7 @@ TEST_F(SvcTest, OnePassEncodeOneFrame) {

 TEST_F(SvcTest, OnePassEncodeThreeFrames) {
  codec_enc_.g_pass = VPX_RC_ONE_PASS;
+  codec_enc_.g_lag_in_frames = 0;
  vpx_fixed_buf outputs[3];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(NULL, 3, 2, &outputs[0]);
@@ -555,7 +501,7 @@ TEST_F(SvcTest, TwoPassEncode2SpatialLayersDecodeBaseLayerOnly) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false);
+  DropEnhancementLayers(&outputs[0], 10, 1);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -573,13 +519,13 @@ TEST_F(SvcTest, TwoPassEncode5SpatialLayersDecode54321Layers) {
  Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]);

  DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 4, false);
+  DropEnhancementLayers(&outputs[0], 10, 4);
  DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 3, false);
+  DropEnhancementLayers(&outputs[0], 10, 3);
  DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, false);
+  DropEnhancementLayers(&outputs[0], 10, 2);
  DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false);
+  DropEnhancementLayers(&outputs[0], 10, 1);
  DecodeNFrames(&outputs[0], 10);

  FreeBitstreamBuffers(&outputs[0], 10);
@@ -616,9 +562,9 @@ TEST_F(SvcTest, TwoPassEncode3SNRLayersDecode321Layers) {
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]);
  DecodeNFrames(&outputs[0], 20);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 2, false);
+  DropEnhancementLayers(&outputs[0], 20, 2);
  DecodeNFrames(&outputs[0], 20);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 1, false);
+  DropEnhancementLayers(&outputs[0], 20, 1);
  DecodeNFrames(&outputs[0], 20);

  FreeBitstreamBuffers(&outputs[0], 20);
@@ -649,7 +595,6 @@ TEST_F(SvcTest, TwoPassEncode2SpatialLayersWithMultipleFrameContexts) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -667,7 +612,7 @@ TEST_F(SvcTest,
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);
+  DropEnhancementLayers(&outputs[0], 10, 1);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -686,7 +631,6 @@ TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContexts) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -707,32 +651,13 @@ TEST_F(SvcTest,
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 3, &outputs[0]);

-  vpx_fixed_buf outputs_new[10];
-  for (int i = 0; i < 10; ++i) {
-    outputs_new[i].buf = malloc(outputs[i].sz + 16);
-    ASSERT_TRUE(outputs_new[i].buf != NULL);
-    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
-    outputs_new[i].sz = outputs[i].sz;
-  }
-  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 3, true);
-  DecodeNFrames(&outputs_new[0], 10);
-
-  for (int i = 0; i < 10; ++i) {
-    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
-    outputs_new[i].sz = outputs[i].sz;
-  }
-  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 2, true);
-  DecodeNFrames(&outputs_new[0], 10);
-
-  for (int i = 0; i < 10; ++i) {
-    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
-    outputs_new[i].sz = outputs[i].sz;
-  }
-  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 1, true);
-  DecodeNFrames(&outputs_new[0], 10);
+  DecodeNFrames(&outputs[0], 10);
+  DropEnhancementLayers(&outputs[0], 10, 2);
+  DecodeNFrames(&outputs[0], 10);
+  DropEnhancementLayers(&outputs[0], 10, 1);
+  DecodeNFrames(&outputs[0], 10);

  FreeBitstreamBuffers(&outputs[0], 10);
-  FreeBitstreamBuffers(&outputs_new[0], 10);
 }

 TEST_F(SvcTest, TwoPassEncode2TemporalLayers) {
@@ -769,7 +694,6 @@ TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithMultipleFrameContexts) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -814,7 +738,6 @@ TEST_F(SvcTest,
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);

  vpx_fixed_buf base_layer[5];
  for (int i = 0; i < 5; ++i)
@@ -824,4 +747,51 @@ TEST_F(SvcTest,
  FreeBitstreamBuffers(&outputs[0], 10);
 }

+TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithTiles) {
+  // First pass encode
+  std::string stats_buf;
+  vpx_svc_set_options(&svc_, "scale-factors=1/1");
+  svc_.temporal_layers = 2;
+  Pass1EncodeNFrames(10, 1, &stats_buf);
+
+  // Second pass encode
+  codec_enc_.g_pass = VPX_RC_LAST_PASS;
+  svc_.temporal_layers = 2;
+  vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1");
+  codec_enc_.g_w = 704;
+  codec_enc_.g_h = 144;
+  tile_columns_ = 1;
+  tile_rows_ = 1;
+  vpx_fixed_buf outputs[10];
+  memset(&outputs[0], 0, sizeof(outputs));
+  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
+  DecodeNFrames(&outputs[0], 10);
+  FreeBitstreamBuffers(&outputs[0], 10);
+}
+
+TEST_F(SvcTest,
+       TwoPassEncode2TemporalLayersWithMultipleFrameContextsAndTiles) {
+  // First pass encode
+  std::string stats_buf;
+  vpx_svc_set_options(&svc_, "scale-factors=1/1");
+  svc_.temporal_layers = 2;
+  Pass1EncodeNFrames(10, 1, &stats_buf);
+
+  // Second pass encode
+  codec_enc_.g_pass = VPX_RC_LAST_PASS;
+  svc_.temporal_layers = 2;
+  codec_enc_.g_error_resilient = 0;
+  codec_enc_.g_w = 704;
+  codec_enc_.g_h = 144;
+  tile_columns_ = 1;
+  tile_rows_ = 1;
+  vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1 "
+                      "multi-frame-contexts=1");
+  vpx_fixed_buf outputs[10];
+  memset(&outputs[0], 0, sizeof(outputs));
+  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
+  DecodeNFrames(&outputs[0], 10);
+  FreeBitstreamBuffers(&outputs[0], 10);
+}
+
 }  // namespace
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -7,13 +7,19 @@ LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_440.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420_a10-1.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv

+LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += desktop_credits.y4m
+LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m

@@ -550,6 +556,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm
@@ -650,12 +658,38 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv422.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv422.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv440.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv440.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm.md5
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-10bit-yuv420.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-10bit-yuv420.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-12bit-yuv420.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-12bit-yuv420.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv422.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv422.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv422.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv422.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv440.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv440.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv440.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv440.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv444.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv444.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm.md5
+endif  # CONFIG_VP9_HIGHBITDEPTH

 # Invalid files for testing libvpx error checking.
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm
@@ -666,10 +700,16 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.iv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf
@@ -684,8 +724,13 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s738
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-3.webm

 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
+# Encode / Decode test
+LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.yuv
 # BBB VP9 streams
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_426x240_tile_1x1_180kbps.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_640x360_tile_1x2_337kbps.webm
@@ -721,3 +766,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomanarrows_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomasmallcameramovement_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += thaloundeskmtg_640_480_30.yuv
 endif  # CONFIG_ENCODE_PERF_TESTS
+
+# sort and remove duplicates
+LIBVPX_TEST_DATA-yes := $(sort $(LIBVPX_TEST_DATA-yes))
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
--- a/test/test.mk
+++ b/test/test.mk
@@ -22,28 +22,37 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += aq_segment_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += datarate_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += i420_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += resize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += yuv_video_source.h

 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc

+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += byte_alignment_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += invalid_file_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_frame_parallel_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_refresh_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc

 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.h
-LIBVPX_TEST_SRCS-yes                   += encode_test_driver.cc
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += encode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += encode_test_driver.h

+## IVF writing.
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += ../ivfenc.c ../ivfenc.h
+
 ## Y4m parsing.
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_test.cc ../y4menc.c ../y4menc.h

@@ -58,6 +67,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../tools_common.h
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.cc
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.h
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += webm_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_skip_loopfilter_test.cc
 endif

 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += decode_api_test.cc
@@ -82,6 +92,7 @@ endif
 ## shared library builds don't make these functions accessible.
 ##
 ifeq ($(CONFIG_SHARED),)
+LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += lpf_8_test.cc

 ## VP8
 ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
@@ -89,17 +100,17 @@ ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
 # These tests require both the encoder and decoder to be built.
 ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes)
 LIBVPX_TEST_SRCS-yes                   += vp8_boolcoder_test.cc
+LIBVPX_TEST_SRCS-yes                   += vp8_fragments_test.cc
 endif

 LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC)    += pp_filter_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc

 LIBVPX_TEST_SRCS-yes                   += idct_test.cc
-LIBVPX_TEST_SRCS-yes                   += intrapred_test.cc
 LIBVPX_TEST_SRCS-yes                   += sixtap_predict_test.cc
 LIBVPX_TEST_SRCS-yes                   += vpx_scale_test.cc

@@ -120,7 +131,7 @@ LIBVPX_TEST_SRCS-yes                   += partial_idct_test.cc
 LIBVPX_TEST_SRCS-yes                   += superframe_test.cc
 LIBVPX_TEST_SRCS-yes                   += tile_independence_test.cc
 LIBVPX_TEST_SRCS-yes                   += vp9_boolcoder_test.cc
-
+LIBVPX_TEST_SRCS-yes                   += vp9_encoder_parms_get_to_decoder.cc
 endif

 LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += convolve_test.cc
@@ -132,24 +143,34 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += quantize_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += error_block_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += vp9_intrapred_test.cc

 ifeq ($(CONFIG_VP9_ENCODER),yes)
 LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += blockiness_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += consistency_test.cc
+
 endif

 ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes)
 LIBVPX_TEST_SRCS-$(HAVE_SSE2) += vp9_denoiser_sse2_test.cc
 endif
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_arf_freq_test.cc

 endif # VP9

 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += sad_test.cc

+TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) := test_intra_pred_speed.cc
+TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9) += ../md5_utils.h ../md5_utils.c
+
+## VP10
+LIBVPX_TEST_SRCS-$(CONFIG_VP10_ENCODER) += vp10_dct_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP10) += vp10_inv_txfm_test.cc
+
 endif # CONFIG_SHARED

 include $(SRC_PATH_BARE)/test/test-data.mk
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -0,0 +1,384 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+//  Test and time VPX intra-predictor functions
+
+#include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/md5_helper.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+#include "vpx_ports/vpx_timer.h"
+
+// -----------------------------------------------------------------------------
+
+namespace {
+
+typedef void (*VpxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
+                            const uint8_t *above, const uint8_t *left);
+
+const int kNumVp9IntraPredFuncs = 13;
+const char *kVp9IntraPredNames[kNumVp9IntraPredFuncs] = {
+  "DC_PRED", "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED", "H_PRED",
+  "D45_PRED", "D135_PRED", "D117_PRED", "D153_PRED", "D207_PRED", "D63_PRED",
+  "TM_PRED"
+};
+
+void TestIntraPred(const char name[], VpxPredFunc const *pred_funcs,
+                   const char *const pred_func_names[], int num_funcs,
+                   const char *const signatures[], int block_size,
+                   int num_pixels_per_test) {
+  libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+  const int kBPS = 32;
+  const int kTotalPixels = 32 * kBPS;
+  DECLARE_ALIGNED(16, uint8_t, src[kTotalPixels]);
+  DECLARE_ALIGNED(16, uint8_t, ref_src[kTotalPixels]);
+  DECLARE_ALIGNED(16, uint8_t, left[kBPS]);
+  DECLARE_ALIGNED(16, uint8_t, above_mem[2 * kBPS + 16]);
+  uint8_t *const above = above_mem + 16;
+  for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand8();
+  for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand8();
+  for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand8();
+  const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
+
+  // some code assumes the top row has been extended:
+  // d45/d63 C-code, for instance, but not the assembly.
+  // TODO(jzern): this style of extension isn't strictly necessary.
+  ASSERT_LE(block_size, kBPS);
+  memset(above + block_size, above[block_size - 1], 2 * kBPS - block_size);
+
+  for (int k = 0; k < num_funcs; ++k) {
+    if (pred_funcs[k] == NULL) continue;
+    memcpy(src, ref_src, sizeof(src));
+    vpx_usec_timer timer;
+    vpx_usec_timer_start(&timer);
+    for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
+      pred_funcs[k](src, kBPS, above, left);
+    }
+    libvpx_test::ClearSystemState();
+    vpx_usec_timer_mark(&timer);
+    const int elapsed_time =
+        static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
+    libvpx_test::MD5 md5;
+    md5.Add(src, sizeof(src));
+    printf("Mode %s[%12s]: %5d ms     MD5: %s\n", name, pred_func_names[k],
+           elapsed_time, md5.Get());
+    EXPECT_STREQ(signatures[k], md5.Get());
+  }
+}
+
+void TestIntraPred4(VpxPredFunc const *pred_funcs) {
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "4334156168b34ab599d9b5b30f522fe9",
+    "bc4649d5ba47c7ff178d92e475960fb0",
+    "8d316e5933326dcac24e1064794b5d12",
+    "a27270fed024eafd762c95de85f4da51",
+    "c33dff000d4256c2b8f3bf9e9bab14d2",
+    "44d8cddc2ad8f79b8ed3306051722b4f",
+    "eb54839b2bad6699d8946f01ec041cd0",
+    "ecb0d56ae5f677ea45127ce9d5c058e4",
+    "0b7936841f6813da818275944895b574",
+    "9117972ef64f91a58ff73e1731c81db2",
+    "c56d5e8c729e46825f46dd5d3b5d508a",
+    "c0889e2039bcf7bcb5d2f33cdca69adc",
+    "309a618577b27c648f9c5ee45252bc8f",
+  };
+  TestIntraPred("Intra4", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 4, 4 * 4 * kNumVp9IntraFuncs);
+}
+
+void TestIntraPred8(VpxPredFunc const *pred_funcs) {
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "7694ddeeefed887faf9d339d18850928",
+    "7d726b1213591b99f736be6dec65065b",
+    "19c5711281357a485591aaf9c96c0a67",
+    "ba6b66877a089e71cd938e3b8c40caac",
+    "802440c93317e0f8ba93fab02ef74265",
+    "9e09a47a15deb0b9d8372824f9805080",
+    "b7c2d8c662268c0c427da412d7b0311d",
+    "78339c1c60bb1d67d248ab8c4da08b7f",
+    "5c97d70f7d47de1882a6cd86c165c8a9",
+    "8182bf60688b42205acd95e59e967157",
+    "08323400005a297f16d7e57e7fe1eaac",
+    "95f7bfc262329a5849eda66d8f7c68ce",
+    "815b75c8e0d91cc1ae766dc5d3e445a3",
+  };
+  TestIntraPred("Intra8", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 8, 8 * 8 * kNumVp9IntraFuncs);
+}
+
+void TestIntraPred16(VpxPredFunc const *pred_funcs) {
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "b40dbb555d5d16a043dc361e6694fe53",
+    "fb08118cee3b6405d64c1fd68be878c6",
+    "6c190f341475c837cc38c2e566b64875",
+    "db5c34ccbe2c7f595d9b08b0dc2c698c",
+    "a62cbfd153a1f0b9fed13e62b8408a7a",
+    "143df5b4c89335e281103f610f5052e4",
+    "d87feb124107cdf2cfb147655aa0bb3c",
+    "7841fae7d4d47b519322e6a03eeed9dc",
+    "f6ebed3f71cbcf8d6d0516ce87e11093",
+    "3cc480297dbfeed01a1c2d78dd03d0c5",
+    "b9f69fa6532b372c545397dcb78ef311",
+    "a8fe1c70432f09d0c20c67bdb6432c4d",
+    "b8a41aa968ec108af447af4217cba91b",
+  };
+  TestIntraPred("Intra16", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 16, 16 * 16 * kNumVp9IntraFuncs);
+}
+
+void TestIntraPred32(VpxPredFunc const *pred_funcs) {
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "558541656d84f9ae7896db655826febe",
+    "b3587a1f9a01495fa38c8cd3c8e2a1bf",
+    "4c6501e64f25aacc55a2a16c7e8f0255",
+    "b3b01379ba08916ef6b1b35f7d9ad51c",
+    "0f1eb38b6cbddb3d496199ef9f329071",
+    "911c06efb9ed1c3b4c104b232b55812f",
+    "9225beb0ddfa7a1d24eaa1be430a6654",
+    "0a6d584a44f8db9aa7ade2e2fdb9fc9e",
+    "b01c9076525216925f3456f034fb6eee",
+    "d267e20ad9e5cd2915d1a47254d3d149",
+    "ed012a4a5da71f36c2393023184a0e59",
+    "f162b51ed618d28b936974cff4391da5",
+    "9e1370c6d42e08d357d9612c93a71cfc",
+  };
+  TestIntraPred("Intra32", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 32, 32 * 32 * kNumVp9IntraFuncs);
+}
+
+}  // namespace
+
+// Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
+// to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4.
+#define INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128, v, h, \
+                        d45, d135, d117, d153, d207, d63, tm)               \
+  TEST(arch, test_func) {                                                   \
+    static const VpxPredFunc vpx_intra_pred[] = {                           \
+        dc,   dc_left, dc_top, dc_128, v,   h, d45,                         \
+        d135, d117,    d153,   d207,   d63, tm};                            \
+    test_func(vpx_intra_pred);                                              \
+  }
+
+// -----------------------------------------------------------------------------
+// 4x4
+
+INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c,
+                vpx_dc_left_predictor_4x4_c, vpx_dc_top_predictor_4x4_c,
+                vpx_dc_128_predictor_4x4_c, vpx_v_predictor_4x4_c,
+                vpx_h_predictor_4x4_c, vpx_d45_predictor_4x4_c,
+                vpx_d135_predictor_4x4_c, vpx_d117_predictor_4x4_c,
+                vpx_d153_predictor_4x4_c, vpx_d207_predictor_4x4_c,
+                vpx_d63_predictor_4x4_c, vpx_tm_predictor_4x4_c)
+
+#if HAVE_SSE && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSE, TestIntraPred4, vpx_dc_predictor_4x4_sse,
+                vpx_dc_left_predictor_4x4_sse, vpx_dc_top_predictor_4x4_sse,
+                vpx_dc_128_predictor_4x4_sse, vpx_v_predictor_4x4_sse, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_4x4_sse)
+#endif  // HAVE_SSE && CONFIG_USE_X86INC
+
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
+                vpx_h_predictor_4x4_ssse3, vpx_d45_predictor_4x4_ssse3, NULL,
+                NULL, vpx_d153_predictor_4x4_ssse3,
+                vpx_d207_predictor_4x4_ssse3, vpx_d63_predictor_4x4_ssse3, NULL)
+#endif  // HAVE_SSSE3 && CONFIG_USE_X86INC
+
+#if HAVE_DSPR2
+INTRA_PRED_TEST(DSPR2, TestIntraPred4, vpx_dc_predictor_4x4_dspr2, NULL, NULL,
+                NULL, NULL, vpx_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_tm_predictor_4x4_dspr2)
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred4, vpx_dc_predictor_4x4_neon,
+                vpx_dc_left_predictor_4x4_neon, vpx_dc_top_predictor_4x4_neon,
+                vpx_dc_128_predictor_4x4_neon, vpx_v_predictor_4x4_neon,
+                vpx_h_predictor_4x4_neon, vpx_d45_predictor_4x4_neon,
+                vpx_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_4x4_neon)
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred4, vpx_dc_predictor_4x4_msa,
+                vpx_dc_left_predictor_4x4_msa, vpx_dc_top_predictor_4x4_msa,
+                vpx_dc_128_predictor_4x4_msa, vpx_v_predictor_4x4_msa,
+                vpx_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vpx_tm_predictor_4x4_msa)
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 8x8
+
+INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c,
+                vpx_dc_left_predictor_8x8_c, vpx_dc_top_predictor_8x8_c,
+                vpx_dc_128_predictor_8x8_c, vpx_v_predictor_8x8_c,
+                vpx_h_predictor_8x8_c, vpx_d45_predictor_8x8_c,
+                vpx_d135_predictor_8x8_c, vpx_d117_predictor_8x8_c,
+                vpx_d153_predictor_8x8_c, vpx_d207_predictor_8x8_c,
+                vpx_d63_predictor_8x8_c, vpx_tm_predictor_8x8_c)
+
+#if HAVE_SSE && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSE, TestIntraPred8, vpx_dc_predictor_8x8_sse,
+                vpx_dc_left_predictor_8x8_sse, vpx_dc_top_predictor_8x8_sse,
+                vpx_dc_128_predictor_8x8_sse, vpx_v_predictor_8x8_sse, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+#endif  // HAVE_SSE && CONFIG_USE_X86INC
+
+#if HAVE_SSE2 && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSE2, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2)
+#endif  // HAVE_SSE2 && CONFIG_USE_X86INC
+
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL,
+                vpx_h_predictor_8x8_ssse3, vpx_d45_predictor_8x8_ssse3, NULL,
+                NULL, vpx_d153_predictor_8x8_ssse3,
+                vpx_d207_predictor_8x8_ssse3, vpx_d63_predictor_8x8_ssse3, NULL)
+#endif  // HAVE_SSSE3 && CONFIG_USE_X86INC
+
+#if HAVE_DSPR2
+INTRA_PRED_TEST(DSPR2, TestIntraPred8, vpx_dc_predictor_8x8_dspr2, NULL, NULL,
+                NULL, NULL, vpx_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_tm_predictor_8x8_c)
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred8, vpx_dc_predictor_8x8_neon,
+                vpx_dc_left_predictor_8x8_neon, vpx_dc_top_predictor_8x8_neon,
+                vpx_dc_128_predictor_8x8_neon, vpx_v_predictor_8x8_neon,
+                vpx_h_predictor_8x8_neon, vpx_d45_predictor_8x8_neon, NULL,
+                NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_neon)
+
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred8, vpx_dc_predictor_8x8_msa,
+                vpx_dc_left_predictor_8x8_msa, vpx_dc_top_predictor_8x8_msa,
+                vpx_dc_128_predictor_8x8_msa, vpx_v_predictor_8x8_msa,
+                vpx_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vpx_tm_predictor_8x8_msa)
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 16x16
+
+INTRA_PRED_TEST(C, TestIntraPred16, vpx_dc_predictor_16x16_c,
+                vpx_dc_left_predictor_16x16_c, vpx_dc_top_predictor_16x16_c,
+                vpx_dc_128_predictor_16x16_c, vpx_v_predictor_16x16_c,
+                vpx_h_predictor_16x16_c, vpx_d45_predictor_16x16_c,
+                vpx_d135_predictor_16x16_c, vpx_d117_predictor_16x16_c,
+                vpx_d153_predictor_16x16_c, vpx_d207_predictor_16x16_c,
+                vpx_d63_predictor_16x16_c, vpx_tm_predictor_16x16_c)
+
+#if HAVE_SSE2 && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSE2, TestIntraPred16, vpx_dc_predictor_16x16_sse2,
+                vpx_dc_left_predictor_16x16_sse2,
+                vpx_dc_top_predictor_16x16_sse2,
+                vpx_dc_128_predictor_16x16_sse2, vpx_v_predictor_16x16_sse2,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_16x16_sse2)
+#endif  // HAVE_SSE2 && CONFIG_USE_X86INC
+
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL,
+                vpx_h_predictor_16x16_ssse3, vpx_d45_predictor_16x16_ssse3,
+                NULL, NULL, vpx_d153_predictor_16x16_ssse3,
+                vpx_d207_predictor_16x16_ssse3, vpx_d63_predictor_16x16_ssse3,
+                NULL)
+#endif  // HAVE_SSSE3 && CONFIG_USE_X86INC
+
+#if HAVE_DSPR2
+INTRA_PRED_TEST(DSPR2, TestIntraPred16, vpx_dc_predictor_16x16_dspr2, NULL,
+                NULL, NULL, NULL, vpx_h_predictor_16x16_dspr2, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL)
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred16, vpx_dc_predictor_16x16_neon,
+                vpx_dc_left_predictor_16x16_neon,
+                vpx_dc_top_predictor_16x16_neon,
+                vpx_dc_128_predictor_16x16_neon, vpx_v_predictor_16x16_neon,
+                vpx_h_predictor_16x16_neon, vpx_d45_predictor_16x16_neon, NULL,
+                NULL, NULL, NULL, NULL, vpx_tm_predictor_16x16_neon)
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred16, vpx_dc_predictor_16x16_msa,
+                vpx_dc_left_predictor_16x16_msa, vpx_dc_top_predictor_16x16_msa,
+                vpx_dc_128_predictor_16x16_msa, vpx_v_predictor_16x16_msa,
+                vpx_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vpx_tm_predictor_16x16_msa)
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 32x32
+
+INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c,
+                vpx_dc_left_predictor_32x32_c, vpx_dc_top_predictor_32x32_c,
+                vpx_dc_128_predictor_32x32_c, vpx_v_predictor_32x32_c,
+                vpx_h_predictor_32x32_c, vpx_d45_predictor_32x32_c,
+                vpx_d135_predictor_32x32_c, vpx_d117_predictor_32x32_c,
+                vpx_d153_predictor_32x32_c, vpx_d207_predictor_32x32_c,
+                vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c)
+
+#if HAVE_SSE2 && CONFIG_USE_X86INC
+#if ARCH_X86_64
+INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
+                vpx_dc_left_predictor_32x32_sse2,
+                vpx_dc_top_predictor_32x32_sse2,
+                vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_32x32_sse2)
+#else
+INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
+                vpx_dc_left_predictor_32x32_sse2,
+                vpx_dc_top_predictor_32x32_sse2,
+                vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+#endif  // ARCH_X86_64
+#endif  // HAVE_SSE2 && CONFIG_USE_X86INC
+
+#if HAVE_SSSE3 && CONFIG_USE_X86INC
+INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL,
+                vpx_h_predictor_32x32_ssse3, vpx_d45_predictor_32x32_ssse3,
+                NULL, NULL, vpx_d153_predictor_32x32_ssse3,
+                vpx_d207_predictor_32x32_ssse3, vpx_d63_predictor_32x32_ssse3,
+                NULL)
+#endif  // HAVE_SSSE3 && CONFIG_USE_X86INC
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred32, vpx_dc_predictor_32x32_neon,
+                vpx_dc_left_predictor_32x32_neon,
+                vpx_dc_top_predictor_32x32_neon,
+                vpx_dc_128_predictor_32x32_neon, vpx_v_predictor_32x32_neon,
+                vpx_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_32x32_neon)
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred32, vpx_dc_predictor_32x32_msa,
+                vpx_dc_left_predictor_32x32_msa, vpx_dc_top_predictor_32x32_msa,
+                vpx_dc_128_predictor_32x32_msa, vpx_v_predictor_32x32_msa,
+                vpx_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vpx_tm_predictor_32x32_msa)
+#endif  // HAVE_MSA
+
+#include "test/test_libvpx.cc"
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -8,6 +8,9 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */
 #include <string>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
 #include "./vpx_config.h"
 #if ARCH_X86 || ARCH_X86_64
 #include "vpx_ports/x86.h"
@@ -15,13 +18,15 @@
 extern "C" {
 #if CONFIG_VP8
 extern void vp8_rtcd();
-#endif
+#endif  // CONFIG_VP8
 #if CONFIG_VP9
 extern void vp9_rtcd();
-#endif
+#endif  // CONFIG_VP9
+extern void vpx_dsp_rtcd();
+extern void vpx_scale_rtcd();
 }
-#include "third_party/googletest/src/include/gtest/gtest.h"

+#if ARCH_X86 || ARCH_X86_64
 static void append_negative_gtest_filter(const char *str) {
  std::string filter = ::testing::FLAGS_gtest_filter;
  // Negative patterns begin with one '-' followed by a ':' separated list.
@@ -29,6 +34,7 @@ static void append_negative_gtest_filter(const char *str) {
  filter += str;
  ::testing::FLAGS_gtest_filter = filter;
 }
+#endif  // ARCH_X86 || ARCH_X86_64

 int main(int argc, char **argv) {
  ::testing::InitGoogleTest(&argc, argv);
@@ -36,22 +42,22 @@ int main(int argc, char **argv) {
 #if ARCH_X86 || ARCH_X86_64
  const int simd_caps = x86_simd_caps();
  if (!(simd_caps & HAS_MMX))
-    append_negative_gtest_filter(":MMX/*");
+    append_negative_gtest_filter(":MMX.*:MMX/*");
  if (!(simd_caps & HAS_SSE))
-    append_negative_gtest_filter(":SSE/*");
+    append_negative_gtest_filter(":SSE.*:SSE/*");
  if (!(simd_caps & HAS_SSE2))
-    append_negative_gtest_filter(":SSE2/*");
+    append_negative_gtest_filter(":SSE2.*:SSE2/*");
  if (!(simd_caps & HAS_SSE3))
-    append_negative_gtest_filter(":SSE3/*");
+    append_negative_gtest_filter(":SSE3.*:SSE3/*");
  if (!(simd_caps & HAS_SSSE3))
-    append_negative_gtest_filter(":SSSE3/*");
+    append_negative_gtest_filter(":SSSE3.*:SSSE3/*");
  if (!(simd_caps & HAS_SSE4_1))
-    append_negative_gtest_filter(":SSE4_1/*");
+    append_negative_gtest_filter(":SSE4_1.*:SSE4_1/*");
  if (!(simd_caps & HAS_AVX))
-    append_negative_gtest_filter(":AVX/*");
+    append_negative_gtest_filter(":AVX.*:AVX/*");
  if (!(simd_caps & HAS_AVX2))
-    append_negative_gtest_filter(":AVX2/*");
-#endif
+    append_negative_gtest_filter(":AVX2.*:AVX2/*");
+#endif  // ARCH_X86 || ARCH_X86_64

 #if !CONFIG_SHARED
 // Shared library builds don't support whitebox tests
@@ -59,11 +65,13 @@ int main(int argc, char **argv) {

 #if CONFIG_VP8
  vp8_rtcd();
-#endif
+#endif  // CONFIG_VP8
 #if CONFIG_VP9
  vp9_rtcd();
-#endif
-#endif
+#endif  // CONFIG_VP9
+  vpx_dsp_rtcd();
+  vpx_scale_rtcd();
+#endif  // !CONFIG_SHARED

  return RUN_ALL_TESTS();
 }
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -12,6 +12,7 @@
 #include <cstdlib>
 #include <string>
 #include "third_party/googletest/src/include/gtest/gtest.h"
+#include "../tools_common.h"
 #include "./vpx_config.h"
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
@@ -26,10 +27,24 @@

 namespace {

+enum DecodeMode {
+  kSerialMode,
+  kFrameParallelMode
+};
+
+const int kDecodeMode = 0;
+const int kThreads = 1;
+const int kFileName = 2;
+
+typedef std::tr1::tuple<int, int, const char*> DecodeParam;
+
 class TestVectorTest : public ::libvpx_test::DecoderTest,
-    public ::libvpx_test::CodecTestWithParam<const char*> {
+    public ::libvpx_test::CodecTestWithParam<DecodeParam> {
 protected:
-  TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(NULL) {}
+  TestVectorTest()
+      : DecoderTest(GET_PARAM(0)),
+        md5_file_(NULL) {
+  }

  virtual ~TestVectorTest() {
    if (md5_file_)
@@ -71,8 +86,25 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
 // checksums match the correct md5 data, then the test is passed. Otherwise,
 // the test failed.
 TEST_P(TestVectorTest, MD5Match) {
-  const std::string filename = GET_PARAM(1);
+  const DecodeParam input = GET_PARAM(1);
+  const std::string filename = std::tr1::get<kFileName>(input);
+  const int threads = std::tr1::get<kThreads>(input);
+  const int mode = std::tr1::get<kDecodeMode>(input);
  libvpx_test::CompressedVideoSource *video = NULL;
+  vpx_codec_flags_t flags = 0;
+  vpx_codec_dec_cfg_t cfg = {0};
+  char str[256];
+
+  if (mode == kFrameParallelMode) {
+    flags |= VPX_CODEC_USE_FRAME_THREADING;
+  }
+
+  cfg.threads = threads;
+
+  snprintf(str, sizeof(str) / sizeof(str[0]) - 1,
+           "file: %s  mode: %s threads: %d",
+           filename.c_str(), mode == 0 ? "Serial" : "Parallel", threads);
+  SCOPED_TRACE(str);

  // Open compressed video file.
  if (filename.substr(filename.length() - 3, 3) == "ivf") {
@@ -92,18 +124,50 @@ TEST_P(TestVectorTest, MD5Match) {
  const std::string md5_filename = filename + ".md5";
  OpenMD5File(md5_filename);

+  // Set decode config and flags.
+  set_cfg(cfg);
+  set_flags(flags);
+
  // Decode frame, and check the md5 matching.
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video, cfg));
  delete video;
 }

-VP8_INSTANTIATE_TEST_CASE(TestVectorTest,
-                          ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
-                                              libvpx_test::kVP8TestVectors +
-                                              libvpx_test::kNumVP8TestVectors));
-VP9_INSTANTIATE_TEST_CASE(TestVectorTest,
-                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
-                                              libvpx_test::kVP9TestVectors +
-                                              libvpx_test::kNumVP9TestVectors));
+// Test VP8 decode in serial mode with single thread.
+// NOTE: VP8 only support serial mode.
+#if CONFIG_VP8_DECODER
+VP8_INSTANTIATE_TEST_CASE(
+    TestVectorTest,
+    ::testing::Combine(
+        ::testing::Values(0),  // Serial Mode.
+        ::testing::Values(1),  // Single thread.
+        ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
+                            libvpx_test::kVP8TestVectors +
+                                libvpx_test::kNumVP8TestVectors)));
+#endif  // CONFIG_VP8_DECODER

+// Test VP9 decode in serial mode with single thread.
+#if CONFIG_VP9_DECODER
+VP9_INSTANTIATE_TEST_CASE(
+    TestVectorTest,
+    ::testing::Combine(
+        ::testing::Values(0),  // Serial Mode.
+        ::testing::Values(1),  // Single thread.
+        ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
+                            libvpx_test::kVP9TestVectors +
+                                libvpx_test::kNumVP9TestVectors)));
+
+// Test VP9 decode in frame parallel mode with different number of threads.
+INSTANTIATE_TEST_CASE_P(
+    VP9MultiThreadedFrameParallel, TestVectorTest,
+    ::testing::Combine(
+        ::testing::Values(
+            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
+        ::testing::Combine(
+            ::testing::Values(1),        // Frame Parallel mode.
+            ::testing::Range(2, 9),      // With 2 ~ 8 threads.
+            ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
+                                libvpx_test::kVP9TestVectors +
+                                    libvpx_test::kNumVP9TestVectors))));
+#endif
 }  // namespace
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -165,7 +165,10 @@ const char *const kVP9TestVectors[] = {
  "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
  "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
  "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
+#if !CONFIG_SIZE_LIMIT || \
+    (DECODE_WIDTH_LIMIT >= 20400 && DECODE_HEIGHT_LIMIT >= 120)
  "vp90-2-13-largescaling.webm",
+#endif
  "vp90-2-14-resize-fp-tiles-1-16.webm",
  "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
  "vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm",
@@ -184,6 +187,14 @@ const char *const kVP9TestVectors[] = {
  "vp90-2-18-resize.ivf", "vp90-2-19-skip.webm",
  "vp90-2-19-skip-01.webm", "vp90-2-19-skip-02.webm",
  "vp91-2-04-yuv444.webm",
+  "vp91-2-04-yuv422.webm", "vp91-2-04-yuv440.webm",
+#if CONFIG_VP9_HIGHBITDEPTH
+  "vp92-2-20-10bit-yuv420.webm", "vp92-2-20-12bit-yuv420.webm",
+  "vp93-2-20-10bit-yuv422.webm", "vp93-2-20-12bit-yuv422.webm",
+  "vp93-2-20-10bit-yuv440.webm", "vp93-2-20-12bit-yuv440.webm",
+  "vp93-2-20-10bit-yuv444.webm", "vp93-2-20-12bit-yuv444.webm",
+#endif  // CONFIG_VP9_HIGHBITDEPTH`
+  "vp90-2-20-big_superframe-01.webm", "vp90-2-20-big_superframe-02.webm",
 };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
 #endif  // CONFIG_VP9_DECODER
--- a/test/tile_independence_test.cc
+++ b/test/tile_independence_test.cc
@@ -104,4 +104,5 @@ TEST_P(TileIndependenceTest, MD5Match) {

 VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));

+VP10_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1));
 }  // namespace
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -106,22 +106,24 @@ check_git_hashes() {
  fi
 }

+# $1 is the name of an environment variable containing a directory name to
+# test.
+test_env_var_dir() {
+  local dir=$(eval echo "\${$1}")
+  if [ ! -d "${dir}" ]; then
+    elog "'${dir}': No such directory"
+    elog "The $1 environment variable must be set to a valid directory."
+    return 1
+  fi
+}
+
 # This script requires that the LIBVPX_BIN_PATH, LIBVPX_CONFIG_PATH, and
 # LIBVPX_TEST_DATA_PATH variables are in the environment: Confirm that
 # the variables are set and that they all evaluate to directory paths.
 verify_vpx_test_environment() {
-  if [ ! -d "${LIBVPX_BIN_PATH}" ]; then
-    echo "The LIBVPX_BIN_PATH environment variable must be set."
-    return 1
-  fi
-  if [ ! -d "${LIBVPX_CONFIG_PATH}" ]; then
-    echo "The LIBVPX_CONFIG_PATH environment variable must be set."
-    return 1
-  fi
-  if [ ! -d "${LIBVPX_TEST_DATA_PATH}" ]; then
-    echo "The LIBVPX_TEST_DATA_PATH environment variable must be set."
-    return 1
-  fi
+  test_env_var_dir "LIBVPX_BIN_PATH" \
+    && test_env_var_dir "LIBVPX_CONFIG_PATH" \
+    && test_env_var_dir "LIBVPX_TEST_DATA_PATH"
 }

 # Greps vpx_config.h in LIBVPX_CONFIG_PATH for positional parameter one, which
@@ -261,6 +263,9 @@ run_tests() {
    return
  fi

+  # Don't bother with the environment tests if everything else was disabled.
+  [ -z "${tests_to_filter}" ] && return
+
  # Combine environment and actual tests.
  local tests_to_run="${env_tests} ${tests_to_filter}"

@@ -378,8 +383,7 @@ else
  VPX_TEST_TEMP_ROOT=/tmp
 fi

-VPX_TEST_RAND=$(awk 'BEGIN { srand(); printf "%d\n",(rand() * 32768)}')
-VPX_TEST_OUTPUT_DIR="${VPX_TEST_TEMP_ROOT}/vpx_test_${VPX_TEST_RAND}"
+VPX_TEST_OUTPUT_DIR="${VPX_TEST_TEMP_ROOT}/vpx_test_$$"

 if ! mkdir -p "${VPX_TEST_OUTPUT_DIR}" || \
   [ ! -d "${VPX_TEST_OUTPUT_DIR}" ]; then
@@ -397,11 +401,16 @@ VP8_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf"
 VP9_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-09-subpixel-00.ivf"

 VP9_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm"
+VP9_FPM_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-07-frame_parallel-1.webm"
+VP9_LT_50_FRAMES_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-02-size-32x08.webm"

 YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
 YUV_RAW_INPUT_WIDTH=352
 YUV_RAW_INPUT_HEIGHT=288

+Y4M_NOSQ_PAR_INPUT="${LIBVPX_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m"
+Y4M_720P_INPUT="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.y4m"
+
 # Setup a trap function to clean up after tests complete.
 trap cleanup EXIT

@@ -417,13 +426,13 @@ vlog "$(basename "${0%.*}") test configuration:
  VPX_TEST_LIST_TESTS=${VPX_TEST_LIST_TESTS}
  VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}
  VPX_TEST_PREFIX=${VPX_TEST_PREFIX}
-  VPX_TEST_RAND=${VPX_TEST_RAND}
  VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
  VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT}
  VPX_TEST_TEMP_ROOT=${VPX_TEST_TEMP_ROOT}
  VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
  YUV_RAW_INPUT=${YUV_RAW_INPUT}
  YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
-  YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}"
+  YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}
+  Y4M_NOSQ_PAR_INPUT=${Y4M_NOSQ_PAR_INPUT}"

 fi  # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.
--- a/test/util.h
+++ b/test/util.h
@@ -19,8 +19,7 @@
 // Macros
 #define GET_PARAM(k) std::tr1::get< k >(GetParam())

-static double compute_psnr(const vpx_image_t *img1,
-                           const vpx_image_t *img2) {
+inline double compute_psnr(const vpx_image_t *img1, const vpx_image_t *img2) {
  assert((img1->fmt == img2->fmt) &&
         (img1->d_w == img2->d_w) &&
         (img1->d_h == img2->d_h));
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
--- a/test/video_source.h
+++ b/test/video_source.h
@@ -11,6 +11,9 @@
 #define TEST_VIDEO_SOURCE_H_

 #if defined(_WIN32)
+#undef NOMINMAX
+#define NOMINMAX
+#define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 #endif
 #include <cstdio>
@@ -48,7 +51,7 @@ static std::string GetDataPath() {
 #undef TO_STRING
 #undef STRINGIFY

-static FILE *OpenTestDataFile(const std::string& file_name) {
+inline FILE *OpenTestDataFile(const std::string& file_name) {
  const std::string path_to_source = GetDataPath() + "/" + file_name;
  return fopen(path_to_source.c_str(), "rb");
 }
@@ -134,8 +137,13 @@ class VideoSource {

 class DummyVideoSource : public VideoSource {
 public:
-  DummyVideoSource() : img_(NULL), limit_(100), width_(0), height_(0) {
-    SetSize(80, 64);
+  DummyVideoSource()
+      : img_(NULL),
+        limit_(100),
+        width_(80),
+        height_(64),
+        format_(VPX_IMG_FMT_I420) {
+    ReallocImage();
  }

  virtual ~DummyVideoSource() { vpx_img_free(img_); }
@@ -174,23 +182,35 @@ class DummyVideoSource : public VideoSource {

  void SetSize(unsigned int width, unsigned int height) {
    if (width != width_ || height != height_) {
-      vpx_img_free(img_);
-      raw_sz_ = ((width + 31)&~31) * height * 3 / 2;
-      img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, width, height, 32);
      width_ = width;
      height_ = height;
+      ReallocImage();
+    }
+  }
+
+  void SetImageFormat(vpx_img_fmt_t format) {
+    if (format_ != format) {
+      format_ = format;
+      ReallocImage();
    }
  }

 protected:
  virtual void FillFrame() { if (img_) memset(img_->img_data, 0, raw_sz_); }

+  void ReallocImage() {
+    vpx_img_free(img_);
+    img_ = vpx_img_alloc(NULL, format_, width_, height_, 32);
+    raw_sz_ = ((img_->w + 31) & ~31) * img_->h * img_->bps / 8;
+  }
+
  vpx_image_t *img_;
  size_t       raw_sz_;
  unsigned int limit_;
  unsigned int frame_;
  unsigned int width_;
  unsigned int height_;
+  vpx_img_fmt_t format_;
 };


--- a/test/vp10_dct_test.cc
+++ b/test/vp10_dct_test.cc
@@ -0,0 +1,111 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <new>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/util.h"
+#include "./vpx_config.h"
+#include "vpx_ports/msvc.h"
+
+#undef CONFIG_COEFFICIENT_RANGE_CHECKING
+#define CONFIG_COEFFICIENT_RANGE_CHECKING 1
+#include "vp10/encoder/dct.c"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+void reference_dct_1d(const double *in, double *out, int size) {
+  const double PI = 3.141592653589793238462643383279502884;
+  const double kInvSqrt2 = 0.707106781186547524400844362104;
+  for (int k = 0; k < size; ++k) {
+    out[k] = 0;
+    for (int n = 0; n < size; ++n) {
+      out[k] += in[n] * cos(PI * (2 * n + 1) * k / (2 * size));
+    }
+    if (k == 0)
+      out[k] = out[k] * kInvSqrt2;
+  }
+}
+
+typedef void (*FdctFuncRef)(const double *in, double *out, int size);
+typedef void (*IdctFuncRef)(const double *in, double *out, int size);
+typedef void (*FdctFunc)(const tran_low_t *in, tran_low_t *out);
+typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
+
+class TransTestBase {
+ public:
+  virtual ~TransTestBase() {}
+
+ protected:
+  void RunFwdAccuracyCheck() {
+    tran_low_t *input  = new tran_low_t[txfm_size_];
+    tran_low_t *output = new tran_low_t[txfm_size_];
+    double *ref_input  = new double[txfm_size_];
+    double *ref_output = new double[txfm_size_];
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+    for (int ti =  0; ti < count_test_block; ++ti) {
+      for (int ni = 0; ni < txfm_size_; ++ni) {
+        input[ni] = rnd.Rand8() - rnd.Rand8();
+        ref_input[ni] = static_cast<double>(input[ni]);
+      }
+
+      fwd_txfm_(input, output);
+      fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
+
+      for (int ni = 0; ni < txfm_size_; ++ni) {
+        EXPECT_LE(
+            abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
+            max_error_);
+      }
+    }
+
+    delete[] input;
+    delete[] output;
+    delete[] ref_input;
+    delete[] ref_output;
+  }
+
+  double max_error_;
+  int txfm_size_;
+  FdctFunc fwd_txfm_;
+  FdctFuncRef fwd_txfm_ref_;
+};
+
+typedef std::tr1::tuple<FdctFunc, FdctFuncRef, int, int> FdctParam;
+class Vp10FwdTxfm
+    : public TransTestBase,
+      public ::testing::TestWithParam<FdctParam> {
+ public:
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    fwd_txfm_ref_ = GET_PARAM(1);
+    txfm_size_ = GET_PARAM(2);
+    max_error_ = GET_PARAM(3);
+  }
+  virtual void TearDown() {}
+};
+
+TEST_P(Vp10FwdTxfm, RunFwdAccuracyCheck) {
+  RunFwdAccuracyCheck();
+}
+
+INSTANTIATE_TEST_CASE_P(
+    C, Vp10FwdTxfm,
+    ::testing::Values(
+        FdctParam(&fdct4, &reference_dct_1d, 4, 1),
+        FdctParam(&fdct8, &reference_dct_1d, 8, 1),
+        FdctParam(&fdct16, &reference_dct_1d, 16, 2)));
+}  // namespace
--- a/test/vp10_inv_txfm_test.cc
+++ b/test/vp10_inv_txfm_test.cc
@@ -0,0 +1,321 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp10_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vp10/common/blockd.h"
+#include "vp10/common/scan.h"
+#include "vpx/vpx_integer.h"
+#include "vp10/common/vp10_inv_txfm.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+const double PI = 3.141592653589793238462643383279502884;
+const double kInvSqrt2 = 0.707106781186547524400844362104;
+
+void reference_idct_1d(const double *in, double *out, int size) {
+  for (int n = 0; n < size; ++n) {
+    out[n] = 0;
+    for (int k = 0; k < size; ++k) {
+      if (k == 0)
+        out[n] += kInvSqrt2 * in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
+      else
+        out[n] += in[k] * cos(PI * (2 * n + 1) * k / (2 * size));
+    }
+  }
+}
+
+typedef void (*IdctFuncRef)(const double *in, double *out, int size);
+typedef void (*IdctFunc)(const tran_low_t *in, tran_low_t *out);
+
+class TransTestBase {
+ public:
+  virtual ~TransTestBase() {}
+
+ protected:
+  void RunInvAccuracyCheck() {
+    tran_low_t *input  = new tran_low_t[txfm_size_];
+    tran_low_t *output = new tran_low_t[txfm_size_];
+    double *ref_input  = new double[txfm_size_];
+    double *ref_output = new double[txfm_size_];
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+    for (int ti =  0; ti < count_test_block; ++ti) {
+      for (int ni = 0; ni < txfm_size_; ++ni) {
+        input[ni] = rnd.Rand8() - rnd.Rand8();
+        ref_input[ni] = static_cast<double>(input[ni]);
+      }
+
+      fwd_txfm_(input, output);
+      fwd_txfm_ref_(ref_input, ref_output, txfm_size_);
+
+      for (int ni = 0; ni < txfm_size_; ++ni) {
+        EXPECT_LE(
+            abs(output[ni] - static_cast<tran_low_t>(round(ref_output[ni]))),
+            max_error_);
+      }
+    }
+
+    delete[] input;
+    delete[] output;
+    delete[] ref_input;
+    delete[] ref_output;
+  }
+
+  double max_error_;
+  int txfm_size_;
+  IdctFunc fwd_txfm_;
+  IdctFuncRef fwd_txfm_ref_;
+};
+
+typedef std::tr1::tuple<IdctFunc, IdctFuncRef, int, int> IdctParam;
+class Vp10InvTxfm
+    : public TransTestBase,
+      public ::testing::TestWithParam<IdctParam> {
+ public:
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    fwd_txfm_ref_ = GET_PARAM(1);
+    txfm_size_ = GET_PARAM(2);
+    max_error_ = GET_PARAM(3);
+  }
+  virtual void TearDown() {}
+};
+
+TEST_P(Vp10InvTxfm, RunInvAccuracyCheck) {
+  RunInvAccuracyCheck();
+}
+
+INSTANTIATE_TEST_CASE_P(
+    C, Vp10InvTxfm,
+    ::testing::Values(
+        IdctParam(&vp10_idct4_c, &reference_idct_1d, 4, 1),
+        IdctParam(&vp10_idct8_c, &reference_idct_1d, 8, 2),
+        IdctParam(&vp10_idct16_c, &reference_idct_1d, 16, 4),
+        IdctParam(&vp10_idct32_c, &reference_idct_1d, 32, 6))
+);
+
+typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
+typedef std::tr1::tuple<FwdTxfmFunc,
+                        InvTxfmFunc,
+                        InvTxfmFunc,
+                        TX_SIZE, int> PartialInvTxfmParam;
+const int kMaxNumCoeffs = 1024;
+class Vp10PartialIDctTest
+    : public ::testing::TestWithParam<PartialInvTxfmParam> {
+ public:
+  virtual ~Vp10PartialIDctTest() {}
+  virtual void SetUp() {
+    ftxfm_ = GET_PARAM(0);
+    full_itxfm_ = GET_PARAM(1);
+    partial_itxfm_ = GET_PARAM(2);
+    tx_size_  = GET_PARAM(3);
+    last_nonzero_ = GET_PARAM(4);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  int last_nonzero_;
+  TX_SIZE tx_size_;
+  FwdTxfmFunc ftxfm_;
+  InvTxfmFunc full_itxfm_;
+  InvTxfmFunc partial_itxfm_;
+};
+
+TEST_P(Vp10PartialIDctTest, RunQuantCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int size;
+  switch (tx_size_) {
+    case TX_4X4:
+      size = 4;
+      break;
+    case TX_8X8:
+      size = 8;
+      break;
+    case TX_16X16:
+      size = 16;
+      break;
+    case TX_32X32:
+      size = 32;
+      break;
+    default:
+      FAIL() << "Wrong Size!";
+      break;
+  }
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
+
+  const int count_test_block = 1000;
+  const int block_size = size * size;
+
+  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);
+
+  int max_error = 0;
+  for (int i = 0; i < count_test_block; ++i) {
+    // clear out destination buffer
+    memset(dst1, 0, sizeof(*dst1) * block_size);
+    memset(dst2, 0, sizeof(*dst2) * block_size);
+    memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
+    memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
+
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-255, 255].
+      if (i == 0) {
+        for (int j = 0; j < block_size; ++j)
+          input_extreme_block[j] = 255;
+      } else if (i == 1) {
+        for (int j = 0; j < block_size; ++j)
+          input_extreme_block[j] = -255;
+      } else {
+        for (int j = 0; j < block_size; ++j) {
+          input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
+        }
+      }
+
+      ftxfm_(input_extreme_block, output_ref_block, size);
+
+      // quantization with maximum allowed step sizes
+      test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336;
+      for (int j = 1; j < last_nonzero_; ++j)
+        test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]]
+                         = (output_ref_block[j] / 1828) * 1828;
+    }
+
+    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
+
+    for (int j = 0; j < block_size; ++j) {
+      const int diff = dst1[j] - dst2[j];
+      const int error = diff * diff;
+      if (max_error < error)
+        max_error = error;
+    }
+  }
+
+  EXPECT_EQ(0, max_error)
+      << "Error: partial inverse transform produces different results";
+}
+
+TEST_P(Vp10PartialIDctTest, ResultsMatch) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  int size;
+  switch (tx_size_) {
+    case TX_4X4:
+      size = 4;
+      break;
+    case TX_8X8:
+      size = 8;
+      break;
+    case TX_16X16:
+      size = 16;
+      break;
+    case TX_32X32:
+      size = 32;
+      break;
+    default:
+      FAIL() << "Wrong Size!";
+      break;
+  }
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
+  const int count_test_block = 1000;
+  const int max_coeff = 32766 / 4;
+  const int block_size = size * size;
+  int max_error = 0;
+  for (int i = 0; i < count_test_block; ++i) {
+    // clear out destination buffer
+    memset(dst1, 0, sizeof(*dst1) * block_size);
+    memset(dst2, 0, sizeof(*dst2) * block_size);
+    memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
+    memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
+    int max_energy_leftover = max_coeff * max_coeff;
+    for (int j = 0; j < last_nonzero_; ++j) {
+      int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
+                                          (rnd.Rand16() - 32768) / 65536);
+      max_energy_leftover -= coef * coef;
+      if (max_energy_leftover < 0) {
+        max_energy_leftover = 0;
+        coef = 0;
+      }
+      test_coef_block1[vp10_default_scan_orders[tx_size_].scan[j]] = coef;
+    }
+
+    memcpy(test_coef_block2, test_coef_block1,
+           sizeof(*test_coef_block2) * block_size);
+
+    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
+    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
+
+    for (int j = 0; j < block_size; ++j) {
+      const int diff = dst1[j] - dst2[j];
+      const int error = diff * diff;
+      if (max_error < error)
+        max_error = error;
+    }
+  }
+
+  EXPECT_EQ(0, max_error)
+      << "Error: partial inverse transform produces different results";
+}
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    C, Vp10PartialIDctTest,
+    ::testing::Values(
+        make_tuple(&vpx_fdct32x32_c,
+                   &vp10_idct32x32_1024_add_c,
+                   &vp10_idct32x32_34_add_c,
+                   TX_32X32, 34),
+        make_tuple(&vpx_fdct32x32_c,
+                   &vp10_idct32x32_1024_add_c,
+                   &vp10_idct32x32_1_add_c,
+                   TX_32X32, 1),
+        make_tuple(&vpx_fdct16x16_c,
+                   &vp10_idct16x16_256_add_c,
+                   &vp10_idct16x16_10_add_c,
+                   TX_16X16, 10),
+        make_tuple(&vpx_fdct16x16_c,
+                   &vp10_idct16x16_256_add_c,
+                   &vp10_idct16x16_1_add_c,
+                   TX_16X16, 1),
+        make_tuple(&vpx_fdct8x8_c,
+                   &vp10_idct8x8_64_add_c,
+                   &vp10_idct8x8_12_add_c,
+                   TX_8X8, 12),
+        make_tuple(&vpx_fdct8x8_c,
+                   &vp10_idct8x8_64_add_c,
+                   &vp10_idct8x8_1_add_c,
+                   TX_8X8, 1),
+        make_tuple(&vpx_fdct4x4_c,
+                   &vp10_idct4x4_16_add_c,
+                   &vp10_idct4x4_1_add_c,
+                   TX_4X4, 1)));
+}  // namespace
--- a/test/vp8_boolcoder_test.cc
+++ b/test/vp8_boolcoder_test.cc
@@ -16,12 +16,12 @@
 #include <string.h>
 #include <sys/types.h>

-#include "test/acm_random.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "vpx/vpx_integer.h"

-#include "vp8/encoder/boolhuff.h"
+#include "test/acm_random.h"
 #include "vp8/decoder/dboolhuff.h"
+#include "vp8/encoder/boolhuff.h"
+#include "vpx/vpx_integer.h"

 namespace {
 const int num_tests = 10;
--- a/test/vp8_denoiser_sse2_test.cc
+++ b/test/vp8_denoiser_sse2_test.cc
@@ -28,19 +28,18 @@ using libvpx_test::ACMRandom;
 namespace {

 const int kNumPixels = 16 * 16;
-class VP8DenoiserTest
-    : public ::testing::TestWithParam<int> {
+class VP8DenoiserTest : public ::testing::TestWithParam<int> {
 public:
  virtual ~VP8DenoiserTest() {}

  virtual void SetUp() {
-    increase_denoising = GetParam();
+    increase_denoising_ = GetParam();
  }

  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  int increase_denoising;
+  int increase_denoising_;
 };

 TEST_P(VP8DenoiserTest, BitexactCheck) {
@@ -53,18 +52,18 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
  // mc_avg_block is the denoised reference block,
  // avg_block_c is the denoised result from C code,
  // avg_block_sse2 is the denoised result from SSE2 code.
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block_c, kNumPixels);
+  DECLARE_ALIGNED(16, uint8_t, sig_block_c[kNumPixels]);
  // Since in VP8 denoiser, the source signal will be changed,
  // we need another copy of the source signal as the input of sse2 code.
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block_sse2, kNumPixels);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, mc_avg_block, kNumPixels);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_c, kNumPixels);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_sse2, kNumPixels);
+  DECLARE_ALIGNED(16, uint8_t, sig_block_sse2[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, mc_avg_block[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, avg_block_c[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, avg_block_sse2[kNumPixels]);

  for (int i = 0; i < count_test_block; ++i) {
    // Generate random motion magnitude, 20% of which exceed the threshold.
-    uint8_t motion_magnitude_random
-              = rnd.Rand8() % (uint8_t)(MOTION_MAGNITUDE_THRESHOLD * 1.2);
+    const int motion_magnitude_ran =
+        rnd.Rand8() % static_cast<int>(MOTION_MAGNITUDE_THRESHOLD * 1.2);

    // Initialize a test block with random number in range [0, 255].
    for (int j = 0; j < kNumPixels; ++j) {
@@ -72,20 +71,20 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
      sig_block_sse2[j] = sig_block_c[j] = rnd.Rand8();
      // The pixels in mc_avg_block are generated by adding a random
      // number in range [-19, 19] to corresponding pixels in sig_block.
-      temp = sig_block_c[j] + (rnd.Rand8() % 2 == 0? -1 : 1) *
-             (rnd.Rand8()%20);
+      temp = sig_block_c[j] + (rnd.Rand8() % 2 == 0 ? -1 : 1) *
+             (rnd.Rand8() % 20);
      // Clip.
-      mc_avg_block[j] = (temp < 0? 0 : (temp > 255? 255 : temp));
+      mc_avg_block[j] = (temp < 0) ? 0 : ((temp > 255) ? 255 : temp);
    }

    // Test denosiser on Y component.
-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_c(mc_avg_block, stride,
-                               avg_block_c, stride, sig_block_c, stride,
-                               motion_magnitude_random, increase_denoising));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_c(
+        mc_avg_block, stride, avg_block_c, stride, sig_block_c, stride,
+        motion_magnitude_ran, increase_denoising_));

-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_sse2(mc_avg_block, stride,
-                               avg_block_sse2, stride, sig_block_sse2, stride,
-                               motion_magnitude_random, increase_denoising));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_sse2(
+        mc_avg_block, stride, avg_block_sse2, stride, sig_block_sse2, stride,
+        motion_magnitude_ran, increase_denoising_));

    // Check bitexactness.
    for (int h = 0; h < 16; ++h) {
@@ -94,14 +93,14 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
      }
    }

-    // Test denosiser on UV component.
-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_c(mc_avg_block, stride,
-                               avg_block_c, stride, sig_block_c, stride,
-                               motion_magnitude_random, increase_denoising));
+    // Test denoiser on UV component.
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_c(
+        mc_avg_block, stride, avg_block_c, stride, sig_block_c, stride,
+        motion_magnitude_ran, increase_denoising_));

-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_sse2(mc_avg_block, stride,
-                               avg_block_sse2, stride, sig_block_sse2, stride,
-                               motion_magnitude_random, increase_denoising));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_sse2(
+        mc_avg_block, stride, avg_block_sse2, stride, sig_block_sse2, stride,
+        motion_magnitude_ran, increase_denoising_));

    // Check bitexactness.
    for (int h = 0; h < 16; ++h) {
@@ -113,7 +112,5 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
 }

 // Test for all block size.
-INSTANTIATE_TEST_CASE_P(
-    SSE2, VP8DenoiserTest,
-    ::testing::Values(0, 1));
+INSTANTIATE_TEST_CASE_P(SSE2, VP8DenoiserTest, ::testing::Values(0, 1));
 }  // namespace
--- a/Show More
+++ b/Show More