Merge "Fix sub8x8 motion search on scaled reference frame" into sandbox/jingning@google.com/decoder_test_suite

Merge "Fix high bit depth with scaled reference frame" into sandbox/jingning@google.com/decoder_test_suite
Fix sub8x8 motion search on scaled reference frame
2015-12-11 20:28:21 +00:00 · 2015-12-11 20:27:56 +00:00 · 2015-12-10 22:26:41 -08:00 · 2015-12-10 15:47:33 -08:00 · 2015-11-17 17:58:42 -08:00 · 2015-09-08 11:20:39 -07:00
603 changed files with 75345 additions and 49968 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -44,7 +44,6 @@
 /ivfenc.dox
 /libvpx.so*
 /libvpx.ver
-/obj_int_extract
 /samples.dox
 /test_libvpx
 /vp8_api1_migration.dox
--- a/.mailmap
+++ b/.mailmap
@@ -1,18 +1,26 @@
 Adrian Grange <agrange@google.com>
+Alex Converse <aconverse@google.com> <alex.converse@gmail.com>
 Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
+Alpha Lam <hclam@google.com> <hclam@chromium.org>
+Deb Mukherjee <debargha@google.com>
+Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
+Guillaume Martres <gmartres@google.com> <smarter3@gmail.com>
 Hangyu Kuang <hkuang@google.com>
 Jim Bankoski <jimbankoski@google.com>
-John Koleszar <jkoleszar@google.com>
 Johann Koenig <johannkoenig@google.com>
 Johann Koenig <johannkoenig@google.com> <johann.koenig@duck.com>
-Johann Koenig <johannkoenig@google.com> <johannkoenig@dhcp-172-19-7-52.mtv.corp.google.com>
+John Koleszar <jkoleszar@google.com>
+Joshua Litt <joshualitt@google.com> <joshualitt@chromium.org>
+Marco Paniconi <marpan@google.com>
+Marco Paniconi <marpan@google.com> <marpan@chromium.org>
 Pascal Massimino <pascal.massimino@gmail.com>
+Paul Wilkins <paulwilkins@google.com>
+Ralph Giles <giles@xiph.org> <giles@entropywave.com>
+Ralph Giles <giles@xiph.org> <giles@mozilla.com>
 Sami Pietilä <samipietila@google.com>
+Tamar Levy <tamar.levy@intel.com>
+Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
 Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
 Timothy B. Terriberry <tterribe@xiph.org> Tim Terriberry <tterriberry@mozilla.com>
 Tom Finegan <tomfinegan@google.com>
-Ralph Giles <giles@xiph.org> <giles@entropywave.com>
-Ralph Giles <giles@xiph.org> <giles@mozilla.com>
-Alpha Lam <hclam@google.com> <hclam@chromium.org>
-Deb Mukherjee <debargha@google.com>
 Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
--- a/29
+++ b/29
@@ -3,10 +3,11 @@

 Aaron Watry <awatry@gmail.com>
 Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
+Adam Xu <adam@xuyaowu.com>
 Adrian Grange <agrange@google.com>
 Ahmad Sharif <asharif@google.com>
 Alexander Voronov <avoronov@graphics.cs.msu.ru>
-Alex Converse <alex.converse@gmail.com>
+Alex Converse <aconverse@google.com>
 Alexis Ballier <aballier@gentoo.org>
 Alok Ahuja <waveletcoeff@gmail.com>
 Alpha Lam <hclam@google.com>
@@ -14,44 +15,58 @@ A.Mahfoodh <ab.mahfoodh@gmail.com>
 Ami Fischman <fischman@chromium.org>
 Andoni Morales Alastruey <ylatuya@gmail.com>
 Andres Mejia <mcitadel@gmail.com>
+Andrew Russell <anrussell@google.com>
 Aron Rosenberg <arosenberg@logitech.com>
 Attila Nagy <attilanagy@google.com>
 changjun.yang <changjun.yang@intel.com>
+Charles 'Buck' Krasic <ckrasic@google.com>
 chm <chm@rock-chips.com>
 Christian Duvivier <cduvivier@google.com>
 Daniel Kang <ddkang@google.com>
 Deb Mukherjee <debargha@google.com>
+Dim Temp <dimtemp0@gmail.com>
 Dmitry Kovalev <dkovalev@google.com>
 Dragan Mrdjan <dmrdjan@mips.com>
-Erik Niemeyer <erik.a.niemeyer@gmail.com>
+Ehsan Akhgari <ehsan.akhgari@gmail.com>
+Erik Niemeyer <erik.a.niemeyer@intel.com>
 Fabio Pedretti <fabio.ped@libero.it>
 Frank Galligan <fgalligan@google.com>
 Fredrik Söderquist <fs@opera.com>
 Fritz Koenig <frkoenig@google.com>
 Gaute Strokkenes <gaute.strokkenes@broadcom.com>
 Giuseppe Scrivano <gscrivano@gnu.org>
+Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
 Guillaume Martres <gmartres@google.com>
 Guillermo Ballester Valor <gbvalor@gmail.com>
 Hangyu Kuang <hkuang@google.com>
+Hanno Böck <hanno@hboeck.de>
 Henrik Lundin <hlundin@google.com>
 Hui Su <huisu@google.com>
 Ivan Maltz <ivanmaltz@google.com>
+Jacek Caban <cjacek@gmail.com>
+JackyChen <jackychen@google.com>
 James Berry <jamesberry@google.com>
+James Yu <james.yu@linaro.org>
 James Zern <jzern@google.com>
+Jan Gerber <j@mailb.org>
 Jan Kratochvil <jan.kratochvil@redhat.com>
 Janne Salonen <jsalonen@google.com>
 Jeff Faust <jfaust@google.com>
 Jeff Muizelaar <jmuizelaar@mozilla.com>
 Jeff Petkau <jpet@chromium.org>
+Jia Jia <jia.jia@linaro.org>
 Jim Bankoski <jimbankoski@google.com>
 Jingning Han <jingning@google.com>
+Joey Parrish <joeyparrish@google.com>
 Johann Koenig <johannkoenig@google.com>
 John Koleszar <jkoleszar@google.com>
+John Stark <jhnstrk@gmail.com>
 Joshua Bleecher Snyder <josh@treelinelabs.com>
 Joshua Litt <joshualitt@google.com>
 Justin Clift <justin@salasaga.org>
 Justin Lebar <justin.lebar@gmail.com>
 KO Myung-Hun <komh@chollian.net>
+Lawrence Velázquez <larryv@macports.org>
 Lou Quillio <louquillio@google.com>
 Luca Barbato <lu_zero@gentoo.org>
 Makoto Kato <makoto.kt@gmail.com>
@@ -65,6 +80,7 @@ Michael Kohler <michaelkohler@live.com>
 Mike Frysinger <vapier@chromium.org>
 Mike Hommey <mhommey@mozilla.com>
 Mikhal Shemer <mikhal@google.com>
+Minghai Shang <minghai@google.com>
 Morton Jonuschat <yabawock@gmail.com>
 Parag Salasakar <img.mips1@gmail.com>
 Pascal Massimino <pascal.massimino@gmail.com>
@@ -72,6 +88,8 @@ Patrik Westin <patrik.westin@gmail.com>
 Paul Wilkins <paulwilkins@google.com>
 Pavol Rusnak <stick@gk2.sk>
 Paweł Hajdan <phajdan@google.com>
+Pengchong Jin <pengchong@google.com>
+Peter de Rivaz <peter.derivaz@gmail.com>
 Philip Jägenstedt <philipj@opera.com>
 Priit Laes <plaes@plaes.org>
 Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
@@ -79,22 +97,29 @@ Rafaël Carré <funman@videolan.org>
 Ralph Giles <giles@xiph.org>
 Rob Bradford <rob@linux.intel.com>
 Ronald S. Bultje <rbultje@google.com>
+Rui Ueyama <ruiu@google.com>
 Sami Pietilä <samipietila@google.com>
 Scott Graham <scottmg@chromium.org>
 Scott LaVarnway <slavarnway@google.com>
+Sean McGovern <gseanmcg@gmail.com>
+Sergey Ulanov <sergeyu@chromium.org>
 Shimon Doodkin <helpmepro1@gmail.com>
 Stefan Holmer <holmer@google.com>
 Suman Sunkara <sunkaras@google.com>
 Taekhyun Kim <takim@nvidia.com>
 Takanori MATSUURA <t.matsuu@gmail.com>
 Tamar Levy <tamar.levy@intel.com>
+Tao Bai <michaelbai@chromium.org>
 Tero Rintaluoma <teror@google.com>
 Thijs Vermeir <thijsvermeir@gmail.com>
+Tim Kopp <tkopp@google.com>
 Timothy B. Terriberry <tterribe@xiph.org>
 Tom Finegan <tomfinegan@google.com>
 Vignesh Venkatasubramanian <vigneshv@google.com>
 Yaowu Xu <yaowu@google.com>
+Yongzhe Wang <yongzhe@google.com>
 Yunqing Wang <yunqingwang@google.com>
+Zoe Liu <zoeliu@google.com>
 Google Inc.
 The Mozilla Foundation
 The Xiph.Org Foundation
--- a/28
+++ b/28
@@ -1,3 +1,31 @@
+xxxx-yy-zz v1.4.0 "Changes for next release"
+  vpxenc is changed to use VP9 by default.
+  Encoder controls added for 1 pass SVC.
+  Decoder control to toggle on/off loopfilter.
+
+2015-04-03 v1.4.0 "Indian Runner Duck"
+  This release includes significant improvements to the VP9 codec.
+
+  - Upgrading:
+    This release is ABI incompatible with 1.3.0. It drops the compatibility
+    layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec
+    controls for VP9.
+
+  - Enhancements:
+    Faster VP9 encoding and decoding
+    Multithreaded VP9 decoding (tile and frame-based)
+    Multithreaded VP9 encoding - on by default
+    YUV 4:2:2 and 4:4:4 support in VP9
+    10 and 12bit support in VP9
+    64bit ARM support by replacing ARM assembly with intrinsics
+
+  - Bug Fixes:
+    Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0
+    files.
+
+  - Known Issues:
+    Frame Parallel decoding fails for segmented and non-420 files.
+
 2013-11-15 v1.3.0 "Forest"
  This release introduces the VP9 codec in a backward-compatible way.
  All existing users of VP8 can continue to use the library without
--- a/2
+++ b/2
@@ -17,7 +17,7 @@ or agree to the institution of patent litigation or any other patent
 enforcement activity against any entity (including a cross-claim or
 counterclaim in a lawsuit) alleging that any of these implementations of WebM
 or any code incorporated within any of these implementations of WebM
-constitutes direct or contributory patent infringement, or inducement of
+constitute direct or contributory patent infringement, or inducement of
 patent infringement, then any patent rights granted to you under this License
 for these implementations of WebM shall terminate as of the date such
 litigation is filed.
--- a/20
+++ b/20
@@ -1,4 +1,4 @@
-README - 30 May 2014
+README - 23 March 2015

 Welcome to the WebM VP8/VP9 Codec SDK!

@@ -47,10 +47,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  --help output of the configure script. As of this writing, the list of
  available targets is:

-    armv5te-android-gcc
-    armv5te-linux-rvct
-    armv5te-linux-gcc
-    armv5te-none-rvct
    armv6-darwin-gcc
    armv6-linux-rvct
    armv6-linux-gcc
@@ -66,12 +62,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    armv7s-darwin-gcc
    mips32-linux-gcc
    mips64-linux-gcc
-    ppc32-darwin8-gcc
-    ppc32-darwin9-gcc
-    ppc32-linux-gcc
-    ppc64-darwin8-gcc
-    ppc64-darwin9-gcc
-    ppc64-linux-gcc
    sparc-solaris-gcc
    x86-android-gcc
    x86-darwin8-gcc
@@ -82,6 +72,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-darwin11-gcc
    x86-darwin12-gcc
    x86-darwin13-gcc
+    x86-darwin14-gcc
    x86-iphonesimulator-gcc
    x86-linux-gcc
    x86-linux-icc
@@ -99,6 +90,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-darwin11-gcc
    x86_64-darwin12-gcc
    x86_64-darwin13-gcc
+    x86_64-darwin14-gcc
    x86_64-iphonesimulator-gcc
    x86_64-linux-gcc
    x86_64-linux-icc
@@ -109,12 +101,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-win64-vs10
    x86_64-win64-vs11
    x86_64-win64-vs12
-    universal-darwin8-gcc
-    universal-darwin9-gcc
-    universal-darwin10-gcc
-    universal-darwin11-gcc
-    universal-darwin12-gcc
-    universal-darwin13-gcc
    generic-gnu

  The generic-gnu target, in conjunction with the CROSS environment variable,
--- a/args.c
+++ b/args.c
@@ -14,9 +14,7 @@
 #include <limits.h>
 #include "args.h"

-#ifdef _MSC_VER
-#define snprintf _snprintf
-#endif
+#include "vpx_ports/msvc.h"

 #if defined(__GNUC__) && __GNUC__
 extern void die(const char *fmt, ...) __attribute__((noreturn));
--- a/build/arm-msvs/obj_int_extract.bat
+++ b/build/arm-msvs/obj_int_extract.bat
@@ -1,18 +0,0 @@
-REM   Copyright (c) 2013 The WebM project authors. All Rights Reserved.
-REM
-REM   Use of this source code is governed by a BSD-style license
-REM   that can be found in the LICENSE file in the root of the source
-REM   tree. An additional intellectual property rights grant can be found
-REM   in the file PATENTS.  All contributing project authors may
-REM   be found in the AUTHORS file in the root of the source tree.
-echo on
-
-REM Arguments:
-REM   %1 - Relative path to the directory containing the vp8 and vpx_scale
-REM        source directories.
-REM   %2 - Path to obj_int_extract.exe.
-cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
-%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
-
-cl /I. /I%1 /nologo /c /DWINAPI_FAMILY=WINAPI_FAMILY_PHONE_APP "%~1/vpx_scale/vpx_scale_asm_offsets.c"
-%2\obj_int_extract.exe rvds "vpx_scale_asm_offsets.obj" > "vpx_scale_asm_offsets.asm"
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -43,7 +43,7 @@
 # will remove any NEON dependency.

 # To change to building armeabi, run ./libvpx/configure again, but with
-# --target=arm5te-android-gcc and modify the Application.mk file to
+# --target=armv6-android-gcc and modify the Application.mk file to
 # set APP_ABI := armeabi
 #
 # Running ndk-build will build libvpx and include it in your project.
@@ -60,7 +60,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
  include $(CONFIG_DIR)libs-armv7-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq  ($(TARGET_ARCH_ABI),armeabi)
-  include $(CONFIG_DIR)libs-armv5te-android-gcc.mk
+  include $(CONFIG_DIR)libs-armv6-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq  ($(TARGET_ARCH_ABI),arm64-v8a)
  include $(CONFIG_DIR)libs-armv8-android-gcc.mk
@@ -91,51 +91,8 @@ LOCAL_CFLAGS := -O3
 # like x86inc.asm and x86_abi_support.asm
 LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)

-# -----------------------------------------------------------------------------
-# Template  : asm_offsets_template
-# Arguments : 1: assembly offsets file to be created
-#             2: c file to base assembly offsets on
-# Returns   : None
-# Usage     : $(eval $(call asm_offsets_template,<asmfile>, <srcfile>
-# Rationale : Create offsets at compile time using for structures that are
-#             defined in c, but used in assembly functions.
-# -----------------------------------------------------------------------------
-define asm_offsets_template
-
-_SRC:=$(2)
-_OBJ:=$(ASM_CNV_PATH)/$$(notdir $(2)).S
-
-_FLAGS = $$($$(my)CFLAGS) \
-          $$(call get-src-file-target-cflags,$(2)) \
-          $$(call host-c-includes,$$(LOCAL_C_INCLUDES) $$(CONFIG_DIR)) \
-          $$(LOCAL_CFLAGS) \
-          $$(NDK_APP_CFLAGS) \
-          $$(call host-c-includes,$$($(my)C_INCLUDES)) \
-          -DINLINE_ASM \
-          -S \
-
-_TEXT = "Compile $$(call get-src-file-text,$(2))"
-_CC   = $$(TARGET_CC)
-
-$$(eval $$(call ev-build-file))
-
-$(1) : $$(_OBJ) $(2)
-	@mkdir -p $$(dir $$@)
-	@grep $(OFFSET_PATTERN) $$< | tr -d '\#' | $(CONFIG_DIR)$(ASM_CONVERSION) > $$@
-endef
-
-# Use ads2gas script to convert from RVCT format to GAS format.  This
-#  puts the processed file under $(ASM_CNV_PATH).  Local clean rule
-#  to handle removing these
-ifeq ($(CONFIG_VP8_ENCODER), yes)
-  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm
-endif
-ifeq ($(HAVE_NEON_ASM), yes)
-  ASM_CNV_OFFSETS_DEPEND += $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm
-endif
-
 .PRECIOUS: %.asm.s
-$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm $(ASM_CNV_OFFSETS_DEPEND)
+$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm
 	@mkdir -p $(dir $@)
 	@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@

@@ -201,13 +158,12 @@ LOCAL_CFLAGS += \

 LOCAL_MODULE := libvpx

-LOCAL_LDLIBS := -llog
-
 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
  LOCAL_STATIC_LIBRARIES := cpufeatures
 endif

 # Add a dependency to force generation of the RTCD files.
+define rtcd_dep_template
 ifeq ($(CONFIG_VP8), yes)
 $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp8_rtcd.h
 endif
@@ -215,31 +171,26 @@ ifeq ($(CONFIG_VP9), yes)
 $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vp9_rtcd.h
 endif
 $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_scale_rtcd.h
+$(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_dsp_rtcd.h

 ifeq ($(TARGET_ARCH_ABI),x86)
 $(foreach file, $(LOCAL_SRC_FILES), $(LOCAL_PATH)/$(file)): vpx_config.asm
 endif
+endef
+
+$(eval $(call rtcd_dep_template))

 .PHONY: clean
 clean:
 	@echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]"
 	@$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
-	@$(RM) $(patsubst %.asm, %.*, $(ASM_CNV_OFFSETS_DEPEND))
 	@$(RM) -r $(ASM_CNV_PATH)
 	@$(RM) $(CLEAN-OBJS)

-include $(BUILD_SHARED_LIBRARY)
-
-ifeq ($(HAVE_NEON), yes)
-  $(eval $(call asm_offsets_template,\
-    $(ASM_CNV_PATH)/vpx_scale_asm_offsets.asm, \
-    $(LIBVPX_PATH)/vpx_scale/vpx_scale_asm_offsets.c))
-endif
-
-ifeq ($(CONFIG_VP8_ENCODER), yes)
-  $(eval $(call asm_offsets_template,\
-    $(ASM_CNV_PATH)/vp8_asm_enc_offsets.asm, \
-    $(LIBVPX_PATH)/vp8/encoder/vp8_asm_enc_offsets.c))
+ifeq ($(ENABLE_SHARED),1)
+  include $(BUILD_SHARED_LIBRARY)
+else
+  include $(BUILD_STATIC_LIBRARY)
 endif

 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -22,8 +22,10 @@ clean:: .DEFAULT
 exampletest: .DEFAULT
 install:: .DEFAULT
 test:: .DEFAULT
+test-no-data-check:: .DEFAULT
 testdata:: .DEFAULT
 utiltest: .DEFAULT
+exampletest-no-data-check utiltest-no-data-check: .DEFAULT


 # Note: md5sum is not installed on OS X, but openssl is. Openssl may not be
@@ -56,13 +58,10 @@ dist:
        fi
 endif

+# Since we invoke make recursively for multiple targets we need to include the
+# .mk file for the correct target, but only when $(target) is non-empty.
 ifneq ($(target),)
-# Normally, we want to build the filename from the target and the toolchain.
-# This disambiguates from the $(target).mk file that exists in the source tree.
-# However, the toolchain is part of the target in universal builds, so we
-# don't want to include TOOLCHAIN in that case. FAT_ARCHS is used to test
-# if we're in the universal case.
-include $(target)$(if $(FAT_ARCHS),,-$(TOOLCHAIN)).mk
+include $(target)-$(TOOLCHAIN).mk
 endif
 BUILD_ROOT?=.
 VPATH=$(SRC_PATH_BARE)
@@ -116,6 +115,9 @@ test::
 testdata::
 .PHONY: utiltest
 utiltest:
+.PHONY: test-no-data-check exampletest-no-data-check utiltest-no-data-check
+test-no-data-check::
+exampletest-no-data-check utiltest-no-data-check:

 # Add compiler flags for intrinsic files
 ifeq ($(TOOLCHAIN), x86-os2-gcc)
@@ -146,6 +148,7 @@ $(BUILD_PFX)%.c.d: %.c

 $(BUILD_PFX)%.c.o: %.c
 	$(if $(quiet),@echo "    [CC] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CC) $(INTERNAL_CFLAGS) $(CFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.cc.d: %.cc
@@ -155,6 +158,7 @@ $(BUILD_PFX)%.cc.d: %.cc

 $(BUILD_PFX)%.cc.o: %.cc
 	$(if $(quiet),@echo "    [CXX] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.cpp.d: %.cpp
@@ -164,6 +168,7 @@ $(BUILD_PFX)%.cpp.d: %.cpp

 $(BUILD_PFX)%.cpp.o: %.cpp
 	$(if $(quiet),@echo "    [CXX] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $<

 $(BUILD_PFX)%.asm.d: %.asm
@@ -174,6 +179,7 @@ $(BUILD_PFX)%.asm.d: %.asm

 $(BUILD_PFX)%.asm.o: %.asm
 	$(if $(quiet),@echo "    [AS] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<

 $(BUILD_PFX)%.s.d: %.s
@@ -184,12 +190,14 @@ $(BUILD_PFX)%.s.d: %.s

 $(BUILD_PFX)%.s.o: %.s
 	$(if $(quiet),@echo "    [AS] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<

 .PRECIOUS: %.c.S
 %.c.S: CFLAGS += -DINLINE_ASM
 $(BUILD_PFX)%.c.S: %.c
 	$(if $(quiet),@echo "    [GEN] $@")
+	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CC) -S $(CFLAGS) -o $@ $<

 .PRECIOUS: %.asm.s
@@ -216,14 +224,6 @@ else
 	$(qexec)cp $< $@
 endif

-#
-# Rule to extract assembly constants from C sources
-#
-obj_int_extract: build/make/obj_int_extract.c
-	$(if $(quiet),@echo "    [HOSTCC] $@")
-	$(qexec)$(HOSTCC) -I. -I$(SRC_PATH_BARE) -o $@ $<
-CLEAN-OBJS += obj_int_extract
-
 #
 # Utility functions
 #
@@ -315,18 +315,15 @@ $(1):
        $$(filter %.o,$$^) $$(extralibs)
 endef

-
-
-define lipo_lib_template
-$(1): $(addsuffix /$(1),$(FAT_ARCHS))
-	$(if $(quiet),@echo "    [LIPO] $$@")
-	$(qexec)libtool -static -o $$@ $$?
-endef
-
-define lipo_bin_template
-$(1): $(addsuffix /$(1),$(FAT_ARCHS))
-	$(if $(quiet),@echo "    [LIPO] $$@")
-	$(qexec)lipo -output $$@ -create $$?
+define dll_template
+# Not using a pattern rule here because we don't want to generate empty
+# archives when they are listed as a dependency in files not responsible
+# for creating them.
+$(1):
+	$(if $(quiet),@echo "    [LD] $$@")
+	$(qexec)$$(LD) -Zdll $$(LDFLAGS) \
+        -o $$@ \
+        $$(filter %.o,$$^) $$(extralibs) $$(EXPORTS_FILE)
 endef


@@ -340,9 +337,11 @@ endif
 skip_deps := $(filter %clean,$(MAKECMDGOALS))
 skip_deps += $(findstring testdata,$(MAKECMDGOALS))
 ifeq ($(strip $(skip_deps)),)
-  # Older versions of make don't like -include directives with no arguments
-  ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
-    -include $(filter %.d,$(OBJS-yes:.o=.d))
+  ifeq ($(CONFIG_DEPENDENCY_TRACKING),yes)
+    # Older versions of make don't like -include directives with no arguments
+    ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),)
+      -include $(filter %.d,$(OBJS-yes:.o=.d))
+    endif
  endif
 endif

@@ -383,8 +382,9 @@ LIBS=$(call enabled,LIBS)
 .libs: $(LIBS)
 	@touch $@
 $(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib))))
-$(foreach lib,$(filter %so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
-$(foreach lib,$(filter %$(VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
+$(foreach lib,$(filter %so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib))))
+$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib))))
+$(foreach lib,$(filter %$(SO_VERSION_MAJOR).dll,$(LIBS)),$(eval $(call dll_template,$(lib))))

 INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS)
 ifeq ($(MAKECMDGOALS),dist)
@@ -424,11 +424,7 @@ ifneq ($(call enabled,DIST-SRCS),)
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_sln.sh
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/gen_msvs_vcxproj.sh
    DIST-SRCS-$(CONFIG_MSVS)  += build/make/msvs_common.sh
-    DIST-SRCS-$(CONFIG_MSVS)  += build/x86-msvs/obj_int_extract.bat
-    DIST-SRCS-$(CONFIG_MSVS)  += build/arm-msvs/obj_int_extract.bat
    DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh
-    # Include obj_int_extract if we use offsets from *_asm_*_offsets
-    DIST-SRCS-$(ARCH_ARM)$(ARCH_X86)$(ARCH_X86_64)    += build/make/obj_int_extract.c
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2gas.pl
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2gas_apple.pl
    DIST-SRCS-$(ARCH_ARM)    += build/make/ads2armasm_ms.pl
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
--- a/build/make/gen_msvs_proj.sh
+++ b/build/make/gen_msvs_proj.sh
@@ -295,22 +295,7 @@ generate_vcproj() {
        case "$target" in
            x86*)
                case "$name" in
-                    obj_int_extract)
-                        tag Tool \
-                            Name="VCCLCompilerTool" \
-                            Optimization="0" \
-                            AdditionalIncludeDirectories="$incs" \
-                            PreprocessorDefinitions="WIN32;DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
-                            RuntimeLibrary="$debug_runtime" \
-                            WarningLevel="3" \
-                            DebugInformationFormat="1" \
-                            $warn_64bit \
-                    ;;
                    vpx)
-                        tag Tool \
-                            Name="VCPreBuildEventTool" \
-                            CommandLine="call obj_int_extract.bat &quot;$src_path_bare&quot; $plat_no_ws\\\$(ConfigurationName)" \
-
                        tag Tool \
                            Name="VCCLCompilerTool" \
                            Optimization="0" \
@@ -347,11 +332,6 @@ generate_vcproj() {
                case "$target" in
                    x86*)
                        case "$name" in
-                            obj_int_extract)
-                                tag Tool \
-                                    Name="VCLinkerTool" \
-                                    GenerateDebugInformation="true" \
-                            ;;
                            *)
                                tag Tool \
                                    Name="VCLinkerTool" \
@@ -400,24 +380,7 @@ generate_vcproj() {
        case "$target" in
            x86*)
                case "$name" in
-                    obj_int_extract)
-                        tag Tool \
-                            Name="VCCLCompilerTool" \
-                            Optimization="2" \
-                            FavorSizeorSpeed="1" \
-                            AdditionalIncludeDirectories="$incs" \
-                            PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE" \
-                            RuntimeLibrary="$release_runtime" \
-                            UsePrecompiledHeader="0" \
-                            WarningLevel="3" \
-                            DebugInformationFormat="0" \
-                            $warn_64bit \
-                    ;;
                    vpx)
-                        tag Tool \
-                            Name="VCPreBuildEventTool" \
-                            CommandLine="call obj_int_extract.bat &quot;$src_path_bare&quot; $plat_no_ws\\\$(ConfigurationName)" \
-
                        tag Tool \
                            Name="VCCLCompilerTool" \
                            Optimization="2" \
@@ -456,11 +419,6 @@ generate_vcproj() {
                case "$target" in
                    x86*)
                        case "$name" in
-                            obj_int_extract)
-                                tag Tool \
-                                    Name="VCLinkerTool" \
-                                    GenerateDebugInformation="true" \
-                            ;;
                            *)
                                tag Tool \
                                    Name="VCLinkerTool" \
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -262,15 +262,9 @@ case "$target" in
        asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} &quot;%(FullPath)&quot;"
    ;;
    arm*)
-        asm_Debug_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
-        asm_Release_cmdline="armasm -nologo &quot;%(FullPath)&quot;"
-        if [ "$name" = "obj_int_extract" ]; then
-            # We don't want to build this tool for the target architecture,
-            # but for an architecture we can run locally during the build.
-            platforms[0]="Win32"
-        else
-            platforms[0]="ARM"
-        fi
+        platforms[0]="ARM"
+        asm_Debug_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
+        asm_Release_cmdline="armasm -nologo -oldit &quot;%(FullPath)&quot;"
    ;;
    *) die "Unsupported target $target!"
    ;;
@@ -400,23 +394,13 @@ generate_vcxproj() {
                if [ "$hostplat" == "ARM" ]; then
                    hostplat=Win32
                fi
-                open_tag PreBuildEvent
-                tag_content Command "call obj_int_extract.bat &quot;$src_path_bare&quot; $hostplat\\\$(Configuration)"
-                close_tag PreBuildEvent
            fi
            open_tag ClCompile
            if [ "$config" = "Debug" ]; then
                opt=Disabled
                runtime=$debug_runtime
                curlibs=$debug_libs
-                case "$name" in
-                obj_int_extract)
-                    debug=DEBUG
-                    ;;
-                *)
-                    debug=_DEBUG
-                    ;;
-                esac
+                debug=_DEBUG
            else
                opt=MaxSpeed
                runtime=$release_runtime
@@ -424,14 +408,7 @@ generate_vcxproj() {
                tag_content FavorSizeOrSpeed Speed
                debug=NDEBUG
            fi
-            case "$name" in
-            obj_int_extract)
-                extradefines=";_CONSOLE"
-                ;;
-            *)
-                extradefines=";$defines"
-                ;;
-            esac
+            extradefines=";$defines"
            tag_content Optimization $opt
            tag_content AdditionalIncludeDirectories "$incs;%(AdditionalIncludeDirectories)"
            tag_content PreprocessorDefinitions "WIN32;$debug;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE$extradefines;%(PreprocessorDefinitions)"
@@ -451,10 +428,6 @@ generate_vcxproj() {
            case "$proj_kind" in
            exe)
                open_tag Link
-                if [ "$name" != "obj_int_extract" ]; then
-                    tag_content AdditionalDependencies "$curlibs;%(AdditionalDependencies)"
-                    tag_content AdditionalLibraryDirectories "$libdirs;%(AdditionalLibraryDirectories)"
-                fi
                tag_content GenerateDebugInformation true
                # Console is the default normally, but if
                # AppContainerApplication is set, we need to override it.
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -18,15 +18,19 @@ set -e
 devnull='> /dev/null 2>&1'

 BUILD_ROOT="_iosbuild"
+CONFIGURE_ARGS="--disable-docs
+                --disable-examples
+                --disable-libyuv
+                --disable-unit-tests"
 DIST_DIR="_dist"
 FRAMEWORK_DIR="VPX.framework"
 HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx"
 MAKE_JOBS=1
-LIBVPX_SOURCE_DIR=$(dirname "$0" | sed -e s,/build/make,,)
+SCRIPT_DIR=$(dirname "$0")
+LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd)
 LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo)
 ORIG_PWD="$(pwd)"
 TARGETS="arm64-darwin-gcc
-         armv6-darwin-gcc
         armv7-darwin-gcc
         armv7s-darwin-gcc
         x86-iphonesimulator-gcc
@@ -42,8 +46,8 @@ build_target() {

  mkdir "${target}"
  cd "${target}"
-  eval "../../${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
-      --disable-docs ${EXTRA_CONFIGURE_ARGS} ${devnull}
+  eval "${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \
+    ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${devnull}
  export DIST_DIR
  eval make -j ${MAKE_JOBS} dist ${devnull}
  cd "${old_pwd}"
@@ -58,9 +62,6 @@ target_to_preproc_symbol() {
    arm64-*)
      echo "__aarch64__"
      ;;
-    armv6-*)
-      echo "__ARM_ARCH_6__"
-      ;;
    armv7-*)
      echo "__ARM_ARCH_7A__"
      ;;
@@ -176,8 +177,13 @@ build_framework() {
 # Trap function. Cleans up the subtree used to build all targets contained in
 # $TARGETS.
 cleanup() {
+  local readonly res=$?
  cd "${ORIG_PWD}"

+  if [ $res -ne 0 ]; then
+    elog "build exited with error ($res)"
+  fi
+
  if [ "${PRESERVE_BUILD_OUTPUT}" != "yes" ]; then
    rm -rf "${BUILD_ROOT}"
  fi
@@ -187,14 +193,21 @@ iosbuild_usage() {
 cat << EOF
  Usage: ${0##*/} [arguments]
    --help: Display this message and exit.
+    --extra-configure-args <args>: Extra args to pass when configuring libvpx.
    --jobs: Number of make jobs.
    --preserve-build-output: Do not delete the build directory.
    --show-build-output: Show output from each library build.
+    --targets <targets>: Override default target list. Defaults:
+         ${TARGETS}
    --verbose: Output information about the environment and each stage of the
               build.
 EOF
 }

+elog() {
+  echo "${0##*/} failed because: $@" 1>&2
+}
+
 vlog() {
  if [ "${VERBOSE}" = "yes" ]; then
    echo "$@"
@@ -224,6 +237,10 @@ while [ -n "$1" ]; do
    --show-build-output)
      devnull=
      ;;
+    --targets)
+      TARGETS="$2"
+      shift
+      ;;
    --verbose)
      VERBOSE=yes
      ;;
@@ -239,6 +256,7 @@ if [ "${VERBOSE}" = "yes" ]; then
 cat << EOF
  BUILD_ROOT=${BUILD_ROOT}
  DIST_DIR=${DIST_DIR}
+  CONFIGURE_ARGS=${CONFIGURE_ARGS}
  EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS}
  FRAMEWORK_DIR=${FRAMEWORK_DIR}
  HEADER_DIR=${HEADER_DIR}
@@ -252,3 +270,5 @@ EOF
 fi

 build_framework "${TARGETS}"
+echo "Successfully built '${FRAMEWORK_DIR}' for:"
+echo "         ${TARGETS}"
--- a/build/make/obj_int_extract.c
+++ b/build/make/obj_int_extract.c
@@ -1,857 +0,0 @@
-/*
- *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-#include <stdarg.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "vpx_config.h"
-#include "vpx/vpx_integer.h"
-
-typedef enum {
-  OUTPUT_FMT_PLAIN,
-  OUTPUT_FMT_RVDS,
-  OUTPUT_FMT_GAS,
-  OUTPUT_FMT_C_HEADER,
-} output_fmt_t;
-
-int log_msg(const char *fmt, ...) {
-  int res;
-  va_list ap;
-  va_start(ap, fmt);
-  res = vfprintf(stderr, fmt, ap);
-  va_end(ap);
-  return res;
-}
-
-#if defined(__GNUC__) && __GNUC__
-
-#if defined(FORCE_PARSE_ELF)
-
-#if defined(__MACH__)
-#undef __MACH__
-#endif
-
-#if !defined(__ELF__)
-#define __ELF__
-#endif
-#endif
-
-#if defined(__MACH__)
-
-#include <mach-o/loader.h>
-#include <mach-o/nlist.h>
-
-int print_macho_equ(output_fmt_t mode, uint8_t* name, int val) {
-  switch (mode) {
-    case OUTPUT_FMT_RVDS:
-      printf("%-40s EQU %5d\n", name, val);
-      return 0;
-    case OUTPUT_FMT_GAS:
-      printf(".set %-40s, %5d\n", name, val);
-      return 0;
-    case OUTPUT_FMT_C_HEADER:
-      printf("#define %-40s %5d\n", name, val);
-      return 0;
-    default:
-      log_msg("Unsupported mode: %d", mode);
-      return 1;
-  }
-}
-
-int parse_macho(uint8_t *base_buf, size_t sz, output_fmt_t mode) {
-  int i, j;
-  struct mach_header header;
-  uint8_t *buf = base_buf;
-  int base_data_section = 0;
-  int bits = 0;
-
-  /* We can read in mach_header for 32 and 64 bit architectures
-   * because it's identical to mach_header_64 except for the last
-   * element (uint32_t reserved), which we don't use. Then, when
-   * we know which architecture we're looking at, increment buf
-   * appropriately.
-   */
-  memcpy(&header, buf, sizeof(struct mach_header));
-
-  if (header.magic == MH_MAGIC) {
-    if (header.cputype == CPU_TYPE_ARM
-        || header.cputype == CPU_TYPE_X86) {
-      bits = 32;
-      buf += sizeof(struct mach_header);
-    } else {
-      log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_[ARM|X86].\n");
-      goto bail;
-    }
-  } else if (header.magic == MH_MAGIC_64) {
-    if (header.cputype == CPU_TYPE_X86_64) {
-      bits = 64;
-      buf += sizeof(struct mach_header_64);
-    } else {
-      log_msg("Bad cputype for object file. Currently only tested for CPU_TYPE_X86_64.\n");
-      goto bail;
-    }
-  } else {
-    log_msg("Bad magic number for object file. 0x%x or 0x%x expected, 0x%x found.\n",
-            MH_MAGIC, MH_MAGIC_64, header.magic);
-    goto bail;
-  }
-
-  if (header.filetype != MH_OBJECT) {
-    log_msg("Bad filetype for object file. Currently only tested for MH_OBJECT.\n");
-    goto bail;
-  }
-
-  for (i = 0; i < header.ncmds; i++) {
-    struct load_command lc;
-
-    memcpy(&lc, buf, sizeof(struct load_command));
-
-    if (lc.cmd == LC_SEGMENT) {
-      uint8_t *seg_buf = buf;
-      struct section s;
-      struct segment_command seg_c;
-
-      memcpy(&seg_c, seg_buf, sizeof(struct segment_command));
-      seg_buf += sizeof(struct segment_command);
-
-      /* Although each section is given it's own offset, nlist.n_value
-       * references the offset of the first section. This isn't
-       * apparent without debug information because the offset of the
-       * data section is the same as the first section. However, with
-       * debug sections mixed in, the offset of the debug section
-       * increases but n_value still references the first section.
-       */
-      if (seg_c.nsects < 1) {
-        log_msg("Not enough sections\n");
-        goto bail;
-      }
-
-      memcpy(&s, seg_buf, sizeof(struct section));
-      base_data_section = s.offset;
-    } else if (lc.cmd == LC_SEGMENT_64) {
-      uint8_t *seg_buf = buf;
-      struct section_64 s;
-      struct segment_command_64 seg_c;
-
-      memcpy(&seg_c, seg_buf, sizeof(struct segment_command_64));
-      seg_buf += sizeof(struct segment_command_64);
-
-      /* Explanation in LG_SEGMENT */
-      if (seg_c.nsects < 1) {
-        log_msg("Not enough sections\n");
-        goto bail;
-      }
-
-      memcpy(&s, seg_buf, sizeof(struct section_64));
-      base_data_section = s.offset;
-    } else if (lc.cmd == LC_SYMTAB) {
-      if (base_data_section != 0) {
-        struct symtab_command sc;
-        uint8_t *sym_buf = base_buf;
-        uint8_t *str_buf = base_buf;
-
-        memcpy(&sc, buf, sizeof(struct symtab_command));
-
-        if (sc.cmdsize != sizeof(struct symtab_command)) {
-          log_msg("Can't find symbol table!\n");
-          goto bail;
-        }
-
-        sym_buf += sc.symoff;
-        str_buf += sc.stroff;
-
-        for (j = 0; j < sc.nsyms; j++) {
-          /* Location of string is cacluated each time from the
-           * start of the string buffer.  On darwin the symbols
-           * are prefixed by "_", so we bump the pointer by 1.
-           * The target value is defined as an int in *_asm_*_offsets.c,
-           * which is 4 bytes on all targets we currently use.
-           */
-          if (bits == 32) {
-            struct nlist nl;
-            int val;
-
-            memcpy(&nl, sym_buf, sizeof(struct nlist));
-            sym_buf += sizeof(struct nlist);
-
-            memcpy(&val, base_buf + base_data_section + nl.n_value,
-                   sizeof(val));
-            print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
-          } else { /* if (bits == 64) */
-            struct nlist_64 nl;
-            int val;
-
-            memcpy(&nl, sym_buf, sizeof(struct nlist_64));
-            sym_buf += sizeof(struct nlist_64);
-
-            memcpy(&val, base_buf + base_data_section + nl.n_value,
-                   sizeof(val));
-            print_macho_equ(mode, str_buf + nl.n_un.n_strx + 1, val);
-          }
-        }
-      }
-    }
-
-    buf += lc.cmdsize;
-  }
-
-  return 0;
-bail:
-  return 1;
-
-}
-
-#elif defined(__ELF__)
-#include "elf.h"
-
-#define COPY_STRUCT(dst, buf, ofst, sz) do {\
-    if(ofst + sizeof((*(dst))) > sz) goto bail;\
-    memcpy(dst, buf+ofst, sizeof((*(dst))));\
-  } while(0)
-
-#define ENDIAN_ASSIGN(val, memb) do {\
-    if(!elf->le_data) {log_msg("Big Endian data not supported yet!\n");goto bail;}\
-    (val) = (memb);\
-  } while(0)
-
-#define ENDIAN_ASSIGN_IN_PLACE(memb) do {\
-    ENDIAN_ASSIGN(memb, memb);\
-  } while(0)
-
-typedef struct {
-  uint8_t      *buf; /* Buffer containing ELF data */
-  size_t        sz;  /* Buffer size */
-  int           le_data; /* Data is little-endian */
-  unsigned char e_ident[EI_NIDENT]; /* Magic number and other info */
-  int           bits; /* 32 or 64 */
-  Elf32_Ehdr    hdr32;
-  Elf64_Ehdr    hdr64;
-} elf_obj_t;
-
-int parse_elf_header(elf_obj_t *elf) {
-  int res;
-  /* Verify ELF Magic numbers */
-  COPY_STRUCT(&elf->e_ident, elf->buf, 0, elf->sz);
-  res = elf->e_ident[EI_MAG0] == ELFMAG0;
-  res &= elf->e_ident[EI_MAG1] == ELFMAG1;
-  res &= elf->e_ident[EI_MAG2] == ELFMAG2;
-  res &= elf->e_ident[EI_MAG3] == ELFMAG3;
-  res &= elf->e_ident[EI_CLASS] == ELFCLASS32
-         || elf->e_ident[EI_CLASS] == ELFCLASS64;
-  res &= elf->e_ident[EI_DATA] == ELFDATA2LSB;
-
-  if (!res) goto bail;
-
-  elf->le_data = elf->e_ident[EI_DATA] == ELFDATA2LSB;
-
-  /* Read in relevant values */
-  if (elf->e_ident[EI_CLASS] == ELFCLASS32) {
-    elf->bits = 32;
-    COPY_STRUCT(&elf->hdr32, elf->buf, 0, elf->sz);
-
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_type);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_machine);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_version);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_entry);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phoff);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shoff);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_flags);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_ehsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phentsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_phnum);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shentsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shnum);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr32.e_shstrndx);
-  } else { /* if (elf->e_ident[EI_CLASS] == ELFCLASS64) */
-    elf->bits = 64;
-    COPY_STRUCT(&elf->hdr64, elf->buf, 0, elf->sz);
-
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_type);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_machine);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_version);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_entry);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phoff);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shoff);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_flags);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_ehsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phentsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_phnum);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shentsize);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shnum);
-    ENDIAN_ASSIGN_IN_PLACE(elf->hdr64.e_shstrndx);
-  }
-
-  return 0;
-bail:
-  log_msg("Failed to parse ELF file header");
-  return 1;
-}
-
-int parse_elf_section(elf_obj_t *elf, int idx, Elf32_Shdr *hdr32, Elf64_Shdr *hdr64) {
-  if (hdr32) {
-    if (idx >= elf->hdr32.e_shnum)
-      goto bail;
-
-    COPY_STRUCT(hdr32, elf->buf, elf->hdr32.e_shoff + idx * elf->hdr32.e_shentsize,
-                elf->sz);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_name);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_type);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_flags);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addr);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_offset);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_size);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_link);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_info);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_addralign);
-    ENDIAN_ASSIGN_IN_PLACE(hdr32->sh_entsize);
-  } else { /* if (hdr64) */
-    if (idx >= elf->hdr64.e_shnum)
-      goto bail;
-
-    COPY_STRUCT(hdr64, elf->buf, elf->hdr64.e_shoff + idx * elf->hdr64.e_shentsize,
-                elf->sz);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_name);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_type);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_flags);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addr);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_offset);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_size);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_link);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_info);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_addralign);
-    ENDIAN_ASSIGN_IN_PLACE(hdr64->sh_entsize);
-  }
-
-  return 0;
-bail:
-  return 1;
-}
-
-const char *parse_elf_string_table(elf_obj_t *elf, int s_idx, int idx) {
-  if (elf->bits == 32) {
-    Elf32_Shdr shdr;
-
-    if (parse_elf_section(elf, s_idx, &shdr, NULL)) {
-      log_msg("Failed to parse ELF string table: section %d, index %d\n",
-              s_idx, idx);
-      return "";
-    }
-
-    return (char *)(elf->buf + shdr.sh_offset + idx);
-  } else { /* if (elf->bits == 64) */
-    Elf64_Shdr shdr;
-
-    if (parse_elf_section(elf, s_idx, NULL, &shdr)) {
-      log_msg("Failed to parse ELF string table: section %d, index %d\n",
-              s_idx, idx);
-      return "";
-    }
-
-    return (char *)(elf->buf + shdr.sh_offset + idx);
-  }
-}
-
-int parse_elf_symbol(elf_obj_t *elf, unsigned int ofst, Elf32_Sym *sym32, Elf64_Sym *sym64) {
-  if (sym32) {
-    COPY_STRUCT(sym32, elf->buf, ofst, elf->sz);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_name);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_value);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_size);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_info);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_other);
-    ENDIAN_ASSIGN_IN_PLACE(sym32->st_shndx);
-  } else { /* if (sym64) */
-    COPY_STRUCT(sym64, elf->buf, ofst, elf->sz);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_name);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_value);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_size);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_info);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_other);
-    ENDIAN_ASSIGN_IN_PLACE(sym64->st_shndx);
-  }
-  return 0;
-bail:
-  return 1;
-}
-
-int parse_elf(uint8_t *buf, size_t sz, output_fmt_t mode) {
-  elf_obj_t    elf;
-  unsigned int ofst;
-  int          i;
-  Elf32_Off    strtab_off32;
-  Elf64_Off    strtab_off64; /* save String Table offset for later use */
-
-  memset(&elf, 0, sizeof(elf));
-  elf.buf = buf;
-  elf.sz = sz;
-
-  /* Parse Header */
-  if (parse_elf_header(&elf))
-    goto bail;
-
-  if (elf.bits == 32) {
-    Elf32_Shdr shdr;
-    for (i = 0; i < elf.hdr32.e_shnum; i++) {
-      parse_elf_section(&elf, i, &shdr, NULL);
-
-      if (shdr.sh_type == SHT_STRTAB) {
-        char strtsb_name[128];
-
-        strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
-
-        if (!(strcmp(strtsb_name, ".shstrtab"))) {
-          /* log_msg("found section: %s\n", strtsb_name); */
-          strtab_off32 = shdr.sh_offset;
-          break;
-        }
-      }
-    }
-  } else { /* if (elf.bits == 64) */
-    Elf64_Shdr shdr;
-    for (i = 0; i < elf.hdr64.e_shnum; i++) {
-      parse_elf_section(&elf, i, NULL, &shdr);
-
-      if (shdr.sh_type == SHT_STRTAB) {
-        char strtsb_name[128];
-
-        strcpy(strtsb_name, (char *)(elf.buf + shdr.sh_offset + shdr.sh_name));
-
-        if (!(strcmp(strtsb_name, ".shstrtab"))) {
-          /* log_msg("found section: %s\n", strtsb_name); */
-          strtab_off64 = shdr.sh_offset;
-          break;
-        }
-      }
-    }
-  }
-
-  /* Parse all Symbol Tables */
-  if (elf.bits == 32) {
-    Elf32_Shdr shdr;
-    for (i = 0; i < elf.hdr32.e_shnum; i++) {
-      parse_elf_section(&elf, i, &shdr, NULL);
-
-      if (shdr.sh_type == SHT_SYMTAB) {
-        for (ofst = shdr.sh_offset;
-             ofst < shdr.sh_offset + shdr.sh_size;
-             ofst += shdr.sh_entsize) {
-          Elf32_Sym sym;
-
-          parse_elf_symbol(&elf, ofst, &sym, NULL);
-
-          /* For all OBJECTS (data objects), extract the value from the
-           * proper data segment.
-           */
-          /* if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
-              log_msg("found data object %s\n",
-                      parse_elf_string_table(&elf,
-                                             shdr.sh_link,
-                                             sym.st_name));
-           */
-
-          if (ELF32_ST_TYPE(sym.st_info) == STT_OBJECT
-              && sym.st_size == 4) {
-            Elf32_Shdr dhdr;
-            int val = 0;
-            char section_name[128];
-
-            parse_elf_section(&elf, sym.st_shndx, &dhdr, NULL);
-
-            /* For explanition - refer to _MSC_VER version of code */
-            strcpy(section_name, (char *)(elf.buf + strtab_off32 + dhdr.sh_name));
-            /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
-
-            if (strcmp(section_name, ".bss")) {
-              if (sizeof(val) != sym.st_size) {
-                /* The target value is declared as an int in
-                 * *_asm_*_offsets.c, which is 4 bytes on all
-                 * targets we currently use. Complain loudly if
-                 * this is not true.
-                 */
-                log_msg("Symbol size is wrong\n");
-                goto bail;
-              }
-
-              memcpy(&val,
-                     elf.buf + dhdr.sh_offset + sym.st_value,
-                     sym.st_size);
-            }
-
-            if (!elf.le_data) {
-              log_msg("Big Endian data not supported yet!\n");
-              goto bail;
-            }
-
-            switch (mode) {
-              case OUTPUT_FMT_RVDS:
-                printf("%-40s EQU %5d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-                break;
-              case OUTPUT_FMT_GAS:
-                printf(".equ %-40s, %5d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-                break;
-              case OUTPUT_FMT_C_HEADER:
-                printf("#define %-40s %5d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-                break;
-              default:
-                printf("%s = %d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-            }
-          }
-        }
-      }
-    }
-  } else { /* if (elf.bits == 64) */
-    Elf64_Shdr shdr;
-    for (i = 0; i < elf.hdr64.e_shnum; i++) {
-      parse_elf_section(&elf, i, NULL, &shdr);
-
-      if (shdr.sh_type == SHT_SYMTAB) {
-        for (ofst = shdr.sh_offset;
-             ofst < shdr.sh_offset + shdr.sh_size;
-             ofst += shdr.sh_entsize) {
-          Elf64_Sym sym;
-
-          parse_elf_symbol(&elf, ofst, NULL, &sym);
-
-          /* For all OBJECTS (data objects), extract the value from the
-           * proper data segment.
-           */
-          /* if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT && sym.st_name)
-              log_msg("found data object %s\n",
-                      parse_elf_string_table(&elf,
-                                             shdr.sh_link,
-                                             sym.st_name));
-           */
-
-          if (ELF64_ST_TYPE(sym.st_info) == STT_OBJECT
-              && sym.st_size == 4) {
-            Elf64_Shdr dhdr;
-            int val = 0;
-            char section_name[128];
-
-            parse_elf_section(&elf, sym.st_shndx, NULL, &dhdr);
-
-            /* For explanition - refer to _MSC_VER version of code */
-            strcpy(section_name, (char *)(elf.buf + strtab_off64 + dhdr.sh_name));
-            /* log_msg("Section_name: %s, Section_type: %d\n", section_name, dhdr.sh_type); */
-
-            if ((strcmp(section_name, ".bss"))) {
-              if (sizeof(val) != sym.st_size) {
-                /* The target value is declared as an int in
-                 * *_asm_*_offsets.c, which is 4 bytes on all
-                 * targets we currently use. Complain loudly if
-                 * this is not true.
-                 */
-                log_msg("Symbol size is wrong\n");
-                goto bail;
-              }
-
-              memcpy(&val,
-                     elf.buf + dhdr.sh_offset + sym.st_value,
-                     sym.st_size);
-            }
-
-            if (!elf.le_data) {
-              log_msg("Big Endian data not supported yet!\n");
-              goto bail;
-            }
-
-            switch (mode) {
-              case OUTPUT_FMT_RVDS:
-                printf("%-40s EQU %5d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-                break;
-              case OUTPUT_FMT_GAS:
-                printf(".equ %-40s, %5d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-                break;
-              default:
-                printf("%s = %d\n",
-                       parse_elf_string_table(&elf,
-                                              shdr.sh_link,
-                                              sym.st_name),
-                       val);
-            }
-          }
-        }
-      }
-    }
-  }
-
-  if (mode == OUTPUT_FMT_RVDS)
-    printf("    END\n");
-
-  return 0;
-bail:
-  log_msg("Parse error: File does not appear to be valid ELF32 or ELF64\n");
-  return 1;
-}
-
-#endif
-#endif /* defined(__GNUC__) && __GNUC__ */
-
-
-#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
-/*  See "Microsoft Portable Executable and Common Object File Format Specification"
-    for reference.
-*/
-#define get_le32(x) ((*(x)) | (*(x+1)) << 8 |(*(x+2)) << 16 | (*(x+3)) << 24 )
-#define get_le16(x) ((*(x)) | (*(x+1)) << 8)
-
-int parse_coff(uint8_t *buf, size_t sz) {
-  unsigned int nsections, symtab_ptr, symtab_sz, strtab_ptr;
-  unsigned int sectionrawdata_ptr;
-  unsigned int i;
-  uint8_t *ptr;
-  uint32_t symoffset;
-
-  char **sectionlist;  // this array holds all section names in their correct order.
-  // it is used to check if the symbol is in .bss or .rdata section.
-
-  nsections = get_le16(buf + 2);
-  symtab_ptr = get_le32(buf + 8);
-  symtab_sz = get_le32(buf + 12);
-  strtab_ptr = symtab_ptr + symtab_sz * 18;
-
-  if (nsections > 96) {
-    log_msg("Too many sections\n");
-    return 1;
-  }
-
-  sectionlist = malloc(nsections * sizeof(sectionlist));
-
-  if (sectionlist == NULL) {
-    log_msg("Allocating first level of section list failed\n");
-    return 1;
-  }
-
-  // log_msg("COFF: Found %u symbols in %u sections.\n", symtab_sz, nsections);
-
-  /*
-  The size of optional header is always zero for an obj file. So, the section header
-  follows the file header immediately.
-  */
-
-  ptr = buf + 20;     // section header
-
-  for (i = 0; i < nsections; i++) {
-    char sectionname[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
-    strncpy(sectionname, ptr, 8);
-    // log_msg("COFF: Parsing section %s\n",sectionname);
-
-    sectionlist[i] = malloc(strlen(sectionname) + 1);
-
-    if (sectionlist[i] == NULL) {
-      log_msg("Allocating storage for %s failed\n", sectionname);
-      goto bail;
-    }
-    strcpy(sectionlist[i], sectionname);
-
-    // check if it's .rdata and is not a COMDAT section.
-    if (!strcmp(sectionname, ".rdata") &&
-        (get_le32(ptr + 36) & 0x1000) == 0) {
-      sectionrawdata_ptr = get_le32(ptr + 20);
-    }
-
-    ptr += 40;
-  }
-
-  // log_msg("COFF: Symbol table at offset %u\n", symtab_ptr);
-  // log_msg("COFF: raw data pointer ofset for section .rdata is %u\n", sectionrawdata_ptr);
-
-  /*  The compiler puts the data with non-zero offset in .rdata section, but puts the data with
-      zero offset in .bss section. So, if the data in in .bss section, set offset=0.
-      Note from Wiki: In an object module compiled from C, the bss section contains
-      the local variables (but not functions) that were declared with the static keyword,
-      except for those with non-zero initial values. (In C, static variables are initialized
-      to zero by default.) It also contains the non-local (both extern and static) variables
-      that are also initialized to zero (either explicitly or by default).
-      */
-  // move to symbol table
-  /* COFF symbol table:
-      offset      field
-      0           Name(*)
-      8           Value
-      12          SectionNumber
-      14          Type
-      16          StorageClass
-      17          NumberOfAuxSymbols
-      */
-  ptr = buf + symtab_ptr;
-
-  for (i = 0; i < symtab_sz; i++) {
-    int16_t section = get_le16(ptr + 12); // section number
-
-    if (section > 0 && ptr[16] == 2) {
-      // if(section > 0 && ptr[16] == 3 && get_le32(ptr+8)) {
-
-      if (get_le32(ptr)) {
-        char name[9] = {0, 0, 0, 0, 0, 0, 0, 0, 0};
-        strncpy(name, ptr, 8);
-        // log_msg("COFF: Parsing symbol %s\n",name);
-        /* The 64bit Windows compiler doesn't prefix with an _.
-         * Check what's there, and bump if necessary
-         */
-        if (name[0] == '_')
-          printf("%-40s EQU ", name + 1);
-        else
-          printf("%-40s EQU ", name);
-      } else {
-        // log_msg("COFF: Parsing symbol %s\n",
-        //        buf + strtab_ptr + get_le32(ptr+4));
-        if ((buf + strtab_ptr + get_le32(ptr + 4))[0] == '_')
-          printf("%-40s EQU ",
-                 buf + strtab_ptr + get_le32(ptr + 4) + 1);
-        else
-          printf("%-40s EQU ", buf + strtab_ptr + get_le32(ptr + 4));
-      }
-
-      if (!(strcmp(sectionlist[section - 1], ".bss"))) {
-        symoffset = 0;
-      } else {
-        symoffset = get_le32(buf + sectionrawdata_ptr + get_le32(ptr + 8));
-      }
-
-      // log_msg("      Section: %d\n",section);
-      // log_msg("      Class:   %d\n",ptr[16]);
-      // log_msg("      Address: %u\n",get_le32(ptr+8));
-      // log_msg("      Offset: %u\n", symoffset);
-
-      printf("%5d\n", symoffset);
-    }
-
-    ptr += 18;
-  }
-
-  printf("    END\n");
-
-  for (i = 0; i < nsections; i++) {
-    free(sectionlist[i]);
-  }
-
-  free(sectionlist);
-
-  return 0;
-bail:
-
-  for (i = 0; i < nsections; i++) {
-    free(sectionlist[i]);
-  }
-
-  free(sectionlist);
-
-  return 1;
-}
-#endif /* defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__) */
-
-int main(int argc, char **argv) {
-  output_fmt_t mode = OUTPUT_FMT_PLAIN;
-  const char *f;
-  uint8_t *file_buf;
-  int res;
-  FILE *fp;
-  long int file_size;
-
-  if (argc < 2 || argc > 3) {
-    fprintf(stderr, "Usage: %s [output format] <obj file>\n\n", argv[0]);
-    fprintf(stderr, "  <obj file>\tobject file to parse\n");
-    fprintf(stderr, "Output Formats:\n");
-    fprintf(stderr, "  gas  - compatible with GNU assembler\n");
-    fprintf(stderr, "  rvds - compatible with armasm\n");
-    fprintf(stderr, "  cheader - c/c++ header file\n");
-    goto bail;
-  }
-
-  f = argv[2];
-
-  if (!strcmp(argv[1], "rvds"))
-    mode = OUTPUT_FMT_RVDS;
-  else if (!strcmp(argv[1], "gas"))
-    mode = OUTPUT_FMT_GAS;
-  else if (!strcmp(argv[1], "cheader"))
-    mode = OUTPUT_FMT_C_HEADER;
-  else
-    f = argv[1];
-
-  fp = fopen(f, "rb");
-
-  if (!fp) {
-    perror("Unable to open file");
-    goto bail;
-  }
-
-  if (fseek(fp, 0, SEEK_END)) {
-    perror("stat");
-    goto bail;
-  }
-
-  file_size = ftell(fp);
-  file_buf = malloc(file_size);
-
-  if (!file_buf) {
-    perror("malloc");
-    goto bail;
-  }
-
-  rewind(fp);
-
-  if (fread(file_buf, sizeof(char), file_size, fp) != file_size) {
-    perror("read");
-    goto bail;
-  }
-
-  if (fclose(fp)) {
-    perror("close");
-    goto bail;
-  }
-
-#if defined(__GNUC__) && __GNUC__
-#if defined(__MACH__)
-  res = parse_macho(file_buf, file_size, mode);
-#elif defined(__ELF__)
-  res = parse_elf(file_buf, file_size, mode);
-#endif
-#endif
-#if defined(_MSC_VER) || defined(__MINGW32__) || defined(__CYGWIN__)
-  res = parse_coff(file_buf, file_size);
-#endif
-
-  free(file_buf);
-
-  if (!res)
-    return EXIT_SUCCESS;
-
-bail:
-  return EXIT_FAILURE;
-}
--- a/build/make/rtcd.pl
+++ b/build/make/rtcd.pl
@@ -376,17 +376,18 @@ if ($opts{arch} eq 'x86') {
      @ALL_ARCHS = filter("$opts{arch}", qw/dspr2/);
      last;
    }
+    if (/HAVE_MSA=yes/) {
+      @ALL_ARCHS = filter("$opts{arch}", qw/msa/);
+      last;
+    }
  }
  close CONFIG_FILE;
  mips;
-} elsif ($opts{arch} eq 'armv5te') {
-  @ALL_ARCHS = filter(qw/edsp/);
-  arm;
 } elsif ($opts{arch} eq 'armv6') {
-  @ALL_ARCHS = filter(qw/edsp media/);
+  @ALL_ARCHS = filter(qw/media/);
  arm;
-} elsif ($opts{arch} eq 'armv7') {
-  @ALL_ARCHS = filter(qw/edsp media neon_asm neon/);
+} elsif ($opts{arch} =~ /armv7\w?/) {
+  @ALL_ARCHS = filter(qw/media neon_asm neon/);
  @REQUIRES = filter(keys %required ? keys %required : qw/media/);
  &require(@REQUIRES);
  arm;
--- a/build/x86-msvs/obj_int_extract.bat
+++ b/build/x86-msvs/obj_int_extract.bat
@@ -1,15 +0,0 @@
-REM   Copyright (c) 2011 The WebM project authors. All Rights Reserved.
-REM
-REM   Use of this source code is governed by a BSD-style license
-REM   that can be found in the LICENSE file in the root of the source
-REM   tree. An additional intellectual property rights grant can be found
-REM   in the file PATENTS.  All contributing project authors may
-REM   be found in the AUTHORS file in the root of the source tree.
-echo on
-
-REM Arguments:
-REM   %1 - Relative path to the directory containing the vp8 source directory.
-REM   %2 - Path to obj_int_extract.exe.
-cl /I. /I%1 /nologo /c "%~1/vp8/encoder/vp8_asm_enc_offsets.c"
-%2\obj_int_extract.exe rvds "vp8_asm_enc_offsets.obj" > "vp8_asm_enc_offsets.asm"
-
--- a/135
+++ b/135
@@ -26,19 +26,18 @@ Advanced options:
  ${toggle_unit_tests}            unit tests
  ${toggle_decode_perf_tests}     build decoder perf tests with unit tests
  ${toggle_encode_perf_tests}     build encoder perf tests with unit tests
+  --cpu=CPU                       tune for the specified CPU (ARM: cortex-a8, X86: sse3)
  --libc=PATH                     path to alternate libc
  --size-limit=WxH                max size to allow in the decoder
  --as={yasm|nasm|auto}           use specified assembler [auto, yasm preferred]
  --sdk-path=PATH                 path to root of sdk (android builds only)
-  ${toggle_fast_unaligned}        don't use unaligned accesses, even when
-                                  supported by hardware [auto]
  ${toggle_codec_srcs}            in/exclude codec library source code
  ${toggle_debug_libs}            in/exclude debug version of libraries
  ${toggle_static_msvcrt}         use static MSVCRT (VS builds only)
+  ${toggle_vp9_highbitdepth}      use VP9 high bit depth (10/12) profiles
  ${toggle_vp8}                   VP8 codec support
  ${toggle_vp9}                   VP9 codec support
  ${toggle_internal_stats}        output of encoder internal stats for debug, if supported (encoders)
-  ${toggle_mem_tracker}           track memory usage
  ${toggle_postproc}              postprocessing
  ${toggle_vp9_postproc}          vp9 specific postprocessing
  ${toggle_multithread}           multithreaded encoding and decoding
@@ -56,6 +55,8 @@ Advanced options:
  ${toggle_postproc_visualizer}   macro block / block level visualizers
  ${toggle_multi_res_encoding}    enable multiple-resolution encoding
  ${toggle_temporal_denoising}    enable temporal denoising and disable the spatial denoiser
+  ${toggle_vp9_temporal_denoising}
+                                  enable vp9 temporal denoising
  ${toggle_webm_io}               enable input from and output to WebM container
  ${toggle_libyuv}                enable libyuv

@@ -93,10 +94,6 @@ EOF

 # all_platforms is a list of all supported target platforms. Maintain
 # alphabetically by architecture, generic-gnu last.
-all_platforms="${all_platforms} armv5te-android-gcc"
-all_platforms="${all_platforms} armv5te-linux-rvct"
-all_platforms="${all_platforms} armv5te-linux-gcc"
-all_platforms="${all_platforms} armv5te-none-rvct"
 all_platforms="${all_platforms} armv6-darwin-gcc"
 all_platforms="${all_platforms} armv6-linux-rvct"
 all_platforms="${all_platforms} armv6-linux-gcc"
@@ -112,12 +109,6 @@ all_platforms="${all_platforms} armv7-win32-vs12"
 all_platforms="${all_platforms} armv7s-darwin-gcc"
 all_platforms="${all_platforms} mips32-linux-gcc"
 all_platforms="${all_platforms} mips64-linux-gcc"
-all_platforms="${all_platforms} ppc32-darwin8-gcc"
-all_platforms="${all_platforms} ppc32-darwin9-gcc"
-all_platforms="${all_platforms} ppc32-linux-gcc"
-all_platforms="${all_platforms} ppc64-darwin8-gcc"
-all_platforms="${all_platforms} ppc64-darwin9-gcc"
-all_platforms="${all_platforms} ppc64-linux-gcc"
 all_platforms="${all_platforms} sparc-solaris-gcc"
 all_platforms="${all_platforms} x86-android-gcc"
 all_platforms="${all_platforms} x86-darwin8-gcc"
@@ -128,6 +119,7 @@ all_platforms="${all_platforms} x86-darwin10-gcc"
 all_platforms="${all_platforms} x86-darwin11-gcc"
 all_platforms="${all_platforms} x86-darwin12-gcc"
 all_platforms="${all_platforms} x86-darwin13-gcc"
+all_platforms="${all_platforms} x86-darwin14-gcc"
 all_platforms="${all_platforms} x86-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86-linux-gcc"
 all_platforms="${all_platforms} x86-linux-icc"
@@ -145,6 +137,7 @@ all_platforms="${all_platforms} x86_64-darwin10-gcc"
 all_platforms="${all_platforms} x86_64-darwin11-gcc"
 all_platforms="${all_platforms} x86_64-darwin12-gcc"
 all_platforms="${all_platforms} x86_64-darwin13-gcc"
+all_platforms="${all_platforms} x86_64-darwin14-gcc"
 all_platforms="${all_platforms} x86_64-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
 all_platforms="${all_platforms} x86_64-linux-icc"
@@ -155,12 +148,6 @@ all_platforms="${all_platforms} x86_64-win64-vs9"
 all_platforms="${all_platforms} x86_64-win64-vs10"
 all_platforms="${all_platforms} x86_64-win64-vs11"
 all_platforms="${all_platforms} x86_64-win64-vs12"
-all_platforms="${all_platforms} universal-darwin8-gcc"
-all_platforms="${all_platforms} universal-darwin9-gcc"
-all_platforms="${all_platforms} universal-darwin10-gcc"
-all_platforms="${all_platforms} universal-darwin11-gcc"
-all_platforms="${all_platforms} universal-darwin12-gcc"
-all_platforms="${all_platforms} universal-darwin13-gcc"
 all_platforms="${all_platforms} generic-gnu"

 # all_targets is a list of all targets that can be configured
@@ -197,6 +184,10 @@ if [ ${doxy_major:-0} -ge 1 ]; then
    [ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen
 fi

+# disable codecs when their source directory does not exist
+[ -d "${source_path}/vp8" ] || disable_feature vp8
+[ -d "${source_path}/vp9" ] || disable_feature vp9
+
 # install everything except the sources, by default. sources will have
 # to be enabled when doing dist builds, since that's no longer a common
 # case.
@@ -206,45 +197,28 @@ enable_feature install_libs

 enable_feature static
 enable_feature optimizations
-enable_feature fast_unaligned #allow unaligned accesses, if supported by hw
+enable_feature dependency_tracking
 enable_feature spatial_resampling
 enable_feature multithread
 enable_feature os_support
 enable_feature temporal_denoising

-[ -d "${source_path}/../include" ] && enable_feature alt_tree_layout
-for d in vp8 vp9; do
-    [ -d "${source_path}/${d}" ] && disable_feature alt_tree_layout;
-done
-
-if ! enabled alt_tree_layout; then
-# development environment
-[ -d "${source_path}/vp8" ] && CODECS="${CODECS} vp8_encoder vp8_decoder"
-[ -d "${source_path}/vp9" ] && CODECS="${CODECS} vp9_encoder vp9_decoder"
-else
-# customer environment
-[ -f "${source_path}/../include/vpx/vp8cx.h" ] && CODECS="${CODECS} vp8_encoder"
-[ -f "${source_path}/../include/vpx/vp8dx.h" ] && CODECS="${CODECS} vp8_decoder"
-[ -f "${source_path}/../include/vpx/vp9cx.h" ] && CODECS="${CODECS} vp9_encoder"
-[ -f "${source_path}/../include/vpx/vp9dx.h" ] && CODECS="${CODECS} vp9_decoder"
-[ -f "${source_path}/../include/vpx/vp8cx.h" ] || disable_feature vp8_encoder
-[ -f "${source_path}/../include/vpx/vp8dx.h" ] || disable_feature vp8_decoder
-[ -f "${source_path}/../include/vpx/vp9cx.h" ] || disable_feature vp9_encoder
-[ -f "${source_path}/../include/vpx/vp9dx.h" ] || disable_feature vp9_decoder
-
-[ -f "${source_path}/../lib/*/*mt.lib" ] && soft_enable static_msvcrt
-fi
-
-CODECS="$(echo ${CODECS} | tr ' ' '\n')"
-CODEC_FAMILIES="$(for c in ${CODECS}; do echo ${c%_*}; done | sort | uniq)"
+CODECS="
+    vp8_encoder
+    vp8_decoder
+    vp9_encoder
+    vp9_decoder
+"
+CODEC_FAMILIES="
+    vp8
+    vp9
+"

 ARCH_LIST="
    arm
    mips
    x86
    x86_64
-    ppc32
-    ppc64
 "
 ARCH_EXT_LIST="
    edsp
@@ -254,7 +228,7 @@ ARCH_EXT_LIST="

    mips32
    dspr2
-
+    msa
    mips64

    mmx
@@ -265,25 +239,24 @@ ARCH_EXT_LIST="
    sse4_1
    avx
    avx2
-
-    altivec
 "
 HAVE_LIST="
    ${ARCH_EXT_LIST}
    vpx_ports
    stdint_h
-    alt_tree_layout
    pthread_h
    sys_mman_h
    unistd_h
 "
 EXPERIMENT_LIST="
    spatial_svc
-    vp9_temporal_denoising
    fp_mb_stats
+    full_buffer_test
+    internal_resize
    emulate_hardware
 "
 CONFIG_LIST="
+    dependency_tracking
    external_build
    install_docs
    install_bins
@@ -301,10 +274,6 @@ CONFIG_LIST="

    codec_srcs
    debug_libs
-    fast_unaligned
-    mem_manager
-    mem_tracker
-    mem_checks

    dequant_tokens
    dc_recon
@@ -334,6 +303,7 @@ CONFIG_LIST="
    encode_perf_tests
    multi_res_encoding
    temporal_denoising
+    vp9_temporal_denoising
    coefficient_range_checking
    vp9_highbitdepth
    experimental
@@ -341,6 +311,7 @@ CONFIG_LIST="
    ${EXPERIMENT_LIST}
 "
 CMDLINE_SELECT="
+    dependency_tracking
    external_build
    extra_warnings
    werror
@@ -364,7 +335,6 @@ CMDLINE_SELECT="
    libc
    as
    size_limit
-    fast_unaligned
    codec_srcs
    debug_libs

@@ -377,7 +347,6 @@ CMDLINE_SELECT="
    ${CODECS}
    ${CODEC_FAMILIES}
    static_msvcrt
-    mem_tracker
    spatial_resampling
    realtime_only
    onthefly_bitpacking
@@ -393,6 +362,7 @@ CMDLINE_SELECT="
    encode_perf_tests
    multi_res_encoding
    temporal_denoising
+    vp9_temporal_denoising
    coefficient_range_checking
    vp9_highbitdepth
    experimental
@@ -449,24 +419,8 @@ post_process_cmdline() {

 process_targets() {
    enabled child || write_common_config_banner
-    enabled universal || write_common_target_config_h  ${BUILD_PFX}vpx_config.h
-
-    # TODO: add host tools target (obj_int_extract, etc)
-
-    # For fat binaries, call configure recursively to configure for each
-    # binary architecture to be included.
-    if enabled universal; then
-        # Call configure (ourselves) for each subarchitecture
-        for arch in $fat_bin_archs; do
-            BUILD_PFX=${arch}/ toolchain=${arch} $self --child $cmdline_args || exit $?
-        done
-    fi
-
-    # The write_common_config (config.mk) logic is deferred until after the
-    # recursive calls to configure complete, because we want our universal
-    # targets to be executed last.
+    write_common_target_config_h ${BUILD_PFX}vpx_config.h
    write_common_config_targets
-    enabled universal && echo "FAT_ARCHS=${fat_bin_archs}" >> config.mk

    # Calculate the default distribution name, based on the enabled features
    cf=""
@@ -542,11 +496,11 @@ process_detect() {
        # Can only build shared libs on a subset of platforms. Doing this check
        # here rather than at option parse time because the target auto-detect
        # magic happens after the command line has been parsed.
-        if ! enabled linux; then
+        if ! enabled linux && ! enabled os2; then
            if enabled gnu; then
                echo "--enable-shared is only supported on ELF; assuming this is OK"
            else
-                die "--enable-shared only supported on ELF for now"
+                die "--enable-shared only supported on ELF and OS/2 for now"
            fi
        fi
    fi
@@ -611,30 +565,6 @@ EOF
 process_toolchain() {
    process_common_toolchain

-    # Handle universal binaries for this architecture
-    case $toolchain in
-        universal-darwin*)
-            darwin_ver=${tgt_os##darwin}
-
-            # Snow Leopard (10.6/darwin10) dropped support for PPC
-            # Include PPC support for all prior versions
-            if [ $darwin_ver -lt 10 ]; then
-                fat_bin_archs="$fat_bin_archs ppc32-${tgt_os}-gcc"
-            fi
-
-            # Tiger (10.4/darwin8) brought support for x86
-            if [ $darwin_ver -ge 8 ]; then
-                fat_bin_archs="$fat_bin_archs x86-${tgt_os}-${tgt_cc}"
-            fi
-
-            # Leopard (10.5/darwin9) brought 64 bit support
-            if [ $darwin_ver -ge 9 ]; then
-                fat_bin_archs="$fat_bin_archs x86_64-${tgt_os}-${tgt_cc}"
-            fi
-            ;;
-    esac
-
-
    # Enable some useful compiler flags
    if enabled gcc; then
        enabled werror && check_add_cflags -Werror
@@ -722,7 +652,7 @@ process_toolchain() {
    esac

    # Other toolchain specific defaults
-    case $toolchain in x86*|ppc*|universal*) soft_enable postproc;; esac
+    case $toolchain in x86*) soft_enable postproc;; esac

    if enabled postproc_visualizer; then
        enabled postproc || die "postproc_visualizer requires postproc to be enabled"
@@ -786,6 +716,7 @@ CONFIGURE_ARGS="$@"
 process "$@"
 print_webm_license ${BUILD_PFX}vpx_config.c "/*" " */"
 cat <<EOF >> ${BUILD_PFX}vpx_config.c
+#include "vpx/vpx_codec.h"
 static const char* const cfg = "$CONFIGURE_ARGS";
 const char *vpx_codec_build_config(void) {return cfg;}
 EOF
--- a/examples.mk
+++ b/examples.mk
@@ -56,6 +56,7 @@ UTILS-$(CONFIG_DECODERS)    += vpxdec.c
 vpxdec.SRCS                 += md5_utils.c md5_utils.h
 vpxdec.SRCS                 += vpx_ports/mem_ops.h
 vpxdec.SRCS                 += vpx_ports/mem_ops_aligned.h
+vpxdec.SRCS                 += vpx_ports/msvc.h
 vpxdec.SRCS                 += vpx_ports/vpx_timer.h
 vpxdec.SRCS                 += vpx/vpx_integer.h
 vpxdec.SRCS                 += args.c args.h
@@ -80,6 +81,7 @@ vpxenc.SRCS                 += tools_common.c tools_common.h
 vpxenc.SRCS                 += warnings.c warnings.h
 vpxenc.SRCS                 += vpx_ports/mem_ops.h
 vpxenc.SRCS                 += vpx_ports/mem_ops_aligned.h
+vpxenc.SRCS                 += vpx_ports/msvc.h
 vpxenc.SRCS                 += vpx_ports/vpx_timer.h
 vpxenc.SRCS                 += vpxstats.c vpxstats.h
 ifeq ($(CONFIG_LIBYUV),yes)
@@ -98,6 +100,7 @@ ifeq ($(CONFIG_SPATIAL_SVC),yes)
  vp9_spatial_svc_encoder.SRCS        += tools_common.c tools_common.h
  vp9_spatial_svc_encoder.SRCS        += video_common.h
  vp9_spatial_svc_encoder.SRCS        += video_writer.h video_writer.c
+  vp9_spatial_svc_encoder.SRCS        += vpx_ports/msvc.h
  vp9_spatial_svc_encoder.SRCS        += vpxstats.c vpxstats.h
  vp9_spatial_svc_encoder.GUID        = 4A38598D-627D-4505-9C7B-D4020C84100D
  vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder
@@ -112,6 +115,7 @@ vpx_temporal_svc_encoder.SRCS        += ivfenc.c ivfenc.h
 vpx_temporal_svc_encoder.SRCS        += tools_common.c tools_common.h
 vpx_temporal_svc_encoder.SRCS        += video_common.h
 vpx_temporal_svc_encoder.SRCS        += video_writer.h video_writer.c
+vpx_temporal_svc_encoder.SRCS        += vpx_ports/msvc.h
 vpx_temporal_svc_encoder.GUID        = B18C08F2-A439-4502-A78E-849BE3D60947
 vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder
 EXAMPLES-$(CONFIG_DECODERS)        += simple_decoder.c
@@ -122,6 +126,7 @@ simple_decoder.SRCS                += video_common.h
 simple_decoder.SRCS                += video_reader.h video_reader.c
 simple_decoder.SRCS                += vpx_ports/mem_ops.h
 simple_decoder.SRCS                += vpx_ports/mem_ops_aligned.h
+simple_decoder.SRCS                += vpx_ports/msvc.h
 simple_decoder.DESCRIPTION          = Simplified decoder loop
 EXAMPLES-$(CONFIG_DECODERS)        += postproc.c
 postproc.SRCS                      += ivfdec.h ivfdec.c
@@ -130,6 +135,7 @@ postproc.SRCS                      += video_common.h
 postproc.SRCS                      += video_reader.h video_reader.c
 postproc.SRCS                      += vpx_ports/mem_ops.h
 postproc.SRCS                      += vpx_ports/mem_ops_aligned.h
+postproc.SRCS                      += vpx_ports/msvc.h
 postproc.GUID                       = 65E33355-F35E-4088-884D-3FD4905881D7
 postproc.DESCRIPTION                = Decoder postprocessor control
 EXAMPLES-$(CONFIG_DECODERS)        += decode_to_md5.c
@@ -140,6 +146,7 @@ decode_to_md5.SRCS                 += video_common.h
 decode_to_md5.SRCS                 += video_reader.h video_reader.c
 decode_to_md5.SRCS                 += vpx_ports/mem_ops.h
 decode_to_md5.SRCS                 += vpx_ports/mem_ops_aligned.h
+decode_to_md5.SRCS                 += vpx_ports/msvc.h
 decode_to_md5.GUID                  = 59120B9B-2735-4BFE-B022-146CA340FE42
 decode_to_md5.DESCRIPTION           = Frame by frame MD5 checksum
 EXAMPLES-$(CONFIG_ENCODERS)     += simple_encoder.c
@@ -147,6 +154,7 @@ simple_encoder.SRCS             += ivfenc.h ivfenc.c
 simple_encoder.SRCS             += tools_common.h tools_common.c
 simple_encoder.SRCS             += video_common.h
 simple_encoder.SRCS             += video_writer.h video_writer.c
+simple_encoder.SRCS             += vpx_ports/msvc.h
 simple_encoder.GUID              = 4607D299-8A71-4D2C-9B1D-071899B6FBFD
 simple_encoder.DESCRIPTION       = Simplified encoder loop
 EXAMPLES-$(CONFIG_VP9_ENCODER)  += vp9_lossless_encoder.c
@@ -154,6 +162,7 @@ vp9_lossless_encoder.SRCS       += ivfenc.h ivfenc.c
 vp9_lossless_encoder.SRCS       += tools_common.h tools_common.c
 vp9_lossless_encoder.SRCS       += video_common.h
 vp9_lossless_encoder.SRCS       += video_writer.h video_writer.c
+vp9_lossless_encoder.SRCS       += vpx_ports/msvc.h
 vp9_lossless_encoder.GUID        = B63C7C88-5348-46DC-A5A6-CC151EF93366
 vp9_lossless_encoder.DESCRIPTION = Simplified lossless VP9 encoder
 EXAMPLES-$(CONFIG_ENCODERS)     += twopass_encoder.c
@@ -161,6 +170,7 @@ twopass_encoder.SRCS            += ivfenc.h ivfenc.c
 twopass_encoder.SRCS            += tools_common.h tools_common.c
 twopass_encoder.SRCS            += video_common.h
 twopass_encoder.SRCS            += video_writer.h video_writer.c
+twopass_encoder.SRCS            += vpx_ports/msvc.h
 twopass_encoder.GUID             = 73494FA6-4AF9-4763-8FBB-265C92402FD8
 twopass_encoder.DESCRIPTION      = Two-pass encoder loop
 EXAMPLES-$(CONFIG_DECODERS)     += decode_with_drops.c
@@ -170,6 +180,7 @@ decode_with_drops.SRCS          += video_common.h
 decode_with_drops.SRCS          += video_reader.h video_reader.c
 decode_with_drops.SRCS          += vpx_ports/mem_ops.h
 decode_with_drops.SRCS          += vpx_ports/mem_ops_aligned.h
+decode_with_drops.SRCS          += vpx_ports/msvc.h
 decode_with_drops.GUID           = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26
 decode_with_drops.DESCRIPTION    = Drops frames while decoding
 EXAMPLES-$(CONFIG_ENCODERS)        += set_maps.c
@@ -177,6 +188,7 @@ set_maps.SRCS                      += ivfenc.h ivfenc.c
 set_maps.SRCS                      += tools_common.h tools_common.c
 set_maps.SRCS                      += video_common.h
 set_maps.SRCS                      += video_writer.h video_writer.c
+set_maps.SRCS                      += vpx_ports/msvc.h
 set_maps.GUID                       = ECB2D24D-98B8-4015-A465-A4AF3DCC145F
 set_maps.DESCRIPTION                = Set active and ROI maps
 EXAMPLES-$(CONFIG_VP8_ENCODER)     += vp8cx_set_ref.c
@@ -184,6 +196,7 @@ vp8cx_set_ref.SRCS                 += ivfenc.h ivfenc.c
 vp8cx_set_ref.SRCS                 += tools_common.h tools_common.c
 vp8cx_set_ref.SRCS                 += video_common.h
 vp8cx_set_ref.SRCS                 += video_writer.h video_writer.c
+vp8cx_set_ref.SRCS                 += vpx_ports/msvc.h
 vp8cx_set_ref.GUID                  = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A
 vp8cx_set_ref.DESCRIPTION           = VP8 set encoder reference frame

@@ -194,6 +207,7 @@ EXAMPLES-$(CONFIG_VP8_ENCODER)          += vp8_multi_resolution_encoder.c
 vp8_multi_resolution_encoder.SRCS       += ivfenc.h ivfenc.c
 vp8_multi_resolution_encoder.SRCS       += tools_common.h tools_common.c
 vp8_multi_resolution_encoder.SRCS       += video_writer.h video_writer.c
+vp8_multi_resolution_encoder.SRCS       += vpx_ports/msvc.h
 vp8_multi_resolution_encoder.SRCS       += $(LIBYUV_SRCS)
 vp8_multi_resolution_encoder.GUID        = 04f8738e-63c8-423b-90fa-7c2703a374de
 vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding
@@ -254,14 +268,6 @@ CODEC_EXTRA_LIBS=$(sort $(call enabled,CODEC_EXTRA_LIBS))
 $(foreach ex,$(ALL_EXAMPLES),$(eval $(notdir $(ex:.c=)).SRCS += $(ex) examples.mk))


-# If this is a universal (fat) binary, then all the subarchitectures have
-# already been built and our job is to stitch them together. The
-# BUILD_OBJS variable indicates whether we should be building
-# (compiling, linking) the library. The LIPO_OBJS variable indicates
-# that we're stitching.
-$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_OBJS,BUILD_OBJS):=yes)
-
-
 # Create build/install dependencies for all examples. The common case
 # is handled here. The MSVS case is handled below.
 NOT_MSVS = $(if $(CONFIG_MSVS),,yes)
@@ -269,24 +275,28 @@ DIST-BINS-$(NOT_MSVS)      += $(addprefix bin/,$(ALL_EXAMPLES:.c=$(EXE_SFX)))
 INSTALL-BINS-$(NOT_MSVS)   += $(addprefix bin/,$(UTILS:.c=$(EXE_SFX)))
 DIST-SRCS-yes              += $(ALL_SRCS)
 INSTALL-SRCS-yes           += $(UTIL_SRCS)
-OBJS-$(NOT_MSVS)           += $(if $(BUILD_OBJS),$(call objs,$(ALL_SRCS)))
+OBJS-$(NOT_MSVS)           += $(call objs,$(ALL_SRCS))
 BINS-$(NOT_MSVS)           += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=$(EXE_SFX)))


 # Instantiate linker template for all examples.
 CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-SHARED_LIB_SUF=$(if $(filter darwin%,$(TGT_OS)),.dylib,.so)
+ifneq ($(filter darwin%,$(TGT_OS)),)
+SHARED_LIB_SUF=.dylib
+else
+ifneq ($(filter os2%,$(TGT_OS)),)
+SHARED_LIB_SUF=_dll.a
+else
+SHARED_LIB_SUF=.so
+endif
+endif
 CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
 $(foreach bin,$(BINS-yes),\
-    $(if $(BUILD_OBJS),$(eval $(bin):\
-        $(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF)))\
-    $(if $(BUILD_OBJS),$(eval $(call linker_template,$(bin),\
+    $(eval $(bin):$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF))\
+    $(eval $(call linker_template,$(bin),\
        $(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) \
        -l$(CODEC_LIB) $(addprefix -l,$(CODEC_EXTRA_LIBS))\
-        )))\
-    $(if $(LIPO_OBJS),$(eval $(call lipo_bin_template,$(bin))))\
-    )
-
+        )))

 # The following pairs define a mapping of locations in the distribution
 # tree to locations in the source/build trees.
@@ -338,6 +348,7 @@ $(foreach proj,$(call enabled,PROJECTS),\
 #
 %.dox: %.c
 	@echo "    [DOXY] $@"
+	@mkdir -p $(dir $@)
 	@echo "/*!\page example_$(@F:.dox=) $(@F:.dox=)" > $@
 	@echo "   \includelineno $(<F)" >> $@
 	@echo "*/" >> $@
--- a/examples/decode_to_md5.c
+++ b/examples/decode_to_md5.c
@@ -36,9 +36,9 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "./md5_utils.h"
-#include "./tools_common.h"
-#include "./video_reader.h"
+#include "../md5_utils.h"
+#include "../tools_common.h"
+#include "../video_reader.h"
 #include "./vpx_config.h"

 static void get_image_md5(const vpx_image_t *img, unsigned char digest[16]) {
@@ -71,7 +71,7 @@ static void print_md5(FILE *stream, unsigned char digest[16]) {

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/decode_with_drops.c
+++ b/examples/decode_with_drops.c
@@ -59,13 +59,13 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "./tools_common.h"
-#include "./video_reader.h"
+#include "../tools_common.h"
+#include "../video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <infile> <outfile> <N-M|N/M>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/postproc.c
+++ b/examples/postproc.c
@@ -46,13 +46,13 @@
 #include "vpx/vp8dx.h"
 #include "vpx/vpx_decoder.h"

-#include "./tools_common.h"
-#include "./video_reader.h"
+#include "../tools_common.h"
+#include "../video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/resize_util.c
+++ b/examples/resize_util.c
@@ -15,15 +15,23 @@
 #include <stdlib.h>
 #include <string.h>

-#include "./vp9/encoder/vp9_resize.h"
+#include "../tools_common.h"
+#include "../vp9/encoder/vp9_resize.h"

-static void usage(char *progname) {
+static const char *exec_name = NULL;
+
+static void usage() {
  printf("Usage:\n");
  printf("%s <input_yuv> <width>x<height> <target_width>x<target_height> ",
-         progname);
+         exec_name);
  printf("<output_yuv> [<frames>]\n");
 }

+void usage_exit(void) {
+  usage();
+  exit(EXIT_FAILURE);
+}
+
 static int parse_dim(char *v, int *width, int *height) {
  char *x = strchr(v, 'x');
  if (x == NULL)
@@ -47,9 +55,11 @@ int main(int argc, char *argv[]) {
  int f, frames;
  int width, height, target_width, target_height;

+  exec_name = argv[0];
+
  if (argc < 5) {
    printf("Incorrect parameters:\n");
-    usage(argv[0]);
+    usage();
    return 1;
  }

@@ -57,25 +67,25 @@ int main(int argc, char *argv[]) {
  fout = argv[4];
  if (!parse_dim(argv[2], &width, &height)) {
    printf("Incorrect parameters: %s\n", argv[2]);
-    usage(argv[0]);
+    usage();
    return 1;
  }
  if (!parse_dim(argv[3], &target_width, &target_height)) {
    printf("Incorrect parameters: %s\n", argv[3]);
-    usage(argv[0]);
+    usage();
    return 1;
  }

  fpin = fopen(fin, "rb");
  if (fpin == NULL) {
    printf("Can't open file %s to read\n", fin);
-    usage(argv[0]);
+    usage();
    return 1;
  }
  fpout = fopen(fout, "wb");
  if (fpout == NULL) {
    printf("Can't open file %s to write\n", fout);
-    usage(argv[0]);
+    usage();
    return 1;
  }
  if (argc >= 6)
--- a/examples/set_maps.c
+++ b/examples/set_maps.c
@@ -50,12 +50,12 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
          exec_name);
  exit(EXIT_FAILURE);
--- a/examples/simple_decoder.c
+++ b/examples/simple_decoder.c
@@ -82,13 +82,13 @@

 #include "vpx/vpx_decoder.h"

-#include "./tools_common.h"
-#include "./video_reader.h"
+#include "../tools_common.h"
+#include "../video_reader.h"
 #include "./vpx_config.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <infile> <outfile>\n", exec_name);
  exit(EXIT_FAILURE);
 }
--- a/examples/simple_encoder.c
+++ b/examples/simple_encoder.c
@@ -101,12 +101,12 @@

 #include "vpx/vpx_encoder.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr,
          "Usage: %s <codec> <width> <height> <infile> <outfile> "
              "<keyframe-interval> [<error-resilient>]\nSee comments in "
--- a/examples/twopass_encoder.c
+++ b/examples/twopass_encoder.c
@@ -53,12 +53,12 @@

 #include "vpx/vpx_encoder.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <codec> <width> <height> <infile> <outfile>\n",
          exec_name);
  exit(EXIT_FAILURE);
--- a/examples/vp8_multi_resolution_encoder.c
+++ b/examples/vp8_multi_resolution_encoder.c
@@ -8,292 +8,729 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+/*
+ * This is an example demonstrating multi-resolution encoding in VP8.
+ * High-resolution input video is down-sampled to lower-resolutions. The
+ * encoder then encodes the video and outputs multiple bitstreams with
+ * different resolutions.
+ *
+ * This test also allows for settings temporal layers for each spatial layer.
+ * Different number of temporal layers per spatial stream may be used.
+ * Currently up to 3 temporal layers per spatial stream (encoder) are supported
+ * in this test.
+ */

-// This is an example demonstrating multi-resolution encoding in VP8.
-// High-resolution input video is down-sampled to lower-resolutions. The
-// encoder then encodes the video and outputs multiple bitstreams with
-// different resolutions.
-//
-// Configure with --enable-multi-res-encoding flag to enable this example.
+#include "./vpx_config.h"

 #include <stdio.h>
 #include <stdlib.h>
+#include <stdarg.h>
 #include <string.h>
+#include <math.h>
+#include <assert.h>
+#include <sys/time.h>
+#if USE_POSIX_MMAP
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#endif
+#include "vpx_ports/vpx_timer.h"
+#include "vpx/vpx_encoder.h"
+#include "vpx/vp8cx.h"
+#include "vpx_ports/mem_ops.h"
+#include "../tools_common.h"
+#define interface (vpx_codec_vp8_cx())
+#define fourcc    0x30385056

+void usage_exit(void) {
+  exit(EXIT_FAILURE);
+}
+
+/*
+ * The input video frame is downsampled several times to generate a multi-level
+ * hierarchical structure. NUM_ENCODERS is defined as the number of encoding
+ * levels required. For example, if the size of input video is 1280x720,
+ * NUM_ENCODERS is 3, and down-sampling factor is 2, the encoder outputs 3
+ * bitstreams with resolution of 1280x720(level 0), 640x360(level 1), and
+ * 320x180(level 2) respectively.
+ */
+
+/* Number of encoders (spatial resolutions) used in this test. */
+#define NUM_ENCODERS 3
+
+/* Maximum number of temporal layers allowed for this test. */
+#define MAX_NUM_TEMPORAL_LAYERS 3
+
+/* This example uses the scaler function in libyuv. */
 #include "third_party/libyuv/include/libyuv/basic_types.h"
 #include "third_party/libyuv/include/libyuv/scale.h"
 #include "third_party/libyuv/include/libyuv/cpu_id.h"

-#include "vpx/vpx_encoder.h"
-#include "vpx/vp8cx.h"
+int (*read_frame_p)(FILE *f, vpx_image_t *img);

-#include "./tools_common.h"
-#include "./video_writer.h"
+static int read_frame(FILE *f, vpx_image_t *img) {
+    size_t nbytes, to_read;
+    int    res = 1;

-// The input video frame is downsampled several times to generate a
-// multi-level  hierarchical structure. kNumEncoders is defined as the number
-// of encoding  levels required. For example, if the size of input video is
-// 1280x720, kNumEncoders is 3, and down-sampling factor is 2, the encoder
-// outputs 3 bitstreams with resolution of 1280x720(level 0),
-// 640x360(level 1), and 320x180(level 2) respectively.
-#define kNumEncoders 3
-
-static const char *exec_name;
-
-void usage_exit() {
-  fprintf(stderr,
-          "Usage: %s <width> <height> <infile> <outfile(s)> <output psnr?>\n",
-          exec_name);
-  exit(EXIT_FAILURE);
+    to_read = img->w*img->h*3/2;
+    nbytes = fread(img->planes[0], 1, to_read, f);
+    if(nbytes != to_read) {
+        res = 0;
+        if(nbytes > 0)
+            printf("Warning: Read partial frame. Check your width & height!\n");
+    }
+    return res;
 }

-int main(int argc, char *argv[]) {
-  int frame_cnt = 0;
-  FILE *infile = NULL;
-  VpxVideoWriter *writers[kNumEncoders];
-  vpx_codec_ctx_t codec[kNumEncoders];
-  vpx_codec_enc_cfg_t cfg[kNumEncoders];
-  vpx_image_t raw[kNumEncoders];
-  const VpxInterface *const encoder = get_vpx_encoder_by_name("vp8");
-  // Currently, only realtime mode is supported in multi-resolution encoding.
-  const int arg_deadline = VPX_DL_REALTIME;
-  int i;
-  int width = 0;
-  int height = 0;
-  int frame_avail = 0;
-  int got_data = 0;
+static int read_frame_by_row(FILE *f, vpx_image_t *img) {
+    size_t nbytes, to_read;
+    int    res = 1;
+    int plane;

-  // Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you
-  // don't need to know PSNR, which will skip PSNR calculation and save
-  // encoding time.
-  int show_psnr = 0;
-  uint64_t psnr_sse_total[kNumEncoders] = {0};
-  uint64_t psnr_samples_total[kNumEncoders] = {0};
-  double psnr_totals[kNumEncoders][4] = {{0, 0}};
-  int psnr_count[kNumEncoders] = {0};
-
-  // Set the required target bitrates for each resolution level.
-  // If target bitrate for highest-resolution level is set to 0,
-  // (i.e. target_bitrate[0]=0), we skip encoding at that level.
-  unsigned int target_bitrate[kNumEncoders] = {1000, 500, 100};
-
-  // Enter the frame rate of the input video.
-  const int framerate = 30;
-  // Set down-sampling factor for each resolution level.
-  //   dsf[0] controls down sampling from level 0 to level 1;
-  //   dsf[1] controls down sampling from level 1 to level 2;
-  //   dsf[2] is not used.
-  vpx_rational_t dsf[kNumEncoders] = {{2, 1}, {2, 1}, {1, 1}};
-
-  exec_name = argv[0];
-
-  if (!encoder)
-    die("Unsupported codec.");
-
-  // exe_name, input width, input height, input file,
-  // output file 1, output file 2, output file 3, psnr on/off
-  if (argc != (5 + kNumEncoders))
-    die("Invalid number of input options.");
-
-  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
-
-  width = strtol(argv[1], NULL, 0);
-  height = strtol(argv[2], NULL, 0);
-
-  if (width < 16 || width % 2 || height < 16 || height % 2)
-    die("Invalid resolution: %ldx%ld", width, height);
-
-  // Open input video file for encoding
-  if (!(infile = fopen(argv[3], "rb")))
-    die("Failed to open %s for reading", argv[3]);
-
-  show_psnr = strtol(argv[kNumEncoders + 4], NULL, 0);
-
-  // Populate default encoder configuration
-  for (i = 0; i < kNumEncoders; ++i) {
-    vpx_codec_err_t res =
-        vpx_codec_enc_config_default(encoder->codec_interface(), &cfg[i], 0);
-    if (res != VPX_CODEC_OK) {
-      printf("Failed to get config: %s\n", vpx_codec_err_to_string(res));
-      return EXIT_FAILURE;
-    }
-  }
-
-  // Update the default configuration according to needs of the application.
-  // Highest-resolution encoder settings
-  cfg[0].g_w = width;
-  cfg[0].g_h = height;
-  cfg[0].g_threads = 1;
-  cfg[0].rc_dropframe_thresh = 30;
-  cfg[0].rc_end_usage = VPX_CBR;
-  cfg[0].rc_resize_allowed = 0;
-  cfg[0].rc_min_quantizer = 4;
-  cfg[0].rc_max_quantizer = 56;
-  cfg[0].rc_undershoot_pct = 98;
-  cfg[0].rc_overshoot_pct = 100;
-  cfg[0].rc_buf_initial_sz = 500;
-  cfg[0].rc_buf_optimal_sz = 600;
-  cfg[0].rc_buf_sz = 1000;
-  cfg[0].g_error_resilient = 1;
-  cfg[0].g_lag_in_frames = 0;
-  cfg[0].kf_mode = VPX_KF_AUTO;  // VPX_KF_DISABLED
-  cfg[0].kf_min_dist = 3000;
-  cfg[0].kf_max_dist = 3000;
-  cfg[0].rc_target_bitrate = target_bitrate[0];
-  cfg[0].g_timebase.num = 1;
-  cfg[0].g_timebase.den = framerate;
-
-  // Other-resolution encoder settings
-  for (i = 1; i < kNumEncoders; ++i) {
-    cfg[i] = cfg[0];
-    cfg[i].g_threads = 1;
-    cfg[i].rc_target_bitrate = target_bitrate[i];
-
-    // Note: Width & height of other-resolution encoders are calculated
-    // from the highest-resolution encoder's size and the corresponding
-    // down_sampling_factor.
+    for (plane = 0; plane < 3; plane++)
    {
-      unsigned int iw = cfg[i - 1].g_w * dsf[i - 1].den + dsf[i - 1].num - 1;
-      unsigned int ih = cfg[i - 1].g_h * dsf[i - 1].den + dsf[i - 1].num - 1;
-      cfg[i].g_w = iw / dsf[i - 1].num;
-      cfg[i].g_h = ih / dsf[i - 1].num;
-    }
+        unsigned char *ptr;
+        int w = (plane ? (1 + img->d_w) / 2 : img->d_w);
+        int h = (plane ? (1 + img->d_h) / 2 : img->d_h);
+        int r;

-    // Make width & height to be multiplier of 2.
-    if ((cfg[i].g_w) % 2)
-      cfg[i].g_w++;
-
-    if ((cfg[i].g_h) % 2)
-      cfg[i].g_h++;
-  }
-
-  // Open output file for each encoder to output bitstreams
-  for (i = 0; i < kNumEncoders; ++i) {
-    VpxVideoInfo info = {
-      encoder->fourcc,
-      cfg[i].g_w,
-      cfg[i].g_h,
-      {cfg[i].g_timebase.num, cfg[i].g_timebase.den}
-    };
-
-    if (!(writers[i] = vpx_video_writer_open(argv[i+4], kContainerIVF, &info)))
-      die("Failed to open %s for writing", argv[i+4]);
-  }
-
-  // Allocate image for each encoder
-  for (i = 0; i < kNumEncoders; ++i)
-    if (!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
-      die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
-
-  // Initialize multi-encoder
-  if (vpx_codec_enc_init_multi(&codec[0], encoder->codec_interface(), &cfg[0],
-                               kNumEncoders,
-                               show_psnr ? VPX_CODEC_USE_PSNR : 0, &dsf[0]))
-    die_codec(&codec[0], "Failed to initialize encoder");
-
-  // The extra encoding configuration parameters can be set as follows.
-  for (i = 0; i < kNumEncoders; i++) {
-    // Set encoding speed
-    if (vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, -6))
-      die_codec(&codec[i], "Failed to set cpu_used");
-
-    // Set static threshold.
-    if (vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1))
-      die_codec(&codec[i], "Failed to set static threshold");
-
-    // Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING
-    // Enable denoising for the highest-resolution encoder.
-    if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, i == 0))
-      die_codec(&codec[0], "Failed to set noise_sensitivity");
-  }
-
-  frame_avail = 1;
-  got_data = 0;
-
-  while (frame_avail || got_data) {
-    vpx_codec_iter_t iter[kNumEncoders] = {NULL};
-    const vpx_codec_cx_pkt_t *pkt[kNumEncoders];
-
-    frame_avail = vpx_img_read(&raw[0], infile);
-
-    if (frame_avail) {
-      for (i = 1; i < kNumEncoders; ++i) {
-        vpx_image_t *const prev = &raw[i - 1];
-
-        // Scale the image down a number of times by downsampling factor
-        // FilterMode 1 or 2 give better psnr than FilterMode 0.
-        I420Scale(prev->planes[VPX_PLANE_Y], prev->stride[VPX_PLANE_Y],
-                  prev->planes[VPX_PLANE_U], prev->stride[VPX_PLANE_U],
-                  prev->planes[VPX_PLANE_V], prev->stride[VPX_PLANE_V],
-                  prev->d_w, prev->d_h,
-                  raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
-                  raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
-                  raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
-                  raw[i].d_w, raw[i].d_h, 1);
-      }
-    }
-
-    // Encode frame.
-    if (vpx_codec_encode(&codec[0], frame_avail? &raw[0] : NULL,
-                         frame_cnt, 1, 0, arg_deadline)) {
-      die_codec(&codec[0], "Failed to encode frame");
-    }
-
-    for (i = kNumEncoders - 1; i >= 0; i--) {
-      got_data = 0;
-
-      while ((pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i]))) {
-        got_data = 1;
-        switch (pkt[i]->kind) {
-          case VPX_CODEC_CX_FRAME_PKT:
-            vpx_video_writer_write_frame(writers[i], pkt[i]->data.frame.buf,
-                                         pkt[i]->data.frame.sz, frame_cnt - 1);
-          break;
-          case VPX_CODEC_PSNR_PKT:
-            if (show_psnr) {
-              int j;
-              psnr_sse_total[i] += pkt[i]->data.psnr.sse[0];
-              psnr_samples_total[i] += pkt[i]->data.psnr.samples[0];
-              for (j = 0; j < 4; j++)
-                psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j];
-              psnr_count[i]++;
-            }
+        /* Determine the correct plane based on the image format. The for-loop
+         * always counts in Y,U,V order, but this may not match the order of
+         * the data on disk.
+         */
+        switch (plane)
+        {
+        case 1:
+            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12? VPX_PLANE_V : VPX_PLANE_U];
            break;
-          default:
+        case 2:
+            ptr = img->planes[img->fmt==VPX_IMG_FMT_YV12?VPX_PLANE_U : VPX_PLANE_V];
            break;
+        default:
+            ptr = img->planes[plane];
        }
-        printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT &&
-               (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)? "K":".");
-        fflush(stdout);
-      }
-    }
-    frame_cnt++;
-  }
-  printf("\n");

-  fclose(infile);
+        for (r = 0; r < h; r++)
+        {
+            to_read = w;

-  printf("Processed %d frames.\n", frame_cnt - 1);
-  for (i = 0; i < kNumEncoders; ++i) {
-    // Calculate PSNR and print it out
-    if (show_psnr && psnr_count[i] > 0) {
-      int j;
-      double ovpsnr = sse_to_psnr(psnr_samples_total[i], 255.0,
-                                  psnr_sse_total[i]);
+            nbytes = fread(ptr, 1, to_read, f);
+            if(nbytes != to_read) {
+                res = 0;
+                if(nbytes > 0)
+                    printf("Warning: Read partial frame. Check your width & height!\n");
+                break;
+            }

-      fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i);
-      fprintf(stderr, " %.3lf", ovpsnr);
-      for (j = 0; j < 4; j++)
-        fprintf(stderr, " %.3lf", psnr_totals[i][j]/psnr_count[i]);
+            ptr += img->stride[plane];
+        }
+        if (!res)
+            break;
    }

-    if (vpx_codec_destroy(&codec[i]))
-      die_codec(&codec[i], "Failed to destroy codec");
-
-    vpx_img_free(&raw[i]);
-    vpx_video_writer_close(writers[i]);
-  }
-  printf("\n");
-
-  return EXIT_SUCCESS;
+    return res;
+}
+
+static void write_ivf_file_header(FILE *outfile,
+                                  const vpx_codec_enc_cfg_t *cfg,
+                                  int frame_cnt) {
+    char header[32];
+
+    if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS)
+        return;
+    header[0] = 'D';
+    header[1] = 'K';
+    header[2] = 'I';
+    header[3] = 'F';
+    mem_put_le16(header+4,  0);                   /* version */
+    mem_put_le16(header+6,  32);                  /* headersize */
+    mem_put_le32(header+8,  fourcc);              /* headersize */
+    mem_put_le16(header+12, cfg->g_w);            /* width */
+    mem_put_le16(header+14, cfg->g_h);            /* height */
+    mem_put_le32(header+16, cfg->g_timebase.den); /* rate */
+    mem_put_le32(header+20, cfg->g_timebase.num); /* scale */
+    mem_put_le32(header+24, frame_cnt);           /* length */
+    mem_put_le32(header+28, 0);                   /* unused */
+
+    (void) fwrite(header, 1, 32, outfile);
+}
+
+static void write_ivf_frame_header(FILE *outfile,
+                                   const vpx_codec_cx_pkt_t *pkt)
+{
+    char             header[12];
+    vpx_codec_pts_t  pts;
+
+    if(pkt->kind != VPX_CODEC_CX_FRAME_PKT)
+        return;
+
+    pts = pkt->data.frame.pts;
+    mem_put_le32(header, pkt->data.frame.sz);
+    mem_put_le32(header+4, pts&0xFFFFFFFF);
+    mem_put_le32(header+8, pts >> 32);
+
+    (void) fwrite(header, 1, 12, outfile);
+}
+
+/* Temporal scaling parameters */
+/* This sets all the temporal layer parameters given |num_temporal_layers|,
+ * including the target bit allocation across temporal layers. Bit allocation
+ * parameters will be passed in as user parameters in another version.
+ */
+static void set_temporal_layer_pattern(int num_temporal_layers,
+                                       vpx_codec_enc_cfg_t *cfg,
+                                       int bitrate,
+                                       int *layer_flags)
+{
+    assert(num_temporal_layers <= MAX_NUM_TEMPORAL_LAYERS);
+    switch (num_temporal_layers)
+    {
+    case 1:
+    {
+        /* 1-layer */
+        cfg->ts_number_layers     = 1;
+        cfg->ts_periodicity       = 1;
+        cfg->ts_rate_decimator[0] = 1;
+        cfg->ts_layer_id[0] = 0;
+        cfg->ts_target_bitrate[0] = bitrate;
+
+        // Update L only.
+        layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF;
+        break;
+    }
+
+    case 2:
+    {
+        /* 2-layers, with sync point at first frame of layer 1. */
+        cfg->ts_number_layers     = 2;
+        cfg->ts_periodicity       = 2;
+        cfg->ts_rate_decimator[0] = 2;
+        cfg->ts_rate_decimator[1] = 1;
+        cfg->ts_layer_id[0] = 0;
+        cfg->ts_layer_id[1] = 1;
+        // Use 60/40 bit allocation as example.
+        cfg->ts_target_bitrate[0] = 0.6f * bitrate;
+        cfg->ts_target_bitrate[1] = bitrate;
+
+        /* 0=L, 1=GF */
+        // ARF is used as predictor for all frames, and is only updated on
+        // key frame. Sync point every 8 frames.
+
+        // Layer 0: predict from L and ARF, update L and G.
+        layer_flags[0] = VP8_EFLAG_NO_REF_GF |
+                         VP8_EFLAG_NO_UPD_ARF;
+
+        // Layer 1: sync point: predict from L and ARF, and update G.
+        layer_flags[1] = VP8_EFLAG_NO_REF_GF |
+                         VP8_EFLAG_NO_UPD_LAST |
+                         VP8_EFLAG_NO_UPD_ARF;
+
+        // Layer 0, predict from L and ARF, update L.
+        layer_flags[2] = VP8_EFLAG_NO_REF_GF  |
+                         VP8_EFLAG_NO_UPD_GF  |
+                         VP8_EFLAG_NO_UPD_ARF;
+
+        // Layer 1: predict from L, G and ARF, and update G.
+        layer_flags[3] = VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_UPD_LAST |
+                         VP8_EFLAG_NO_UPD_ENTROPY;
+
+        // Layer 0
+        layer_flags[4] = layer_flags[2];
+
+        // Layer 1
+        layer_flags[5] = layer_flags[3];
+
+        // Layer 0
+        layer_flags[6] = layer_flags[4];
+
+        // Layer 1
+        layer_flags[7] = layer_flags[5];
+        break;
+    }
+
+    case 3:
+    default:
+    {
+        // 3-layers structure where ARF is used as predictor for all frames,
+        // and is only updated on key frame.
+        // Sync points for layer 1 and 2 every 8 frames.
+        cfg->ts_number_layers     = 3;
+        cfg->ts_periodicity       = 4;
+        cfg->ts_rate_decimator[0] = 4;
+        cfg->ts_rate_decimator[1] = 2;
+        cfg->ts_rate_decimator[2] = 1;
+        cfg->ts_layer_id[0] = 0;
+        cfg->ts_layer_id[1] = 2;
+        cfg->ts_layer_id[2] = 1;
+        cfg->ts_layer_id[3] = 2;
+        // Use 40/20/40 bit allocation as example.
+        cfg->ts_target_bitrate[0] = 0.4f * bitrate;
+        cfg->ts_target_bitrate[1] = 0.6f * bitrate;
+        cfg->ts_target_bitrate[2] = bitrate;
+
+        /* 0=L, 1=GF, 2=ARF */
+
+        // Layer 0: predict from L and ARF; update L and G.
+        layer_flags[0] =  VP8_EFLAG_NO_UPD_ARF |
+                          VP8_EFLAG_NO_REF_GF;
+
+        // Layer 2: sync point: predict from L and ARF; update none.
+        layer_flags[1] = VP8_EFLAG_NO_REF_GF |
+                         VP8_EFLAG_NO_UPD_GF |
+                         VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_UPD_LAST |
+                         VP8_EFLAG_NO_UPD_ENTROPY;
+
+        // Layer 1: sync point: predict from L and ARF; update G.
+        layer_flags[2] = VP8_EFLAG_NO_REF_GF |
+                         VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_UPD_LAST;
+
+        // Layer 2: predict from L, G, ARF; update none.
+        layer_flags[3] = VP8_EFLAG_NO_UPD_GF |
+                         VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_UPD_LAST |
+                         VP8_EFLAG_NO_UPD_ENTROPY;
+
+        // Layer 0: predict from L and ARF; update L.
+        layer_flags[4] = VP8_EFLAG_NO_UPD_GF |
+                         VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_REF_GF;
+
+        // Layer 2: predict from L, G, ARF; update none.
+        layer_flags[5] = layer_flags[3];
+
+        // Layer 1: predict from L, G, ARF; update G.
+        layer_flags[6] = VP8_EFLAG_NO_UPD_ARF |
+                         VP8_EFLAG_NO_UPD_LAST;
+
+        // Layer 2: predict from L, G, ARF; update none.
+        layer_flags[7] = layer_flags[3];
+        break;
+    }
+    }
+}
+
+/* The periodicity of the pattern given the number of temporal layers. */
+static int periodicity_to_num_layers[MAX_NUM_TEMPORAL_LAYERS] = {1, 8, 8};
+
+int main(int argc, char **argv)
+{
+    FILE                 *infile, *outfile[NUM_ENCODERS];
+    FILE                 *downsampled_input[NUM_ENCODERS - 1];
+    char                 filename[50];
+    vpx_codec_ctx_t      codec[NUM_ENCODERS];
+    vpx_codec_enc_cfg_t  cfg[NUM_ENCODERS];
+    int                  frame_cnt = 0;
+    vpx_image_t          raw[NUM_ENCODERS];
+    vpx_codec_err_t      res[NUM_ENCODERS];
+
+    int                  i;
+    long                 width;
+    long                 height;
+    int                  length_frame;
+    int                  frame_avail;
+    int                  got_data;
+    int                  flags = 0;
+    int                  layer_id = 0;
+
+    int                  layer_flags[VPX_TS_MAX_PERIODICITY * NUM_ENCODERS]
+                                     = {0};
+    int                  flag_periodicity;
+
+    /*Currently, only realtime mode is supported in multi-resolution encoding.*/
+    int                  arg_deadline = VPX_DL_REALTIME;
+
+    /* Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you
+       don't need to know PSNR, which will skip PSNR calculation and save
+       encoding time. */
+    int                  show_psnr = 0;
+    int                  key_frame_insert = 0;
+    uint64_t             psnr_sse_total[NUM_ENCODERS] = {0};
+    uint64_t             psnr_samples_total[NUM_ENCODERS] = {0};
+    double               psnr_totals[NUM_ENCODERS][4] = {{0,0}};
+    int                  psnr_count[NUM_ENCODERS] = {0};
+
+    double               cx_time = 0;
+    struct  timeval      tv1, tv2, difftv;
+
+    /* Set the required target bitrates for each resolution level.
+     * If target bitrate for highest-resolution level is set to 0,
+     * (i.e. target_bitrate[0]=0), we skip encoding at that level.
+     */
+    unsigned int         target_bitrate[NUM_ENCODERS]={1000, 500, 100};
+
+    /* Enter the frame rate of the input video */
+    int                  framerate = 30;
+
+    /* Set down-sampling factor for each resolution level.
+       dsf[0] controls down sampling from level 0 to level 1;
+       dsf[1] controls down sampling from level 1 to level 2;
+       dsf[2] is not used. */
+    vpx_rational_t dsf[NUM_ENCODERS] = {{2, 1}, {2, 1}, {1, 1}};
+
+    /* Set the number of temporal layers for each encoder/resolution level,
+     * starting from highest resoln down to lowest resoln. */
+    unsigned int         num_temporal_layers[NUM_ENCODERS] = {3, 3, 3};
+
+    if(argc!= (7 + 3 * NUM_ENCODERS))
+        die("Usage: %s <width> <height> <frame_rate>  <infile> <outfile(s)> "
+            "<rate_encoder(s)> <temporal_layer(s)> <key_frame_insert> <output psnr?> \n",
+            argv[0]);
+
+    printf("Using %s\n",vpx_codec_iface_name(interface));
+
+    width = strtol(argv[1], NULL, 0);
+    height = strtol(argv[2], NULL, 0);
+    framerate = strtol(argv[3], NULL, 0);
+
+    if(width < 16 || width%2 || height <16 || height%2)
+        die("Invalid resolution: %ldx%ld", width, height);
+
+    /* Open input video file for encoding */
+    if(!(infile = fopen(argv[4], "rb")))
+        die("Failed to open %s for reading", argv[4]);
+
+    /* Open output file for each encoder to output bitstreams */
+    for (i=0; i< NUM_ENCODERS; i++)
+    {
+        if(!target_bitrate[i])
+        {
+            outfile[i] = NULL;
+            continue;
+        }
+
+        if(!(outfile[i] = fopen(argv[i+5], "wb")))
+            die("Failed to open %s for writing", argv[i+4]);
+    }
+
+    // Bitrates per spatial layer: overwrite default rates above.
+    for (i=0; i< NUM_ENCODERS; i++)
+    {
+        target_bitrate[i] = strtol(argv[NUM_ENCODERS + 5 + i], NULL, 0);
+    }
+
+    // Temporal layers per spatial layers: overwrite default settings above.
+    for (i=0; i< NUM_ENCODERS; i++)
+    {
+        num_temporal_layers[i] = strtol(argv[2 * NUM_ENCODERS + 5 + i], NULL, 0);
+        if (num_temporal_layers[i] < 1 || num_temporal_layers[i] > 3)
+          die("Invalid temporal layers: %d, Must be 1, 2, or 3. \n",
+              num_temporal_layers);
+    }
+
+    /* Open file to write out each spatially downsampled input stream. */
+    for (i=0; i< NUM_ENCODERS - 1; i++)
+    {
+       // Highest resoln is encoder 0.
+        if (sprintf(filename,"ds%d.yuv",NUM_ENCODERS - i) < 0)
+        {
+            return EXIT_FAILURE;
+        }
+        downsampled_input[i] = fopen(filename,"wb");
+    }
+
+    key_frame_insert = strtol(argv[3 * NUM_ENCODERS + 5], NULL, 0);
+
+    show_psnr = strtol(argv[3 * NUM_ENCODERS + 6], NULL, 0);
+
+
+    /* Populate default encoder configuration */
+    for (i=0; i< NUM_ENCODERS; i++)
+    {
+        res[i] = vpx_codec_enc_config_default(interface, &cfg[i], 0);
+        if(res[i]) {
+            printf("Failed to get config: %s\n", vpx_codec_err_to_string(res[i]));
+            return EXIT_FAILURE;
+        }
+    }
+
+    /*
+     * Update the default configuration according to needs of the application.
+     */
+    /* Highest-resolution encoder settings */
+    cfg[0].g_w = width;
+    cfg[0].g_h = height;
+    cfg[0].rc_dropframe_thresh = 0;
+    cfg[0].rc_end_usage = VPX_CBR;
+    cfg[0].rc_resize_allowed = 0;
+    cfg[0].rc_min_quantizer = 2;
+    cfg[0].rc_max_quantizer = 56;
+    cfg[0].rc_undershoot_pct = 100;
+    cfg[0].rc_overshoot_pct = 15;
+    cfg[0].rc_buf_initial_sz = 500;
+    cfg[0].rc_buf_optimal_sz = 600;
+    cfg[0].rc_buf_sz = 1000;
+    cfg[0].g_error_resilient = 1;              /* Enable error resilient mode */
+    cfg[0].g_lag_in_frames   = 0;
+
+    /* Disable automatic keyframe placement */
+    /* Note: These 3 settings are copied to all levels. But, except the lowest
+     * resolution level, all other levels are set to VPX_KF_DISABLED internally.
+     */
+    cfg[0].kf_mode           = VPX_KF_AUTO;
+    cfg[0].kf_min_dist = 3000;
+    cfg[0].kf_max_dist = 3000;
+
+    cfg[0].rc_target_bitrate = target_bitrate[0];       /* Set target bitrate */
+    cfg[0].g_timebase.num = 1;                          /* Set fps */
+    cfg[0].g_timebase.den = framerate;
+
+    /* Other-resolution encoder settings */
+    for (i=1; i< NUM_ENCODERS; i++)
+    {
+        memcpy(&cfg[i], &cfg[0], sizeof(vpx_codec_enc_cfg_t));
+
+        cfg[i].rc_target_bitrate = target_bitrate[i];
+
+        /* Note: Width & height of other-resolution encoders are calculated
+         * from the highest-resolution encoder's size and the corresponding
+         * down_sampling_factor.
+         */
+        {
+            unsigned int iw = cfg[i-1].g_w*dsf[i-1].den + dsf[i-1].num - 1;
+            unsigned int ih = cfg[i-1].g_h*dsf[i-1].den + dsf[i-1].num - 1;
+            cfg[i].g_w = iw/dsf[i-1].num;
+            cfg[i].g_h = ih/dsf[i-1].num;
+        }
+
+        /* Make width & height to be multiplier of 2. */
+        // Should support odd size ???
+        if((cfg[i].g_w)%2)cfg[i].g_w++;
+        if((cfg[i].g_h)%2)cfg[i].g_h++;
+    }
+
+
+    // Set the number of threads per encode/spatial layer.
+    // (1, 1, 1) means no encoder threading.
+    cfg[0].g_threads = 2;
+    cfg[1].g_threads = 1;
+    cfg[2].g_threads = 1;
+
+    /* Allocate image for each encoder */
+    for (i=0; i< NUM_ENCODERS; i++)
+        if(!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
+            die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);
+
+    if (raw[0].stride[VPX_PLANE_Y] == raw[0].d_w)
+        read_frame_p = read_frame;
+    else
+        read_frame_p = read_frame_by_row;
+
+    for (i=0; i< NUM_ENCODERS; i++)
+        if(outfile[i])
+            write_ivf_file_header(outfile[i], &cfg[i], 0);
+
+    /* Temporal layers settings */
+    for ( i=0; i<NUM_ENCODERS; i++)
+    {
+        set_temporal_layer_pattern(num_temporal_layers[i],
+                                   &cfg[i],
+                                   cfg[i].rc_target_bitrate,
+                                   &layer_flags[i * VPX_TS_MAX_PERIODICITY]);
+    }
+
+    /* Initialize multi-encoder */
+    if(vpx_codec_enc_init_multi(&codec[0], interface, &cfg[0], NUM_ENCODERS,
+                                (show_psnr ? VPX_CODEC_USE_PSNR : 0), &dsf[0]))
+        die_codec(&codec[0], "Failed to initialize encoder");
+
+    /* The extra encoding configuration parameters can be set as follows. */
+    /* Set encoding speed */
+    for ( i=0; i<NUM_ENCODERS; i++)
+    {
+        int speed = -6;
+        /* Lower speed for the lowest resolution. */
+        if (i == NUM_ENCODERS - 1) speed = -4;
+        if(vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, speed))
+            die_codec(&codec[i], "Failed to set cpu_used");
+    }
+
+    /* Set static threshold = 1 for all encoders */
+    for ( i=0; i<NUM_ENCODERS; i++)
+    {
+        if(vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1))
+            die_codec(&codec[i], "Failed to set static threshold");
+    }
+
+    /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */
+    /* Enable denoising for the highest-resolution encoder. */
+    if(vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 1))
+        die_codec(&codec[0], "Failed to set noise_sensitivity");
+    for ( i=1; i< NUM_ENCODERS; i++)
+    {
+        if(vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0))
+            die_codec(&codec[i], "Failed to set noise_sensitivity");
+    }
+
+    /* Set the number of token partitions */
+    for ( i=0; i<NUM_ENCODERS; i++)
+    {
+        if(vpx_codec_control(&codec[i], VP8E_SET_TOKEN_PARTITIONS, 1))
+            die_codec(&codec[i], "Failed to set static threshold");
+    }
+
+    /* Set the max intra target bitrate */
+    for ( i=0; i<NUM_ENCODERS; i++)
+    {
+        unsigned int max_intra_size_pct =
+            (int)(((double)cfg[0].rc_buf_optimal_sz * 0.5) * framerate / 10);
+        if(vpx_codec_control(&codec[i], VP8E_SET_MAX_INTRA_BITRATE_PCT,
+                             max_intra_size_pct))
+            die_codec(&codec[i], "Failed to set static threshold");
+       //printf("%d %d \n",i,max_intra_size_pct);
+    }
+
+    frame_avail = 1;
+    got_data = 0;
+
+    while(frame_avail || got_data)
+    {
+        vpx_codec_iter_t iter[NUM_ENCODERS]={NULL};
+        const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS];
+
+        flags = 0;
+        frame_avail = read_frame_p(infile, &raw[0]);
+
+        if(frame_avail)
+        {
+            for ( i=1; i<NUM_ENCODERS; i++)
+            {
+                /*Scale the image down a number of times by downsampling factor*/
+                /* FilterMode 1 or 2 give better psnr than FilterMode 0. */
+                I420Scale(raw[i-1].planes[VPX_PLANE_Y], raw[i-1].stride[VPX_PLANE_Y],
+                          raw[i-1].planes[VPX_PLANE_U], raw[i-1].stride[VPX_PLANE_U],
+                          raw[i-1].planes[VPX_PLANE_V], raw[i-1].stride[VPX_PLANE_V],
+                          raw[i-1].d_w, raw[i-1].d_h,
+                          raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y],
+                          raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U],
+                          raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V],
+                          raw[i].d_w, raw[i].d_h, 1);
+                /* Write out down-sampled input. */
+                length_frame = cfg[i].g_w *  cfg[i].g_h *3/2;
+                if (fwrite(raw[i].planes[0], 1, length_frame,
+                           downsampled_input[NUM_ENCODERS - i - 1]) !=
+                               length_frame)
+                {
+                    return EXIT_FAILURE;
+                }
+            }
+        }
+
+        /* Set the flags (reference and update) for all the encoders.*/
+        for ( i=0; i<NUM_ENCODERS; i++)
+        {
+            layer_id = cfg[i].ts_layer_id[frame_cnt % cfg[i].ts_periodicity];
+            flags = 0;
+            flag_periodicity = periodicity_to_num_layers
+                [num_temporal_layers[i] - 1];
+            flags = layer_flags[i * VPX_TS_MAX_PERIODICITY +
+                                frame_cnt % flag_periodicity];
+            // Key frame flag for first frame.
+            if (frame_cnt == 0)
+            {
+                flags |= VPX_EFLAG_FORCE_KF;
+            }
+            if (frame_cnt > 0 && frame_cnt == key_frame_insert)
+            {
+                flags = VPX_EFLAG_FORCE_KF;
+            }
+
+            vpx_codec_control(&codec[i], VP8E_SET_FRAME_FLAGS, flags);
+            vpx_codec_control(&codec[i], VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
+        }
+
+        gettimeofday(&tv1, NULL);
+        /* Encode each frame at multi-levels */
+        /* Note the flags must be set to 0 in the encode call if they are set
+           for each frame with the vpx_codec_control(), as done above. */
+        if(vpx_codec_encode(&codec[0], frame_avail? &raw[0] : NULL,
+            frame_cnt, 1, 0, arg_deadline))
+        {
+            die_codec(&codec[0], "Failed to encode frame");
+        }
+        gettimeofday(&tv2, NULL);
+        timersub(&tv2, &tv1, &difftv);
+        cx_time += (double)(difftv.tv_sec * 1000000 + difftv.tv_usec);
+        for (i=NUM_ENCODERS-1; i>=0 ; i--)
+        {
+            got_data = 0;
+            while( (pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i])) )
+            {
+                got_data = 1;
+                switch(pkt[i]->kind) {
+                    case VPX_CODEC_CX_FRAME_PKT:
+                        write_ivf_frame_header(outfile[i], pkt[i]);
+                        (void) fwrite(pkt[i]->data.frame.buf, 1,
+                                      pkt[i]->data.frame.sz, outfile[i]);
+                    break;
+                    case VPX_CODEC_PSNR_PKT:
+                        if (show_psnr)
+                        {
+                            int j;
+
+                            psnr_sse_total[i] += pkt[i]->data.psnr.sse[0];
+                            psnr_samples_total[i] += pkt[i]->data.psnr.samples[0];
+                            for (j = 0; j < 4; j++)
+                            {
+                                psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j];
+                            }
+                            psnr_count[i]++;
+                        }
+
+                        break;
+                    default:
+                        break;
+                }
+                printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT
+                       && (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)? "K":"");
+                fflush(stdout);
+            }
+        }
+        frame_cnt++;
+    }
+    printf("\n");
+    printf("FPS for encoding %d %f %f \n", frame_cnt, (float)cx_time / 1000000,
+           1000000 * (double)frame_cnt / (double)cx_time);
+
+    fclose(infile);
+
+    printf("Processed %ld frames.\n",(long int)frame_cnt-1);
+    for (i=0; i< NUM_ENCODERS; i++)
+    {
+        /* Calculate PSNR and print it out */
+        if ( (show_psnr) && (psnr_count[i]>0) )
+        {
+            int j;
+            double ovpsnr = sse_to_psnr(psnr_samples_total[i], 255.0,
+                                        psnr_sse_total[i]);
+
+            fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i);
+
+            fprintf(stderr, " %.3lf", ovpsnr);
+            for (j = 0; j < 4; j++)
+            {
+                fprintf(stderr, " %.3lf", psnr_totals[i][j]/psnr_count[i]);
+            }
+        }
+
+        if(vpx_codec_destroy(&codec[i]))
+            die_codec(&codec[i], "Failed to destroy codec");
+
+        vpx_img_free(&raw[i]);
+
+        if(!outfile[i])
+            continue;
+
+        /* Try to rewrite the file header with the actual frame count */
+        if(!fseek(outfile[i], 0, SEEK_SET))
+            write_ivf_file_header(outfile[i], &cfg[i], frame_cnt-1);
+        fclose(outfile[i]);
+    }
+    printf("\n");
+
+    return EXIT_SUCCESS;
 }
--- a/examples/vp8cx_set_ref.c
+++ b/examples/vp8cx_set_ref.c
@@ -53,12 +53,12 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile> <frame>\n",
          exec_name);
  exit(EXIT_FAILURE);
--- a/examples/vp9_lossless_encoder.c
+++ b/examples/vp9_lossless_encoder.c
@@ -15,12 +15,12 @@
 #include "vpx/vpx_encoder.h"
 #include "vpx/vp8cx.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "vp9_lossless_encoder: Example demonstrating VP9 lossless "
                  "encoding feature. Supports raw input only.\n");
  fprintf(stderr, "Usage: %s <width> <height> <infile> <outfile>\n", exec_name);
--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -14,24 +14,33 @@
 * that benefit from a scalable bitstream.
 */

+#include <math.h>
 #include <stdarg.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>

-#include "./args.h"
-#include "./tools_common.h"
-#include "./video_writer.h"
+
+#include "../args.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 #include "vpx/svc_context.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
-#include "./vpxstats.h"
+#include "../vpxstats.h"
+#define OUTPUT_RC_STATS 1

 static const arg_def_t skip_frames_arg =
    ARG_DEF("s", "skip-frames", 1, "input frames to skip");
 static const arg_def_t frames_arg =
    ARG_DEF("f", "frames", 1, "number of frames to encode");
+static const arg_def_t threads_arg =
+    ARG_DEF("th", "threads", 1, "number of threads to use");
+#if OUTPUT_RC_STATS
+static const arg_def_t output_rc_stats_arg =
+    ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats");
+#endif
 static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width");
 static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height");
 static const arg_def_t timebase_arg =
@@ -42,6 +51,9 @@ static const arg_def_t spatial_layers_arg =
    ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers");
 static const arg_def_t temporal_layers_arg =
    ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers");
+static const arg_def_t temporal_layering_mode_arg =
+    ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme."
+        "VP9E_TEMPORAL_LAYERING_MODE");
 static const arg_def_t kf_dist_arg =
    ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes");
 static const arg_def_t scale_factors_arg =
@@ -60,6 +72,13 @@ static const arg_def_t min_bitrate_arg =
    ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate");
 static const arg_def_t max_bitrate_arg =
    ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate");
+static const arg_def_t lag_in_frame_arg =
+    ARG_DEF(NULL, "lag-in-frames", 1, "Number of frame to input before "
+        "generating any outputs");
+static const arg_def_t rc_end_usage_arg =
+    ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q");
+static const arg_def_t speed_arg =
+    ARG_DEF("sp", "speed", 1, "speed configuration");

 #if CONFIG_VP9_HIGHBITDEPTH
 static const struct arg_enum_list bitdepth_enum[] = {
@@ -80,11 +99,17 @@ static const arg_def_t *svc_args[] = {
  &timebase_arg,      &bitrate_arg,       &skip_frames_arg, &spatial_layers_arg,
  &kf_dist_arg,       &scale_factors_arg, &passes_arg,      &pass_arg,
  &fpf_name_arg,      &min_q_arg,         &max_q_arg,       &min_bitrate_arg,
-  &max_bitrate_arg,   &temporal_layers_arg,
+  &max_bitrate_arg,   &temporal_layers_arg, &temporal_layering_mode_arg,
+  &lag_in_frame_arg,  &threads_arg,
+#if OUTPUT_RC_STATS
+  &output_rc_stats_arg,
+#endif
+
 #if CONFIG_VP9_HIGHBITDEPTH
  &bitdepth_arg,
 #endif
-  NULL
+  &speed_arg,
+  &rc_end_usage_arg,  NULL
 };

 static const uint32_t default_frames_to_skip = 0;
@@ -97,6 +122,10 @@ static const uint32_t default_bitrate = 1000;
 static const uint32_t default_spatial_layers = 5;
 static const uint32_t default_temporal_layers = 1;
 static const uint32_t default_kf_dist = 100;
+static const uint32_t default_temporal_layering_mode = 0;
+static const uint32_t default_output_rc_stats = 0;
+static const int32_t default_speed = -1;  // -1 means use library default.
+static const uint32_t default_threads = 0;  // zero means use library default.

 typedef struct {
  const char *input_filename;
@@ -111,7 +140,7 @@ typedef struct {

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  fprintf(stderr, "Usage: %s <options> input_filename output_filename\n",
          exec_name);
  fprintf(stderr, "Options:\n");
@@ -138,6 +167,12 @@ static void parse_command_line(int argc, const char **argv_,
  svc_ctx->log_level = SVC_LOG_DEBUG;
  svc_ctx->spatial_layers = default_spatial_layers;
  svc_ctx->temporal_layers = default_temporal_layers;
+  svc_ctx->temporal_layering_mode = default_temporal_layering_mode;
+#if OUTPUT_RC_STATS
+  svc_ctx->output_rc_stat = default_output_rc_stats;
+#endif
+  svc_ctx->speed = default_speed;
+  svc_ctx->threads = default_threads;

  // start with default encoder configuration
  res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0);
@@ -179,6 +214,20 @@ static void parse_command_line(int argc, const char **argv_,
      svc_ctx->spatial_layers = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &temporal_layers_arg, argi)) {
      svc_ctx->temporal_layers = arg_parse_uint(&arg);
+#if OUTPUT_RC_STATS
+    } else if (arg_match(&arg, &output_rc_stats_arg, argi)) {
+      svc_ctx->output_rc_stat = arg_parse_uint(&arg);
+#endif
+    } else if (arg_match(&arg, &speed_arg, argi)) {
+      svc_ctx->speed = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &threads_arg, argi)) {
+      svc_ctx->threads = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) {
+      svc_ctx->temporal_layering_mode =
+          enc_cfg->temporal_layering_mode = arg_parse_int(&arg);
+      if (svc_ctx->temporal_layering_mode) {
+        enc_cfg->g_error_resilient = 1;
+      }
    } else if (arg_match(&arg, &kf_dist_arg, argi)) {
      enc_cfg->kf_min_dist = arg_parse_uint(&arg);
      enc_cfg->kf_max_dist = enc_cfg->kf_min_dist;
@@ -207,6 +256,10 @@ static void parse_command_line(int argc, const char **argv_,
      min_bitrate = arg_parse_uint(&arg);
    } else if (arg_match(&arg, &max_bitrate_arg, argi)) {
      max_bitrate = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &lag_in_frame_arg, argi)) {
+      enc_cfg->g_lag_in_frames = arg_parse_uint(&arg);
+    } else if (arg_match(&arg, &rc_end_usage_arg, argi)) {
+      enc_cfg->rc_end_usage = arg_parse_uint(&arg);
 #if CONFIG_VP9_HIGHBITDEPTH
    } else if (arg_match(&arg, &bitdepth_arg, argi)) {
      enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg);
@@ -307,6 +360,185 @@ static void parse_command_line(int argc, const char **argv_,
      enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist);
 }

+#if OUTPUT_RC_STATS
+// For rate control encoding stats.
+struct RateControlStats {
+  // Number of input frames per layer.
+  int layer_input_frames[VPX_MAX_LAYERS];
+  // Total (cumulative) number of encoded frames per layer.
+  int layer_tot_enc_frames[VPX_MAX_LAYERS];
+  // Number of encoded non-key frames per layer.
+  int layer_enc_frames[VPX_MAX_LAYERS];
+  // Framerate per layer (cumulative).
+  double layer_framerate[VPX_MAX_LAYERS];
+  // Target average frame size per layer (per-frame-bandwidth per layer).
+  double layer_pfb[VPX_MAX_LAYERS];
+  // Actual average frame size per layer.
+  double layer_avg_frame_size[VPX_MAX_LAYERS];
+  // Average rate mismatch per layer (|target - actual| / target).
+  double layer_avg_rate_mismatch[VPX_MAX_LAYERS];
+  // Actual encoding bitrate per layer (cumulative).
+  double layer_encoding_bitrate[VPX_MAX_LAYERS];
+  // Average of the short-time encoder actual bitrate.
+  // TODO(marpan): Should we add these short-time stats for each layer?
+  double avg_st_encoding_bitrate;
+  // Variance of the short-time encoder actual bitrate.
+  double variance_st_encoding_bitrate;
+  // Window (number of frames) for computing short-time encoding bitrate.
+  int window_size;
+  // Number of window measurements.
+  int window_count;
+};
+
+// Note: these rate control stats assume only 1 key frame in the
+// sequence (i.e., first frame only).
+static void set_rate_control_stats(struct RateControlStats *rc,
+                                     vpx_codec_enc_cfg_t *cfg) {
+  unsigned int sl, tl;
+  // Set the layer (cumulative) framerate and the target layer (non-cumulative)
+  // per-frame-bandwidth, for the rate control encoding stats below.
+  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
+
+  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
+      const int layer = sl * cfg->ts_number_layers + tl;
+      const int tlayer0 = sl * cfg->ts_number_layers;
+      rc->layer_framerate[layer] =
+          framerate / cfg->ts_rate_decimator[tl];
+      if (tl > 0) {
+        rc->layer_pfb[layer] = 1000.0 *
+            (cfg->layer_target_bitrate[layer] -
+                cfg->layer_target_bitrate[layer - 1]) /
+            (rc->layer_framerate[layer] -
+                rc->layer_framerate[layer - 1]);
+      } else {
+        rc->layer_pfb[tlayer0] = 1000.0 *
+            cfg->layer_target_bitrate[tlayer0] /
+            rc->layer_framerate[tlayer0];
+      }
+      rc->layer_input_frames[layer] = 0;
+      rc->layer_enc_frames[layer] = 0;
+      rc->layer_tot_enc_frames[layer] = 0;
+      rc->layer_encoding_bitrate[layer] = 0.0;
+      rc->layer_avg_frame_size[layer] = 0.0;
+      rc->layer_avg_rate_mismatch[layer] = 0.0;
+    }
+  }
+  rc->window_count = 0;
+  rc->window_size = 15;
+  rc->avg_st_encoding_bitrate = 0.0;
+  rc->variance_st_encoding_bitrate = 0.0;
+}
+
+static void printout_rate_control_summary(struct RateControlStats *rc,
+                                          vpx_codec_enc_cfg_t *cfg,
+                                          int frame_cnt) {
+  unsigned int sl, tl;
+  int tot_num_frames = 0;
+  double perc_fluctuation = 0.0;
+  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
+  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
+      cfg->ss_number_layers, cfg->ts_number_layers);
+  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
+      const int layer = sl * cfg->ts_number_layers + tl;
+      const int num_dropped = (tl > 0) ?
+          (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) :
+          (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1);
+      if (!sl)
+        tot_num_frames += rc->layer_input_frames[layer];
+      rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
+          rc->layer_encoding_bitrate[layer] / tot_num_frames;
+      rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] /
+          rc->layer_enc_frames[layer];
+      rc->layer_avg_rate_mismatch[layer] =
+          100.0 * rc->layer_avg_rate_mismatch[layer] /
+          rc->layer_enc_frames[layer];
+      printf("For layer#: sl%d tl%d \n", sl, tl);
+      printf("Bitrate (target vs actual): %d %f.0 kbps\n",
+             cfg->layer_target_bitrate[layer],
+             rc->layer_encoding_bitrate[layer]);
+      printf("Average frame size (target vs actual): %f %f bits\n",
+             rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]);
+      printf("Average rate_mismatch: %f\n",
+             rc->layer_avg_rate_mismatch[layer]);
+      printf("Number of input frames, encoded (non-key) frames, "
+          "and percent dropped frames: %d %d %f.0 \n",
+          rc->layer_input_frames[layer], rc->layer_enc_frames[layer],
+          100.0 * num_dropped / rc->layer_input_frames[layer]);
+      printf("\n");
+    }
+  }
+  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
+  rc->variance_st_encoding_bitrate =
+      rc->variance_st_encoding_bitrate / rc->window_count -
+      (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
+  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
+      rc->avg_st_encoding_bitrate;
+  printf("Short-time stats, for window of %d frames: \n", rc->window_size);
+  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
+         rc->avg_st_encoding_bitrate,
+         sqrt(rc->variance_st_encoding_bitrate),
+         perc_fluctuation);
+  if (frame_cnt != tot_num_frames)
+    die("Error: Number of input frames not equal to output encoded frames != "
+        "%d tot_num_frames = %d\n", frame_cnt, tot_num_frames);
+}
+
+vpx_codec_err_t parse_superframe_index(const uint8_t *data,
+                                       size_t data_sz,
+                                       uint32_t sizes[8], int *count) {
+  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
+  // it is a super frame index. If the last byte of real video compression
+  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
+  // not the associated matching marker byte at the front of the index we have
+  // an invalid bitstream and need to return an error.
+
+  uint8_t marker;
+
+  marker = *(data + data_sz - 1);
+  *count = 0;
+
+
+  if ((marker & 0xe0) == 0xc0) {
+    const uint32_t frames = (marker & 0x7) + 1;
+    const uint32_t mag = ((marker >> 3) & 0x3) + 1;
+    const size_t index_sz = 2 + mag * frames;
+
+    // This chunk is marked as having a superframe index but doesn't have
+    // enough data for it, thus it's an invalid superframe index.
+    if (data_sz < index_sz)
+      return VPX_CODEC_CORRUPT_FRAME;
+
+    {
+      const uint8_t marker2 = *(data + data_sz - index_sz);
+
+      // This chunk is marked as having a superframe index but doesn't have
+      // the matching marker byte at the front of the index therefore it's an
+      // invalid chunk.
+      if (marker != marker2)
+        return VPX_CODEC_CORRUPT_FRAME;
+    }
+
+    {
+      // Found a valid superframe index.
+      uint32_t i, j;
+      const uint8_t *x = &data[data_sz - index_sz + 1];
+
+      for (i = 0; i < frames; ++i) {
+        uint32_t this_sz = 0;
+
+        for (j = 0; j < mag; ++j)
+          this_sz |= (*x++) << (j * 8);
+        sizes[i] = this_sz;
+      }
+      *count = frames;
+    }
+  }
+  return VPX_CODEC_OK;
+}
+#endif
+
 int main(int argc, const char **argv) {
  AppInput app_input = {0};
  VpxVideoWriter *writer = NULL;
@@ -323,7 +555,15 @@ int main(int argc, const char **argv) {
  FILE *infile = NULL;
  int end_of_stream = 0;
  int frames_received = 0;
-
+#if OUTPUT_RC_STATS
+  VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL};
+  struct RateControlStats rc;
+  vpx_svc_layer_id_t layer_id;
+  int sl, tl;
+  double sum_bitrate = 0.0;
+  double sum_bitrate2 = 0.0;
+  double framerate  = 30.0;
+#endif
  memset(&svc_ctx, 0, sizeof(svc_ctx));
  svc_ctx.log_print = 1;
  exec_name = argv[0];
@@ -350,6 +590,13 @@ int main(int argc, const char **argv) {
      VPX_CODEC_OK)
    die("Failed to initialize encoder\n");

+#if OUTPUT_RC_STATS
+  if (svc_ctx.output_rc_stat) {
+    set_rate_control_stats(&rc, &enc_cfg);
+    framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num;
+  }
+#endif
+
  info.codec_fourcc = VP9_FOURCC;
  info.time_base.numerator = enc_cfg.g_timebase.num;
  info.time_base.denominator = enc_cfg.g_timebase.den;
@@ -361,11 +608,31 @@ int main(int argc, const char **argv) {
    if (!writer)
      die("Failed to open %s for writing\n", app_input.output_filename);
  }
+#if OUTPUT_RC_STATS
+  // For now, just write temporal layer streams.
+  // TODO(wonkap): do spatial by re-writing superframe.
+  if (svc_ctx.output_rc_stat) {
+    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
+      char file_name[PATH_MAX];
+
+      snprintf(file_name, sizeof(file_name), "%s_t%d.ivf",
+               app_input.output_filename, tl);
+      outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info);
+      if (!outfile[tl])
+        die("Failed to open %s for writing", file_name);
+    }
+  }
+#endif

  // skip initial frames
  for (i = 0; i < app_input.frames_to_skip; ++i)
    vpx_img_read(&raw, infile);

+  if (svc_ctx.speed != -1)
+    vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
+  if (svc_ctx.threads)
+    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
+
  // Encode frames
  while (!end_of_stream) {
    vpx_codec_iter_t iter = NULL;
@@ -377,7 +644,9 @@ int main(int argc, const char **argv) {
    }

    res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw),
-                         pts, frame_duration, VPX_DL_GOOD_QUALITY);
+                         pts, frame_duration, svc_ctx.speed >= 5 ?
+                         VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY);
+
    printf("%s", vpx_svc_get_message(&svc_ctx));
    if (res != VPX_CODEC_OK) {
      die_codec(&codec, "Failed to encode frame");
@@ -386,11 +655,90 @@ int main(int argc, const char **argv) {
    while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) {
      switch (cx_pkt->kind) {
        case VPX_CODEC_CX_FRAME_PKT: {
-          if (cx_pkt->data.frame.sz > 0)
+          if (cx_pkt->data.frame.sz > 0) {
+#if OUTPUT_RC_STATS
+            uint32_t sizes[8];
+            int count = 0;
+#endif
            vpx_video_writer_write_frame(writer,
                                         cx_pkt->data.frame.buf,
                                         cx_pkt->data.frame.sz,
                                         cx_pkt->data.frame.pts);
+#if OUTPUT_RC_STATS
+            // TODO(marpan/wonkap): Put this (to line728) in separate function.
+            if (svc_ctx.output_rc_stat) {
+              vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
+              parse_superframe_index(cx_pkt->data.frame.buf,
+                                     cx_pkt->data.frame.sz, sizes, &count);
+              for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
+                                        layer_id.temporal_layer_id];
+              }
+              for (tl = layer_id.temporal_layer_id;
+                  tl < enc_cfg.ts_number_layers; ++tl) {
+                vpx_video_writer_write_frame(outfile[tl],
+                                             cx_pkt->data.frame.buf,
+                                             cx_pkt->data.frame.sz,
+                                             cx_pkt->data.frame.pts);
+              }
+
+              for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                for (tl = layer_id.temporal_layer_id;
+                    tl < enc_cfg.ts_number_layers; ++tl) {
+                  const int layer = sl * enc_cfg.ts_number_layers + tl;
+                  ++rc.layer_tot_enc_frames[layer];
+                  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
+                  // Keep count of rate control stats per layer, for non-key
+                  // frames.
+                  if (tl == layer_id.temporal_layer_id &&
+                      !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
+                    rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
+                    rc.layer_avg_rate_mismatch[layer] +=
+                        fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) /
+                        rc.layer_pfb[layer];
+                    ++rc.layer_enc_frames[layer];
+                  }
+                }
+              }
+
+              // Update for short-time encoding bitrate states, for moving
+              // window of size rc->window, shifted by rc->window / 2.
+              // Ignore first window segment, due to key frame.
+              if (frame_cnt > rc.window_size) {
+                tl = layer_id.temporal_layer_id;
+                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
+                }
+                if (frame_cnt % rc.window_size == 0) {
+                  rc.window_count += 1;
+                  rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
+                  rc.variance_st_encoding_bitrate +=
+                      (sum_bitrate / rc.window_size) *
+                      (sum_bitrate / rc.window_size);
+                  sum_bitrate = 0.0;
+                }
+              }
+
+              // Second shifted window.
+              if (frame_cnt > rc.window_size + rc.window_size / 2) {
+               tl = layer_id.temporal_layer_id;
+               for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+                 sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
+               }
+
+               if (frame_cnt > 2 * rc.window_size &&
+                  frame_cnt % rc.window_size == 0) {
+                 rc.window_count += 1;
+                 rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
+                 rc.variance_st_encoding_bitrate +=
+                    (sum_bitrate2 / rc.window_size) *
+                    (sum_bitrate2 / rc.window_size);
+                 sum_bitrate2 = 0.0;
+               }
+              }
+            }
+#endif
+          }

          printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
                 !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
@@ -415,25 +763,30 @@ int main(int argc, const char **argv) {
      pts += frame_duration;
    }
  }
-
  printf("Processed %d frames\n", frame_cnt);
-
  fclose(infile);
+#if OUTPUT_RC_STATS
+  if (svc_ctx.output_rc_stat) {
+    printout_rate_control_summary(&rc, &enc_cfg, frame_cnt);
+    printf("\n");
+  }
+#endif
  if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec");
-
  if (app_input.passes == 2)
    stats_close(&app_input.rc_stats, 1);
-
  if (writer) {
    vpx_video_writer_close(writer);
  }
-
+#if OUTPUT_RC_STATS
+  if (svc_ctx.output_rc_stat) {
+    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
+      vpx_video_writer_close(outfile[tl]);
+    }
+  }
+#endif
  vpx_img_free(&raw);
-
  // display average size, psnr
  printf("%s", vpx_svc_dump_statistics(&svc_ctx));
-
  vpx_svc_release(&svc_ctx);
-
  return EXIT_SUCCESS;
 }
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -19,16 +19,16 @@
 #include <string.h>

 #include "./vpx_config.h"
-#include "vpx_ports/vpx_timer.h"
+#include "../vpx_ports/vpx_timer.h"
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"

-#include "./tools_common.h"
-#include "./video_writer.h"
+#include "../tools_common.h"
+#include "../video_writer.h"

 static const char *exec_name;

-void usage_exit() {
+void usage_exit(void) {
  exit(EXIT_FAILURE);
 }

@@ -61,6 +61,16 @@ struct RateControlMetrics {
  double layer_avg_rate_mismatch[VPX_TS_MAX_LAYERS];
  // Actual encoding bitrate per layer (cumulative).
  double layer_encoding_bitrate[VPX_TS_MAX_LAYERS];
+  // Average of the short-time encoder actual bitrate.
+  // TODO(marpan): Should we add these short-time stats for each layer?
+  double avg_st_encoding_bitrate;
+  // Variance of the short-time encoder actual bitrate.
+  double variance_st_encoding_bitrate;
+  // Window (number of frames) for computing short-timee encoding bitrate.
+  int window_size;
+  // Number of window measurements.
+  int window_count;
+  int layer_target_bitrate[VPX_MAX_LAYERS];
 };

 // Note: these rate control metrics assume only 1 key frame in the
@@ -76,13 +86,13 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
  // per-frame-bandwidth, for the rate control encoding stats below.
  const double framerate = cfg->g_timebase.den / cfg->g_timebase.num;
  rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0];
-  rc->layer_pfb[0] = 1000.0 * cfg->ts_target_bitrate[0] /
+  rc->layer_pfb[0] = 1000.0 * rc->layer_target_bitrate[0] /
      rc->layer_framerate[0];
  for (i = 0; i < cfg->ts_number_layers; ++i) {
    if (i > 0) {
      rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i];
      rc->layer_pfb[i] = 1000.0 *
-          (cfg->ts_target_bitrate[i] - cfg->ts_target_bitrate[i - 1]) /
+          (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) /
          (rc->layer_framerate[i] - rc->layer_framerate[i - 1]);
    }
    rc->layer_input_frames[i] = 0;
@@ -92,6 +102,10 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc,
    rc->layer_avg_frame_size[i] = 0.0;
    rc->layer_avg_rate_mismatch[i] = 0.0;
  }
+  rc->window_count = 0;
+  rc->window_size = 15;
+  rc->avg_st_encoding_bitrate = 0.0;
+  rc->variance_st_encoding_bitrate = 0.0;
 }

 static void printout_rate_control_summary(struct RateControlMetrics *rc,
@@ -99,6 +113,7 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
                                          int frame_cnt) {
  unsigned int i = 0;
  int tot_num_frames = 0;
+  double perc_fluctuation = 0.0;
  printf("Total number of processed frames: %d\n\n", frame_cnt -1);
  printf("Rate control layer stats for %d layer(s):\n\n",
      cfg->ts_number_layers);
@@ -114,7 +129,7 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
    rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] /
        rc->layer_enc_frames[i];
    printf("For layer#: %d \n", i);
-    printf("Bitrate (target vs actual): %d %f \n", cfg->ts_target_bitrate[i],
+    printf("Bitrate (target vs actual): %d %f \n", rc->layer_target_bitrate[i],
           rc->layer_encoding_bitrate[i]);
    printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i],
           rc->layer_avg_frame_size[i]);
@@ -125,6 +140,17 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
        100.0 * num_dropped / rc->layer_input_frames[i]);
    printf("\n");
  }
+  rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count;
+  rc->variance_st_encoding_bitrate =
+      rc->variance_st_encoding_bitrate / rc->window_count -
+      (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate);
+  perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) /
+      rc->avg_st_encoding_bitrate;
+  printf("Short-time stats, for window of %d frames: \n",rc->window_size);
+  printf("Average, rms-variance, and percent-fluct: %f %f %f \n",
+         rc->avg_st_encoding_bitrate,
+         sqrt(rc->variance_st_encoding_bitrate),
+         perc_fluctuation);
  if ((frame_cnt - 1) != tot_num_frames)
    die("Error: Number of input frames not equal to output! \n");
 }
@@ -456,7 +482,11 @@ int main(int argc, char **argv) {
  int layering_mode = 0;
  int layer_flags[VPX_TS_MAX_PERIODICITY] = {0};
  int flag_periodicity = 1;
+#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
  vpx_svc_layer_id_t layer_id = {0, 0};
+#else
+  vpx_svc_layer_id_t layer_id = {0};
+#endif
  const VpxInterface *encoder = NULL;
  FILE *infile = NULL;
  struct RateControlMetrics rc;
@@ -469,6 +499,9 @@ int main(int argc, char **argv) {
 #else
  const int min_args = min_args_base;
 #endif  // CONFIG_VP9_HIGHBITDEPTH
+  double sum_bitrate = 0.0;
+  double sum_bitrate2 = 0.0;
+  double framerate  = 30.0;

  exec_name = argv[0];
  // Check usage and arguments.
@@ -565,21 +598,32 @@ int main(int argc, char **argv) {
  for (i = min_args_base;
       (int)i < min_args_base + mode_to_num_layers[layering_mode];
       ++i) {
-    cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
+    rc.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
+    if (strncmp(encoder->name, "vp8", 3) == 0)
+      cfg.ts_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
+    else if (strncmp(encoder->name, "vp9", 3) == 0)
+      cfg.layer_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
  }

  // Real time parameters.
  cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
  cfg.rc_end_usage = VPX_CBR;
-  cfg.rc_resize_allowed = 0;
  cfg.rc_min_quantizer = 2;
  cfg.rc_max_quantizer = 56;
+  if (strncmp(encoder->name, "vp9", 3) == 0)
+    cfg.rc_max_quantizer = 52;
  cfg.rc_undershoot_pct = 50;
  cfg.rc_overshoot_pct = 50;
  cfg.rc_buf_initial_sz = 500;
  cfg.rc_buf_optimal_sz = 600;
  cfg.rc_buf_sz = 1000;

+  // Disable dynamic resizing by default.
+  cfg.rc_resize_allowed = 0;
+
+  // Use 1 thread as default.
+  cfg.g_threads = 1;
+
  // Enable error resilient mode.
  cfg.g_error_resilient = 1;
  cfg.g_lag_in_frames   = 0;
@@ -588,6 +632,8 @@ int main(int argc, char **argv) {
  // Disable automatic keyframe placement.
  cfg.kf_min_dist = cfg.kf_max_dist = 3000;

+  cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
  set_temporal_layer_pattern(layering_mode,
                             &cfg,
                             layer_flags,
@@ -596,14 +642,15 @@ int main(int argc, char **argv) {
  set_rate_control_metrics(&rc, &cfg);

  // Target bandwidth for the whole stream.
-  // Set to ts_target_bitrate for highest layer (total bitrate).
-  cfg.rc_target_bitrate = cfg.ts_target_bitrate[cfg.ts_number_layers - 1];
+  // Set to layer_target_bitrate for highest layer (total bitrate).
+  cfg.rc_target_bitrate = rc.layer_target_bitrate[cfg.ts_number_layers - 1];

  // Open input file.
  if (!(infile = fopen(argv[1], "rb"))) {
    die("Failed to open %s for reading", argv[1]);
  }

+  framerate = cfg.g_timebase.den / cfg.g_timebase.num;
  // Open an output file for each stream.
  for (i = 0; i < cfg.ts_number_layers; ++i) {
    char file_name[PATH_MAX];
@@ -636,23 +683,35 @@ int main(int argc, char **argv) {

  if (strncmp(encoder->name, "vp8", 3) == 0) {
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
-    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOnYOnly);
+    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
+    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
  } else if (strncmp(encoder->name, "vp9", 3) == 0) {
-      vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
-      vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
-      vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
-      vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
-      if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) {
-        die_codec(&codec, "Failed to set SVC");
+    vpx_svc_extra_cfg_t svc_params;
+    vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
+    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+    vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
+    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, 0);
+    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0);
+    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
+    if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0))
+      die_codec(&codec, "Failed to set SVC");
+    for (i = 0; i < cfg.ts_number_layers; ++i) {
+      svc_params.max_quantizers[i] = cfg.rc_max_quantizer;
+      svc_params.min_quantizers[i] = cfg.rc_min_quantizer;
    }
+    svc_params.scaling_factor_num[0] = cfg.g_h;
+    svc_params.scaling_factor_den[0] = cfg.g_h;
+    vpx_codec_control(&codec, VP9E_SET_SVC_PARAMETERS, &svc_params);
+  }
+  if (strncmp(encoder->name, "vp8", 3) == 0) {
+    vpx_codec_control(&codec, VP8E_SET_SCREEN_CONTENT_MODE, 0);
  }
-  vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
  vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1);
  // This controls the maximum target size of the key frame.
  // For generating smaller key frames, use a smaller max_intra_size_pct
  // value, like 100 or 200.
  {
-    const int max_intra_size_pct = 200;
+    const int max_intra_size_pct = 900;
    vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
                      max_intra_size_pct);
  }
@@ -662,14 +721,21 @@ int main(int argc, char **argv) {
    struct vpx_usec_timer timer;
    vpx_codec_iter_t iter = NULL;
    const vpx_codec_cx_pkt_t *pkt;
+#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
    // Update the temporal layer_id. No spatial layers in this test.
    layer_id.spatial_layer_id = 0;
+#endif
    layer_id.temporal_layer_id =
        cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
    if (strncmp(encoder->name, "vp9", 3) == 0) {
      vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id);
+    } else if (strncmp(encoder->name, "vp8", 3) == 0) {
+      vpx_codec_control(&codec, VP8E_SET_TEMPORAL_LAYER_ID,
+                        layer_id.temporal_layer_id);
    }
    flags = layer_flags[frame_cnt % flag_periodicity];
+    if (layering_mode == 0)
+      flags = 0;
    frame_avail = vpx_img_read(&raw, infile);
    if (frame_avail)
      ++rc.layer_input_frames[layer_id.temporal_layer_id];
@@ -705,6 +771,33 @@ int main(int argc, char **argv) {
              ++rc.layer_enc_frames[i];
            }
          }
+          // Update for short-time encoding bitrate states, for moving window
+          // of size rc->window, shifted by rc->window / 2.
+          // Ignore first window segment, due to key frame.
+          if (frame_cnt > rc.window_size) {
+            sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
+            if (frame_cnt % rc.window_size == 0) {
+              rc.window_count += 1;
+              rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size;
+              rc.variance_st_encoding_bitrate +=
+                  (sum_bitrate / rc.window_size) *
+                  (sum_bitrate / rc.window_size);
+              sum_bitrate = 0.0;
+            }
+          }
+          // Second shifted window.
+          if (frame_cnt > rc.window_size + rc.window_size / 2) {
+            sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate;
+            if (frame_cnt > 2 * rc.window_size &&
+                frame_cnt % rc.window_size == 0) {
+              rc.window_count += 1;
+              rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
+              rc.variance_st_encoding_bitrate +=
+                  (sum_bitrate2 / rc.window_size) *
+                  (sum_bitrate2 / rc.window_size);
+              sum_bitrate2 = 0.0;
+            }
+          }
          break;
          default:
            break;
--- a/libs.doxy_template
+++ b/libs.doxy_template
@@ -36,7 +36,7 @@ DOXYFILE_ENCODING      = UTF-8
 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded
 # by quotes) that should identify the project.

-PROJECT_NAME           = "WebM VP8 Codec SDK"
+PROJECT_NAME           = "WebM Codec SDK"

 # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
 # base path where the generated documentation will be put.
@@ -415,12 +415,6 @@ MAX_INITIALIZER_LINES  = 30

 SHOW_USED_FILES        = YES

-# If the sources in your project are distributed over multiple directories
-# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy
-# in the documentation. The default is NO.
-
-SHOW_DIRECTORIES       = NO
-
 # The FILE_VERSION_FILTER tag can be used to specify a program or script that
 # doxygen should invoke to get the current version for each file (typically from the
 # version control system). Doxygen will invoke the program by executing (via
@@ -715,12 +709,6 @@ HTML_FOOTER            =

 HTML_STYLESHEET        =

-# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes,
-# files or namespaces will be aligned in HTML using tables. If set to
-# NO a bullet list will be used.
-
-HTML_ALIGN_MEMBERS     = YES
-
 # If the GENERATE_HTMLHELP tag is set to YES, additional index files
 # will be generated that can be used as input for tools like the
 # Microsoft HTML help workshop to generate a compressed HTML help file (.chm)
--- a/libs.mk
+++ b/libs.mk
@@ -17,32 +17,6 @@ else
  ASM:=.asm
 endif

-#
-# Calculate platform- and compiler-specific offsets for hand coded assembly
-#
-ifeq ($(filter icc gcc,$(TGT_CC)), $(TGT_CC))
-OFFSET_PATTERN:='^[a-zA-Z0-9_]* EQU'
-define asm_offsets_template
-$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).S
-	@echo "    [CREATE] $$@"
-	$$(qexec)LC_ALL=C grep $$(OFFSET_PATTERN) $$< | tr -d '$$$$\#' $$(ADS2GAS) > $$@
-$$(BUILD_PFX)$(2).S: $(2)
-CLEAN-OBJS += $$(BUILD_PFX)$(1) $(2).S
-endef
-else
-  ifeq ($(filter rvct,$(TGT_CC)), $(TGT_CC))
-define asm_offsets_template
-$$(BUILD_PFX)$(1): obj_int_extract
-$$(BUILD_PFX)$(1): $$(BUILD_PFX)$(2).o
-	@echo "    [CREATE] $$@"
-	$$(qexec)./obj_int_extract rvds $$< $$(ADS2GAS) > $$@
-OBJS-yes += $$(BUILD_PFX)$(2).o
-CLEAN-OBJS += $$(BUILD_PFX)$(1)
-$$(filter %$$(ASM).o,$$(OBJS-yes)): $$(BUILD_PFX)$(1)
-endef
-endif # rvct
-endif # !gcc
-
 #
 # Rule to generate runtime cpu detection files
 #
@@ -51,7 +25,7 @@ $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2)
 	@echo "    [CREATE] $$@"
 	$$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \
          --sym=$(1) \
-          --config=$$(CONFIG_DIR)$$(target)$$(if $$(FAT_ARCHS),,-$$(TOOLCHAIN)).mk \
+          --config=$$(CONFIG_DIR)$$(target)-$$(TOOLCHAIN).mk \
          $$(RTCD_OPTIONS) $$^ > $$@
 CLEAN-OBJS += $$(BUILD_PFX)$(1).h
 RTCD += $$(BUILD_PFX)$(1).h
@@ -60,13 +34,6 @@ endef
 CODEC_SRCS-yes += CHANGELOG
 CODEC_SRCS-yes += libs.mk

-# If this is a universal (fat) binary, then all the subarchitectures have
-# already been built and our job is to stitch them together. The
-# BUILD_LIBVPX variable indicates whether we should be building
-# (compiling, linking) the library. The LIPO_LIBVPX variable indicates
-# that we're stitching.
-$(eval $(if $(filter universal%,$(TOOLCHAIN)),LIPO_LIBVPX,BUILD_LIBVPX):=yes)
-
 include $(SRC_PATH_BARE)/vpx/vpx_codec.mk
 CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS))
 CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS))
@@ -80,6 +47,9 @@ CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS))
 include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk
 CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS))

+include $(SRC_PATH_BARE)/vpx_dsp/vpx_dsp.mk
+CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS))
+
 ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
  VP8_PREFIX=vp8/
  include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk
@@ -163,18 +133,18 @@ INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Release/%)
 INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/%  $(p)/Debug/%)
 endif

-CODEC_SRCS-$(BUILD_LIBVPX) += build/make/version.sh
-CODEC_SRCS-$(BUILD_LIBVPX) += build/make/rtcd.pl
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/emmintrin_compat.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/mem_ops_aligned.h
-CODEC_SRCS-$(BUILD_LIBVPX) += vpx_ports/vpx_once.h
-CODEC_SRCS-$(BUILD_LIBVPX) += $(BUILD_PFX)vpx_config.c
+CODEC_SRCS-yes += build/make/version.sh
+CODEC_SRCS-yes += build/make/rtcd.pl
+CODEC_SRCS-yes += vpx_ports/emmintrin_compat.h
+CODEC_SRCS-yes += vpx_ports/mem_ops.h
+CODEC_SRCS-yes += vpx_ports/mem_ops_aligned.h
+CODEC_SRCS-yes += vpx_ports/vpx_once.h
+CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c
 INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
 ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
 endif
-CODEC_EXPORTS-$(BUILD_LIBVPX) += vpx/exports_com
+CODEC_EXPORTS-yes += vpx/exports_com
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
 CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec

@@ -205,33 +175,13 @@ INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(call enabled,CODEC_EXPORTS)
 # based build systems.
 libvpx_srcs.txt:
 	@echo "    [CREATE] $@"
-	@echo $(CODEC_SRCS) | xargs -n1 echo | sort -u > $@
+	@echo $(CODEC_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
 CLEAN-OBJS += libvpx_srcs.txt


 ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
 ifeq ($(CONFIG_MSVS),yes)

-obj_int_extract.bat: $(SRC_PATH_BARE)/build/$(MSVS_ARCH_DIR)/obj_int_extract.bat
-	@cp $^ $@
-
-obj_int_extract.$(VCPROJ_SFX): obj_int_extract.bat
-obj_int_extract.$(VCPROJ_SFX): $(SRC_PATH_BARE)/build/make/obj_int_extract.c
-	@echo "    [CREATE] $@"
-	$(qexec)$(GEN_VCPROJ) \
-    --exe \
-    --target=$(TOOLCHAIN) \
-    --name=obj_int_extract \
-    --ver=$(CONFIG_VS_VERSION) \
-    --proj-guid=E1360C65-D375-4335-8057-7ED99CC3F9B2 \
-    --src-path-bare="$(SRC_PATH_BARE)" \
-    $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
-    --out=$@ $^ \
-    -I. \
-    -I"$(SRC_PATH_BARE)" \
-
-PROJECTS-$(BUILD_LIBVPX) += obj_int_extract.$(VCPROJ_SFX)
-
 vpx.def: $(call enabled,CODEC_EXPORTS)
 	@echo "    [CREATE] $@"
 	$(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\
@@ -246,7 +196,7 @@ ASM_INCLUDES := \
    vpx_config.asm \
    vpx_ports/x86_abi_support.asm \

-vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
+vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
 	@echo "    [CREATE] $@"
 	$(qexec)$(GEN_VCPROJ) \
            $(if $(CONFIG_SHARED),--dll,--lib) \
@@ -261,7 +211,7 @@ vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def obj_int_extract.$(VCPROJ_SFX)
            $(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) \
            --src-path-bare="$(SRC_PATH_BARE)" \

-PROJECTS-$(BUILD_LIBVPX) += vpx.$(VCPROJ_SFX)
+PROJECTS-yes += vpx.$(VCPROJ_SFX)

 vpx.$(VCPROJ_SFX): vpx_config.asm
 vpx.$(VCPROJ_SFX): $(RTCD)
@@ -269,32 +219,42 @@ vpx.$(VCPROJ_SFX): $(RTCD)
 endif
 else
 LIBVPX_OBJS=$(call objs,$(CODEC_SRCS))
-OBJS-$(BUILD_LIBVPX) += $(LIBVPX_OBJS)
-LIBS-$(if $(BUILD_LIBVPX),$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
+OBJS-yes += $(LIBVPX_OBJS)
+LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)

-
-BUILD_LIBVPX_SO         := $(if $(BUILD_LIBVPX),$(CONFIG_SHARED))
-
+SO_VERSION_MAJOR := 2
+SO_VERSION_MINOR := 0
+SO_VERSION_PATCH := 0
 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
-LIBVPX_SO               := libvpx.$(VERSION_MAJOR).dylib
+LIBVPX_SO               := libvpx.$(SO_VERSION_MAJOR).dylib
+SHARED_LIB_SUF          := .dylib
 EXPORT_FILE             := libvpx.syms
 LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
                             libvpx.dylib  )
 else
-LIBVPX_SO               := libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)
+ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS))
+LIBVPX_SO               := libvpx$(SO_VERSION_MAJOR).dll
+SHARED_LIB_SUF          := _dll.a
+EXPORT_FILE             := libvpx.def
+LIBVPX_SO_SYMLINKS      :=
+LIBVPX_SO_IMPLIB        := libvpx_dll.a
+else
+LIBVPX_SO               := libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH)
+SHARED_LIB_SUF          := .so
 EXPORT_FILE             := libvpx.ver
-SYM_LINK                := libvpx.so
 LIBVPX_SO_SYMLINKS      := $(addprefix $(LIBSUBDIR)/, \
-                             libvpx.so libvpx.so.$(VERSION_MAJOR) \
-                             libvpx.so.$(VERSION_MAJOR).$(VERSION_MINOR))
+                             libvpx.so libvpx.so.$(SO_VERSION_MAJOR) \
+                             libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR))
+endif
 endif

-LIBS-$(BUILD_LIBVPX_SO) += $(BUILD_PFX)$(LIBVPX_SO)\
-                           $(notdir $(LIBVPX_SO_SYMLINKS))
+LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\
+                           $(notdir $(LIBVPX_SO_SYMLINKS)) \
+                           $(if $(LIBVPX_SO_IMPLIB), $(BUILD_PFX)$(LIBVPX_SO_IMPLIB))
 $(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) $(EXPORT_FILE)
 $(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm
-$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(VERSION_MAJOR)
+$(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(SO_VERSION_MAJOR)
 $(BUILD_PFX)$(LIBVPX_SO): EXPORTS_FILE = $(EXPORT_FILE)

 libvpx.ver: $(call enabled,CODEC_EXPORTS)
@@ -309,6 +269,19 @@ libvpx.syms: $(call enabled,CODEC_EXPORTS)
 	$(qexec)awk '{print "_"$$2}' $^ >$@
 CLEAN-OBJS += libvpx.syms

+libvpx.def: $(call enabled,CODEC_EXPORTS)
+	@echo "    [CREATE] $@"
+	$(qexec)echo LIBRARY $(LIBVPX_SO:.dll=) INITINSTANCE TERMINSTANCE > $@
+	$(qexec)echo "DATA MULTIPLE NONSHARED" >> $@
+	$(qexec)echo "EXPORTS" >> $@
+	$(qexec)awk '!/vpx_svc_*/ {print "_"$$2}' $^ >>$@
+CLEAN-OBJS += libvpx.def
+
+libvpx_dll.a: $(LIBVPX_SO)
+	@echo "    [IMPLIB] $@"
+	$(qexec)emximp -o $@ $<
+CLEAN-OBJS += libvpx_dll.a
+
 define libvpx_symlink_template
 $(1): $(2)
 	@echo "    [LN]     $(2) $$@"
@@ -324,11 +297,12 @@ $(eval $(call libvpx_symlink_template,\
    $(LIBVPX_SO)))


-INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBVPX_SO_SYMLINKS)
-INSTALL-LIBS-$(BUILD_LIBVPX_SO) += $(LIBSUBDIR)/$(LIBVPX_SO)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO)
+INSTALL-LIBS-$(CONFIG_SHARED) += $(if $(LIBVPX_SO_IMPLIB),$(LIBSUBDIR)/$(LIBVPX_SO_IMPLIB))


-LIBS-$(BUILD_LIBVPX) += vpx.pc
+LIBS-yes += vpx.pc
 vpx.pc: config.mk libs.mk
 	@echo "    [CREATE] $@"
 	$(qexec)echo '# pkg-config file from libvpx $(VERSION_STRING)' > $@
@@ -354,9 +328,6 @@ INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc
 CLEAN-OBJS += vpx.pc
 endif

-LIBS-$(LIPO_LIBVPX) += libvpx.a
-$(eval $(if $(LIPO_LIBVPX),$(call lipo_lib_template,libvpx.a)))
-
 #
 # Rule to make assembler configuration file from C configuration file
 #
@@ -377,7 +348,7 @@ CLEAN-OBJS += $(BUILD_PFX)vpx_config.asm
 endif

 #
-# Add assembler dependencies for configuration and offsets
+# Add assembler dependencies for configuration.
 #
 $(filter %.s.o,$(OBJS-yes)):     $(BUILD_PFX)vpx_config.asm
 $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm
@@ -395,14 +366,18 @@ LIBVPX_TEST_DATA_PATH ?= .

 include $(SRC_PATH_BARE)/test/test.mk
 LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
-LIBVPX_TEST_BINS=./test_libvpx$(EXE_SFX)
+LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX)
 LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
                     $(call enabled,LIBVPX_TEST_DATA))
 libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)

+TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
+TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
+TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS)))
+
 libvpx_test_srcs.txt:
 	@echo "    [CREATE] $@"
-	@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | sort -u > $@
+	@echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
 CLEAN-OBJS += libvpx_test_srcs.txt

 $(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1
@@ -463,7 +438,25 @@ test_libvpx.$(VCPROJ_SFX): $(LIBVPX_TEST_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_

 PROJECTS-$(CONFIG_MSVS) += test_libvpx.$(VCPROJ_SFX)

-LIBVPX_TEST_BINS := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BINS)))
+LIBVPX_TEST_BIN := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BIN)))
+
+ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
+PROJECTS-$(CONFIG_MSVS) += test_intra_pred_speed.$(VCPROJ_SFX)
+test_intra_pred_speed.$(VCPROJ_SFX): $(TEST_INTRA_PRED_SPEED_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX)
+	@echo "    [CREATE] $@"
+	$(qexec)$(GEN_VCPROJ) \
+            --exe \
+            --target=$(TOOLCHAIN) \
+            --name=test_intra_pred_speed \
+            -D_VARIADIC_MAX=10 \
+            --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \
+            --ver=$(CONFIG_VS_VERSION) \
+            --src-path-bare="$(SRC_PATH_BARE)" \
+            $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \
+            --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \
+            -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \
+            -L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^
+endif  # TEST_INTRA_PRED_SPEED
 endif
 else

@@ -474,45 +467,54 @@ ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS))
 # Disabling pthreads globally will cause issues on darwin and possibly elsewhere
 $(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0
 endif
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
-OBJS-$(BUILD_LIBVPX) += $(GTEST_OBJS)
-LIBS-$(BUILD_LIBVPX) += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
+GTEST_INCLUDES := -I$(SRC_PATH_BARE)/third_party/googletest/src
+GTEST_INCLUDES += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
+$(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(GTEST_OBJS)
+LIBS-yes += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a
 $(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS)

 LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS)))
-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src
-$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/googletest/src/include
-OBJS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_OBJS)
-BINS-$(BUILD_LIBVPX) += $(LIBVPX_TEST_BINS)
+$(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(LIBVPX_TEST_OBJS)
+BINS-yes += $(LIBVPX_TEST_BIN)

 CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx)
-CODEC_LIB_SUF=$(if $(CONFIG_SHARED),.so,.a)
-$(foreach bin,$(LIBVPX_TEST_BINS),\
-    $(if $(BUILD_LIBVPX),$(eval $(bin): \
-        lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a ))\
-    $(if $(BUILD_LIBVPX),$(eval $(call linkerxx_template,$(bin),\
-        $(LIBVPX_TEST_OBJS) \
-        -L. -lvpx -lgtest $(extralibs) -lm)\
-        )))\
-    $(if $(LIPO_LIBS),$(eval $(call lipo_bin_template,$(bin))))\
+CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a)
+TEST_LIBS := lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a
+$(LIBVPX_TEST_BIN): $(TEST_LIBS)
+$(eval $(call linkerxx_template,$(LIBVPX_TEST_BIN), \
+              $(LIBVPX_TEST_OBJS) \
+              -L. -lvpx -lgtest $(extralibs) -lm))

-endif
+ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),)
+$(TEST_INTRA_PRED_SPEED_OBJS) $(TEST_INTRA_PRED_SPEED_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES)
+OBJS-yes += $(TEST_INTRA_PRED_SPEED_OBJS)
+BINS-yes += $(TEST_INTRA_PRED_SPEED_BIN)
+
+$(TEST_INTRA_PRED_SPEED_BIN): $(TEST_LIBS)
+$(eval $(call linkerxx_template,$(TEST_INTRA_PRED_SPEED_BIN), \
+              $(TEST_INTRA_PRED_SPEED_OBJS) \
+              -L. -lvpx -lgtest $(extralibs) -lm))
+endif  # TEST_INTRA_PRED_SPEED
+
+endif  # CONFIG_UNIT_TESTS

 # Install test sources only if codec source is included
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\
    $(shell find $(SRC_PATH_BARE)/third_party/googletest -type f))
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS)
+INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(TEST_INTRA_PRED_SPEED_SRCS)

 define test_shard_template
 test:: test_shard.$(1)
-test_shard.$(1): $(LIBVPX_TEST_BINS) testdata
+test-no-data-check:: test_shard_ndc.$(1)
+test_shard.$(1) test_shard_ndc.$(1): $(LIBVPX_TEST_BIN)
 	@set -e; \
-	 for t in $(LIBVPX_TEST_BINS); do \
-	   export GTEST_SHARD_INDEX=$(1); \
-	   export GTEST_TOTAL_SHARDS=$(2); \
-	   $$$$t; \
-	 done
+	 export GTEST_SHARD_INDEX=$(1); \
+	 export GTEST_TOTAL_SHARDS=$(2); \
+	 $(LIBVPX_TEST_BIN)
+test_shard.$(1): testdata
 .PHONY: test_shard.$(1)
 endef

@@ -535,7 +537,11 @@ libs.doxy: $(CODEC_DOC_SRCS)
 	@echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@

 ## Generate rtcd.h for all objects
+ifeq ($(CONFIG_DEPENDENCY_TRACKING),yes)
 $(OBJS-yes:.o=.d): $(RTCD)
+else
+$(OBJS-yes): $(RTCD)
+endif

 ## Update the global src list
 SRCS += $(CODEC_SRCS) $(LIBVPX_TEST_SRCS) $(GTEST_SRCS)
@@ -553,15 +559,16 @@ ifeq ($(CONFIG_MSVS),yes)
 # TODO(tomfinegan): Support running the debug versions of tools?
 TEST_BIN_PATH := $(addsuffix /$(TGT_OS:win64=x64)/Release, $(TEST_BIN_PATH))
 endif
-utiltest: testdata
+utiltest utiltest-no-data-check:
 	$(qexec)$(SRC_PATH_BARE)/test/vpxdec.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(TEST_BIN_PATH)
 	$(qexec)$(SRC_PATH_BARE)/test/vpxenc.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(TEST_BIN_PATH)
+utiltest: testdata
 else
-utiltest:
+utiltest utiltest-no-data-check:
 	@echo Unit tests must be enabled to make the utiltest target.
 endif

@@ -579,11 +586,12 @@ ifeq ($(CONFIG_MSVS),yes)
 # TODO(tomfinegan): Support running the debug versions of tools?
 EXAMPLES_BIN_PATH := $(TGT_OS:win64=x64)/Release
 endif
-exampletest: examples testdata
+exampletest exampletest-no-data-check: examples
 	$(qexec)$(SRC_PATH_BARE)/test/examples.sh \
 		--test-data-path $(LIBVPX_TEST_DATA_PATH) \
 		--bin-path $(EXAMPLES_BIN_PATH)
+exampletest: testdata
 else
-exampletest:
+exampletest exampletest-no-data-check:
 	@echo Unit tests must be enabled to make the exampletest target.
 endif
--- a/mainpage.dox
+++ b/mainpage.dox
@@ -1,4 +1,4 @@
-/*!\mainpage WebM VP8 Codec SDK
+/*!\mainpage WebM Codec SDK

  \section main_contents Page Contents
  - \ref main_intro
@@ -6,11 +6,11 @@
  - \ref main_support

  \section main_intro Introduction
-  Welcome to the WebM VP8 Codec SDK. This SDK allows you to integrate your
-  applications with the VP8 video codec, a high quality, royalty free, open
-  source codec deployed on millions of computers and devices worldwide.
+  Welcome to the WebM Codec SDK. This SDK allows you to integrate your
+  applications with the VP8 and VP9 video codecs, high quality, royalty free,
+  open source codecs deployed on billions of computers and devices worldwide.

-  This distribution of the WebM VP8 Codec SDK includes the following support:
+  This distribution of the WebM Codec SDK includes the following support:

  \if vp8_encoder
  - \ref vp8_encoder
@@ -28,12 +28,12 @@
  - Read the \ref samples "sample code" for examples of how to interact with the
    codec.
  - \ref codec reference
-    \if encoder
-    - \ref encoder reference
-    \endif
-    \if decoder
-    - \ref decoder reference
-    \endif
+  \if encoder
+  - \ref encoder reference
+  \endif
+  \if decoder
+  - \ref decoder reference
+  \endif

  \section main_support Support Options & FAQ
  The WebM project is an open source project supported by its community. For
--- a/md5_utils.c
+++ b/md5_utils.c
@@ -24,7 +24,7 @@

 #include "md5_utils.h"

-void
+static void
 byteSwap(UWORD32 *buf, unsigned words) {
  md5byte *p;

--- a/rate_hist.c
+++ b/rate_hist.c
@@ -88,6 +88,9 @@ void update_rate_histogram(struct rate_hist *hist,
  if (now < cfg->rc_buf_initial_sz)
    return;

+  if (!cfg->rc_target_bitrate)
+    return;
+
  then = now;

  /* Sum the size over the past rc_buf_sz ms */
--- a/solution.mk
+++ b/solution.mk
@@ -9,7 +9,7 @@
 ##

 # libvpx reverse dependencies (targets that depend on libvpx)
-VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest obj_int_extract)
+VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest)
 VPX_RDEPS=$(foreach vcp,\
              $(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.$(VCPROJ_SFX)=):vpx)

@@ -17,7 +17,6 @@ vpx.sln: $(wildcard *.$(VCPROJ_SFX))
 	@echo "    [CREATE] $@"
 	$(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \
            $(if $(filter vpx.$(VCPROJ_SFX),$^),$(VPX_RDEPS)) \
-            --dep=vpx:obj_int_extract \
            --dep=test_libvpx:gtest \
            --ver=$(CONFIG_VS_VERSION)\
            --out=$@ $^
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -29,14 +29,14 @@ class ACMRandom {
  uint16_t Rand16(void) {
    const uint32_t value =
        random_.Generate(testing::internal::Random::kMaxRange);
-    return (value >> 16) & 0xffff;
+    return (value >> 15) & 0xffff;
  }

  uint8_t Rand8(void) {
    const uint32_t value =
        random_.Generate(testing::internal::Random::kMaxRange);
    // There's a bit more entropy in the upper bits of this implementation.
-    return (value >> 24) & 0xff;
+    return (value >> 23) & 0xff;
  }

  uint8_t Rand8Extremes(void) {
--- a/test/android/Android.mk
+++ b/test/android/Android.mk
@@ -40,9 +40,17 @@ include $(CLEAR_VARS)
 LOCAL_ARM_MODE := arm
 LOCAL_MODULE := libvpx_test
 LOCAL_STATIC_LIBRARIES := gtest libwebm
-LOCAL_SHARED_LIBRARIES := vpx
+
+ifeq ($(ENABLE_SHARED),1)
+  LOCAL_SHARED_LIBRARIES := vpx
+else
+  LOCAL_STATIC_LIBRARIES += vpx
+endif
+
 include $(LOCAL_PATH)/test/test.mk
 LOCAL_C_INCLUDES := $(BINDINGS_DIR)
 FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes)))
 LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC))
+# some test files depend on *_rtcd.h, ensure they're generated first.
+$(eval $(call rtcd_dep_template))
 include $(BUILD_EXECUTABLE)
--- a/test/blockiness_test.cc
+++ b/test/blockiness_test.cc
@@ -0,0 +1,229 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <string.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#if CONFIG_VP9_ENCODER
+#include "./vp9_rtcd.h"
+#endif
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "vpx_mem/vpx_mem.h"
+
+
+extern "C"
+double vp9_get_blockiness(const unsigned char *img1, int img1_pitch,
+                          const unsigned char *img2, int img2_pitch,
+                          int width, int height);
+
+using libvpx_test::ACMRandom;
+
+namespace {
+class BlockinessTestBase : public ::testing::Test {
+ public:
+  BlockinessTestBase(int width, int height) : width_(width), height_(height) {}
+
+  static void SetUpTestCase() {
+    source_data_ = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    reference_data_ = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+  }
+
+  static void TearDownTestCase() {
+    vpx_free(source_data_);
+    source_data_ = NULL;
+    vpx_free(reference_data_);
+    reference_data_ = NULL;
+  }
+
+  virtual void TearDown() {
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  // Handle frames up to 640x480
+  static const int kDataAlignment = 16;
+  static const int kDataBufferSize = 640*480;
+
+  virtual void SetUp() {
+    source_stride_ = (width_ + 31) & ~31;
+    reference_stride_ = width_ * 2;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  void FillConstant(uint8_t *data, int stride, uint8_t fill_constant,
+                    int width, int height) {
+    for (int h = 0; h < height; ++h) {
+      for (int w = 0; w < width; ++w) {
+        data[h * stride + w] = fill_constant;
+      }
+    }
+  }
+
+  void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) {
+    FillConstant(data, stride, fill_constant, width_, height_);
+  }
+
+  void FillRandom(uint8_t *data, int stride, int width, int height) {
+    for (int h = 0; h < height; ++h) {
+      for (int w = 0; w < width; ++w) {
+        data[h * stride + w] = rnd_.Rand8();
+      }
+    }
+  }
+
+  void FillRandom(uint8_t *data, int stride) {
+    FillRandom(data, stride, width_, height_);
+  }
+
+  void FillRandomBlocky(uint8_t *data, int stride) {
+    for (int h = 0; h < height_; h += 4) {
+      for (int w = 0; w < width_; w += 4) {
+        FillRandom(data + h * stride + w, stride, 4, 4);
+      }
+    }
+  }
+
+  void FillCheckerboard(uint8_t *data, int stride) {
+    for (int h = 0; h < height_; h += 4) {
+      for (int w = 0; w < width_; w += 4) {
+        if (((h/4) ^ (w/4)) & 1)
+          FillConstant(data + h * stride + w, stride, 255, 4, 4);
+        else
+          FillConstant(data + h * stride + w, stride, 0, 4, 4);
+      }
+    }
+  }
+
+  void Blur(uint8_t *data, int stride, int taps) {
+    int sum = 0;
+    int half_taps = taps / 2;
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < taps; ++w) {
+        sum += data[w + h * stride];
+      }
+      for (int w = taps; w < width_; ++w) {
+        sum += data[w + h * stride] - data[w - taps + h * stride];
+        data[w - half_taps + h * stride] = (sum + half_taps) / taps;
+      }
+    }
+    for (int w = 0; w < width_; ++w) {
+      for (int h = 0; h < taps; ++h) {
+        sum += data[h + w * stride];
+      }
+      for (int h = taps; h < height_; ++h) {
+        sum += data[w + h * stride] - data[(h - taps) * stride + w];
+        data[(h - half_taps) * stride + w] = (sum + half_taps) / taps;
+      }
+    }
+  }
+  int width_, height_;
+  static uint8_t* source_data_;
+  int source_stride_;
+  static uint8_t* reference_data_;
+  int reference_stride_;
+
+  ACMRandom rnd_;
+};
+
+#if CONFIG_VP9_ENCODER
+typedef std::tr1::tuple<int, int> BlockinessParam;
+class BlockinessVP9Test
+    : public BlockinessTestBase,
+      public ::testing::WithParamInterface<BlockinessParam> {
+ public:
+  BlockinessVP9Test() : BlockinessTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+  int CheckBlockiness() {
+    return vp9_get_blockiness(source_data_, source_stride_,
+                              reference_data_, reference_stride_,
+                              width_, height_);
+  }
+};
+#endif  // CONFIG_VP9_ENCODER
+
+uint8_t* BlockinessTestBase::source_data_ = NULL;
+uint8_t* BlockinessTestBase::reference_data_ = NULL;
+
+#if CONFIG_VP9_ENCODER
+TEST_P(BlockinessVP9Test, SourceBlockierThanReference) {
+  // Source is blockier than reference.
+  FillRandomBlocky(source_data_, source_stride_);
+  FillConstant(reference_data_, reference_stride_, 128);
+  int super_blocky = CheckBlockiness();
+
+  EXPECT_EQ(0, super_blocky) << "Blocky source should produce 0 blockiness.";
+}
+
+TEST_P(BlockinessVP9Test, ReferenceBlockierThanSource) {
+  // Source is blockier than reference.
+  FillConstant(source_data_, source_stride_, 128);
+  FillRandomBlocky(reference_data_, reference_stride_);
+  int super_blocky = CheckBlockiness();
+
+  EXPECT_GT(super_blocky, 0.0)
+      << "Blocky reference should score high for blockiness.";
+}
+
+TEST_P(BlockinessVP9Test, BlurringDecreasesBlockiness) {
+  // Source is blockier than reference.
+  FillConstant(source_data_, source_stride_, 128);
+  FillRandomBlocky(reference_data_, reference_stride_);
+  int super_blocky = CheckBlockiness();
+
+  Blur(reference_data_, reference_stride_, 4);
+  int less_blocky = CheckBlockiness();
+
+  EXPECT_GT(super_blocky, less_blocky)
+      << "A straight blur should decrease blockiness.";
+}
+
+TEST_P(BlockinessVP9Test, WorstCaseBlockiness) {
+  // Source is blockier than reference.
+  FillConstant(source_data_, source_stride_, 128);
+  FillCheckerboard(reference_data_, reference_stride_);
+
+  int super_blocky = CheckBlockiness();
+
+  Blur(reference_data_, reference_stride_, 4);
+  int less_blocky = CheckBlockiness();
+
+  EXPECT_GT(super_blocky, less_blocky)
+      << "A straight blur should decrease blockiness.";
+}
+#endif  // CONFIG_VP9_ENCODER
+
+
+using std::tr1::make_tuple;
+
+//------------------------------------------------------------------------------
+// C functions
+
+#if CONFIG_VP9_ENCODER
+const BlockinessParam c_vp9_tests[] = {
+  make_tuple(320, 240),
+  make_tuple(318, 242),
+  make_tuple(318, 238),
+};
+INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests));
+#endif
+
+}  // namespace
--- a/test/byte_alignment_test.cc
+++ b/test/byte_alignment_test.cc
@@ -0,0 +1,189 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+
+#include "./vpx_config.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#if CONFIG_WEBM_IO
+#include "test/webm_video_source.h"
+#endif
+
+namespace {
+
+const int kLegacyByteAlignment = 0;
+const int kLegacyYPlaneByteAlignment = 32;
+const int kNumPlanesToCheck = 3;
+const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
+const char kVP9Md5File[] = "vp90-2-02-size-lf-1920x1080.webm.md5";
+
+#if CONFIG_WEBM_IO
+
+struct ByteAlignmentTestParam {
+  int byte_alignment;
+  vpx_codec_err_t expected_value;
+  bool decode_remaining;
+};
+
+const ByteAlignmentTestParam kBaTestParams[] = {
+  {kLegacyByteAlignment, VPX_CODEC_OK, true},
+  {32, VPX_CODEC_OK, true},
+  {64, VPX_CODEC_OK, true},
+  {128, VPX_CODEC_OK, true},
+  {256, VPX_CODEC_OK, true},
+  {512, VPX_CODEC_OK, true},
+  {1024, VPX_CODEC_OK, true},
+  {1, VPX_CODEC_INVALID_PARAM, false},
+  {-2, VPX_CODEC_INVALID_PARAM, false},
+  {4, VPX_CODEC_INVALID_PARAM, false},
+  {16, VPX_CODEC_INVALID_PARAM, false},
+  {255, VPX_CODEC_INVALID_PARAM, false},
+  {2048, VPX_CODEC_INVALID_PARAM, false},
+};
+
+// Class for testing byte alignment of reference buffers.
+class ByteAlignmentTest
+    : public ::testing::TestWithParam<ByteAlignmentTestParam> {
+ protected:
+  ByteAlignmentTest()
+      : video_(NULL),
+        decoder_(NULL),
+        md5_file_(NULL) {}
+
+  virtual void SetUp() {
+    video_ = new libvpx_test::WebMVideoSource(kVP9TestFile);
+    ASSERT_TRUE(video_ != NULL);
+    video_->Init();
+    video_->Begin();
+
+    const vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
+    decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
+    ASSERT_TRUE(decoder_ != NULL);
+
+    OpenMd5File(kVP9Md5File);
+  }
+
+  virtual void TearDown() {
+    if (md5_file_ != NULL)
+      fclose(md5_file_);
+
+    delete decoder_;
+    delete video_;
+  }
+
+  void SetByteAlignment(int byte_alignment, vpx_codec_err_t expected_value) {
+    decoder_->Control(VP9_SET_BYTE_ALIGNMENT, byte_alignment, expected_value);
+  }
+
+  vpx_codec_err_t DecodeOneFrame(int byte_alignment_to_check) {
+    const vpx_codec_err_t res =
+        decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+    CheckDecodedFrames(byte_alignment_to_check);
+    if (res == VPX_CODEC_OK)
+      video_->Next();
+    return res;
+  }
+
+  vpx_codec_err_t DecodeRemainingFrames(int byte_alignment_to_check) {
+    for (; video_->cxdata() != NULL; video_->Next()) {
+      const vpx_codec_err_t res =
+          decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+      if (res != VPX_CODEC_OK)
+        return res;
+      CheckDecodedFrames(byte_alignment_to_check);
+    }
+    return VPX_CODEC_OK;
+  }
+
+ private:
+  // Check if |data| is aligned to |byte_alignment_to_check|.
+  // |byte_alignment_to_check| must be a power of 2.
+  void CheckByteAlignment(const uint8_t *data, int byte_alignment_to_check) {
+    ASSERT_EQ(0u, reinterpret_cast<size_t>(data) % byte_alignment_to_check);
+  }
+
+  // Iterate through the planes of the decoded frames and check for
+  // alignment based off |byte_alignment_to_check|.
+  void CheckDecodedFrames(int byte_alignment_to_check) {
+    libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
+    const vpx_image_t *img;
+
+    // Get decompressed data
+    while ((img = dec_iter.Next()) != NULL) {
+      if (byte_alignment_to_check == kLegacyByteAlignment) {
+        CheckByteAlignment(img->planes[0], kLegacyYPlaneByteAlignment);
+      } else {
+        for (int i = 0; i < kNumPlanesToCheck; ++i) {
+          CheckByteAlignment(img->planes[i], byte_alignment_to_check);
+        }
+      }
+      CheckMd5(*img);
+    }
+  }
+
+  // TODO(fgalligan): Move the MD5 testing code into another class.
+  void OpenMd5File(const std::string &md5_file_name_) {
+    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
+    ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
+        << md5_file_name_;
+  }
+
+  void CheckMd5(const vpx_image_t &img) {
+    ASSERT_TRUE(md5_file_ != NULL);
+    char expected_md5[33];
+    char junk[128];
+
+    // Read correct md5 checksums.
+    const int res = fscanf(md5_file_, "%s  %s", expected_md5, junk);
+    ASSERT_NE(EOF, res) << "Read md5 data failed";
+    expected_md5[32] = '\0';
+
+    ::libvpx_test::MD5 md5_res;
+    md5_res.Add(&img);
+    const char *const actual_md5 = md5_res.Get();
+
+    // Check md5 match.
+    ASSERT_STREQ(expected_md5, actual_md5) << "MD5 checksums don't match";
+  }
+
+  libvpx_test::WebMVideoSource *video_;
+  libvpx_test::VP9Decoder *decoder_;
+  FILE *md5_file_;
+};
+
+TEST_F(ByteAlignmentTest, SwitchByteAlignment) {
+  const int num_elements = 14;
+  const int byte_alignments[] = { 0, 32, 64, 128, 256, 512, 1024,
+                                  0, 1024, 32, 512, 64, 256, 128 };
+
+  for (int i = 0; i < num_elements; ++i) {
+    SetByteAlignment(byte_alignments[i], VPX_CODEC_OK);
+    ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame(byte_alignments[i]));
+  }
+  SetByteAlignment(byte_alignments[0], VPX_CODEC_OK);
+  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(byte_alignments[0]));
+}
+
+TEST_P(ByteAlignmentTest, TestAlignment) {
+  const ByteAlignmentTestParam t = GetParam();
+  SetByteAlignment(t.byte_alignment, t.expected_value);
+  if (t.decode_remaining)
+    ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(t.byte_alignment));
+}
+
+INSTANTIATE_TEST_CASE_P(Alignments, ByteAlignmentTest,
+                        ::testing::ValuesIn(kBaTestParams));
+
+#endif  // CONFIG_WEBM_IO
+
+}  // namespace
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -35,6 +35,11 @@ class CodecFactory {
  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const = 0;

+  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 const vpx_codec_flags_t flags,
+                                 unsigned long deadline)  // NOLINT(runtime/int)
+                                 const = 0;
+
  virtual Encoder* CreateEncoder(vpx_codec_enc_cfg_t cfg,
                                 unsigned long deadline,
                                 const unsigned long init_flags,
@@ -72,6 +77,10 @@ class VP8Decoder : public Decoder {
  VP8Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
      : Decoder(cfg, deadline) {}

+  VP8Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
+             unsigned long deadline)  // NOLINT
+      : Decoder(cfg, flag, deadline) {}
+
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP8_DECODER
@@ -104,8 +113,14 @@ class VP8CodecFactory : public CodecFactory {

  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const {
+    return CreateDecoder(cfg, 0, deadline);
+  }
+
+  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 const vpx_codec_flags_t flags,
+                                 unsigned long deadline) const {  // NOLINT
 #if CONFIG_VP8_DECODER
-    return new VP8Decoder(cfg, deadline);
+    return new VP8Decoder(cfg, flags, deadline);
 #else
    return NULL;
 #endif
@@ -154,6 +169,10 @@ class VP9Decoder : public Decoder {
  VP9Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
      : Decoder(cfg, deadline) {}

+  VP9Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
+             unsigned long deadline)  // NOLINT
+      : Decoder(cfg, flag, deadline) {}
+
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const {
 #if CONFIG_VP9_DECODER
@@ -186,8 +205,14 @@ class VP9CodecFactory : public CodecFactory {

  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
                                 unsigned long deadline) const {
+    return CreateDecoder(cfg, 0, deadline);
+  }
+
+  virtual Decoder* CreateDecoder(vpx_codec_dec_cfg_t cfg,
+                                 const vpx_codec_flags_t flags,
+                                 unsigned long deadline) const {  // NOLINT
 #if CONFIG_VP9_DECODER
-    return new VP9Decoder(cfg, deadline);
+    return new VP9Decoder(cfg, flags, deadline);
 #else
    return NULL;
 #endif
--- a/test/consistency_test.cc
+++ b/test/consistency_test.cc
@@ -0,0 +1,224 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+
+#include <string.h>
+#include <limits.h>
+#include <stdio.h>
+
+#include "./vpx_config.h"
+#if CONFIG_VP9_ENCODER
+#include "./vp9_rtcd.h"
+#endif
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vp9/encoder/vp9_ssim.h"
+#include "vpx_mem/vpx_mem.h"
+
+extern "C"
+double vp9_get_ssim_metrics(uint8_t *img1, int img1_pitch,
+                            uint8_t *img2, int img2_pitch,
+                            int width, int height,
+                            Ssimv *sv2, Metrics *m,
+                            int do_inconsistency);
+
+using libvpx_test::ACMRandom;
+
+namespace {
+class ConsistencyTestBase : public ::testing::Test {
+ public:
+  ConsistencyTestBase(int width, int height) : width_(width), height_(height) {}
+
+  static void SetUpTestCase() {
+    source_data_[0] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    reference_data_[0] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    source_data_[1] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    reference_data_[1] = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kDataBufferSize));
+    ssim_array_ = new Ssimv[kDataBufferSize / 16];
+  }
+
+  static void ClearSsim() {
+    memset(ssim_array_, 0, kDataBufferSize / 16);
+  }
+  static void TearDownTestCase() {
+    vpx_free(source_data_[0]);
+    source_data_[0] = NULL;
+    vpx_free(reference_data_[0]);
+    reference_data_[0] = NULL;
+    vpx_free(source_data_[1]);
+    source_data_[1] = NULL;
+    vpx_free(reference_data_[1]);
+    reference_data_[1] = NULL;
+
+    delete ssim_array_;
+  }
+
+  virtual void TearDown() {
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  // Handle frames up to 640x480
+  static const int kDataAlignment = 16;
+  static const int kDataBufferSize = 640*480;
+
+  virtual void SetUp() {
+    source_stride_ = (width_ + 31) & ~31;
+    reference_stride_ = width_ * 2;
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+  }
+
+  void FillRandom(uint8_t *data, int stride, int width, int height) {
+    for (int h = 0; h < height; ++h) {
+      for (int w = 0; w < width; ++w) {
+        data[h * stride + w] = rnd_.Rand8();
+      }
+    }
+  }
+
+  void FillRandom(uint8_t *data, int stride) {
+    FillRandom(data, stride, width_, height_);
+  }
+
+  void Copy(uint8_t *reference, uint8_t *source) {
+    memcpy(reference, source, kDataBufferSize);
+  }
+
+  void Blur(uint8_t *data, int stride, int taps) {
+    int sum = 0;
+    int half_taps = taps / 2;
+    for (int h = 0; h < height_; ++h) {
+      for (int w = 0; w < taps; ++w) {
+        sum += data[w + h * stride];
+      }
+      for (int w = taps; w < width_; ++w) {
+        sum += data[w + h * stride] - data[w - taps + h * stride];
+        data[w - half_taps + h * stride] = (sum + half_taps) / taps;
+      }
+    }
+    for (int w = 0; w < width_; ++w) {
+      for (int h = 0; h < taps; ++h) {
+        sum += data[h + w * stride];
+      }
+      for (int h = taps; h < height_; ++h) {
+        sum += data[w + h * stride] - data[(h - taps) * stride + w];
+        data[(h - half_taps) * stride + w] = (sum + half_taps) / taps;
+      }
+    }
+  }
+  int width_, height_;
+  static uint8_t* source_data_[2];
+  int source_stride_;
+  static uint8_t* reference_data_[2];
+  int reference_stride_;
+  static Ssimv *ssim_array_;
+  Metrics metrics_;
+
+  ACMRandom rnd_;
+};
+
+#if CONFIG_VP9_ENCODER
+typedef std::tr1::tuple<int, int> ConsistencyParam;
+class ConsistencyVP9Test
+    : public ConsistencyTestBase,
+      public ::testing::WithParamInterface<ConsistencyParam> {
+ public:
+  ConsistencyVP9Test() : ConsistencyTestBase(GET_PARAM(0), GET_PARAM(1)) {}
+
+ protected:
+  double CheckConsistency(int frame) {
+    EXPECT_LT(frame, 2)<< "Frame to check has to be less than 2.";
+    return
+        vp9_get_ssim_metrics(source_data_[frame], source_stride_,
+                             reference_data_[frame], reference_stride_,
+                             width_, height_, ssim_array_, &metrics_, 1);
+  }
+};
+#endif  // CONFIG_VP9_ENCODER
+
+uint8_t* ConsistencyTestBase::source_data_[2] = {NULL, NULL};
+uint8_t* ConsistencyTestBase::reference_data_[2] = {NULL, NULL};
+Ssimv* ConsistencyTestBase::ssim_array_ = NULL;
+
+#if CONFIG_VP9_ENCODER
+TEST_P(ConsistencyVP9Test, ConsistencyIsZero) {
+  FillRandom(source_data_[0], source_stride_);
+  Copy(source_data_[1], source_data_[0]);
+  Copy(reference_data_[0], source_data_[0]);
+  Blur(reference_data_[0], reference_stride_, 3);
+  Copy(reference_data_[1], source_data_[0]);
+  Blur(reference_data_[1], reference_stride_, 3);
+
+  double inconsistency = CheckConsistency(1);
+  inconsistency = CheckConsistency(0);
+  EXPECT_EQ(inconsistency, 0.0)
+      << "Should have 0 inconsistency if they are exactly the same.";
+
+  // If sources are not consistent reference frames inconsistency should
+  // be less than if the source is consistent.
+  FillRandom(source_data_[0], source_stride_);
+  FillRandom(source_data_[1], source_stride_);
+  FillRandom(reference_data_[0], reference_stride_);
+  FillRandom(reference_data_[1], reference_stride_);
+  CheckConsistency(0);
+  inconsistency = CheckConsistency(1);
+
+  Copy(source_data_[1], source_data_[0]);
+  CheckConsistency(0);
+  double inconsistency2 = CheckConsistency(1);
+  EXPECT_LT(inconsistency, inconsistency2)
+      << "Should have less inconsistency if source itself is inconsistent.";
+
+  // Less of a blur should be less inconsistent than more blur coming off a
+  // a frame with no blur.
+  ClearSsim();
+  FillRandom(source_data_[0], source_stride_);
+  Copy(source_data_[1], source_data_[0]);
+  Copy(reference_data_[0], source_data_[0]);
+  Copy(reference_data_[1], source_data_[0]);
+  Blur(reference_data_[1], reference_stride_, 4);
+  CheckConsistency(0);
+  inconsistency = CheckConsistency(1);
+  ClearSsim();
+  Copy(reference_data_[1], source_data_[0]);
+  Blur(reference_data_[1], reference_stride_, 8);
+  CheckConsistency(0);
+  inconsistency2 = CheckConsistency(1);
+
+  EXPECT_LT(inconsistency, inconsistency2)
+      << "Stronger Blur should produce more inconsistency.";
+}
+#endif  // CONFIG_VP9_ENCODER
+
+
+using std::tr1::make_tuple;
+
+//------------------------------------------------------------------------------
+// C functions
+
+#if CONFIG_VP9_ENCODER
+const ConsistencyParam c_vp9_tests[] = {
+  make_tuple(320, 240),
+  make_tuple(318, 242),
+  make_tuple(318, 238),
+};
+INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test,
+                        ::testing::ValuesIn(c_vp9_tests));
+#endif
+
+}  // namespace
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -10,12 +10,14 @@

 #include <string.h>
 #include "test/acm_random.h"
+#include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
+#include "vp9/common/vp9_common.h"
 #include "vp9/common/vp9_filter.h"
 #include "vpx_mem/vpx_mem.h"
 #include "vpx_ports/mem.h"
@@ -31,13 +33,16 @@ typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
                             int w, int h);

 struct ConvolveFunctions {
-  ConvolveFunctions(ConvolveFunc h8, ConvolveFunc h8_avg,
+  ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg,
+                    ConvolveFunc h8, ConvolveFunc h8_avg,
                    ConvolveFunc v8, ConvolveFunc v8_avg,
                    ConvolveFunc hv8, ConvolveFunc hv8_avg,
                    int bd)
-      : h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg), v8_avg_(v8_avg),
-        hv8_avg_(hv8_avg), use_highbd_(bd) {}
+      : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
+        v8_avg_(v8_avg), hv8_avg_(hv8_avg), use_highbd_(bd) {}

+  ConvolveFunc copy_;
+  ConvolveFunc avg_;
  ConvolveFunc h8_;
  ConvolveFunc v8_;
  ConvolveFunc hv8_;
@@ -298,25 +303,35 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
        vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1;
    output_ = reinterpret_cast<uint8_t*>(
        vpx_memalign(kDataAlignment, kOutputBufferSize));
+    output_ref_ = reinterpret_cast<uint8_t*>(
+        vpx_memalign(kDataAlignment, kOutputBufferSize));
 #if CONFIG_VP9_HIGHBITDEPTH
    input16_ = reinterpret_cast<uint16_t*>(
        vpx_memalign(kDataAlignment,
                     (kInputBufferSize + 1) * sizeof(uint16_t))) + 1;
    output16_ = reinterpret_cast<uint16_t*>(
        vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
+    output16_ref_ = reinterpret_cast<uint16_t*>(
+        vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t)));
 #endif
  }

+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
  static void TearDownTestCase() {
    vpx_free(input_ - 1);
    input_ = NULL;
    vpx_free(output_);
    output_ = NULL;
+    vpx_free(output_ref_);
+    output_ref_ = NULL;
 #if CONFIG_VP9_HIGHBITDEPTH
    vpx_free(input16_ - 1);
    input16_ = NULL;
    vpx_free(output16_);
    output16_ = NULL;
+    vpx_free(output16_ref_);
+    output16_ref_ = NULL;
 #endif
  }

@@ -382,6 +397,13 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 #endif
  }

+  void CopyOutputToRef() {
+    memcpy(output_ref_, output_, kOutputBufferSize);
+#if CONFIG_VP9_HIGHBITDEPTH
+    memcpy(output16_ref_, output16_, kOutputBufferSize);
+#endif
+  }
+
  void CheckGuardBlocks() {
    for (int i = 0; i < kOutputBufferSize; ++i) {
      if (IsIndexInBorder(i))
@@ -415,6 +437,19 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 #endif
  }

+  uint8_t *output_ref() const {
+#if CONFIG_VP9_HIGHBITDEPTH
+    if (UUT_->use_highbd_ == 0) {
+      return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
+    } else {
+      return CONVERT_TO_BYTEPTR(output16_ref_ + BorderTop() * kOuterBlockSize +
+                                BorderLeft());
+    }
+#else
+    return output_ref_ + BorderTop() * kOuterBlockSize + BorderLeft();
+#endif
+  }
+
  uint16_t lookup(uint8_t *list, int index) const {
 #if CONFIG_VP9_HIGHBITDEPTH
    if (UUT_->use_highbd_ == 0) {
@@ -493,24 +528,65 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
  const ConvolveFunctions* UUT_;
  static uint8_t* input_;
  static uint8_t* output_;
+  static uint8_t* output_ref_;
 #if CONFIG_VP9_HIGHBITDEPTH
  static uint16_t* input16_;
  static uint16_t* output16_;
+  static uint16_t* output16_ref_;
  int mask_;
 #endif
 };

 uint8_t* ConvolveTest::input_ = NULL;
 uint8_t* ConvolveTest::output_ = NULL;
+uint8_t* ConvolveTest::output_ref_ = NULL;
 #if CONFIG_VP9_HIGHBITDEPTH
 uint16_t* ConvolveTest::input16_ = NULL;
 uint16_t* ConvolveTest::output16_ = NULL;
+uint16_t* ConvolveTest::output16_ref_ = NULL;
 #endif

 TEST_P(ConvolveTest, GuardBlocks) {
  CheckGuardBlocks();
 }

+TEST_P(ConvolveTest, Copy) {
+  uint8_t* const in = input();
+  uint8_t* const out = output();
+
+  ASM_REGISTER_STATE_CHECK(
+      UUT_->copy_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
+                  Width(), Height()));
+
+  CheckGuardBlocks();
+
+  for (int y = 0; y < Height(); ++y)
+    for (int x = 0; x < Width(); ++x)
+      ASSERT_EQ(lookup(out, y * kOutputStride + x),
+                lookup(in, y * kInputStride + x))
+          << "(" << x << "," << y << ")";
+}
+
+TEST_P(ConvolveTest, Avg) {
+  uint8_t* const in = input();
+  uint8_t* const out = output();
+  uint8_t* const out_ref = output_ref();
+  CopyOutputToRef();
+
+  ASM_REGISTER_STATE_CHECK(
+      UUT_->avg_(in, kInputStride, out, kOutputStride, NULL, 0, NULL, 0,
+                Width(), Height()));
+
+  CheckGuardBlocks();
+
+  for (int y = 0; y < Height(); ++y)
+    for (int x = 0; x < Width(); ++x)
+      ASSERT_EQ(lookup(out, y * kOutputStride + x),
+                ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) +
+                                   lookup(out_ref, y * kOutputStride + x), 1))
+          << "(" << x << "," << y << ")";
+}
+
 TEST_P(ConvolveTest, CopyHoriz) {
  uint8_t* const in = input();
  uint8_t* const out = output();
@@ -1188,6 +1264,30 @@ void wrap_convolve8_avg_sse2_12(const uint8_t *src, ptrdiff_t src_stride,
 }
 #endif  // HAVE_SSE2 && ARCH_X86_64

+void wrap_convolve_copy_c_8(const uint8_t *src, ptrdiff_t src_stride,
+                            uint8_t *dst, ptrdiff_t dst_stride,
+                            const int16_t *filter_x,
+                            int filter_x_stride,
+                            const int16_t *filter_y,
+                            int filter_y_stride,
+                            int w, int h) {
+  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+                             filter_x, filter_x_stride,
+                             filter_y, filter_y_stride, w, h, 8);
+}
+
+void wrap_convolve_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
+                           uint8_t *dst, ptrdiff_t dst_stride,
+                           const int16_t *filter_x,
+                           int filter_x_stride,
+                           const int16_t *filter_y,
+                           int filter_y_stride,
+                           int w, int h) {
+  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+                            filter_x, filter_x_stride,
+                            filter_y, filter_y_stride, w, h, 8);
+}
+
 void wrap_convolve8_horiz_c_8(const uint8_t *src, ptrdiff_t src_stride,
                              uint8_t *dst, ptrdiff_t dst_stride,
                              const int16_t *filter_x,
@@ -1260,6 +1360,30 @@ void wrap_convolve8_avg_c_8(const uint8_t *src, ptrdiff_t src_stride,
                             filter_y, filter_y_stride, w, h, 8);
 }

+void wrap_convolve_copy_c_10(const uint8_t *src, ptrdiff_t src_stride,
+                             uint8_t *dst, ptrdiff_t dst_stride,
+                             const int16_t *filter_x,
+                             int filter_x_stride,
+                             const int16_t *filter_y,
+                             int filter_y_stride,
+                             int w, int h) {
+  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+                             filter_x, filter_x_stride,
+                             filter_y, filter_y_stride, w, h, 10);
+}
+
+void wrap_convolve_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
+                            uint8_t *dst, ptrdiff_t dst_stride,
+                            const int16_t *filter_x,
+                            int filter_x_stride,
+                            const int16_t *filter_y,
+                            int filter_y_stride,
+                            int w, int h) {
+  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+                            filter_x, filter_x_stride,
+                            filter_y, filter_y_stride, w, h, 10);
+}
+
 void wrap_convolve8_horiz_c_10(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
                               const int16_t *filter_x,
@@ -1332,6 +1456,30 @@ void wrap_convolve8_avg_c_10(const uint8_t *src, ptrdiff_t src_stride,
                             filter_y, filter_y_stride, w, h, 10);
 }

+void wrap_convolve_copy_c_12(const uint8_t *src, ptrdiff_t src_stride,
+                             uint8_t *dst, ptrdiff_t dst_stride,
+                             const int16_t *filter_x,
+                             int filter_x_stride,
+                             const int16_t *filter_y,
+                             int filter_y_stride,
+                             int w, int h) {
+  vp9_highbd_convolve_copy_c(src, src_stride, dst, dst_stride,
+                             filter_x, filter_x_stride,
+                             filter_y, filter_y_stride, w, h, 12);
+}
+
+void wrap_convolve_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
+                            uint8_t *dst, ptrdiff_t dst_stride,
+                            const int16_t *filter_x,
+                            int filter_x_stride,
+                            const int16_t *filter_y,
+                            int filter_y_stride,
+                            int w, int h) {
+  vp9_highbd_convolve_avg_c(src, src_stride, dst, dst_stride,
+                            filter_x, filter_x_stride,
+                            filter_y, filter_y_stride, w, h, 12);
+}
+
 void wrap_convolve8_horiz_c_12(const uint8_t *src, ptrdiff_t src_stride,
                               uint8_t *dst, ptrdiff_t dst_stride,
                               const int16_t *filter_x,
@@ -1405,6 +1553,7 @@ void wrap_convolve8_avg_c_12(const uint8_t *src, ptrdiff_t src_stride,
 }

 const ConvolveFunctions convolve8_c(
+    wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
    wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8,
    wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8,
    wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8);
@@ -1423,6 +1572,7 @@ INSTANTIATE_TEST_CASE_P(C_8, ConvolveTest, ::testing::Values(
    make_tuple(32, 64, &convolve8_c),
    make_tuple(64, 64, &convolve8_c)));
 const ConvolveFunctions convolve10_c(
+    wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
    wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10,
    wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10,
    wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10);
@@ -1441,6 +1591,7 @@ INSTANTIATE_TEST_CASE_P(C_10, ConvolveTest, ::testing::Values(
    make_tuple(32, 64, &convolve10_c),
    make_tuple(64, 64, &convolve10_c)));
 const ConvolveFunctions convolve12_c(
+    wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
    wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12,
    wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12,
    wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12);
@@ -1462,6 +1613,7 @@ INSTANTIATE_TEST_CASE_P(C_12, ConvolveTest, ::testing::Values(
 #else

 const ConvolveFunctions convolve8_c(
+    vp9_convolve_copy_c, vp9_convolve_avg_c,
    vp9_convolve8_horiz_c, vp9_convolve8_avg_horiz_c,
    vp9_convolve8_vert_c, vp9_convolve8_avg_vert_c,
    vp9_convolve8_c, vp9_convolve8_avg_c, 0);
@@ -1485,10 +1637,21 @@ INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::Values(
 #if HAVE_SSE2 && ARCH_X86_64
 #if CONFIG_VP9_HIGHBITDEPTH
 const ConvolveFunctions convolve8_sse2(
+    wrap_convolve_copy_c_8, wrap_convolve_avg_c_8,
    wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8,
    wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8,
    wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8);
-INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values(
+const ConvolveFunctions convolve10_sse2(
+    wrap_convolve_copy_c_10, wrap_convolve_avg_c_10,
+    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
+    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
+    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
+const ConvolveFunctions convolve12_sse2(
+    wrap_convolve_copy_c_12, wrap_convolve_avg_c_12,
+    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
+    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
+    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
+INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_sse2),
    make_tuple(8, 4, &convolve8_sse2),
    make_tuple(4, 8, &convolve8_sse2),
@@ -1501,12 +1664,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_8, ConvolveTest, ::testing::Values(
    make_tuple(32, 32, &convolve8_sse2),
    make_tuple(64, 32, &convolve8_sse2),
    make_tuple(32, 64, &convolve8_sse2),
-    make_tuple(64, 64, &convolve8_sse2)));
-const ConvolveFunctions convolve10_sse2(
-    wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10,
-    wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10,
-    wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10);
-INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values(
+    make_tuple(64, 64, &convolve8_sse2),
    make_tuple(4, 4, &convolve10_sse2),
    make_tuple(8, 4, &convolve10_sse2),
    make_tuple(4, 8, &convolve10_sse2),
@@ -1519,12 +1677,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_10, ConvolveTest, ::testing::Values(
    make_tuple(32, 32, &convolve10_sse2),
    make_tuple(64, 32, &convolve10_sse2),
    make_tuple(32, 64, &convolve10_sse2),
-    make_tuple(64, 64, &convolve10_sse2)));
-const ConvolveFunctions convolve12_sse2(
-    wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12,
-    wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12,
-    wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12);
-INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values(
+    make_tuple(64, 64, &convolve10_sse2),
    make_tuple(4, 4, &convolve12_sse2),
    make_tuple(8, 4, &convolve12_sse2),
    make_tuple(4, 8, &convolve12_sse2),
@@ -1540,6 +1693,7 @@ INSTANTIATE_TEST_CASE_P(SSE2_12, ConvolveTest, ::testing::Values(
    make_tuple(64, 64, &convolve12_sse2)));
 #else
 const ConvolveFunctions convolve8_sse2(
+    vp9_convolve_copy_sse2, vp9_convolve_avg_sse2,
    vp9_convolve8_horiz_sse2, vp9_convolve8_avg_horiz_sse2,
    vp9_convolve8_vert_sse2, vp9_convolve8_avg_vert_sse2,
    vp9_convolve8_sse2, vp9_convolve8_avg_sse2, 0);
@@ -1563,6 +1717,7 @@ INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::Values(

 #if HAVE_SSSE3
 const ConvolveFunctions convolve8_ssse3(
+    vp9_convolve_copy_c, vp9_convolve_avg_c,
    vp9_convolve8_horiz_ssse3, vp9_convolve8_avg_horiz_ssse3,
    vp9_convolve8_vert_ssse3, vp9_convolve8_avg_vert_ssse3,
    vp9_convolve8_ssse3, vp9_convolve8_avg_ssse3, 0);
@@ -1585,6 +1740,7 @@ INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::Values(

 #if HAVE_AVX2 && HAVE_SSSE3
 const ConvolveFunctions convolve8_avx2(
+    vp9_convolve_copy_c, vp9_convolve_avg_c,
    vp9_convolve8_horiz_avx2, vp9_convolve8_avg_horiz_ssse3,
    vp9_convolve8_vert_avx2, vp9_convolve8_avg_vert_ssse3,
    vp9_convolve8_avx2, vp9_convolve8_avg_ssse3, 0);
@@ -1605,11 +1761,20 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::Values(
    make_tuple(64, 64, &convolve8_avx2)));
 #endif  // HAVE_AVX2 && HAVE_SSSE3

+#if HAVE_NEON
 #if HAVE_NEON_ASM
 const ConvolveFunctions convolve8_neon(
+    vp9_convolve_copy_neon, vp9_convolve_avg_neon,
    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
    vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
+#else  // HAVE_NEON
+const ConvolveFunctions convolve8_neon(
+    vp9_convolve_copy_neon, vp9_convolve_avg_neon,
+    vp9_convolve8_horiz_neon, vp9_convolve8_avg_horiz_neon,
+    vp9_convolve8_vert_neon, vp9_convolve8_avg_vert_neon,
+    vp9_convolve8_neon, vp9_convolve8_avg_neon, 0);
+#endif  // HAVE_NEON_ASM

 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
    make_tuple(4, 4, &convolve8_neon),
@@ -1625,10 +1790,11 @@ INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::Values(
    make_tuple(64, 32, &convolve8_neon),
    make_tuple(32, 64, &convolve8_neon),
    make_tuple(64, 64, &convolve8_neon)));
-#endif
+#endif  // HAVE_NEON

 #if HAVE_DSPR2
 const ConvolveFunctions convolve8_dspr2(
+    vp9_convolve_copy_dspr2, vp9_convolve_avg_dspr2,
    vp9_convolve8_horiz_dspr2, vp9_convolve8_avg_horiz_dspr2,
    vp9_convolve8_vert_dspr2, vp9_convolve8_avg_vert_dspr2,
    vp9_convolve8_dspr2, vp9_convolve8_avg_dspr2, 0);
@@ -1648,4 +1814,27 @@ INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::Values(
    make_tuple(32, 64, &convolve8_dspr2),
    make_tuple(64, 64, &convolve8_dspr2)));
 #endif
+
+#if HAVE_MSA
+const ConvolveFunctions convolve8_msa(
+    vp9_convolve_copy_msa, vp9_convolve_avg_msa,
+    vp9_convolve8_horiz_msa, vp9_convolve8_avg_horiz_msa,
+    vp9_convolve8_vert_msa, vp9_convolve8_avg_vert_msa,
+    vp9_convolve8_msa, vp9_convolve8_avg_msa, 0);
+
+INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::Values(
+    make_tuple(4, 4, &convolve8_msa),
+    make_tuple(8, 4, &convolve8_msa),
+    make_tuple(4, 8, &convolve8_msa),
+    make_tuple(8, 8, &convolve8_msa),
+    make_tuple(16, 8, &convolve8_msa),
+    make_tuple(8, 16, &convolve8_msa),
+    make_tuple(16, 16, &convolve8_msa),
+    make_tuple(32, 16, &convolve8_msa),
+    make_tuple(16, 32, &convolve8_msa),
+    make_tuple(32, 32, &convolve8_msa),
+    make_tuple(64, 32, &convolve8_msa),
+    make_tuple(32, 64, &convolve8_msa),
+    make_tuple(64, 64, &convolve8_msa)));
+#endif  // HAVE_MSA
 }  // namespace
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -14,6 +14,7 @@
 #include "test/i420_video_source.h"
 #include "test/util.h"
 #include "test/y4m_video_source.h"
+#include "vpx/vpx_codec.h"

 namespace {

@@ -38,13 +39,25 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
    first_drop_ = 0;
    bits_total_ = 0;
    duration_ = 0.0;
+    denoiser_offon_test_ = 0;
+    denoiser_offon_period_ = -1;
  }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
+    if (video->frame() == 0)
+      encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
+
+    if (denoiser_offon_test_) {
+      ASSERT_GT(denoiser_offon_period_, 0)
+          << "denoiser_offon_period_ is not positive.";
+      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
+        // Flip denoiser_on_ periodically
+        denoiser_on_ ^= 1;
+      }
      encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
    }
+
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
    duration_ = 0;
@@ -124,6 +137,8 @@ class DatarateTestLarge : public ::libvpx_test::EncoderTest,
  double effective_datarate_;
  size_t bits_in_last_frame_;
  int denoiser_on_;
+  int denoiser_offon_test_;
+  int denoiser_offon_period_;
 };

 #if CONFIG_TEMPORAL_DENOISING
@@ -155,6 +170,29 @@ TEST_P(DatarateTestLarge, DenoiserLevels) {
        << " The datarate for the file missed the target!";
  }
 }
+
+// Check basic datarate targeting, for a single bitrate, when denoiser is off
+// and on.
+TEST_P(DatarateTestLarge, DenoiserOffOn) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 299);
+  cfg_.rc_target_bitrate = 300;
+  ResetModel();
+  // The denoiser is off by default.
+  denoiser_on_ = 0;
+  // Set the offon test flag.
+  denoiser_offon_test_ = 1;
+  denoiser_offon_period_ = 100;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
+      << " The datarate for the file exceeds the target!";
+  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
+      << " The datarate for the file missed the target!";
+}
 #endif  // CONFIG_TEMPORAL_DENOISING

 TEST_P(DatarateTestLarge, BasicBufferModel) {
@@ -246,6 +284,8 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
    for (int i = 0; i < 3; ++i) {
      bits_total_[i] = 0;
    }
+    denoiser_offon_test_ = 0;
+    denoiser_offon_period_ = -1;
  }

  //
@@ -313,22 +353,30 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
+    if (video->frame() == 0)
      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-      encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_);
+
+    if (denoiser_offon_test_) {
+      ASSERT_GT(denoiser_offon_period_, 0)
+          << "denoiser_offon_period_ is not positive.";
+      if ((video->frame() + 1) % denoiser_offon_period_ == 0) {
+        // Flip denoiser_on_ periodically
+        denoiser_on_ ^= 1;
+      }
    }
+
+    encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_);
+
    if (cfg_.ts_number_layers > 1) {
-      if (video->frame() == 1) {
+      if (video->frame() == 0) {
        encoder->Control(VP9E_SET_SVC, 1);
      }
-      vpx_svc_layer_id_t layer_id = {0, 0};
+      vpx_svc_layer_id_t layer_id;
      layer_id.spatial_layer_id = 0;
      frame_flags_ = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
      layer_id.temporal_layer_id = SetLayerId(video->frame(),
                                              cfg_.ts_number_layers);
-      if (video->frame() > 0) {
-       encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
-      }
+      encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id);
    }
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
@@ -398,6 +446,8 @@ class DatarateTestVP9Large : public ::libvpx_test::EncoderTest,
  vpx_codec_pts_t first_drop_;
  int num_drops_;
  int denoiser_on_;
+  int denoiser_offon_test_;
+  int denoiser_offon_period_;
 };

 // Check basic rate targeting,
@@ -488,7 +538,7 @@ TEST_P(DatarateTestVP9Large, ChangingDropFrameThresh) {
        << " The first dropped frame for drop_thresh " << i
        << " > first dropped frame for drop_thresh "
        << i - kDropFrameThreshTestStep;
-    ASSERT_GE(num_drops_, last_num_drops)
+    ASSERT_GE(num_drops_, last_num_drops * 0.90)
        << " The number of dropped frames for drop_thresh " << i
        << " < number of dropped frames for drop_thresh "
        << i - kDropFrameThreshTestStep;
@@ -514,20 +564,25 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) {
  cfg_.ts_rate_decimator[0] = 2;
  cfg_.ts_rate_decimator[1] = 1;

+  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
+  if (deadline_ == VPX_DL_REALTIME)
+    cfg_.g_error_resilient = 1;
+
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 200);
  for (int i = 200; i <= 800; i += 200) {
    cfg_.rc_target_bitrate = i;
    ResetModel();
    // 60-40 bitrate allocation for 2 temporal layers.
-    cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
+    cfg_.layer_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
+    cfg_.layer_target_bitrate[1] = cfg_.rc_target_bitrate;
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
-      ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
+      ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
          << " The datarate for the file is lower than target by too much, "
              "for layer: " << j;
-      ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
+      ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
          << " The datarate for the file is greater than target by too much, "
              "for layer: " << j;
    }
@@ -552,25 +607,27 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayers) {
  cfg_.ts_rate_decimator[1] = 2;
  cfg_.ts_rate_decimator[2] = 1;

+  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 200);
  for (int i = 200; i <= 800; i += 200) {
    cfg_.rc_target_bitrate = i;
    ResetModel();
    // 40-20-40 bitrate allocation for 3 temporal layers.
-    cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
-    cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
+    cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
+    cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
+    cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
      // TODO(yaowu): Work out more stable rc control strategy and
      //              Adjust the thresholds to be tighter than .75.
-      ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75)
+      ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.75)
          << " The datarate for the file is lower than target by too much, "
              "for layer: " << j;
      // TODO(yaowu): Work out more stable rc control strategy and
      //              Adjust the thresholds to be tighter than 1.25.
-      ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25)
+      ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.25)
          << " The datarate for the file is greater than target by too much, "
              "for layer: " << j;
    }
@@ -598,20 +655,22 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
  cfg_.ts_rate_decimator[1] = 2;
  cfg_.ts_rate_decimator[2] = 1;

+  cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS;
+
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 200);
  cfg_.rc_target_bitrate = 200;
  ResetModel();
  // 40-20-40 bitrate allocation for 3 temporal layers.
-  cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
-  cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
-  cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
+  cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
+  cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
+  cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
-    ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85)
+    ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85)
        << " The datarate for the file is lower than target by too much, "
            "for layer: " << j;
-    ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15)
+    ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15)
        << " The datarate for the file is greater than target by too much, "
            "for layer: " << j;
    // Expect some frame drops in this test: for this 200 frames test,
@@ -649,11 +708,212 @@ TEST_P(DatarateTestVP9Large, DenoiserLevels) {
  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
      << " The datarate for the file is greater than target by too much!";
 }
+
+// Check basic datarate targeting, for a single bitrate, when denoiser is off
+// and on.
+TEST_P(DatarateTestVP9Large, DenoiserOffOn) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_min_quantizer = 2;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 299);
+
+  // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
+  // there is only one denoiser mode: denoiserYonly(which is 1),
+  // but may add more modes in the future.
+  cfg_.rc_target_bitrate = 300;
+  ResetModel();
+  // The denoiser is off by default.
+  denoiser_on_ = 0;
+  // Set the offon test flag.
+  denoiser_offon_test_ = 1;
+  denoiser_offon_period_ = 100;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
+      << " The datarate for the file is lower than target by too much!";
+  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
+      << " The datarate for the file is greater than target by too much!";
+}
 #endif  // CONFIG_VP9_TEMPORAL_DENOISING

+class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest,
+    public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ public:
+  DatarateOnePassCbrSvc() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~DatarateOnePassCbrSvc() {}
+ protected:
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    speed_setting_ = GET_PARAM(2);
+    ResetModel();
+  }
+  virtual void ResetModel() {
+    last_pts_ = 0;
+    bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz;
+    frame_number_ = 0;
+    first_drop_ = 0;
+    bits_total_ = 0;
+    duration_ = 0.0;
+  }
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+  }
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      int i;
+      for (i = 0; i < 2; ++i) {
+        svc_params_.max_quantizers[i] = 63;
+        svc_params_.min_quantizers[i] = 0;
+      }
+      svc_params_.scaling_factor_num[0] = 144;
+      svc_params_.scaling_factor_den[0] = 288;
+      svc_params_.scaling_factor_num[1] = 288;
+      svc_params_.scaling_factor_den[1] = 288;
+      encoder->Control(VP9E_SET_SVC, 1);
+      encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_);
+      encoder->Control(VP8E_SET_CPUUSED, speed_setting_);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, 0);
+      encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300);
+    }
+    const vpx_rational_t tb = video->timebase();
+    timebase_ = static_cast<double>(tb.num) / tb.den;
+    duration_ = 0;
+  }
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
+    if (last_pts_ == 0)
+      duration = 1;
+    bits_in_buffer_model_ += static_cast<int64_t>(
+        duration * timebase_ * cfg_.rc_target_bitrate * 1000);
+    const bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY)
+                         ? true: false;
+    if (!key_frame) {
+      ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
+          << pkt->data.frame.pts;
+    }
+    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
+    bits_in_buffer_model_ -= frame_size_in_bits;
+    bits_total_ += frame_size_in_bits;
+    if (!first_drop_ && duration > 1)
+      first_drop_ = last_pts_ + 1;
+    last_pts_ = pkt->data.frame.pts;
+    bits_in_last_frame_ = frame_size_in_bits;
+    ++frame_number_;
+  }
+  virtual void EndPassHook(void) {
+    if (bits_total_) {
+      const double file_size_in_kb = bits_total_ / 1000.;  // bits per kilobit
+      duration_ = (last_pts_ + 1) * timebase_;
+      effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0
+          / (cfg_.rc_buf_initial_sz / 1000.0 + duration_);
+      file_datarate_ = file_size_in_kb / duration_;
+    }
+  }
+  vpx_codec_pts_t last_pts_;
+  int64_t bits_in_buffer_model_;
+  double timebase_;
+  int frame_number_;
+  vpx_codec_pts_t first_drop_;
+  int64_t bits_total_;
+  double duration_;
+  double file_datarate_;
+  double effective_datarate_;
+  size_t bits_in_last_frame_;
+  vpx_svc_extra_cfg_t svc_params_;
+  int speed_setting_;
+};
+static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg,
+    const vpx_svc_extra_cfg_t *svc_params,
+    int spatial_layers,
+    int temporal_layers,
+    int temporal_layering_mode,
+    unsigned int total_rate) {
+  int sl, spatial_layer_target;
+  float total = 0;
+  float alloc_ratio[VPX_MAX_LAYERS] = {0};
+  for (sl = 0; sl < spatial_layers; ++sl) {
+    if (svc_params->scaling_factor_den[sl] > 0) {
+      alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] *
+          1.0 / svc_params->scaling_factor_den[sl]);
+      total += alloc_ratio[sl];
+    }
+  }
+  for (sl = 0; sl < spatial_layers; ++sl) {
+    enc_cfg->ss_target_bitrate[sl] = spatial_layer_target =
+        (unsigned int)(enc_cfg->rc_target_bitrate *
+            alloc_ratio[sl] / total);
+    const int index = sl * temporal_layers;
+    if (temporal_layering_mode == 3) {
+      enc_cfg->layer_target_bitrate[index] =
+          spatial_layer_target >> 1;
+      enc_cfg->layer_target_bitrate[index + 1] =
+          (spatial_layer_target >> 1) + (spatial_layer_target >> 2);
+      enc_cfg->layer_target_bitrate[index + 2] =
+          spatial_layer_target;
+    } else if (temporal_layering_mode == 2) {
+      enc_cfg->layer_target_bitrate[index] =
+          spatial_layer_target * 2 / 3;
+      enc_cfg->layer_target_bitrate[index + 1] =
+          spatial_layer_target;
+    }
+  }
+}
+
+// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and
+// 3 temporal layers.
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.ss_number_layers = 2;
+  cfg_.ts_number_layers = 3;
+  cfg_.ts_rate_decimator[0] = 4;
+  cfg_.ts_rate_decimator[1] = 2;
+  cfg_.ts_rate_decimator[2] = 1;
+  cfg_.g_error_resilient = 1;
+  cfg_.temporal_layering_mode = 3;
+  svc_params_.scaling_factor_num[0] = 144;
+  svc_params_.scaling_factor_den[0] = 288;
+  svc_params_.scaling_factor_num[1] = 288;
+  svc_params_.scaling_factor_den[1] = 288;
+  // TODO(wonkap/marpan): No frame drop for now, we need to implement correct
+  // frame dropping for SVC.
+  cfg_.rc_dropframe_thresh = 0;
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 200);
+  // TODO(wonkap/marpan): Check that effective_datarate for each layer hits the
+  // layer target_bitrate. Also check if test can pass at lower bitrate (~200k).
+  for (int i = 400; i <= 800; i += 200) {
+    cfg_.rc_target_bitrate = i;
+    ResetModel();
+    assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
+        cfg_.ts_number_layers, cfg_.temporal_layering_mode,
+        cfg_.rc_target_bitrate);
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85)
+            << " The datarate for the file exceeds the target by too much!";
+    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15)
+        << " The datarate for the file is lower than the target by too much!";
+  }
+}
+
 VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES);
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
                          ::testing::Values(::libvpx_test::kOnePassGood,
-                          ::libvpx_test::kRealTime),
+                                            ::libvpx_test::kRealTime),
                          ::testing::Range(2, 7));
+VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvc,
+                          ::testing::Values(::libvpx_test::kRealTime),
+                          ::testing::Range(5, 8));
 }  // namespace
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -20,8 +20,10 @@

 #include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"

 using libvpx_test::ACMRandom;

@@ -264,6 +266,8 @@ typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,

 typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct16x16Param;
 typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht16x16Param;
+typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t>
+    Idct16x16Param;

 void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride,
                   int /*tx_type*/) {
@@ -311,7 +315,33 @@ void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
 }
-#endif
+
+void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct16x16_10_add_c(in, out, stride, 10);
+}
+
+void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct16x16_10_add_c(in, out, stride, 12);
+}
+
+#if HAVE_SSE2
+void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
+}
+
+void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
+}
+
+void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
+}
+
+void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
+}
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 class Trans16x16TestBase {
 public:
@@ -328,13 +358,13 @@ class Trans16x16TestBase {
    int64_t total_error = 0;
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
-      DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-      DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif

      // Initialize a test block with input range [-mask_, mask_].
@@ -388,9 +418,9 @@ class Trans16x16TestBase {
  void RunCoeffCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
@@ -409,15 +439,13 @@ class Trans16x16TestBase {
  void RunMemCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
-        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0) {
@@ -444,24 +472,19 @@ class Trans16x16TestBase {
  void RunQuantCheck(int dc_thred, int ac_thred) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 100000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);

-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, ref, kNumCoeffs);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, ref16, kNumCoeffs);
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
-        if (bit_depth_ == VPX_BITS_8)
-          input_block[j] = rnd.Rand8() - rnd.Rand8();
-        else
-          input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0)
@@ -474,11 +497,11 @@ class Trans16x16TestBase {
      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);

      // clear reconstructed pixel buffers
-      vpx_memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
-      vpx_memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
+      memset(dst, 0, kNumCoeffs * sizeof(uint8_t));
+      memset(ref, 0, kNumCoeffs * sizeof(uint8_t));
 #if CONFIG_VP9_HIGHBITDEPTH
-      vpx_memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
-      vpx_memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
+      memset(dst16, 0, kNumCoeffs * sizeof(uint16_t));
+      memset(ref16, 0, kNumCoeffs * sizeof(uint16_t));
 #endif

      // quantization with maximum allowed step sizes
@@ -511,14 +534,14 @@ class Trans16x16TestBase {
  void RunInvAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
-#endif
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH

    for (int i = 0; i < count_test_block; ++i) {
      double out_r[kNumCoeffs];
@@ -534,13 +557,13 @@ class Trans16x16TestBase {
          src16[j] = rnd.Rand16() & mask_;
          dst16[j] = rnd.Rand16() & mask_;
          in[j] = src16[j] - dst16[j];
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
        }
      }

      reference_16x16_dct_2d(in, out_r);
      for (int j = 0; j < kNumCoeffs; ++j)
-        coeff[j] = round(out_r[j]);
+        coeff[j] = static_cast<tran_low_t>(round(out_r[j]));

      if (bit_depth_ == VPX_BITS_8) {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
@@ -548,7 +571,7 @@ class Trans16x16TestBase {
      } else {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
                                            16));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
      }

      for (int j = 0; j < kNumCoeffs; ++j) {
@@ -557,7 +580,7 @@ class Trans16x16TestBase {
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
        const uint32_t diff = dst[j] - src[j];
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH
        const uint32_t error = diff * diff;
        EXPECT_GE(1u, error)
            << "Error: 16x16 IDCT has error " << error
@@ -565,6 +588,64 @@ class Trans16x16TestBase {
      }
    }
  }
+
+  void CompareInvReference(IdctFunc ref_txfm, int thresh) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 10000;
+    const int eob = 10;
+    const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan;
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
+#if CONFIG_VP9_HIGHBITDEPTH
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+    for (int i = 0; i < count_test_block; ++i) {
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        if (j < eob) {
+          // Random values less than the threshold, either positive or negative
+          coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2));
+        } else {
+          coeff[scan[j]] = 0;
+        }
+        if (bit_depth_ == VPX_BITS_8) {
+          dst[j] = 0;
+          ref[j] = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+        } else {
+          dst16[j] = 0;
+          ref16[j] = 0;
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        }
+      }
+      if (bit_depth_ == VPX_BITS_8) {
+        ref_txfm(coeff, ref, pitch_);
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+      } else {
+#if CONFIG_VP9_HIGHBITDEPTH
+        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
+                                 pitch_));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+      }
+
+      for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_VP9_HIGHBITDEPTH
+        const uint32_t diff =
+            bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
+#else
+        const uint32_t diff = dst[j] - ref[j];
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+        const uint32_t error = diff * diff;
+        EXPECT_EQ(0u, error)
+            << "Error: 16x16 IDCT Comparison has error " << error
+            << " at index " << j;
+      }
+    }
+  }
+
  int pitch_;
  int tx_type_;
  vpx_bit_depth_t bit_depth_;
@@ -590,10 +671,10 @@ class Trans16x16DCT
    mask_ = (1 << bit_depth_) - 1;
 #if CONFIG_VP9_HIGHBITDEPTH
    switch (bit_depth_) {
-      case 10:
+      case VPX_BITS_10:
        inv_txfm_ref = idct16x16_10_ref;
        break;
-      case 12:
+      case VPX_BITS_12:
        inv_txfm_ref = idct16x16_12_ref;
        break;
      default:
@@ -703,6 +784,37 @@ TEST_P(Trans16x16HT, QuantCheck) {
  RunQuantCheck(429, 729);
 }

+class InvTrans16x16DCT
+    : public Trans16x16TestBase,
+      public ::testing::TestWithParam<Idct16x16Param> {
+ public:
+  virtual ~InvTrans16x16DCT() {}
+
+  virtual void SetUp() {
+    ref_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    thresh_ = GET_PARAM(2);
+    bit_depth_ = GET_PARAM(3);
+    pitch_ = 16;
+    mask_ = (1 << bit_depth_) - 1;
+}
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) {}
+  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+
+  IdctFunc ref_txfm_;
+  IdctFunc inv_txfm_;
+  int thresh_;
+};
+
+TEST_P(InvTrans16x16DCT, CompareReference) {
+  CompareInvReference(ref_txfm_, thresh_);
+}
+
 using std::tr1::make_tuple;

 #if CONFIG_VP9_HIGHBITDEPTH
@@ -717,7 +829,7 @@ INSTANTIATE_TEST_CASE_P(
    C, Trans16x16DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -743,7 +855,7 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -770,13 +882,66 @@ INSTANTIATE_TEST_CASE_P(
                   VPX_BITS_8),
        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3,
                   VPX_BITS_8)));
-#endif
+#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

-#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    SSSE3, Trans16x16DCT,
+    SSE2, Trans16x16DCT,
    ::testing::Values(
-        make_tuple(&vp9_fdct16x16_c, &vp9_idct16x16_256_add_ssse3, 0,
+        make_tuple(&vp9_highbd_fdct16x16_sse2,
+                   &idct16x16_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct16x16_c,
+                   &idct16x16_256_add_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct16x16_sse2,
+                   &idct16x16_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fdct16x16_c,
+                   &idct16x16_256_add_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vp9_fdct16x16_sse2,
+                   &vp9_idct16x16_256_add_c, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans16x16HT,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 1, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 2, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_10, 3, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 1, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 2, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht16x16_sse2, &iht16x16_12, 3, VPX_BITS_12),
+        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 3,
                   VPX_BITS_8)));
-#endif
+// Optimizations take effect at a threshold of 3155, so we use a value close to
+// that to test both branches.
+INSTANTIATE_TEST_CASE_P(
+    SSE2, InvTrans16x16DCT,
+    ::testing::Values(
+        make_tuple(&idct16x16_10_add_10_c,
+                   &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10),
+        make_tuple(&idct16x16_10,
+                   &idct16x16_256_add_10_sse2, 3167, VPX_BITS_10),
+        make_tuple(&idct16x16_10_add_12_c,
+                   &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
+        make_tuple(&idct16x16_12,
+                   &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12)));
+#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans16x16DCT,
+    ::testing::Values(
+        make_tuple(&vp9_fdct16x16_msa,
+                   &vp9_idct16x16_256_add_msa, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans16x16HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
+                   VPX_BITS_8)));
+#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -23,6 +23,7 @@
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"

 using libvpx_test::ACMRandom;

@@ -79,6 +80,10 @@ typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>
    Trans32x32Param;

 #if CONFIG_VP9_HIGHBITDEPTH
+void idct32x32_8(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 8);
+}
+
 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 10);
 }
@@ -86,7 +91,7 @@ void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_idct32x32_1024_add_c(in, out, stride, 12);
 }
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 class Trans32x32Test : public ::testing::TestWithParam<Trans32x32Param> {
 public:
@@ -114,20 +119,20 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  uint32_t max_error = 0;
  int64_t total_error = 0;
-  const int count_test_block = 1000;
-  DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+  const int count_test_block = 10000;
+  DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif

  for (int i = 0; i < count_test_block; ++i) {
    // Initialize a test block with input range [-mask_, mask_].
    for (int j = 0; j < kNumCoeffs; ++j) {
-      if (bit_depth_ == 8) {
+      if (bit_depth_ == VPX_BITS_8) {
        src[j] = rnd.Rand8();
        dst[j] = rnd.Rand8();
        test_input_block[j] = src[j] - dst[j];
@@ -180,9 +185,9 @@ TEST_P(Trans32x32Test, CoeffCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 1000;

-  DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+  DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

  for (int i = 0; i < count_test_block; ++i) {
    for (int j = 0; j < kNumCoeffs; ++j)
@@ -208,15 +213,13 @@ TEST_P(Trans32x32Test, MemCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 2000;

-  DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

  for (int i = 0; i < count_test_block; ++i) {
    // Initialize a test block with input range [-mask_, mask_].
    for (int j = 0; j < kNumCoeffs; ++j) {
-      input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
      input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_;
    }
    if (i == 0) {
@@ -253,13 +256,13 @@ TEST_P(Trans32x32Test, MemCheck) {
 TEST_P(Trans32x32Test, InverseAccuracy) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = 1000;
-  DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+  DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+  DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif

  for (int i = 0; i < count_test_block; ++i) {
@@ -282,7 +285,7 @@ TEST_P(Trans32x32Test, InverseAccuracy) {

    reference_32x32_dct_2d(in, out_r);
    for (int j = 0; j < kNumCoeffs; ++j)
-      coeff[j] = round(out_r[j]);
+      coeff[j] = static_cast<tran_low_t>(round(out_r[j]));
    if (bit_depth_ == VPX_BITS_8) {
      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -331,7 +334,7 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct32x32_1024_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fdct32x32_rd_c,
                   &vp9_idct32x32_1024_add_c, 1, VPX_BITS_8)));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -341,7 +344,7 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct32x32_1024_add_neon, 0, VPX_BITS_8),
        make_tuple(&vp9_fdct32x32_rd_c,
                   &vp9_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
-#endif
+#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -351,7 +354,23 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
        make_tuple(&vp9_fdct32x32_rd_sse2,
                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
-#endif
+#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans32x32Test,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1,
+                   VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1,
+                   VPX_BITS_12),
+        make_tuple(&vp9_fdct32x32_sse2, &vp9_idct32x32_1024_add_c, 0,
+                   VPX_BITS_8),
+        make_tuple(&vp9_fdct32x32_rd_sse2, &vp9_idct32x32_1024_add_c, 1,
+                   VPX_BITS_8)));
+#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -361,5 +380,15 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
        make_tuple(&vp9_fdct32x32_rd_avx2,
                   &vp9_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
-#endif
+#endif  // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans32x32Test,
+    ::testing::Values(
+        make_tuple(&vp9_fdct32x32_msa,
+                   &vp9_idct32x32_1024_add_msa, 0, VPX_BITS_8),
+        make_tuple(&vp9_fdct32x32_rd_msa,
+                   &vp9_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
+#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/decode_api_test.cc
+++ b/test/decode_api_test.cc
@@ -57,6 +57,21 @@ TEST(DecodeAPI, InvalidParams) {
  }
 }

+#if CONFIG_VP8_DECODER
+TEST(DecodeAPI, OptionalParams) {
+  vpx_codec_ctx_t dec;
+
+#if CONFIG_ERROR_CONCEALMENT
+  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, &vpx_codec_vp8_dx_algo, NULL,
+                                             VPX_CODEC_USE_ERROR_CONCEALMENT));
+#else
+  EXPECT_EQ(VPX_CODEC_INCAPABLE,
+            vpx_codec_dec_init(&dec, &vpx_codec_vp8_dx_algo, NULL,
+                               VPX_CODEC_USE_ERROR_CONCEALMENT));
+#endif  // CONFIG_ERROR_CONCEALMENT
+}
+#endif  // CONFIG_VP8_DECODER
+
 #if CONFIG_VP9_DECODER
 // Test VP9 codec controls after a decode error to ensure the code doesn't
 // misbehave.
@@ -65,6 +80,7 @@ void TestVp9Controls(vpx_codec_ctx_t *dec) {
    VP8D_GET_LAST_REF_UPDATES,
    VP8D_GET_FRAME_CORRUPTED,
    VP9D_GET_DISPLAY_SIZE,
+    VP9D_GET_FRAME_SIZE
  };
  int val[2];

--- a/test/decode_perf_test.cc
+++ b/test/decode_perf_test.cc
@@ -8,13 +8,17 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <string>
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
 #include "test/ivf_video_source.h"
 #include "test/md5_helper.h"
 #include "test/util.h"
 #include "test/webm_video_source.h"
 #include "vpx_ports/vpx_timer.h"
+#include "./ivfenc.h"
 #include "./vpx_version.h"

 using std::tr1::make_tuple;
@@ -24,7 +28,9 @@ namespace {
 #define VIDEO_NAME 0
 #define THREADS 1

+const int kMaxPsnr = 100;
 const double kUsecsInSec = 1000000.0;
+const char kNewEncodeOutputFile[] = "new_encode.ivf";

 /*
 DecodePerfTest takes a tuple of filename + number of threads to decode with
@@ -105,4 +111,163 @@ TEST_P(DecodePerfTest, PerfTest) {
 INSTANTIATE_TEST_CASE_P(VP9, DecodePerfTest,
                        ::testing::ValuesIn(kVP9DecodePerfVectors));

+class VP9NewEncodeDecodePerfTest :
+    public ::libvpx_test::EncoderTest,
+    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ protected:
+  VP9NewEncodeDecodePerfTest()
+      : EncoderTest(GET_PARAM(0)),
+        encoding_mode_(GET_PARAM(1)),
+        speed_(0),
+        outfile_(0),
+        out_frames_(0) {
+  }
+
+  virtual ~VP9NewEncodeDecodePerfTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+
+    cfg_.g_lag_in_frames = 25;
+    cfg_.rc_min_quantizer = 2;
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_dropframe_thresh = 0;
+    cfg_.rc_undershoot_pct = 50;
+    cfg_.rc_overshoot_pct = 50;
+    cfg_.rc_buf_sz = 1000;
+    cfg_.rc_buf_initial_sz = 500;
+    cfg_.rc_buf_optimal_sz = 600;
+    cfg_.rc_resize_allowed = 0;
+    cfg_.rc_end_usage = VPX_VBR;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(VP8E_SET_CPUUSED, speed_);
+      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, 2);
+    }
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+    const std::string data_path = getenv("LIBVPX_TEST_DATA_PATH");
+    const std::string path_to_source = data_path + "/" + kNewEncodeOutputFile;
+    outfile_ = fopen(path_to_source.c_str(), "wb");
+    ASSERT_TRUE(outfile_ != NULL);
+  }
+
+  virtual void EndPassHook() {
+    if (outfile_ != NULL) {
+      if (!fseek(outfile_, 0, SEEK_SET))
+        ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_);
+      fclose(outfile_);
+      outfile_ = NULL;
+    }
+  }
+
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    ++out_frames_;
+
+    // Write initial file header if first frame.
+    if (pkt->data.frame.pts == 0)
+      ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_);
+
+    // Write frame header and data.
+    ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz);
+    ASSERT_EQ(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_),
+              pkt->data.frame.sz);
+  }
+
+  virtual bool DoDecode() { return false; }
+
+  void set_speed(unsigned int speed) {
+    speed_ = speed;
+  }
+
+ private:
+  libvpx_test::TestMode encoding_mode_;
+  uint32_t speed_;
+  FILE *outfile_;
+  uint32_t out_frames_;
+};
+
+struct EncodePerfTestVideo {
+  EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_,
+                      uint32_t bitrate_, int frames_)
+      : name(name_),
+        width(width_),
+        height(height_),
+        bitrate(bitrate_),
+        frames(frames_) {}
+  const char *name;
+  uint32_t width;
+  uint32_t height;
+  uint32_t bitrate;
+  int frames;
+};
+
+const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
+  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
+};
+
+TEST_P(VP9NewEncodeDecodePerfTest, PerfTest) {
+  SetUp();
+
+  // TODO(JBB): Make this work by going through the set of given files.
+  const int i = 0;
+  const vpx_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;
+
+  init_flags_ = VPX_CODEC_USE_PSNR;
+
+  const char *video_name = kVP9EncodePerfTestVectors[i].name;
+  libvpx_test::I420VideoSource video(
+      video_name,
+      kVP9EncodePerfTestVectors[i].width,
+      kVP9EncodePerfTestVectors[i].height,
+      timebase.den, timebase.num, 0,
+      kVP9EncodePerfTestVectors[i].frames);
+  set_speed(2);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+
+  const uint32_t threads = 4;
+
+  libvpx_test::IVFVideoSource decode_video(kNewEncodeOutputFile);
+  decode_video.Init();
+
+  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
+  cfg.threads = threads;
+  libvpx_test::VP9Decoder decoder(cfg, 0);
+
+  vpx_usec_timer t;
+  vpx_usec_timer_start(&t);
+
+  for (decode_video.Begin(); decode_video.cxdata() != NULL;
+       decode_video.Next()) {
+    decoder.DecodeFrame(decode_video.cxdata(), decode_video.frame_size());
+  }
+
+  vpx_usec_timer_mark(&t);
+  const double elapsed_secs =
+      static_cast<double>(vpx_usec_timer_elapsed(&t)) / kUsecsInSec;
+  const unsigned decode_frames = decode_video.frame_number();
+  const double fps = static_cast<double>(decode_frames) / elapsed_secs;
+
+  printf("{\n");
+  printf("\t\"type\" : \"decode_perf_test\",\n");
+  printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+  printf("\t\"videoName\" : \"%s\",\n", kNewEncodeOutputFile);
+  printf("\t\"threadCount\" : %u,\n", threads);
+  printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs);
+  printf("\t\"totalFrames\" : %u,\n", decode_frames);
+  printf("\t\"framesPerSecond\" : %f\n", fps);
+  printf("}\n");
+}
+
+VP9_INSTANTIATE_TEST_CASE(
+  VP9NewEncodeDecodePerfTest, ::testing::Values(::libvpx_test::kTwoPassGood));
 }  // namespace
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -65,7 +65,7 @@ void DecoderTest::HandlePeekResult(Decoder *const decoder,

 void DecoderTest::RunLoop(CompressedVideoSource *video,
                          const vpx_codec_dec_cfg_t &dec_cfg) {
-  Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
+  Decoder* const decoder = codec_->CreateDecoder(dec_cfg, flags_, 0);
  ASSERT_TRUE(decoder != NULL);
  bool end_of_file = false;

@@ -110,4 +110,12 @@ void DecoderTest::RunLoop(CompressedVideoSource *video) {
  RunLoop(video, dec_cfg);
 }

+void DecoderTest::set_cfg(const vpx_codec_dec_cfg_t &dec_cfg) {
+  memcpy(&cfg_, &dec_cfg, sizeof(cfg_));
+}
+
+void DecoderTest::set_flags(const vpx_codec_flags_t flags) {
+  flags_ = flags;
+}
+
 }  // namespace libvpx_test
--- a/test/decode_test_driver.h
+++ b/test/decode_test_driver.h
@@ -41,7 +41,13 @@ class DxDataIterator {
 class Decoder {
 public:
  Decoder(vpx_codec_dec_cfg_t cfg, unsigned long deadline)
-      : cfg_(cfg), deadline_(deadline), init_done_(false) {
+      : cfg_(cfg), flags_(0), deadline_(deadline), init_done_(false) {
+    memset(&decoder_, 0, sizeof(decoder_));
+  }
+
+  Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag,
+          unsigned long deadline)  // NOLINT
+      : cfg_(cfg), flags_(flag), deadline_(deadline), init_done_(false) {
    memset(&decoder_, 0, sizeof(decoder_));
  }

@@ -66,9 +72,7 @@ class Decoder {
  }

  void Control(int ctrl_id, int arg) {
-    InitOnce();
-    const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
-    ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
+    Control(ctrl_id, arg, VPX_CODEC_OK);
  }

  void Control(int ctrl_id, const void *arg) {
@@ -77,6 +81,12 @@ class Decoder {
    ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
  }

+  void Control(int ctrl_id, int arg, vpx_codec_err_t expected_value) {
+    InitOnce();
+    const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg);
+    ASSERT_EQ(expected_value, res) << DecodeError();
+  }
+
  const char* DecodeError() {
    const char *detail = vpx_codec_error_detail(&decoder_);
    return detail ? detail : vpx_codec_error(&decoder_);
@@ -97,6 +107,10 @@ class Decoder {

  bool IsVP8() const;

+  vpx_codec_ctx_t * GetDecoder() {
+    return &decoder_;
+  }
+
 protected:
  virtual vpx_codec_iface_t* CodecInterface() const = 0;

@@ -104,7 +118,7 @@ class Decoder {
    if (!init_done_) {
      const vpx_codec_err_t res = vpx_codec_dec_init(&decoder_,
                                                     CodecInterface(),
-                                                     &cfg_, 0);
+                                                     &cfg_, flags_);
      ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError();
      init_done_ = true;
    }
@@ -112,6 +126,7 @@ class Decoder {

  vpx_codec_ctx_t     decoder_;
  vpx_codec_dec_cfg_t cfg_;
+  vpx_codec_flags_t   flags_;
  unsigned int        deadline_;
  bool                init_done_;
 };
@@ -124,6 +139,9 @@ class DecoderTest {
  virtual void RunLoop(CompressedVideoSource *video,
                       const vpx_codec_dec_cfg_t &dec_cfg);

+  virtual void set_cfg(const vpx_codec_dec_cfg_t &dec_cfg);
+  virtual void set_flags(const vpx_codec_flags_t flags);
+
  // Hook to be called before decompressing every frame.
  virtual void PreDecodeFrameHook(const CompressedVideoSource& /*video*/,
                                  Decoder* /*decoder*/) {}
@@ -146,11 +164,16 @@ class DecoderTest {
                                const vpx_codec_err_t res_peek);

 protected:
-  explicit DecoderTest(const CodecFactory *codec) : codec_(codec) {}
+  explicit DecoderTest(const CodecFactory *codec)
+      : codec_(codec),
+        cfg_(),
+        flags_(0) {}

  virtual ~DecoderTest() {}

  const CodecFactory *codec_;
+  vpx_codec_dec_cfg_t cfg_;
+  vpx_codec_flags_t   flags_;
 };

 }  // namespace libvpx_test
--- a/test/encode_perf_test.cc
+++ b/test/encode_perf_test.cc
@@ -7,6 +7,7 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
+#include <string>
 #include "third_party/googletest/src/include/gtest/gtest.h"
 #include "./vpx_config.h"
 #include "./vpx_version.h"
@@ -50,7 +51,8 @@ const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
  EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470),
 };

-const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 12 };
+const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8 };
+const int kEncodePerfTestThreads[] = { 1, 2, 4 };

 #define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0]))

@@ -63,7 +65,8 @@ class VP9EncodePerfTest
        min_psnr_(kMaxPsnr),
        nframes_(0),
        encoding_mode_(GET_PARAM(1)),
-        speed_(0) {}
+        speed_(0),
+        threads_(1) {}

  virtual ~VP9EncodePerfTest() {}

@@ -82,12 +85,18 @@ class VP9EncodePerfTest
    cfg_.rc_buf_optimal_sz = 600;
    cfg_.rc_resize_allowed = 0;
    cfg_.rc_end_usage = VPX_CBR;
+    cfg_.g_error_resilient = 1;
+    cfg_.g_threads = threads_;
  }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
+    if (video->frame() == 0) {
+      const int log2_tile_columns = 3;
      encoder->Control(VP8E_SET_CPUUSED, speed_);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, log2_tile_columns);
+      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
    }
  }

@@ -113,54 +122,77 @@ class VP9EncodePerfTest
    speed_ = speed;
  }

+  void set_threads(unsigned int threads) {
+    threads_ = threads;
+  }
+
 private:
  double min_psnr_;
  unsigned int nframes_;
  libvpx_test::TestMode encoding_mode_;
  unsigned speed_;
+  unsigned int threads_;
 };

 TEST_P(VP9EncodePerfTest, PerfTest) {
  for (size_t i = 0; i < NELEMENTS(kVP9EncodePerfTestVectors); ++i) {
    for (size_t j = 0; j < NELEMENTS(kEncodePerfTestSpeeds); ++j) {
-      SetUp();
+      for (size_t k = 0; k < NELEMENTS(kEncodePerfTestThreads); ++k) {
+        if (kVP9EncodePerfTestVectors[i].width < 512 &&
+            kEncodePerfTestThreads[k] > 1)
+          continue;
+        else if (kVP9EncodePerfTestVectors[i].width < 1024 &&
+                 kEncodePerfTestThreads[k] > 2)
+          continue;

-      const vpx_rational timebase = { 33333333, 1000000000 };
-      cfg_.g_timebase = timebase;
-      cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;
+        set_threads(kEncodePerfTestThreads[k]);
+        SetUp();

-      init_flags_ = VPX_CODEC_USE_PSNR;
+        const vpx_rational timebase = { 33333333, 1000000000 };
+        cfg_.g_timebase = timebase;
+        cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate;

-      const unsigned frames = kVP9EncodePerfTestVectors[i].frames;
-      const char *video_name = kVP9EncodePerfTestVectors[i].name;
-      libvpx_test::I420VideoSource video(
-          video_name,
-          kVP9EncodePerfTestVectors[i].width,
-          kVP9EncodePerfTestVectors[i].height,
-          timebase.den, timebase.num, 0,
-          kVP9EncodePerfTestVectors[i].frames);
-      set_speed(kEncodePerfTestSpeeds[j]);
+        init_flags_ = VPX_CODEC_USE_PSNR;

-      vpx_usec_timer t;
-      vpx_usec_timer_start(&t);
+        const unsigned frames = kVP9EncodePerfTestVectors[i].frames;
+        const char *video_name = kVP9EncodePerfTestVectors[i].name;
+        libvpx_test::I420VideoSource video(
+            video_name,
+            kVP9EncodePerfTestVectors[i].width,
+            kVP9EncodePerfTestVectors[i].height,
+            timebase.den, timebase.num, 0,
+            kVP9EncodePerfTestVectors[i].frames);
+        set_speed(kEncodePerfTestSpeeds[j]);

-      ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+        vpx_usec_timer t;
+        vpx_usec_timer_start(&t);

-      vpx_usec_timer_mark(&t);
-      const double elapsed_secs = vpx_usec_timer_elapsed(&t) / kUsecsInSec;
-      const double fps = frames / elapsed_secs;
-      const double minimum_psnr = min_psnr();
+        ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

-      printf("{\n");
-      printf("\t\"type\" : \"encode_perf_test\",\n");
-      printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
-      printf("\t\"videoName\" : \"%s\",\n", video_name);
-      printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs);
-      printf("\t\"totalFrames\" : %u,\n", frames);
-      printf("\t\"framesPerSecond\" : %f,\n", fps);
-      printf("\t\"minPsnr\" : %f,\n", minimum_psnr);
-      printf("\t\"speed\" : %d\n", kEncodePerfTestSpeeds[j]);
-      printf("}\n");
+        vpx_usec_timer_mark(&t);
+        const double elapsed_secs = vpx_usec_timer_elapsed(&t) / kUsecsInSec;
+        const double fps = frames / elapsed_secs;
+        const double minimum_psnr = min_psnr();
+        std::string display_name(video_name);
+        if (kEncodePerfTestThreads[k] > 1) {
+          char thread_count[32];
+          snprintf(thread_count, sizeof(thread_count), "_t-%d",
+                   kEncodePerfTestThreads[k]);
+          display_name += thread_count;
+        }
+
+        printf("{\n");
+        printf("\t\"type\" : \"encode_perf_test\",\n");
+        printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP);
+        printf("\t\"videoName\" : \"%s\",\n", display_name.c_str());
+        printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs);
+        printf("\t\"totalFrames\" : %u,\n", frames);
+        printf("\t\"framesPerSecond\" : %f,\n", fps);
+        printf("\t\"minPsnr\" : %f,\n", minimum_psnr);
+        printf("\t\"speed\" : %d,\n", kEncodePerfTestSpeeds[j]);
+        printf("\t\"threads\" : %d\n", kEncodePerfTestThreads[k]);
+        printf("}\n");
+      }
    }
  }
 }
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -8,6 +8,8 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include <string>
+
 #include "./vpx_config.h"
 #include "test/codec_factory.h"
 #include "test/encode_test_driver.h"
@@ -17,6 +19,38 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"

 namespace libvpx_test {
+void Encoder::InitEncoder(VideoSource *video) {
+  vpx_codec_err_t res;
+  const vpx_image_t *img = video->img();
+
+  if (video->img() && !encoder_.priv) {
+    cfg_.g_w = img->d_w;
+    cfg_.g_h = img->d_h;
+    cfg_.g_timebase = video->timebase();
+    cfg_.rc_twopass_stats_in = stats_->buf();
+
+    res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_,
+                             init_flags_);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+
+#if CONFIG_VP9_ENCODER
+    if (CodecInterface() == &vpx_codec_vp9_cx_algo) {
+      // Default to 1 tile column for VP9.
+      const int log2_tile_columns = 0;
+      res = vpx_codec_control_(&encoder_, VP9E_SET_TILE_COLUMNS,
+                               log2_tile_columns);
+      ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+    } else
+#endif
+    {
+#if CONFIG_VP8_ENCODER
+      ASSERT_EQ(&vpx_codec_vp8_cx_algo, CodecInterface())
+          << "Unknown Codec Interface";
+#endif
+    }
+  }
+}
+
 void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) {
  if (video->img())
    EncodeFrameInternal(*video, frame_flags);
@@ -39,17 +73,6 @@ void Encoder::EncodeFrameInternal(const VideoSource &video,
  vpx_codec_err_t res;
  const vpx_image_t *img = video.img();

-  // Handle first frame initialization
-  if (!encoder_.priv) {
-    cfg_.g_w = img->d_w;
-    cfg_.g_h = img->d_h;
-    cfg_.g_timebase = video.timebase();
-    cfg_.rc_twopass_stats_in = stats_->buf();
-    res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_,
-                             init_flags_);
-    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-  }
-
  // Handle frame resizing
  if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) {
    cfg_.g_w = img->d_w;
@@ -60,8 +83,7 @@ void Encoder::EncodeFrameInternal(const VideoSource &video,

  // Encode the frame
  API_REGISTER_STATE_CHECK(
-      res = vpx_codec_encode(&encoder_,
-                             video.img(), video.pts(), video.duration(),
+      res = vpx_codec_encode(&encoder_, img, video.pts(), video.duration(),
                             frame_flags, deadline_));
  ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
 }
@@ -77,6 +99,7 @@ void Encoder::Flush() {

 void EncoderTest::InitializeConfig() {
  const vpx_codec_err_t res = codec_->DefaultEncoderConfig(&cfg_, 0);
+  dec_cfg_ = vpx_codec_dec_cfg_t();
  ASSERT_EQ(VPX_CODEC_OK, res);
 }

@@ -110,6 +133,7 @@ void EncoderTest::SetMode(TestMode mode) {
 static bool compare_img(const vpx_image_t *img1,
                        const vpx_image_t *img2) {
  bool match = (img1->fmt == img2->fmt) &&
+               (img1->cs == img2->cs) &&
               (img1->d_w == img2->d_w) &&
               (img1->d_h == img2->d_h);

@@ -158,9 +182,18 @@ void EncoderTest::RunLoop(VideoSource *video) {
    Encoder* const encoder = codec_->CreateEncoder(cfg_, deadline_, init_flags_,
                                                   &stats_);
    ASSERT_TRUE(encoder != NULL);
-    Decoder* const decoder = codec_->CreateDecoder(dec_cfg, 0);
+
+    video->Begin();
+    encoder->InitEncoder(video);
+
+    unsigned long dec_init_flags = 0;  // NOLINT
+    // Use fragment decoder if encoder outputs partitions.
+    // NOTE: fragment decoder and partition encoder are only supported by VP8.
+    if (init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION)
+      dec_init_flags |= VPX_CODEC_USE_INPUT_FRAGMENTS;
+    Decoder* const decoder = codec_->CreateDecoder(dec_cfg, dec_init_flags, 0);
    bool again;
-    for (again = true, video->Begin(); again; video->Next()) {
+    for (again = true; again; video->Next()) {
      again = (video->img() != NULL);

      PreEncodeFrameHook(video);
@@ -200,6 +233,13 @@ void EncoderTest::RunLoop(VideoSource *video) {
        }
      }

+      // Flush the decoder when there are no more fragments.
+      if ((init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION) && has_dxdata) {
+        const vpx_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0);
+        if (!HandleDecodeResult(res_dec, *video, decoder))
+          break;
+      }
+
      if (has_dxdata && has_cxdata) {
        const vpx_image_t *img_enc = encoder->GetPreviewFrame();
        DxDataIterator dec_iter = decoder->GetDxData();
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -104,6 +104,8 @@ class Encoder {
    return CxDataIterator(&encoder_);
  }

+  void InitEncoder(VideoSource *video);
+
  const vpx_image_t *GetPreviewFrame() {
    return vpx_codec_get_preview_frame(&encoder_);
  }
@@ -131,6 +133,10 @@ class Encoder {
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
  }

+  void Control(int ctrl_id, struct vpx_svc_parameters *arg) {
+    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+  }
 #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER
  void Control(int ctrl_id, vpx_active_map_t *arg) {
    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
@@ -138,6 +144,12 @@ class Encoder {
  }
 #endif

+  void Config(const vpx_codec_enc_cfg_t *cfg) {
+    const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg);
+    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
+    cfg_ = *cfg;
+  }
+
  void set_deadline(unsigned long deadline) {
    deadline_ = deadline;
  }
@@ -175,7 +187,10 @@ class EncoderTest {
 protected:
  explicit EncoderTest(const CodecFactory *codec)
      : codec_(codec), abort_(false), init_flags_(0), frame_flags_(0),
-        last_pts_(0) {}
+        last_pts_(0) {
+    // Default to 1 thread.
+    cfg_.g_threads = 1;
+  }

  virtual ~EncoderTest() {}

@@ -185,6 +200,11 @@ class EncoderTest {
  // Map the TestMode enum to the deadline_ and passes_ variables.
  void SetMode(TestMode mode);

+  // Set encoder flag.
+  void set_init_flags(unsigned long flag) {  // NOLINT(runtime/int)
+    init_flags_ = flag;
+  }
+
  // Main loop
  virtual void RunLoop(VideoSource *video);

@@ -238,6 +258,7 @@ class EncoderTest {

  bool                 abort_;
  vpx_codec_enc_cfg_t  cfg_;
+  vpx_codec_dec_cfg_t  dec_cfg_;
  unsigned int         passes_;
  unsigned long        deadline_;
  TwopassStatsStore    stats_;
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -37,6 +37,7 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
  void Reset() {
    error_nframes_ = 0;
    droppable_nframes_ = 0;
+    pattern_switch_ = 0;
  }

  virtual void SetUp() {
@@ -56,22 +57,77 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
    nframes_++;
  }

-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video) {
+  //
+  // Frame flags and layer id for temporal layers.
+  // For two layers, test pattern is:
+  //   1     3
+  // 0    2     .....
+  // LAST is updated on base/layer 0, GOLDEN  updated on layer 1.
+  // Non-zero pattern_switch parameter means pattern will switch to
+  // not using LAST for frame_num >= pattern_switch.
+  int SetFrameFlags(int frame_num,
+                    int num_temp_layers,
+                    int pattern_switch) {
+    int frame_flags = 0;
+    if (num_temp_layers == 2) {
+        if (frame_num % 2 == 0) {
+          if (frame_num < pattern_switch || pattern_switch == 0) {
+            // Layer 0: predict from LAST and ARF, update LAST.
+            frame_flags = VP8_EFLAG_NO_REF_GF |
+                          VP8_EFLAG_NO_UPD_GF |
+                          VP8_EFLAG_NO_UPD_ARF;
+          } else {
+            // Layer 0: predict from GF and ARF, update GF.
+            frame_flags = VP8_EFLAG_NO_REF_LAST |
+                          VP8_EFLAG_NO_UPD_LAST |
+                          VP8_EFLAG_NO_UPD_ARF;
+          }
+        } else {
+          if (frame_num < pattern_switch || pattern_switch == 0) {
+            // Layer 1: predict from L, GF, and ARF, update GF.
+            frame_flags = VP8_EFLAG_NO_UPD_ARF |
+                          VP8_EFLAG_NO_UPD_LAST;
+          } else {
+            // Layer 1: predict from GF and ARF, update GF.
+            frame_flags = VP8_EFLAG_NO_REF_LAST |
+                          VP8_EFLAG_NO_UPD_LAST |
+                          VP8_EFLAG_NO_UPD_ARF;
+          }
+        }
+    }
+    return frame_flags;
+  }
+
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
    frame_flags_ &= ~(VP8_EFLAG_NO_UPD_LAST |
                      VP8_EFLAG_NO_UPD_GF |
                      VP8_EFLAG_NO_UPD_ARF);
-    if (droppable_nframes_ > 0 &&
-        (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
+    // For temporal layer case.
+    if (cfg_.ts_number_layers > 1) {
+      frame_flags_ = SetFrameFlags(video->frame(),
+                                   cfg_.ts_number_layers,
+                                   pattern_switch_);
      for (unsigned int i = 0; i < droppable_nframes_; ++i) {
        if (droppable_frames_[i] == video->frame()) {
-          std::cout << "             Encoding droppable frame: "
+          std::cout << "Encoding droppable frame: "
                    << droppable_frames_[i] << "\n";
-          frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
-                           VP8_EFLAG_NO_UPD_GF |
-                           VP8_EFLAG_NO_UPD_ARF);
-          return;
        }
      }
+    } else {
+       if (droppable_nframes_ > 0 &&
+         (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) {
+         for (unsigned int i = 0; i < droppable_nframes_; ++i) {
+           if (droppable_frames_[i] == video->frame()) {
+             std::cout << "Encoding droppable frame: "
+                       << droppable_frames_[i] << "\n";
+             frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST |
+                              VP8_EFLAG_NO_UPD_GF |
+                              VP8_EFLAG_NO_UPD_ARF);
+             return;
+           }
+         }
+       }
    }
  }

@@ -133,11 +189,16 @@ class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest,
    return mismatch_nframes_;
  }

+  void SetPatternSwitch(int frame_switch) {
+     pattern_switch_ = frame_switch;
+   }
+
 private:
  double psnr_;
  unsigned int nframes_;
  unsigned int error_nframes_;
  unsigned int droppable_nframes_;
+  unsigned int pattern_switch_;
  double mismatch_psnr_;
  unsigned int mismatch_nframes_;
  unsigned int error_frames_[kMaxErrorFrames];
@@ -236,7 +297,290 @@ TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) {
 #endif
 }

-VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
-VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+// Check for successful decoding and no encoder/decoder mismatch
+// if we lose (i.e., drop before decoding) the enhancement layer frames for a
+// two layer temporal pattern. The base layer does not predict from the top
+// layer, so successful decoding is expected.
+TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) {
+  const vpx_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = 500;
+  cfg_.g_lag_in_frames = 0;

+  cfg_.rc_end_usage = VPX_CBR;
+  // 2 Temporal layers, no spatial layers, CBR mode.
+  cfg_.ss_number_layers = 1;
+  cfg_.ts_number_layers = 2;
+  cfg_.ts_rate_decimator[0] = 2;
+  cfg_.ts_rate_decimator[1] = 1;
+  cfg_.ts_periodicity = 2;
+  cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
+  cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
+
+  init_flags_ = VPX_CODEC_USE_PSNR;
+
+  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     timebase.den, timebase.num, 0, 40);
+
+  // Error resilient mode ON.
+  cfg_.g_error_resilient = 1;
+  cfg_.kf_mode = VPX_KF_DISABLED;
+  SetPatternSwitch(0);
+
+  // The odd frames are the enhancement layer for 2 layer pattern, so set
+  // those frames as droppable. Drop the last 7 frames.
+  unsigned int num_droppable_frames = 7;
+  unsigned int droppable_frame_list[] = {27, 29, 31, 33, 35, 37, 39};
+  SetDroppableFrames(num_droppable_frames, droppable_frame_list);
+  SetErrorFrames(num_droppable_frames, droppable_frame_list);
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Test that no mismatches have been found
+  std::cout << "             Mismatch frames: "
+            << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+
+  // Reset previously set of error/droppable frames.
+  Reset();
+}
+
+// Check for successful decoding and no encoder/decoder mismatch
+// for a two layer temporal pattern, where at some point in the
+// sequence, the LAST ref is not used anymore.
+TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) {
+  const vpx_rational timebase = { 33333333, 1000000000 };
+  cfg_.g_timebase = timebase;
+  cfg_.rc_target_bitrate = 500;
+  cfg_.g_lag_in_frames = 0;
+
+  cfg_.rc_end_usage = VPX_CBR;
+  // 2 Temporal layers, no spatial layers, CBR mode.
+  cfg_.ss_number_layers = 1;
+  cfg_.ts_number_layers = 2;
+  cfg_.ts_rate_decimator[0] = 2;
+  cfg_.ts_rate_decimator[1] = 1;
+  cfg_.ts_periodicity = 2;
+  cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100;
+  cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate;
+
+  init_flags_ = VPX_CODEC_USE_PSNR;
+
+  libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                     timebase.den, timebase.num, 0, 100);
+
+  // Error resilient mode ON.
+  cfg_.g_error_resilient = 1;
+  cfg_.kf_mode = VPX_KF_DISABLED;
+  SetPatternSwitch(60);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  // Test that no mismatches have been found
+  std::cout << "             Mismatch frames: "
+            << GetMismatchFrames() << "\n";
+  EXPECT_EQ(GetMismatchFrames(), (unsigned int) 0);
+
+  // Reset previously set of error/droppable frames.
+  Reset();
+}
+
+class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest,
+    public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+ protected:
+  ErrorResilienceTestLargeCodecControls()
+      : EncoderTest(GET_PARAM(0)),
+        encoding_mode_(GET_PARAM(1)) {
+    Reset();
+  }
+
+  virtual ~ErrorResilienceTestLargeCodecControls() {}
+
+  void Reset() {
+    last_pts_ = 0;
+    tot_frame_number_ = 0;
+    // For testing up to 3 layers.
+    for (int i = 0; i < 3; ++i) {
+      bits_total_[i] = 0;
+    }
+    duration_ = 0.0;
+  }
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+  }
+
+  //
+  // Frame flags and layer id for temporal layers.
+  //
+
+  // For two layers, test pattern is:
+  //   1     3
+  // 0    2     .....
+  // For three layers, test pattern is:
+  //   1      3    5      7
+  //      2           6
+  // 0          4            ....
+  // LAST is always update on base/layer 0, GOLDEN is updated on layer 1,
+  // and ALTREF is updated on top layer for 3 layer pattern.
+  int SetFrameFlags(int frame_num, int num_temp_layers) {
+    int frame_flags = 0;
+    if (num_temp_layers == 2) {
+      if (frame_num % 2 == 0) {
+        // Layer 0: predict from L and ARF, update L.
+        frame_flags = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF |
+                      VP8_EFLAG_NO_UPD_ARF;
+      } else {
+        // Layer 1: predict from L, G and ARF, and update G.
+        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
+                      VP8_EFLAG_NO_UPD_ENTROPY;
+      }
+    } else if (num_temp_layers == 3) {
+      if (frame_num % 4 == 0) {
+        // Layer 0: predict from L, update L.
+        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF |
+                      VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF;
+      } else if ((frame_num - 2) % 4 == 0) {
+        // Layer 1: predict from L, G,  update G.
+        frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST |
+                      VP8_EFLAG_NO_REF_ARF;
+      }  else if ((frame_num - 1) % 2 == 0) {
+        // Layer 2: predict from L, G, ARF; update ARG.
+        frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST;
+      }
+    }
+    return frame_flags;
+  }
+
+  int SetLayerId(int frame_num, int num_temp_layers) {
+    int layer_id = 0;
+    if (num_temp_layers == 2) {
+      if (frame_num % 2 == 0) {
+        layer_id = 0;
+      } else {
+         layer_id = 1;
+      }
+    } else if (num_temp_layers == 3) {
+      if (frame_num % 4 == 0) {
+        layer_id = 0;
+      } else if ((frame_num - 2) % 4 == 0) {
+        layer_id = 1;
+      } else if ((frame_num - 1) % 2 == 0) {
+        layer_id = 2;
+      }
+    }
+    return layer_id;
+  }
+
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+                                  libvpx_test::Encoder *encoder) {
+    if (cfg_.ts_number_layers > 1) {
+        int layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers);
+        int frame_flags = SetFrameFlags(video->frame(), cfg_.ts_number_layers);
+        if (video->frame() > 0) {
+          encoder->Control(VP8E_SET_TEMPORAL_LAYER_ID, layer_id);
+          encoder->Control(VP8E_SET_FRAME_FLAGS, frame_flags);
+        }
+       const vpx_rational_t tb = video->timebase();
+       timebase_ = static_cast<double>(tb.num) / tb.den;
+       duration_ = 0;
+       return;
+    }
+  }
+
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    // Time since last timestamp = duration.
+    vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_;
+    if (duration > 1) {
+      // Update counter for total number of frames (#frames input to encoder).
+      // Needed for setting the proper layer_id below.
+      tot_frame_number_ += static_cast<int>(duration - 1);
+    }
+    int layer = SetLayerId(tot_frame_number_, cfg_.ts_number_layers);
+    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
+    // Update the total encoded bits. For temporal layers, update the cumulative
+    // encoded bits per layer.
+    for (int i = layer; i < static_cast<int>(cfg_.ts_number_layers); ++i) {
+      bits_total_[i] += frame_size_in_bits;
+    }
+    // Update the most recent pts.
+    last_pts_ = pkt->data.frame.pts;
+    ++tot_frame_number_;
+  }
+
+  virtual void EndPassHook(void) {
+    duration_ = (last_pts_ + 1) * timebase_;
+    if (cfg_.ts_number_layers  > 1) {
+      for (int layer = 0; layer < static_cast<int>(cfg_.ts_number_layers);
+          ++layer) {
+        if (bits_total_[layer]) {
+          // Effective file datarate:
+          effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_;
+        }
+      }
+    }
+  }
+
+  double effective_datarate_[3];
+   private:
+    libvpx_test::TestMode encoding_mode_;
+    vpx_codec_pts_t last_pts_;
+    double timebase_;
+    int64_t bits_total_[3];
+    double duration_;
+    int tot_frame_number_;
+  };
+
+// Check two codec controls used for:
+// (1) for setting temporal layer id, and (2) for settings encoder flags.
+// This test invokes those controls for each frame, and verifies encoder/decoder
+// mismatch and basic rate control response.
+// TODO(marpan): Maybe move this test to datarate_test.cc.
+TEST_P(ErrorResilienceTestLargeCodecControls, CodecControl3TemporalLayers) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_min_quantizer = 2;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.kf_mode = VPX_KF_DISABLED;
+  cfg_.g_error_resilient = 1;
+
+  // 3 Temporal layers. Framerate decimation (4, 2, 1).
+  cfg_.ts_number_layers = 3;
+  cfg_.ts_rate_decimator[0] = 4;
+  cfg_.ts_rate_decimator[1] = 2;
+  cfg_.ts_rate_decimator[2] = 1;
+  cfg_.ts_periodicity = 4;
+  cfg_.ts_layer_id[0] = 0;
+  cfg_.ts_layer_id[1] = 2;
+  cfg_.ts_layer_id[2] = 1;
+  cfg_.ts_layer_id[3] = 2;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 200);
+  for (int i = 200; i <= 800; i += 200) {
+    cfg_.rc_target_bitrate = i;
+    Reset();
+    // 40-20-40 bitrate allocation for 3 temporal layers.
+    cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100;
+    cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100;
+    cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate;
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    for (int j = 0; j < static_cast<int>(cfg_.ts_number_layers); ++j) {
+      ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75)
+          << " The datarate for the file is lower than target by too much, "
+              "for layer: " << j;
+      ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25)
+          << " The datarate for the file is greater than target by too much, "
+              "for layer: " << j;
+    }
+  }
+}
+
+VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
+VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls,
+                          ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES);
 }  // namespace
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -97,13 +97,19 @@ class ExternalFrameBufferList {
    return 0;
  }

-  // Marks the external frame buffer that |fb| is pointing too as free.
+  // Marks the external frame buffer that |fb| is pointing to as free.
  // Returns < 0 on an error.
  int ReturnFrameBuffer(vpx_codec_frame_buffer_t *fb) {
-    EXPECT_TRUE(fb != NULL);
+    if (fb == NULL) {
+      EXPECT_TRUE(fb != NULL);
+      return -1;
+    }
    ExternalFrameBuffer *const ext_fb =
        reinterpret_cast<ExternalFrameBuffer*>(fb->priv);
-    EXPECT_TRUE(ext_fb != NULL);
+    if (ext_fb == NULL) {
+      EXPECT_TRUE(ext_fb != NULL);
+      return -1;
+    }
    EXPECT_EQ(1, ext_fb->in_use);
    ext_fb->in_use = 0;
    return 0;
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -22,6 +22,7 @@
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"

 using libvpx_test::ACMRandom;

@@ -75,7 +76,17 @@ void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
 void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
  vp9_highbd_iwht4x4_16_add_c(in, out, stride, 12);
 }
-#endif
+
+#if HAVE_SSE2
+void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
+}
+
+void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
+}
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 class Trans4x4TestBase {
 public:
@@ -92,13 +103,13 @@ class Trans4x4TestBase {
    int64_t total_error = 0;
    const int count_test_block = 10000;
    for (int i = 0; i < count_test_block; ++i) {
-      DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-      DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-      DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif

      // Initialize a test block with input range [-255, 255].
@@ -132,6 +143,7 @@ class Trans4x4TestBase {
        const uint32_t diff =
            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
 #else
+        ASSERT_EQ(VPX_BITS_8, bit_depth_);
        const uint32_t diff = dst[j] - src[j];
 #endif
        const uint32_t error = diff * diff;
@@ -153,9 +165,9 @@ class Trans4x4TestBase {
  void RunCoeffCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 5000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
@@ -174,15 +186,13 @@ class Trans4x4TestBase {
  void RunMemCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 5000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_block, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);

    for (int i = 0; i < count_test_block; ++i) {
      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < kNumCoeffs; ++j) {
-        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
      }
      if (i == 0) {
@@ -209,13 +219,13 @@ class Trans4x4TestBase {
  void RunInvAccuracyCheck(int limit) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
@@ -416,7 +426,7 @@ INSTANTIATE_TEST_CASE_P(
    C, Trans4x4DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct4x4_c, &vp9_idct4x4_16_add_c, 0, VPX_BITS_8)));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -442,7 +452,7 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
@@ -456,7 +466,7 @@ INSTANTIATE_TEST_CASE_P(
    C, Trans4x4WHT,
    ::testing::Values(
        make_tuple(&vp9_fwht4x4_c, &vp9_iwht4x4_16_add_c, 0, VPX_BITS_8)));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -464,14 +474,17 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vp9_fdct4x4_c,
                   &vp9_idct4x4_16_add_neon, 0, VPX_BITS_8)));
+#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    DISABLED_NEON, Trans4x4HT,
+    NEON, Trans4x4HT,
    ::testing::Values(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
-#endif
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if CONFIG_USE_X86INC && HAVE_MMX && !CONFIG_VP9_HIGHBITDEPTH && \
    !CONFIG_EMULATE_HARDWARE
@@ -494,6 +507,47 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8)));
-#endif
+#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

+#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4DCT,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_fdct4x4_c,    &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct4x4_c,    &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vp9_fdct4x4_sse2,      &vp9_idct4x4_16_add_c, 0,
+                   VPX_BITS_8)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 1, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 2, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_10, 3, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 1, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 2, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht4x4_sse2, &iht4x4_12, 3, VPX_BITS_12),
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
+#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans4x4DCT,
+    ::testing::Values(
+        make_tuple(&vp9_fdct4x4_msa, &vp9_idct4x4_16_add_msa, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 3, VPX_BITS_8)));
+#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -20,11 +20,32 @@

 #include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {

 const int kNumCoeffs = 64;
 const double kPi = 3.141592653589793238462643383279502884;
+
+const int kSignBiasMaxDiff255 = 1500;
+const int kSignBiasMaxDiff15 = 10000;
+
+typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
+typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
+                        int tx_type);
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+
+typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;
+typedef std::tr1::tuple<IdctFunc, IdctFunc, int, vpx_bit_depth_t> Idct8x8Param;
+
 void reference_8x8_dct_1d(const double in[8], double out[8], int stride) {
  const double kInvSqrt2 = 0.707106781186547524400844362104;
  for (int k = 0; k < 8; k++) {
@@ -59,18 +80,6 @@ void reference_8x8_dct_2d(const int16_t input[kNumCoeffs],
  }
 }

-using libvpx_test::ACMRandom;
-
-namespace {
-typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
-                        int tx_type);
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        int tx_type);
-
-typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct8x8Param;
-typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht8x8Param;

 void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
  vp9_fdct8x8_c(in, out, stride);
@@ -96,7 +105,33 @@ void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
 void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
  vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12);
 }
-#endif
+
+void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct8x8_10_add_c(in, out, stride, 10);
+}
+
+void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct8x8_10_add_c(in, out, stride, 12);
+}
+
+#if HAVE_SSE2
+void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
+}
+
+void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
+}
+
+void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
+}
+
+void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vp9_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
+}
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 class FwdTrans8x8TestBase {
 public:
@@ -108,8 +143,8 @@ class FwdTrans8x8TestBase {

  void RunSignBiasCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_output_block, 64);
+    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]);
    int count_sign_block[64][2];
    const int count_test_block = 100000;

@@ -133,7 +168,7 @@ class FwdTrans8x8TestBase {

    for (int j = 0; j < 64; ++j) {
      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = 1125;
+      const int max_diff = kSignBiasMaxDiff255;
      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
          << "Error: 8x8 FDCT/FHT has a sign bias > "
          << 1. * max_diff / count_test_block * 100 << "%"
@@ -146,9 +181,10 @@ class FwdTrans8x8TestBase {
    memset(count_sign_block, 0, sizeof(count_sign_block));

    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-15, 15].
+      // Initialize a test block with input range [-mask_ / 16, mask_ / 16].
      for (int j = 0; j < 64; ++j)
-        test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
+        test_input_block[j] = ((rnd.Rand16() & mask_) >> 4) -
+                              ((rnd.Rand16() & mask_) >> 4);
      ASM_REGISTER_STATE_CHECK(
          RunFwdTxfm(test_input_block, test_output_block, pitch_));

@@ -162,9 +198,9 @@ class FwdTrans8x8TestBase {

    for (int j = 0; j < 64; ++j) {
      const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]);
-      const int max_diff = 10000;
+      const int max_diff = kSignBiasMaxDiff15;
      EXPECT_LT(diff, max_diff << (bit_depth_ - 8))
-          << "Error: 4x4 FDCT/FHT has a sign bias > "
+          << "Error: 8x8 FDCT/FHT has a sign bias > "
          << 1. * max_diff / count_test_block * 100 << "%"
          << " for input range [-15, 15] at index " << j
          << " count0: " << count_sign_block[j][0]
@@ -178,17 +214,17 @@ class FwdTrans8x8TestBase {
    int max_error = 0;
    int total_error = 0;
    const int count_test_block = 100000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
+    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
+    DECLARE_ALIGNED(16, uint8_t, dst[64]);
+    DECLARE_ALIGNED(16, uint8_t, src[64]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
+    DECLARE_ALIGNED(16, uint16_t, dst16[64]);
+    DECLARE_ALIGNED(16, uint16_t, src16[64]);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-255, 255].
+      // Initialize a test block with input range [-mask_, mask_].
      for (int j = 0; j < 64; ++j) {
        if (bit_depth_ == VPX_BITS_8) {
          src[j] = rnd.Rand8();
@@ -255,14 +291,14 @@ class FwdTrans8x8TestBase {
    int total_error = 0;
    int total_coeff_error = 0;
    const int count_test_block = 100000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, test_input_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_temp_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, ref_temp_block, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, 64);
+    DECLARE_ALIGNED(16, int16_t, test_input_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]);
+    DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]);
+    DECLARE_ALIGNED(16, uint8_t, dst[64]);
+    DECLARE_ALIGNED(16, uint8_t, src[64]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, 64);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, 64);
+    DECLARE_ALIGNED(16, uint16_t, dst16[64]);
+    DECLARE_ALIGNED(16, uint16_t, src16[64]);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
@@ -344,13 +380,13 @@ class FwdTrans8x8TestBase {
  void RunInvAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, dst, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint8_t, src, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
 #if CONFIG_VP9_HIGHBITDEPTH
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, src16, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, uint16_t, dst16, kNumCoeffs);
+    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
 #endif

    for (int i = 0; i < count_test_block; ++i) {
@@ -402,9 +438,9 @@ class FwdTrans8x8TestBase {
  void RunFwdAccuracyCheck() {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int count_test_block = 1000;
-    DECLARE_ALIGNED_ARRAY(16, int16_t, in, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff_r, kNumCoeffs);
-    DECLARE_ALIGNED_ARRAY(16, tran_low_t, coeff, kNumCoeffs);
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);

    for (int i = 0; i < count_test_block; ++i) {
      double out_r[kNumCoeffs];
@@ -427,6 +463,63 @@ class FwdTrans8x8TestBase {
      }
    }
  }
+
+void CompareInvReference(IdctFunc ref_txfm, int thresh) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 10000;
+    const int eob = 12;
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]);
+#if CONFIG_VP9_HIGHBITDEPTH
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]);
+#endif
+    const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan;
+
+    for (int i = 0; i < count_test_block; ++i) {
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        if (j < eob) {
+          // Random values less than the threshold, either positive or negative
+          coeff[scan[j]] = rnd(thresh) * (1-2*(i%2));
+        } else {
+          coeff[scan[j]] = 0;
+        }
+        if (bit_depth_ == VPX_BITS_8) {
+          dst[j] = 0;
+          ref[j] = 0;
+#if CONFIG_VP9_HIGHBITDEPTH
+        } else {
+          dst16[j] = 0;
+          ref16[j] = 0;
+#endif
+        }
+      }
+      if (bit_depth_ == VPX_BITS_8) {
+        ref_txfm(coeff, ref, pitch_);
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+#if CONFIG_VP9_HIGHBITDEPTH
+      } else {
+        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16),
+                                            pitch_));
+#endif
+      }
+
+      for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_VP9_HIGHBITDEPTH
+        const uint32_t diff =
+            bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j];
+#else
+        const uint32_t diff = dst[j] - ref[j];
+#endif
+        const uint32_t error = diff * diff;
+        EXPECT_EQ(0u, error)
+            << "Error: 8x8 IDCT has error " << error
+            << " at index " << j;
+      }
+    }
+  }
  int pitch_;
  int tx_type_;
  FhtFunc fwd_txfm_ref;
@@ -526,26 +619,59 @@ TEST_P(FwdTrans8x8HT, ExtremalCheck) {
  RunExtremalCheck();
 }

+class InvTrans8x8DCT
+    : public FwdTrans8x8TestBase,
+      public ::testing::TestWithParam<Idct8x8Param> {
+ public:
+  virtual ~InvTrans8x8DCT() {}
+
+  virtual void SetUp() {
+    ref_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    thresh_ = GET_PARAM(2);
+    pitch_ = 8;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+  void RunFwdTxfm(int16_t *out, tran_low_t *dst, int stride) {}
+
+  IdctFunc ref_txfm_;
+  IdctFunc inv_txfm_;
+  int thresh_;
+};
+
+TEST_P(InvTrans8x8DCT, CompareReference) {
+  CompareInvReference(ref_txfm_, thresh_);
+}
+
 using std::tr1::make_tuple;

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8DCT,
    ::testing::Values(
+        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12),
-        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
+        make_tuple(&vp9_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct8x8_c, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8)));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8HT,
    ::testing::Values(
+        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10),
@@ -554,11 +680,12 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12),
        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12),
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
 #else
+// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
+// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    C, FwdTrans8x8HT,
    ::testing::Values(
@@ -566,24 +693,31 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
-#endif
+#endif  // CONFIG_VP9_HIGHBITDEPTH

 #if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
+// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    NEON, FwdTrans8x8DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct8x8_neon, &vp9_idct8x8_64_add_neon, 0,
                   VPX_BITS_8)));
+#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    DISABLED_NEON, FwdTrans8x8HT,
+    NEON, FwdTrans8x8HT,
    ::testing::Values(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
-#endif
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+// TODO(jingning): re-enable after these handle the expanded range [0, 65535]
+// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    SSE2, FwdTrans8x8DCT,
    ::testing::Values(
@@ -596,14 +730,69 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8)));
-#endif
+#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    SSE2, FwdTrans8x8DCT,
+    ::testing::Values(
+        make_tuple(&vp9_fdct8x8_sse2, &vp9_idct8x8_64_add_c, 0, VPX_BITS_8),
+        make_tuple(&vp9_highbd_fdct8x8_c,
+                   &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct8x8_sse2,
+                   &idct8x8_64_add_10_sse2, 12, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fdct8x8_c,
+                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fdct8x8_sse2,
+                   &idct8x8_64_add_12_sse2, 12, VPX_BITS_12)));
+
+// TODO(jingning): re-enable after these handle the expanded range [0, 65535]
+// returned from Rand16().
+INSTANTIATE_TEST_CASE_P(
+    SSE2, FwdTrans8x8HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
+
+// Optimizations take effect at a threshold of 6201, so we use a value close to
+// that to test both branches.
+INSTANTIATE_TEST_CASE_P(
+    SSE2, InvTrans8x8DCT,
+    ::testing::Values(
+        make_tuple(&idct8x8_10_add_10_c,
+                   &idct8x8_10_add_10_sse2, 6225, VPX_BITS_10),
+        make_tuple(&idct8x8_10,
+                   &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
+        make_tuple(&idct8x8_10_add_12_c,
+                   &idct8x8_10_add_12_sse2, 6225, VPX_BITS_12),
+        make_tuple(&idct8x8_12,
+                   &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
+#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
    !CONFIG_EMULATE_HARDWARE
+// TODO(jingning): re-enable after this handles the expanded range [0, 65535]
+// returned from Rand16().
 INSTANTIATE_TEST_CASE_P(
    SSSE3, FwdTrans8x8DCT,
    ::testing::Values(
        make_tuple(&vp9_fdct8x8_ssse3, &vp9_idct8x8_64_add_ssse3, 0,
                   VPX_BITS_8)));
 #endif
+
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    MSA, FwdTrans8x8DCT,
+    ::testing::Values(
+        make_tuple(&vp9_fdct8x8_msa, &vp9_idct8x8_64_add_msa, 0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    MSA, FwdTrans8x8HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
+#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/i420_video_source.h
+++ b/test/i420_video_source.h
@@ -13,104 +13,22 @@
 #include <cstdlib>
 #include <string>

-#include "test/video_source.h"
+#include "test/yuv_video_source.h"

 namespace libvpx_test {

 // This class extends VideoSource to allow parsing of raw yv12
 // so that we can do actual file encodes.
-class I420VideoSource : public VideoSource {
+class I420VideoSource : public YUVVideoSource {
 public:
  I420VideoSource(const std::string &file_name,
                  unsigned int width, unsigned int height,
                  int rate_numerator, int rate_denominator,
                  unsigned int start, int limit)
-      : file_name_(file_name),
-        input_file_(NULL),
-        img_(NULL),
-        start_(start),
-        limit_(limit),
-        frame_(0),
-        width_(0),
-        height_(0),
-        framerate_numerator_(rate_numerator),
-        framerate_denominator_(rate_denominator) {
-    // This initializes raw_sz_, width_, height_ and allocates an img.
-    SetSize(width, height);
-  }
-
-  virtual ~I420VideoSource() {
-    vpx_img_free(img_);
-    if (input_file_)
-      fclose(input_file_);
-  }
-
-  virtual void Begin() {
-    if (input_file_)
-      fclose(input_file_);
-    input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
-        << file_name_;
-    if (start_) {
-      fseek(input_file_, static_cast<unsigned>(raw_sz_) * start_, SEEK_SET);
-    }
-
-    frame_ = start_;
-    FillFrame();
-  }
-
-  virtual void Next() {
-    ++frame_;
-    FillFrame();
-  }
-
-  virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL;  }
-
-  // Models a stream where Timebase = 1/FPS, so pts == frame.
-  virtual vpx_codec_pts_t pts() const { return frame_; }
-
-  virtual unsigned long duration() const { return 1; }
-
-  virtual vpx_rational_t timebase() const {
-    const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
-    return t;
-  }
-
-  virtual unsigned int frame() const { return frame_; }
-
-  virtual unsigned int limit() const { return limit_; }
-
-  void SetSize(unsigned int width, unsigned int height) {
-    if (width != width_ || height != height_) {
-      vpx_img_free(img_);
-      img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, width, height, 1);
-      ASSERT_TRUE(img_ != NULL);
-      width_ = width;
-      height_ = height;
-      raw_sz_ = width * height * 3 / 2;
-    }
-  }
-
-  virtual void FillFrame() {
-    ASSERT_TRUE(input_file_ != NULL);
-    // Read a frame from input_file.
-    if (fread(img_->img_data, raw_sz_, 1, input_file_) == 0) {
-      limit_ = frame_;
-    }
-  }
-
- protected:
-  std::string file_name_;
-  FILE *input_file_;
-  vpx_image_t *img_;
-  size_t raw_sz_;
-  unsigned int start_;
-  unsigned int limit_;
-  unsigned int frame_;
-  unsigned int width_;
-  unsigned int height_;
-  int framerate_numerator_;
-  int framerate_denominator_;
+      : YUVVideoSource(file_name, VPX_IMG_FMT_I420,
+                       width, height,
+                       rate_numerator, rate_denominator,
+                       start, limit) {}
 };

 }  // namespace libvpx_test
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -121,6 +121,8 @@ const DecodeParam kVP9InvalidFileTests[] = {
  {1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf"},
  {1, "invalid-vp91-2-mixedrefcsp-444to420.ivf"},
  {1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf"},
+  {1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf"},
+  {1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf"},
 };

 VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
@@ -151,6 +153,7 @@ const DecodeParam kMultiThreadedVP9InvalidFileTests[] = {
  {4, "invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm"},
  {4, "invalid-"
      "vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf"},
+  {4, "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf"},
  {2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf"},
  {4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf"},
 };
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -21,6 +21,7 @@
 #include "./vpx_config.h"
 #include "./vp9_rtcd.h"
 #include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_loopfilter.h"
 #include "vpx/vpx_integer.h"

 using libvpx_test::ACMRandom;
@@ -51,7 +52,7 @@ typedef void (*dual_loop_op_t)(uint8_t *s, int p, const uint8_t *blimit0,
                               const uint8_t *thresh1);
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-typedef std::tr1::tuple<loop_op_t, loop_op_t, int> loop8_param_t;
+typedef std::tr1::tuple<loop_op_t, loop_op_t, int, int> loop8_param_t;
 typedef std::tr1::tuple<dual_loop_op_t, dual_loop_op_t, int> dualloop8_param_t;

 #if HAVE_SSE2
@@ -106,6 +107,50 @@ void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_SSE2

+#if HAVE_NEON_ASM
+#if CONFIG_VP9_HIGHBITDEPTH
+// No neon high bitdepth functions.
+#else
+void wrapper_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit,
+                              const uint8_t *limit, const uint8_t *thresh,
+                              int count) {
+  vp9_lpf_vertical_16_neon(s, p, blimit, limit, thresh);
+}
+
+void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
+                           const uint8_t *limit, const uint8_t *thresh,
+                           int count) {
+  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+}
+
+void wrapper_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit,
+                                   const uint8_t *limit, const uint8_t *thresh,
+                                   int count) {
+  vp9_lpf_vertical_16_dual_neon(s, p, blimit, limit, thresh);
+}
+
+void wrapper_vertical_16_dual_c(uint8_t *s, int p, const uint8_t *blimit,
+                                const uint8_t *limit, const uint8_t *thresh,
+                                int count) {
+  vp9_lpf_vertical_16_dual_c(s, p, blimit, limit, thresh);
+}
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_NEON_ASM
+
+#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
+void wrapper_vertical_16_msa(uint8_t *s, int p, const uint8_t *blimit,
+                             const uint8_t *limit, const uint8_t *thresh,
+                             int count) {
+  vp9_lpf_vertical_16_msa(s, p, blimit, limit, thresh);
+}
+
+void wrapper_vertical_16_c(uint8_t *s, int p, const uint8_t *blimit,
+                           const uint8_t *limit, const uint8_t *thresh,
+                           int count) {
+  vp9_lpf_vertical_16_c(s, p, blimit, limit, thresh);
+}
+#endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
+
 class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
 public:
  virtual ~Loop8Test6Param() {}
@@ -113,6 +158,7 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
    loopfilter_op_ = GET_PARAM(0);
    ref_loopfilter_op_ = GET_PARAM(1);
    bit_depth_ = GET_PARAM(2);
+    count_ = GET_PARAM(3);
    mask_ = (1 << bit_depth_) - 1;
  }

@@ -120,6 +166,7 @@ class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {

 protected:
  int bit_depth_;
+  int count_;
  int mask_;
  loop_op_t loopfilter_op_;
  loop_op_t ref_loopfilter_op_;
@@ -149,22 +196,22 @@ TEST_P(Loop8Test6Param, OperationCheck) {
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
  int32_t bd = bit_depth_;
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
 #else
-  DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = rnd.Rand8();
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -175,7 +222,6 @@ TEST_P(Loop8Test6Param, OperationCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
    int32_t p = kNumCoeffs/32;
-    int count = 1;

    uint16_t tmp_s[kNumCoeffs];
    int j = 0;
@@ -207,13 +253,13 @@ TEST_P(Loop8Test6Param, OperationCheck) {
      ref_s[j] = s[j];
    }
 #if CONFIG_VP9_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count, bd);
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
 #else
-    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count);
+    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

    for (int j = 0; j < kNumCoeffs; ++j) {
@@ -235,22 +281,35 @@ TEST_P(Loop8Test6Param, ValueCheck) {
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
  const int32_t bd = bit_depth_;
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
 #else
-  DECLARE_ALIGNED_ARRAY(8, uint8_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(8, uint8_t, ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(8, uint8_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(8, uint8_t, ref_s[kNumCoeffs]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
+
+  // NOTE: The code in vp9_loopfilter.c:update_sharpness computes mblim as a
+  // function of sharpness_lvl and the loopfilter lvl as:
+  // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4));
+  // ...
+  // memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit),
+  //        SIMD_WIDTH);
+  // This means that the largest value for mblim will occur when sharpness_lvl
+  // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER).
+  // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and
+  // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) =
+  // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4
+
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = rnd.Rand8();
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -261,19 +320,18 @@ TEST_P(Loop8Test6Param, ValueCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
    int32_t p = kNumCoeffs / 32;
-    int count = 1;
    for (int j = 0; j < kNumCoeffs; ++j) {
      s[j] = rnd.Rand16() & mask_;
      ref_s[j] = s[j];
    }
 #if CONFIG_VP9_HIGHBITDEPTH
-    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count, bd);
+    ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, count_, bd);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count, bd));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_, bd));
 #else
-    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count);
+    ref_loopfilter_op_(ref_s+8+p*8, p, blimit, limit, thresh, count_);
    ASM_REGISTER_STATE_CHECK(
-        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count));
+        loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, count_));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
    for (int j = 0; j < kNumCoeffs; ++j) {
      err_count += ref_s[j] != s[j];
@@ -294,22 +352,22 @@ TEST_P(Loop8Test9Param, OperationCheck) {
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
  const int32_t bd = bit_depth_;
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
 #else
-  DECLARE_ALIGNED_ARRAY(8,  uint8_t,  s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(8,  uint8_t,  ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(8,  uint8_t,  s[kNumCoeffs]);
+  DECLARE_ALIGNED(8,  uint8_t,  ref_s[kNumCoeffs]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = rnd.Rand8();
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -319,12 +377,12 @@ TEST_P(Loop8Test9Param, OperationCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -396,22 +454,22 @@ TEST_P(Loop8Test9Param, ValueCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  const int count_test_block = number_of_iterations;
 #if CONFIG_VP9_HIGHBITDEPTH
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(16, uint16_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(16, uint16_t, ref_s[kNumCoeffs]);
 #else
-  DECLARE_ALIGNED_ARRAY(8,  uint8_t, s, kNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(8,  uint8_t, ref_s, kNumCoeffs);
+  DECLARE_ALIGNED(8,  uint8_t, s[kNumCoeffs]);
+  DECLARE_ALIGNED(8,  uint8_t, ref_s[kNumCoeffs]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  int err_count_total = 0;
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = rnd.Rand8();
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit0[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -421,12 +479,12 @@ TEST_P(Loop8Test9Param, ValueCheck) {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
    };
-    tmp = rnd.Rand8();
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t, limit1[16])  = {
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
        tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp
@@ -474,77 +532,81 @@ using std::tr1::make_tuple;
 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
-    SSE2_C_COMPARE_SINGLE, Loop8Test6Param,
+    SSE2, Loop8Test6Param,
    ::testing::Values(
        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 8),
+                   &vp9_highbd_lpf_horizontal_4_c, 8, 1),
        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 8),
+                   &vp9_highbd_lpf_vertical_4_c, 8, 1),
        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 8),
+                   &vp9_highbd_lpf_horizontal_8_c, 8, 1),
        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 8),
+                   &vp9_highbd_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
+                   &vp9_highbd_lpf_horizontal_16_c, 8, 2),
        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 8),
+                   &vp9_highbd_lpf_vertical_8_c, 8, 1),
        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, 8),
+                   &wrapper_vertical_16_c, 8, 1),
        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 10),
+                   &vp9_highbd_lpf_horizontal_4_c, 10, 1),
        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 10),
+                   &vp9_highbd_lpf_vertical_4_c, 10, 1),
        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 10),
+                   &vp9_highbd_lpf_horizontal_8_c, 10, 1),
        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 10),
+                   &vp9_highbd_lpf_horizontal_16_c, 10, 1),
+        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
+                   &vp9_highbd_lpf_horizontal_16_c, 10, 2),
        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 10),
+                   &vp9_highbd_lpf_vertical_8_c, 10, 1),
        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, 10),
+                   &wrapper_vertical_16_c, 10, 1),
        make_tuple(&vp9_highbd_lpf_horizontal_4_sse2,
-                   &vp9_highbd_lpf_horizontal_4_c, 12),
+                   &vp9_highbd_lpf_horizontal_4_c, 12, 1),
        make_tuple(&vp9_highbd_lpf_vertical_4_sse2,
-                   &vp9_highbd_lpf_vertical_4_c, 12),
+                   &vp9_highbd_lpf_vertical_4_c, 12, 1),
        make_tuple(&vp9_highbd_lpf_horizontal_8_sse2,
-                   &vp9_highbd_lpf_horizontal_8_c, 12),
+                   &vp9_highbd_lpf_horizontal_8_c, 12, 1),
        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
-                   &vp9_highbd_lpf_horizontal_16_c, 12),
+                   &vp9_highbd_lpf_horizontal_16_c, 12, 1),
+        make_tuple(&vp9_highbd_lpf_horizontal_16_sse2,
+                   &vp9_highbd_lpf_horizontal_16_c, 12, 2),
        make_tuple(&vp9_highbd_lpf_vertical_8_sse2,
-                   &vp9_highbd_lpf_vertical_8_c, 12),
+                   &vp9_highbd_lpf_vertical_8_c, 12, 1),
        make_tuple(&wrapper_vertical_16_sse2,
-                   &wrapper_vertical_16_c, 12)));
+                   &wrapper_vertical_16_c, 12, 1),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, 8, 1),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, 10, 1),
+        make_tuple(&wrapper_vertical_16_dual_sse2,
+                   &wrapper_vertical_16_dual_c, 12, 1)));
 #else
 INSTANTIATE_TEST_CASE_P(
-    SSE2_C_COMPARE_SINGLE, Loop8Test6Param,
+    SSE2, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8),
-        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8),
-        make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8)));
+        make_tuple(&vp9_lpf_horizontal_8_sse2, &vp9_lpf_horizontal_8_c, 8, 1),
+        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vp9_lpf_horizontal_16_sse2, &vp9_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&vp9_lpf_vertical_8_sse2, &vp9_lpf_vertical_8_c, 8, 1),
+        make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8, 1)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif

+#if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
+INSTANTIATE_TEST_CASE_P(
+    AVX2, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vp9_lpf_horizontal_16_avx2, &vp9_lpf_horizontal_16_c, 8,
+                   2)));
+#endif
+
 #if HAVE_SSE2
 #if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
-    SSE2_C_COMPARE_DUAL, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, 8),
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, 10),
-        make_tuple(&wrapper_vertical_16_dual_sse2,
-                   &wrapper_vertical_16_dual_c, 12)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    SSE2_C_COMPARE_DUAL, Loop8Test6Param,
-    ::testing::Values(
-        make_tuple(&wrapper_vertical_16_sse2, &wrapper_vertical_16_c, 8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_SSE2
-
-#if HAVE_SSE2
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    SSE_C_COMPARE_DUAL, Loop8Test9Param,
+    SSE2, Loop8Test9Param,
    ::testing::Values(
        make_tuple(&vp9_highbd_lpf_horizontal_4_dual_sse2,
                   &vp9_highbd_lpf_horizontal_4_dual_c, 8),
@@ -572,7 +634,7 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_highbd_lpf_vertical_8_dual_c, 12)));
 #else
 INSTANTIATE_TEST_CASE_P(
-    SSE_C_COMPARE_DUAL, Loop8Test9Param,
+    SSE2, Loop8Test9Param,
    ::testing::Values(
        make_tuple(&vp9_lpf_horizontal_4_dual_sse2,
                   &vp9_lpf_horizontal_4_dual_c, 8),
@@ -584,4 +646,71 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_lpf_vertical_8_dual_c, 8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif
+
+#if HAVE_NEON
+#if CONFIG_VP9_HIGHBITDEPTH
+// No neon high bitdepth functions.
+#else
+INSTANTIATE_TEST_CASE_P(
+    NEON, Loop8Test6Param,
+    ::testing::Values(
+#if HAVE_NEON_ASM
+// Using #if inside the macro is unsupported on MSVS but the tests are not
+// currently built for MSVS with ARM and NEON.
+        make_tuple(&vp9_lpf_horizontal_16_neon,
+                   &vp9_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vp9_lpf_horizontal_16_neon,
+                   &vp9_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&wrapper_vertical_16_neon,
+                   &wrapper_vertical_16_c, 8, 1),
+        make_tuple(&wrapper_vertical_16_dual_neon,
+                   &wrapper_vertical_16_dual_c, 8, 1),
+        make_tuple(&vp9_lpf_horizontal_8_neon,
+                   &vp9_lpf_horizontal_8_c, 8, 1),
+        make_tuple(&vp9_lpf_vertical_8_neon,
+                   &vp9_lpf_vertical_8_c, 8, 1),
+#endif  // HAVE_NEON_ASM
+        make_tuple(&vp9_lpf_horizontal_4_neon,
+                   &vp9_lpf_horizontal_4_c, 8, 1),
+        make_tuple(&vp9_lpf_vertical_4_neon,
+                   &vp9_lpf_vertical_4_c, 8, 1)));
+INSTANTIATE_TEST_CASE_P(
+    NEON, Loop8Test9Param,
+    ::testing::Values(
+#if HAVE_NEON_ASM
+        make_tuple(&vp9_lpf_horizontal_8_dual_neon,
+                   &vp9_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vp9_lpf_vertical_8_dual_neon,
+                   &vp9_lpf_vertical_8_dual_c, 8),
+#endif  // HAVE_NEON_ASM
+        make_tuple(&vp9_lpf_horizontal_4_dual_neon,
+                   &vp9_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vp9_lpf_vertical_4_dual_neon,
+                   &vp9_lpf_vertical_4_dual_c, 8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_NEON
+
+#if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
+INSTANTIATE_TEST_CASE_P(
+    MSA, Loop8Test6Param,
+    ::testing::Values(
+        make_tuple(&vp9_lpf_horizontal_8_msa, &vp9_lpf_horizontal_8_c, 8, 1),
+        make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 1),
+        make_tuple(&vp9_lpf_horizontal_16_msa, &vp9_lpf_horizontal_16_c, 8, 2),
+        make_tuple(&vp9_lpf_vertical_8_msa, &vp9_lpf_vertical_8_c, 8, 1),
+        make_tuple(&wrapper_vertical_16_msa, &wrapper_vertical_16_c, 8, 1)));
+
+INSTANTIATE_TEST_CASE_P(
+    MSA, Loop8Test9Param,
+    ::testing::Values(
+        make_tuple(&vp9_lpf_horizontal_4_dual_msa,
+                   &vp9_lpf_horizontal_4_dual_c, 8),
+        make_tuple(&vp9_lpf_horizontal_8_dual_msa,
+                   &vp9_lpf_horizontal_8_dual_c, 8),
+        make_tuple(&vp9_lpf_vertical_4_dual_msa,
+                   &vp9_lpf_vertical_4_dual_c, 8),
+        make_tuple(&vp9_lpf_vertical_8_dual_msa,
+                   &vp9_lpf_vertical_8_dual_c, 8)));
+#endif  // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH)
+
 }  // namespace
--- a/test/md5_helper.h
+++ b/test/md5_helper.h
@@ -42,6 +42,10 @@ class MD5 {
    }
  }

+  void Add(const uint8_t *data, size_t size) {
+    MD5Update(&md5_, data, static_cast<uint32_t>(size));
+  }
+
  const char *Get(void) {
    static const char hex[16] = {
      '0', '1', '2', '3', '4', '5', '6', '7',
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -74,16 +74,16 @@ TEST_P(PartialIDctTest, RunQuantCheck) {
      FAIL() << "Wrong Size!";
      break;
  }
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);

  const int count_test_block = 1000;
  const int block_size = size * size;

-  DECLARE_ALIGNED_ARRAY(16, int16_t, input_extreme_block, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, output_ref_block, kMaxNumCoeffs);
+  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);

  int max_error = 0;
  for (int i = 0; i < count_test_block; ++i) {
@@ -153,10 +153,10 @@ TEST_P(PartialIDctTest, ResultsMatch) {
      FAIL() << "Wrong Size!";
      break;
  }
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block1, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, tran_low_t, test_coef_block2, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst1, kMaxNumCoeffs);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, dst2, kMaxNumCoeffs);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
+  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
  const int count_test_block = 1000;
  const int max_coeff = 32766 / 4;
  const int block_size = size * size;
@@ -230,7 +230,7 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct4x4_1_add_c,
                   TX_4X4, 1)));

-#if HAVE_NEON_ASM
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, PartialIDctTest,
    ::testing::Values(
@@ -258,7 +258,7 @@ INSTANTIATE_TEST_CASE_P(
                   &vp9_idct4x4_16_add_c,
                   &vp9_idct4x4_1_add_neon,
                   TX_4X4, 1)));
-#endif
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -305,13 +305,38 @@ INSTANTIATE_TEST_CASE_P(
                   TX_8X8, 12)));
 #endif

-#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
-    SSSE3, PartialIDctTest,
+    MSA, PartialIDctTest,
    ::testing::Values(
+        make_tuple(&vp9_fdct32x32_c,
+                   &vp9_idct32x32_1024_add_c,
+                   &vp9_idct32x32_34_add_msa,
+                   TX_32X32, 34),
+        make_tuple(&vp9_fdct32x32_c,
+                   &vp9_idct32x32_1024_add_c,
+                   &vp9_idct32x32_1_add_msa,
+                   TX_32X32, 1),
        make_tuple(&vp9_fdct16x16_c,
                   &vp9_idct16x16_256_add_c,
-                   &vp9_idct16x16_10_add_ssse3,
-                   TX_16X16, 10)));
-#endif
+                   &vp9_idct16x16_10_add_msa,
+                   TX_16X16, 10),
+        make_tuple(&vp9_fdct16x16_c,
+                   &vp9_idct16x16_256_add_c,
+                   &vp9_idct16x16_1_add_msa,
+                   TX_16X16, 1),
+        make_tuple(&vp9_fdct8x8_c,
+                   &vp9_idct8x8_64_add_c,
+                   &vp9_idct8x8_12_add_msa,
+                   TX_8X8, 10),
+        make_tuple(&vp9_fdct8x8_c,
+                   &vp9_idct8x8_64_add_c,
+                   &vp9_idct8x8_1_add_msa,
+                   TX_8X8, 1),
+        make_tuple(&vp9_fdct4x4_c,
+                   &vp9_idct4x4_16_add_c,
+                   &vp9_idct4x4_1_add_msa,
+                   TX_4X4, 1)));
+#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
 }  // namespace
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -63,12 +63,12 @@ TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
  uint8_t *const dst_image_ptr = dst_image + 8;
  uint8_t *const flimits =
      reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
-  (void)vpx_memset(flimits, 255, block_width);
+  (void)memset(flimits, 255, block_width);

  // Initialize pixels in the input:
  //   block pixels to value 1,
  //   border pixels to value 10.
-  (void)vpx_memset(src_image, 10, input_size);
+  (void)memset(src_image, 10, input_size);
  uint8_t *pixel_ptr = src_image_ptr;
  for (int i = 0; i < block_height; ++i) {
    for (int j = 0; j < block_width; ++j) {
@@ -78,7 +78,7 @@ TEST_P(VP8PostProcessingFilterTest, FilterOutputCheck) {
  }

  // Initialize pixels in the output to 99.
-  (void)vpx_memset(dst_image, 99, output_size);
+  (void)memset(dst_image, 99, output_size);

  ASM_REGISTER_STATE_CHECK(
      GetParam()(src_image_ptr, dst_image_ptr, input_stride,
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -0,0 +1,195 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
+#include "vp8/common/blockd.h"
+#include "vp8/common/onyx.h"
+#include "vp8/encoder/block.h"
+#include "vp8/encoder/onyx_int.h"
+#include "vp8/encoder/quantize.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
+
+namespace {
+
+const int kNumBlocks = 25;
+const int kNumBlockEntries = 16;
+
+typedef void (*VP8Quantize)(BLOCK *b, BLOCKD *d);
+
+typedef std::tr1::tuple<VP8Quantize, VP8Quantize> VP8QuantizeParam;
+
+using libvpx_test::ACMRandom;
+using std::tr1::make_tuple;
+
+// Create and populate a VP8_COMP instance which has a complete set of
+// quantization inputs as well as a second MACROBLOCKD for output.
+class QuantizeTestBase {
+ public:
+  virtual ~QuantizeTestBase() {
+    vp8_remove_compressor(&vp8_comp_);
+    vp8_comp_ = NULL;
+    vpx_free(macroblockd_dst_);
+    macroblockd_dst_ = NULL;
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  void SetupCompressor() {
+    rnd_.Reset(ACMRandom::DeterministicSeed());
+
+    // The full configuration is necessary to generate the quantization tables.
+    VP8_CONFIG vp8_config;
+    memset(&vp8_config, 0, sizeof(vp8_config));
+
+    vp8_comp_ = vp8_create_compressor(&vp8_config);
+
+    // Set the tables based on a quantizer of 0.
+    vp8_set_quantizer(vp8_comp_, 0);
+
+    // Set up all the block/blockd pointers for the mb in vp8_comp_.
+    vp8cx_frame_init_quantizer(vp8_comp_);
+
+    // Copy macroblockd from the reference to get pre-set-up dequant values.
+    macroblockd_dst_ = reinterpret_cast<MACROBLOCKD *>(
+        vpx_memalign(32, sizeof(*macroblockd_dst_)));
+    memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_));
+    // Fix block pointers - currently they point to the blocks in the reference
+    // structure.
+    vp8_setup_block_dptrs(macroblockd_dst_);
+  }
+
+  void UpdateQuantizer(int q) {
+    vp8_set_quantizer(vp8_comp_, q);
+
+    memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_));
+    vp8_setup_block_dptrs(macroblockd_dst_);
+  }
+
+  void FillCoeffConstant(int16_t c) {
+    for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) {
+      vp8_comp_->mb.coeff[i] = c;
+    }
+  }
+
+  void FillCoeffRandom() {
+    for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) {
+      vp8_comp_->mb.coeff[i] = rnd_.Rand8();
+    }
+  }
+
+  void CheckOutput() {
+    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.qcoeff, macroblockd_dst_->qcoeff,
+                        sizeof(*macroblockd_dst_->qcoeff) * kNumBlocks *
+                            kNumBlockEntries))
+        << "qcoeff mismatch";
+    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.dqcoeff, macroblockd_dst_->dqcoeff,
+                        sizeof(*macroblockd_dst_->dqcoeff) * kNumBlocks *
+                            kNumBlockEntries))
+        << "dqcoeff mismatch";
+    EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.eobs, macroblockd_dst_->eobs,
+                        sizeof(*macroblockd_dst_->eobs) * kNumBlocks))
+        << "eobs mismatch";
+  }
+
+  VP8_COMP *vp8_comp_;
+  MACROBLOCKD *macroblockd_dst_;
+
+ private:
+  ACMRandom rnd_;
+};
+
+class QuantizeTest : public QuantizeTestBase,
+                     public ::testing::TestWithParam<VP8QuantizeParam> {
+ protected:
+  virtual void SetUp() {
+    SetupCompressor();
+    asm_quant_ = GET_PARAM(0);
+    c_quant_ = GET_PARAM(1);
+  }
+
+  void RunComparison() {
+    for (int i = 0; i < kNumBlocks; ++i) {
+      ASM_REGISTER_STATE_CHECK(
+          c_quant_(&vp8_comp_->mb.block[i], &vp8_comp_->mb.e_mbd.block[i]));
+      ASM_REGISTER_STATE_CHECK(
+          asm_quant_(&vp8_comp_->mb.block[i], &macroblockd_dst_->block[i]));
+    }
+
+    CheckOutput();
+  }
+
+ private:
+  VP8Quantize asm_quant_;
+  VP8Quantize c_quant_;
+};
+
+TEST_P(QuantizeTest, TestZeroInput) {
+  FillCoeffConstant(0);
+  RunComparison();
+}
+
+TEST_P(QuantizeTest, TestLargeNegativeInput) {
+  FillCoeffConstant(0);
+  // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues
+  // like BUG=883 where the constant being compared was incorrectly initialized.
+  vp8_comp_->mb.coeff[0] = -8191;
+  RunComparison();
+}
+
+TEST_P(QuantizeTest, TestRandomInput) {
+  FillCoeffRandom();
+  RunComparison();
+}
+
+TEST_P(QuantizeTest, TestMultipleQ) {
+  for (int q = 0; q < QINDEX_RANGE; ++q) {
+    UpdateQuantizer(q);
+    FillCoeffRandom();
+    RunComparison();
+  }
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, QuantizeTest,
+    ::testing::Values(
+        make_tuple(&vp8_fast_quantize_b_sse2, &vp8_fast_quantize_b_c),
+        make_tuple(&vp8_regular_quantize_b_sse2, &vp8_regular_quantize_b_c)));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(SSSE3, QuantizeTest,
+                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_ssse3,
+                                                     &vp8_fast_quantize_b_c)));
+#endif  // HAVE_SSSE3
+
+#if HAVE_SSE4_1
+INSTANTIATE_TEST_CASE_P(
+    SSE4_1, QuantizeTest,
+    ::testing::Values(make_tuple(&vp8_regular_quantize_b_sse4_1,
+                                 &vp8_regular_quantize_b_c)));
+#endif  // HAVE_SSE4_1
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest,
+                        ::testing::Values(make_tuple(&vp8_fast_quantize_b_neon,
+                                                     &vp8_fast_quantize_b_c)));
+#endif  // HAVE_NEON
+}  // namespace
--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -144,6 +144,7 @@ class ResizeTest : public ::libvpx_test::EncoderTest,

 TEST_P(ResizeTest, TestExternalResizeWorks) {
  ResizingVideoSource video;
+  cfg_.g_lag_in_frames = 0;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));

  for (std::vector<FrameInfo>::const_iterator info = frame_info_list_.begin();
@@ -153,9 +154,9 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
    const unsigned int expected_h = ScaleForFrameNumber(frame, kInitialHeight);

    EXPECT_EQ(expected_w, info->w)
-        << "Frame " << frame << "had unexpected width";
+        << "Frame " << frame << " had unexpected width";
    EXPECT_EQ(expected_h, info->h)
-        << "Frame " << frame << "had unexpected height";
+        << "Frame " << frame << " had unexpected height";
  }
 }

@@ -260,7 +261,116 @@ TEST_P(ResizeInternalTest, TestInternalResizeWorks) {
  }
 }

+vpx_img_fmt_t CspForFrameNumber(int frame) {
+  if (frame < 10)
+    return VPX_IMG_FMT_I420;
+  if (frame < 20)
+    return VPX_IMG_FMT_I444;
+  return VPX_IMG_FMT_I420;
+}
+
+class ResizeCspTest : public ResizeTest {
+ protected:
+#if WRITE_COMPRESSED_STREAM
+  ResizeCspTest()
+      : ResizeTest(),
+        frame0_psnr_(0.0),
+        outfile_(NULL),
+        out_frames_(0) {}
+#else
+  ResizeCspTest() : ResizeTest(), frame0_psnr_(0.0) {}
+#endif
+
+  virtual ~ResizeCspTest() {}
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+#if WRITE_COMPRESSED_STREAM
+    outfile_ = fopen("vp91-2-05-cspchape.ivf", "wb");
+#endif
+  }
+
+  virtual void EndPassHook() {
+#if WRITE_COMPRESSED_STREAM
+    if (outfile_) {
+      if (!fseek(outfile_, 0, SEEK_SET))
+        write_ivf_file_header(&cfg_, out_frames_, outfile_);
+      fclose(outfile_);
+      outfile_ = NULL;
+    }
+#endif
+  }
+
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+                                  libvpx_test::Encoder *encoder) {
+    if (CspForFrameNumber(video->frame()) != VPX_IMG_FMT_I420 &&
+        cfg_.g_profile != 1) {
+      cfg_.g_profile = 1;
+      encoder->Config(&cfg_);
+    }
+    if (CspForFrameNumber(video->frame()) == VPX_IMG_FMT_I420 &&
+        cfg_.g_profile != 0) {
+      cfg_.g_profile = 0;
+      encoder->Config(&cfg_);
+    }
+  }
+
+  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
+    if (!frame0_psnr_)
+      frame0_psnr_ = pkt->data.psnr.psnr[0];
+    EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0);
+  }
+
+#if WRITE_COMPRESSED_STREAM
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    ++out_frames_;
+
+    // Write initial file header if first frame.
+    if (pkt->data.frame.pts == 0)
+      write_ivf_file_header(&cfg_, 0, outfile_);
+
+    // Write frame header and data.
+    write_ivf_frame_header(pkt, outfile_);
+    (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_);
+  }
+#endif
+
+  double frame0_psnr_;
+#if WRITE_COMPRESSED_STREAM
+  FILE *outfile_;
+  unsigned int out_frames_;
+#endif
+};
+
+class ResizingCspVideoSource : public ::libvpx_test::DummyVideoSource {
+ public:
+  ResizingCspVideoSource() {
+    SetSize(kInitialWidth, kInitialHeight);
+    limit_ = 30;
+  }
+
+  virtual ~ResizingCspVideoSource() {}
+
+ protected:
+  virtual void Next() {
+    ++frame_;
+    SetImageFormat(CspForFrameNumber(frame_));
+    FillFrame();
+  }
+};
+
+TEST_P(ResizeCspTest, TestResizeCspWorks) {
+  ResizingCspVideoSource video;
+  init_flags_ = VPX_CODEC_USE_PSNR;
+  cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48;
+  cfg_.g_lag_in_frames = 0;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
 VP8_INSTANTIATE_TEST_CASE(ResizeTest, ONE_PASS_TEST_MODES);
+VP9_INSTANTIATE_TEST_CASE(ResizeTest,
+                          ::testing::Values(::libvpx_test::kRealTime));
 VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest,
                          ::testing::Values(::libvpx_test::kOnePassBest));
+VP9_INSTANTIATE_TEST_CASE(ResizeCspTest,
+                          ::testing::Values(::libvpx_test::kRealTime));
 }  // namespace
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
--- a/test/set_roi.cc
+++ b/test/set_roi.cc
@@ -53,7 +53,7 @@ TEST(VP8RoiMapTest, ParameterCheck) {
  cpi.common.mb_rows = 240 >> 4;
  cpi.common.mb_cols = 320 >> 4;
  const int mbs = (cpi.common.mb_rows * cpi.common.mb_cols);
-  vpx_memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data));
+  memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data));

  // Segment map
  cpi.segmentation_map = reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
@@ -61,9 +61,9 @@ TEST(VP8RoiMapTest, ParameterCheck) {
  // Allocate memory for the source memory map.
  unsigned char *roi_map =
    reinterpret_cast<unsigned char *>(vpx_calloc(mbs, 1));
-  vpx_memset(&roi_map[mbs >> 2], 1, (mbs >> 2));
-  vpx_memset(&roi_map[mbs >> 1], 2, (mbs >> 2));
-  vpx_memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2));
+  memset(&roi_map[mbs >> 2], 1, (mbs >> 2));
+  memset(&roi_map[mbs >> 1], 2, (mbs >> 2));
+  memset(&roi_map[mbs -(mbs >> 2)], 3, (mbs >> 2));

  // Do a test call with valid parameters.
  int roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows,
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -63,6 +63,9 @@ class SvcTest : public ::testing::Test {
    vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t();
    VP9CodecFactory codec_factory;
    decoder_ = codec_factory.CreateDecoder(dec_cfg, 0);
+
+    tile_columns_ = 0;
+    tile_rows_ = 0;
  }

  virtual void TearDown() {
@@ -75,6 +78,8 @@ class SvcTest : public ::testing::Test {
        vpx_svc_init(&svc_, &codec_, vpx_codec_vp9_cx(), &codec_enc_);
    EXPECT_EQ(VPX_CODEC_OK, res);
    vpx_codec_control(&codec_, VP8E_SET_CPUUSED, 4);  // Make the test faster
+    vpx_codec_control(&codec_, VP9E_SET_TILE_COLUMNS, tile_columns_);
+    vpx_codec_control(&codec_, VP9E_SET_TILE_ROWS, tile_rows_);
    codec_initialized_ = true;
  }

@@ -108,7 +113,8 @@ class SvcTest : public ::testing::Test {
    codec_enc_.g_pass = VPX_RC_FIRST_PASS;
    InitializeEncoder();

-    libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+    libvpx_test::I420VideoSource video(test_file_name_,
+                                       codec_enc_.g_w, codec_enc_.g_h,
                                       codec_enc_.g_timebase.den,
                                       codec_enc_.g_timebase.num, 0, 30);
    video.Begin();
@@ -176,7 +182,8 @@ class SvcTest : public ::testing::Test {
    }
    InitializeEncoder();

-    libvpx_test::I420VideoSource video(test_file_name_, kWidth, kHeight,
+    libvpx_test::I420VideoSource video(test_file_name_,
+                                       codec_enc_.g_w, codec_enc_.g_h,
                                       codec_enc_.g_timebase.den,
                                       codec_enc_.g_timebase.num, 0, 30);
    video.Begin();
@@ -225,10 +232,9 @@ class SvcTest : public ::testing::Test {
    EXPECT_EQ(received_frames, n);
  }

-  void DropLayersAndMakeItVP9Comaptible(struct vpx_fixed_buf *const inputs,
-                                        const int num_super_frames,
-                                        const int remained_spatial_layers,
-                                        const bool is_multiple_frame_contexts) {
+  void DropEnhancementLayers(struct vpx_fixed_buf *const inputs,
+                             const int num_super_frames,
+                             const int remained_spatial_layers) {
    ASSERT_TRUE(inputs != NULL);
    ASSERT_GT(num_super_frames, 0);
    ASSERT_GT(remained_spatial_layers, 0);
@@ -250,45 +256,6 @@ class SvcTest : public ::testing::Test {
      if (frame_count == 0) {
        // There's no super frame but only a single frame.
        ASSERT_EQ(1, remained_spatial_layers);
-        if (is_multiple_frame_contexts) {
-          // Make a new super frame.
-          uint8_t marker = 0xc1;
-          unsigned int mask;
-          int mag;
-
-          // Choose the magnitude.
-          for (mag = 0, mask = 0xff; mag < 4; ++mag) {
-            if (inputs[i].sz < mask)
-              break;
-            mask <<= 8;
-            mask |= 0xff;
-          }
-          marker |= mag << 3;
-          int index_sz = 2 + (mag + 1) * 2;
-
-          inputs[i].buf = realloc(inputs[i].buf, inputs[i].sz + index_sz + 16);
-          ASSERT_TRUE(inputs[i].buf != NULL);
-          uint8_t *frame_data = static_cast<uint8_t*>(inputs[i].buf);
-          frame_data[0] &= ~2;      // Set the show_frame flag to 0.
-          frame_data += inputs[i].sz;
-          // Add an one byte frame with show_existing_frame.
-          *frame_data++ = 0x88;
-
-          // Write the super frame index.
-          *frame_data++ = marker;
-
-          frame_sizes[0] = inputs[i].sz;
-          frame_sizes[1] = 1;
-          for (int j = 0; j < 2; ++j) {
-            unsigned int this_sz = frame_sizes[j];
-            for (int k = 0; k <= mag; k++) {
-              *frame_data++ = this_sz & 0xff;
-              this_sz >>= 8;
-            }
-          }
-          *frame_data++ = marker;
-          inputs[i].sz += index_sz + 1;
-        }
      } else {
        // Found a super frame.
        uint8_t *frame_data = static_cast<uint8_t*>(inputs[i].buf);
@@ -304,16 +271,13 @@ class SvcTest : public ::testing::Test {
        }
        ASSERT_LT(frame, frame_count) << "Couldn't find a visible frame. "
            << "remained_spatial_layers: " << remained_spatial_layers
-            << "    super_frame: " << i
-            << "    is_multiple_frame_context: " << is_multiple_frame_contexts;
-        if (frame == frame_count - 1 && !is_multiple_frame_contexts)
+            << "    super_frame: " << i;
+        if (frame == frame_count - 1)
          continue;

        frame_data += frame_sizes[frame];

        // We need to add one more frame for multiple frame contexts.
-        if (is_multiple_frame_contexts)
-          ++frame;
        uint8_t marker =
            static_cast<const uint8_t*>(inputs[i].buf)[inputs[i].sz - 1];
        const uint32_t mag = ((marker >> 3) & 0x3) + 1;
@@ -323,35 +287,14 @@ class SvcTest : public ::testing::Test {
        marker |= frame;

        // Copy existing frame sizes.
-        memmove(frame_data + (is_multiple_frame_contexts ? 2 : 1),
-                frame_start + inputs[i].sz - index_sz + 1, new_index_sz - 2);
-        if (is_multiple_frame_contexts) {
-          // Add a one byte frame with flag show_existing_frame.
-          *frame_data++ = 0x88 | (remained_spatial_layers - 1);
-        }
+        memmove(frame_data + 1, frame_start + inputs[i].sz - index_sz + 1,
+                new_index_sz - 2);
        // New marker.
        frame_data[0] = marker;
        frame_data += (mag * (frame + 1) + 1);

-        if (is_multiple_frame_contexts) {
-          // Write the frame size for the one byte frame.
-          frame_data -= mag;
-          *frame_data++ = 1;
-          for (uint32_t j = 1; j < mag; ++j) {
-            *frame_data++ = 0;
-          }
-        }
-
        *frame_data++ = marker;
        inputs[i].sz = frame_data - frame_start;
-
-        if (is_multiple_frame_contexts) {
-          // Change the show frame flag to 0 for all frames.
-          for (int j = 0; j < frame; ++j) {
-            frame_start[0] &= ~2;
-            frame_start += frame_sizes[j];
-          }
-        }
      }
    }
  }
@@ -374,6 +317,8 @@ class SvcTest : public ::testing::Test {
  std::string test_file_name_;
  bool codec_initialized_;
  Decoder *decoder_;
+  int tile_columns_;
+  int tile_rows_;
 };

 TEST_F(SvcTest, SvcInit) {
@@ -508,6 +453,7 @@ TEST_F(SvcTest, OnePassEncodeOneFrame) {

 TEST_F(SvcTest, OnePassEncodeThreeFrames) {
  codec_enc_.g_pass = VPX_RC_ONE_PASS;
+  codec_enc_.g_lag_in_frames = 0;
  vpx_fixed_buf outputs[3];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(NULL, 3, 2, &outputs[0]);
@@ -555,7 +501,7 @@ TEST_F(SvcTest, TwoPassEncode2SpatialLayersDecodeBaseLayerOnly) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false);
+  DropEnhancementLayers(&outputs[0], 10, 1);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -573,13 +519,13 @@ TEST_F(SvcTest, TwoPassEncode5SpatialLayersDecode54321Layers) {
  Pass2EncodeNFrames(&stats_buf, 10, 5, &outputs[0]);

  DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 4, false);
+  DropEnhancementLayers(&outputs[0], 10, 4);
  DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 3, false);
+  DropEnhancementLayers(&outputs[0], 10, 3);
  DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, false);
+  DropEnhancementLayers(&outputs[0], 10, 2);
  DecodeNFrames(&outputs[0], 10);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, false);
+  DropEnhancementLayers(&outputs[0], 10, 1);
  DecodeNFrames(&outputs[0], 10);

  FreeBitstreamBuffers(&outputs[0], 10);
@@ -616,9 +562,9 @@ TEST_F(SvcTest, TwoPassEncode3SNRLayersDecode321Layers) {
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 20, 3, &outputs[0]);
  DecodeNFrames(&outputs[0], 20);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 2, false);
+  DropEnhancementLayers(&outputs[0], 20, 2);
  DecodeNFrames(&outputs[0], 20);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 20, 1, false);
+  DropEnhancementLayers(&outputs[0], 20, 1);
  DecodeNFrames(&outputs[0], 20);

  FreeBitstreamBuffers(&outputs[0], 20);
@@ -649,7 +595,6 @@ TEST_F(SvcTest, TwoPassEncode2SpatialLayersWithMultipleFrameContexts) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -667,7 +612,7 @@ TEST_F(SvcTest,
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);
+  DropEnhancementLayers(&outputs[0], 10, 1);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -686,7 +631,6 @@ TEST_F(SvcTest, TwoPassEncode2SNRLayersWithMultipleFrameContexts) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 2, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 2, true);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -707,32 +651,13 @@ TEST_F(SvcTest,
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 3, &outputs[0]);

-  vpx_fixed_buf outputs_new[10];
-  for (int i = 0; i < 10; ++i) {
-    outputs_new[i].buf = malloc(outputs[i].sz + 16);
-    ASSERT_TRUE(outputs_new[i].buf != NULL);
-    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
-    outputs_new[i].sz = outputs[i].sz;
-  }
-  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 3, true);
-  DecodeNFrames(&outputs_new[0], 10);
-
-  for (int i = 0; i < 10; ++i) {
-    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
-    outputs_new[i].sz = outputs[i].sz;
-  }
-  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 2, true);
-  DecodeNFrames(&outputs_new[0], 10);
-
-  for (int i = 0; i < 10; ++i) {
-    memcpy(outputs_new[i].buf, outputs[i].buf, outputs[i].sz);
-    outputs_new[i].sz = outputs[i].sz;
-  }
-  DropLayersAndMakeItVP9Comaptible(&outputs_new[0], 10, 1, true);
-  DecodeNFrames(&outputs_new[0], 10);
+  DecodeNFrames(&outputs[0], 10);
+  DropEnhancementLayers(&outputs[0], 10, 2);
+  DecodeNFrames(&outputs[0], 10);
+  DropEnhancementLayers(&outputs[0], 10, 1);
+  DecodeNFrames(&outputs[0], 10);

  FreeBitstreamBuffers(&outputs[0], 10);
-  FreeBitstreamBuffers(&outputs_new[0], 10);
 }

 TEST_F(SvcTest, TwoPassEncode2TemporalLayers) {
@@ -769,7 +694,6 @@ TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithMultipleFrameContexts) {
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);
  DecodeNFrames(&outputs[0], 10);
  FreeBitstreamBuffers(&outputs[0], 10);
 }
@@ -814,7 +738,6 @@ TEST_F(SvcTest,
  vpx_fixed_buf outputs[10];
  memset(&outputs[0], 0, sizeof(outputs));
  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
-  DropLayersAndMakeItVP9Comaptible(&outputs[0], 10, 1, true);

  vpx_fixed_buf base_layer[5];
  for (int i = 0; i < 5; ++i)
@@ -824,4 +747,51 @@ TEST_F(SvcTest,
  FreeBitstreamBuffers(&outputs[0], 10);
 }

+TEST_F(SvcTest, TwoPassEncode2TemporalLayersWithTiles) {
+  // First pass encode
+  std::string stats_buf;
+  vpx_svc_set_options(&svc_, "scale-factors=1/1");
+  svc_.temporal_layers = 2;
+  Pass1EncodeNFrames(10, 1, &stats_buf);
+
+  // Second pass encode
+  codec_enc_.g_pass = VPX_RC_LAST_PASS;
+  svc_.temporal_layers = 2;
+  vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1");
+  codec_enc_.g_w = 704;
+  codec_enc_.g_h = 144;
+  tile_columns_ = 1;
+  tile_rows_ = 1;
+  vpx_fixed_buf outputs[10];
+  memset(&outputs[0], 0, sizeof(outputs));
+  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
+  DecodeNFrames(&outputs[0], 10);
+  FreeBitstreamBuffers(&outputs[0], 10);
+}
+
+TEST_F(SvcTest,
+       TwoPassEncode2TemporalLayersWithMultipleFrameContextsAndTiles) {
+  // First pass encode
+  std::string stats_buf;
+  vpx_svc_set_options(&svc_, "scale-factors=1/1");
+  svc_.temporal_layers = 2;
+  Pass1EncodeNFrames(10, 1, &stats_buf);
+
+  // Second pass encode
+  codec_enc_.g_pass = VPX_RC_LAST_PASS;
+  svc_.temporal_layers = 2;
+  codec_enc_.g_error_resilient = 0;
+  codec_enc_.g_w = 704;
+  codec_enc_.g_h = 144;
+  tile_columns_ = 1;
+  tile_rows_ = 1;
+  vpx_svc_set_options(&svc_, "auto-alt-refs=1 scale-factors=1/1 "
+                      "multi-frame-contexts=1");
+  vpx_fixed_buf outputs[10];
+  memset(&outputs[0], 0, sizeof(outputs));
+  Pass2EncodeNFrames(&stats_buf, 10, 1, &outputs[0]);
+  DecodeNFrames(&outputs[0], 10);
+  FreeBitstreamBuffers(&outputs[0], 10);
+}
+
 }  // namespace
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -7,13 +7,18 @@ LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_440.yuv
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420_a10-1.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m
 LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m
+LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv

+LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m

@@ -550,6 +555,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm
@@ -650,8 +657,34 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv422.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv422.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv440.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv440.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm.md5
+ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes)
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-10bit-yuv420.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-10bit-yuv420.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-12bit-yuv420.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-12bit-yuv420.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv422.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv422.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv422.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv422.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv440.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv440.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv440.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv440.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv444.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv444.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm.md5
+endif  # CONFIG_VP9_HIGHBITDEPTH

 # Invalid files for testing libvpx error checking.
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v2.webm
@@ -666,10 +699,16 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.iv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf
@@ -684,8 +723,13 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s738
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-1.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-2.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-3.webm

 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
+# Encode / Decode test
+LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.yuv
 # BBB VP9 streams
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_426x240_tile_1x1_180kbps.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_640x360_tile_1x2_337kbps.webm
@@ -721,3 +765,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomanarrows_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomasmallcameramovement_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += thaloundeskmtg_640_480_30.yuv
 endif  # CONFIG_ENCODE_PERF_TESTS
+
+# sort and remove duplicates
+LIBVPX_TEST_DATA-yes := $(sort $(LIBVPX_TEST_DATA-yes))
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
--- a/test/test.mk
+++ b/test/test.mk
@@ -22,28 +22,36 @@ LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += aq_segment_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += datarate_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += i420_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += resize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += yuv_video_source.h

 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += altref_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc

+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += byte_alignment_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += invalid_file_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_frame_parallel_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += resize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc

 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.h
-LIBVPX_TEST_SRCS-yes                   += encode_test_driver.cc
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += encode_test_driver.cc
 LIBVPX_TEST_SRCS-yes                   += encode_test_driver.h

+## IVF writing.
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += ../ivfenc.c ../ivfenc.h
+
 ## Y4m parsing.
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += y4m_test.cc ../y4menc.c ../y4menc.h

@@ -58,6 +66,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../tools_common.h
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.cc
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ../webmdec.h
 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += webm_video_source.h
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_skip_loopfilter_test.cc
 endif

 LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += decode_api_test.cc
@@ -89,6 +98,7 @@ ifneq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),)
 # These tests require both the encoder and decoder to be built.
 ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes)
 LIBVPX_TEST_SRCS-yes                   += vp8_boolcoder_test.cc
+LIBVPX_TEST_SRCS-yes                   += vp8_fragments_test.cc
 endif

 LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC)    += pp_filter_test.cc
@@ -97,6 +107,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += subtract_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc

 LIBVPX_TEST_SRCS-yes                   += idct_test.cc
 LIBVPX_TEST_SRCS-yes                   += intrapred_test.cc
@@ -120,7 +131,7 @@ LIBVPX_TEST_SRCS-yes                   += partial_idct_test.cc
 LIBVPX_TEST_SRCS-yes                   += superframe_test.cc
 LIBVPX_TEST_SRCS-yes                   += tile_independence_test.cc
 LIBVPX_TEST_SRCS-yes                   += vp9_boolcoder_test.cc
-
+LIBVPX_TEST_SRCS-yes                   += vp9_encoder_parms_get_to_decoder.cc
 endif

 LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += convolve_test.cc
@@ -134,20 +145,29 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += lpf_8_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_avg_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9)         += vp9_intrapred_test.cc

 ifeq ($(CONFIG_VP9_ENCODER),yes)
 LIBVPX_TEST_SRCS-$(CONFIG_SPATIAL_SVC) += svc_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += blockiness_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += consistency_test.cc
+
 endif

 ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes)
 LIBVPX_TEST_SRCS-$(HAVE_SSE2) += vp9_denoiser_sse2_test.cc
 endif
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_arf_freq_test.cc

 endif # VP9

 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += sad_test.cc

+TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9_DECODER) := test_intra_pred_speed.cc
+TEST_INTRA_PRED_SPEED_SRCS-$(CONFIG_VP9_DECODER) += ../md5_utils.h ../md5_utils.c
+
 endif # CONFIG_SHARED

 include $(SRC_PATH_BARE)/test/test-data.mk
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -0,0 +1,384 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+//  Test and time VP9 intra-predictor functions
+
+#include <stdio.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/md5_helper.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+#include "vpx_ports/vpx_timer.h"
+
+// -----------------------------------------------------------------------------
+
+namespace {
+
+typedef void (*VpxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
+                            const uint8_t *above, const uint8_t *left);
+
+const int kNumVp9IntraPredFuncs = 13;
+const char *kVp9IntraPredNames[kNumVp9IntraPredFuncs] = {
+  "DC_PRED", "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED", "H_PRED",
+  "D45_PRED", "D135_PRED", "D117_PRED", "D153_PRED", "D207_PRED", "D63_PRED",
+  "TM_PRED"
+};
+
+void TestIntraPred(const char name[], VpxPredFunc const *pred_funcs,
+                   const char *const pred_func_names[], int num_funcs,
+                   const char *const signatures[], int block_size,
+                   int num_pixels_per_test) {
+  libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+  const int kBPS = 32;
+  const int kTotalPixels = 32 * kBPS;
+  DECLARE_ALIGNED(16, uint8_t, src[kTotalPixels]);
+  DECLARE_ALIGNED(16, uint8_t, ref_src[kTotalPixels]);
+  DECLARE_ALIGNED(16, uint8_t, left[kBPS]);
+  DECLARE_ALIGNED(16, uint8_t, above_mem[2 * kBPS + 16]);
+  uint8_t *const above = above_mem + 16;
+  for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand8();
+  for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand8();
+  for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand8();
+  const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
+
+  // some code assumes the top row has been extended:
+  // d45/d63 C-code, for instance, but not the assembly.
+  // TODO(jzern): this style of extension isn't strictly necessary.
+  ASSERT_LE(block_size, kBPS);
+  memset(above + block_size, above[block_size - 1], 2 * kBPS - block_size);
+
+  for (int k = 0; k < num_funcs; ++k) {
+    if (pred_funcs[k] == NULL) continue;
+    memcpy(src, ref_src, sizeof(src));
+    vpx_usec_timer timer;
+    vpx_usec_timer_start(&timer);
+    for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
+      pred_funcs[k](src, kBPS, above, left);
+    }
+    libvpx_test::ClearSystemState();
+    vpx_usec_timer_mark(&timer);
+    const int elapsed_time =
+        static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
+    libvpx_test::MD5 md5;
+    md5.Add(src, sizeof(src));
+    printf("Mode %s[%12s]: %5d ms     MD5: %s\n", name, pred_func_names[k],
+           elapsed_time, md5.Get());
+    EXPECT_STREQ(signatures[k], md5.Get());
+  }
+}
+
+void TestIntraPred4(VpxPredFunc const *pred_funcs) {
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "4334156168b34ab599d9b5b30f522fe9",
+    "bc4649d5ba47c7ff178d92e475960fb0",
+    "8d316e5933326dcac24e1064794b5d12",
+    "a27270fed024eafd762c95de85f4da51",
+    "c33dff000d4256c2b8f3bf9e9bab14d2",
+    "44d8cddc2ad8f79b8ed3306051722b4f",
+    "eb54839b2bad6699d8946f01ec041cd0",
+    "ecb0d56ae5f677ea45127ce9d5c058e4",
+    "0b7936841f6813da818275944895b574",
+    "9117972ef64f91a58ff73e1731c81db2",
+    "c56d5e8c729e46825f46dd5d3b5d508a",
+    "c0889e2039bcf7bcb5d2f33cdca69adc",
+    "309a618577b27c648f9c5ee45252bc8f",
+  };
+  TestIntraPred("Intra4", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 4, 4 * 4 * kNumVp9IntraFuncs);
+}
+
+void TestIntraPred8(VpxPredFunc const *pred_funcs) {
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "7694ddeeefed887faf9d339d18850928",
+    "7d726b1213591b99f736be6dec65065b",
+    "19c5711281357a485591aaf9c96c0a67",
+    "ba6b66877a089e71cd938e3b8c40caac",
+    "802440c93317e0f8ba93fab02ef74265",
+    "9e09a47a15deb0b9d8372824f9805080",
+    "b7c2d8c662268c0c427da412d7b0311d",
+    "78339c1c60bb1d67d248ab8c4da08b7f",
+    "5c97d70f7d47de1882a6cd86c165c8a9",
+    "8182bf60688b42205acd95e59e967157",
+    "08323400005a297f16d7e57e7fe1eaac",
+    "95f7bfc262329a5849eda66d8f7c68ce",
+    "815b75c8e0d91cc1ae766dc5d3e445a3",
+  };
+  TestIntraPred("Intra8", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 8, 8 * 8 * kNumVp9IntraFuncs);
+}
+
+void TestIntraPred16(VpxPredFunc const *pred_funcs) {
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "b40dbb555d5d16a043dc361e6694fe53",
+    "fb08118cee3b6405d64c1fd68be878c6",
+    "6c190f341475c837cc38c2e566b64875",
+    "db5c34ccbe2c7f595d9b08b0dc2c698c",
+    "a62cbfd153a1f0b9fed13e62b8408a7a",
+    "143df5b4c89335e281103f610f5052e4",
+    "d87feb124107cdf2cfb147655aa0bb3c",
+    "7841fae7d4d47b519322e6a03eeed9dc",
+    "f6ebed3f71cbcf8d6d0516ce87e11093",
+    "3cc480297dbfeed01a1c2d78dd03d0c5",
+    "b9f69fa6532b372c545397dcb78ef311",
+    "a8fe1c70432f09d0c20c67bdb6432c4d",
+    "b8a41aa968ec108af447af4217cba91b",
+  };
+  TestIntraPred("Intra16", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 16, 16 * 16 * kNumVp9IntraFuncs);
+}
+
+void TestIntraPred32(VpxPredFunc const *pred_funcs) {
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "558541656d84f9ae7896db655826febe",
+    "b3587a1f9a01495fa38c8cd3c8e2a1bf",
+    "4c6501e64f25aacc55a2a16c7e8f0255",
+    "b3b01379ba08916ef6b1b35f7d9ad51c",
+    "0f1eb38b6cbddb3d496199ef9f329071",
+    "911c06efb9ed1c3b4c104b232b55812f",
+    "9225beb0ddfa7a1d24eaa1be430a6654",
+    "0a6d584a44f8db9aa7ade2e2fdb9fc9e",
+    "b01c9076525216925f3456f034fb6eee",
+    "d267e20ad9e5cd2915d1a47254d3d149",
+    "ed012a4a5da71f36c2393023184a0e59",
+    "f162b51ed618d28b936974cff4391da5",
+    "9e1370c6d42e08d357d9612c93a71cfc",
+  };
+  TestIntraPred("Intra32", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 32, 32 * 32 * kNumVp9IntraFuncs);
+}
+
+}  // namespace
+
+// Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
+// to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4.
+#define INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128, v, h, \
+                        d45, d135, d117, d153, d207, d63, tm)               \
+  TEST(arch, test_func) {                                                   \
+    static const VpxPredFunc vp9_intra_pred[] = {                           \
+        dc,   dc_left, dc_top, dc_128, v,   h, d45,                         \
+        d135, d117,    d153,   d207,   d63, tm};                            \
+    test_func(vp9_intra_pred);                                              \
+  }
+
+// -----------------------------------------------------------------------------
+// 4x4
+
+INTRA_PRED_TEST(C, TestIntraPred4, vp9_dc_predictor_4x4_c,
+                vp9_dc_left_predictor_4x4_c, vp9_dc_top_predictor_4x4_c,
+                vp9_dc_128_predictor_4x4_c, vp9_v_predictor_4x4_c,
+                vp9_h_predictor_4x4_c, vp9_d45_predictor_4x4_c,
+                vp9_d135_predictor_4x4_c, vp9_d117_predictor_4x4_c,
+                vp9_d153_predictor_4x4_c, vp9_d207_predictor_4x4_c,
+                vp9_d63_predictor_4x4_c, vp9_tm_predictor_4x4_c)
+
+#if HAVE_SSE
+INTRA_PRED_TEST(SSE, TestIntraPred4, vp9_dc_predictor_4x4_sse,
+                vp9_dc_left_predictor_4x4_sse, vp9_dc_top_predictor_4x4_sse,
+                vp9_dc_128_predictor_4x4_sse, vp9_v_predictor_4x4_sse, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_4x4_sse)
+#endif  // HAVE_SSE
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
+                vp9_h_predictor_4x4_ssse3, vp9_d45_predictor_4x4_ssse3, NULL,
+                NULL, vp9_d153_predictor_4x4_ssse3,
+                vp9_d207_predictor_4x4_ssse3, vp9_d63_predictor_4x4_ssse3, NULL)
+#endif  // HAVE_SSSE3
+
+#if HAVE_DSPR2
+INTRA_PRED_TEST(DSPR2, TestIntraPred4, vp9_dc_predictor_4x4_dspr2, NULL, NULL,
+                NULL, NULL, vp9_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, vp9_tm_predictor_4x4_dspr2)
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred4, vp9_dc_predictor_4x4_neon,
+                vp9_dc_left_predictor_4x4_neon, vp9_dc_top_predictor_4x4_neon,
+                vp9_dc_128_predictor_4x4_neon, vp9_v_predictor_4x4_neon,
+                vp9_h_predictor_4x4_neon, vp9_d45_predictor_4x4_neon,
+                vp9_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
+                vp9_tm_predictor_4x4_neon)
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred4, vp9_dc_predictor_4x4_msa,
+                vp9_dc_left_predictor_4x4_msa, vp9_dc_top_predictor_4x4_msa,
+                vp9_dc_128_predictor_4x4_msa, vp9_v_predictor_4x4_msa,
+                vp9_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vp9_tm_predictor_4x4_msa)
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 8x8
+
+INTRA_PRED_TEST(C, TestIntraPred8, vp9_dc_predictor_8x8_c,
+                vp9_dc_left_predictor_8x8_c, vp9_dc_top_predictor_8x8_c,
+                vp9_dc_128_predictor_8x8_c, vp9_v_predictor_8x8_c,
+                vp9_h_predictor_8x8_c, vp9_d45_predictor_8x8_c,
+                vp9_d135_predictor_8x8_c, vp9_d117_predictor_8x8_c,
+                vp9_d153_predictor_8x8_c, vp9_d207_predictor_8x8_c,
+                vp9_d63_predictor_8x8_c, vp9_tm_predictor_8x8_c)
+
+#if HAVE_SSE
+INTRA_PRED_TEST(SSE, TestIntraPred8, vp9_dc_predictor_8x8_sse,
+                vp9_dc_left_predictor_8x8_sse, vp9_dc_top_predictor_8x8_sse,
+                vp9_dc_128_predictor_8x8_sse, vp9_v_predictor_8x8_sse, NULL,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+#endif  // HAVE_SSE
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL, NULL, vp9_tm_predictor_8x8_sse2)
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL,
+                vp9_h_predictor_8x8_ssse3, vp9_d45_predictor_8x8_ssse3, NULL,
+                NULL, vp9_d153_predictor_8x8_ssse3,
+                vp9_d207_predictor_8x8_ssse3, vp9_d63_predictor_8x8_ssse3, NULL)
+#endif  // HAVE_SSSE3
+
+#if HAVE_DSPR2
+INTRA_PRED_TEST(DSPR2, TestIntraPred8, vp9_dc_predictor_8x8_dspr2, NULL, NULL,
+                NULL, NULL, vp9_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, vp9_tm_predictor_8x8_c)
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred8, vp9_dc_predictor_8x8_neon,
+                vp9_dc_left_predictor_8x8_neon, vp9_dc_top_predictor_8x8_neon,
+                vp9_dc_128_predictor_8x8_neon, vp9_v_predictor_8x8_neon,
+                vp9_h_predictor_8x8_neon, vp9_d45_predictor_8x8_neon, NULL,
+                NULL, NULL, NULL, NULL, vp9_tm_predictor_8x8_neon)
+
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred8, vp9_dc_predictor_8x8_msa,
+                vp9_dc_left_predictor_8x8_msa, vp9_dc_top_predictor_8x8_msa,
+                vp9_dc_128_predictor_8x8_msa, vp9_v_predictor_8x8_msa,
+                vp9_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vp9_tm_predictor_8x8_msa)
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 16x16
+
+INTRA_PRED_TEST(C, TestIntraPred16, vp9_dc_predictor_16x16_c,
+                vp9_dc_left_predictor_16x16_c, vp9_dc_top_predictor_16x16_c,
+                vp9_dc_128_predictor_16x16_c, vp9_v_predictor_16x16_c,
+                vp9_h_predictor_16x16_c, vp9_d45_predictor_16x16_c,
+                vp9_d135_predictor_16x16_c, vp9_d117_predictor_16x16_c,
+                vp9_d153_predictor_16x16_c, vp9_d207_predictor_16x16_c,
+                vp9_d63_predictor_16x16_c, vp9_tm_predictor_16x16_c)
+
+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2, TestIntraPred16, vp9_dc_predictor_16x16_sse2,
+                vp9_dc_left_predictor_16x16_sse2,
+                vp9_dc_top_predictor_16x16_sse2,
+                vp9_dc_128_predictor_16x16_sse2, vp9_v_predictor_16x16_sse2,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                vp9_tm_predictor_16x16_sse2)
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL,
+                vp9_h_predictor_16x16_ssse3, vp9_d45_predictor_16x16_ssse3,
+                NULL, NULL, vp9_d153_predictor_16x16_ssse3,
+                vp9_d207_predictor_16x16_ssse3, vp9_d63_predictor_16x16_ssse3,
+                NULL)
+#endif  // HAVE_SSSE3
+
+#if HAVE_DSPR2
+INTRA_PRED_TEST(DSPR2, TestIntraPred16, vp9_dc_predictor_16x16_dspr2, NULL,
+                NULL, NULL, NULL, vp9_h_predictor_16x16_dspr2, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL)
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred16, vp9_dc_predictor_16x16_neon,
+                vp9_dc_left_predictor_16x16_neon,
+                vp9_dc_top_predictor_16x16_neon,
+                vp9_dc_128_predictor_16x16_neon, vp9_v_predictor_16x16_neon,
+                vp9_h_predictor_16x16_neon, vp9_d45_predictor_16x16_neon, NULL,
+                NULL, NULL, NULL, NULL, vp9_tm_predictor_16x16_neon)
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred16, vp9_dc_predictor_16x16_msa,
+                vp9_dc_left_predictor_16x16_msa, vp9_dc_top_predictor_16x16_msa,
+                vp9_dc_128_predictor_16x16_msa, vp9_v_predictor_16x16_msa,
+                vp9_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vp9_tm_predictor_16x16_msa)
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 32x32
+
+INTRA_PRED_TEST(C, TestIntraPred32, vp9_dc_predictor_32x32_c,
+                vp9_dc_left_predictor_32x32_c, vp9_dc_top_predictor_32x32_c,
+                vp9_dc_128_predictor_32x32_c, vp9_v_predictor_32x32_c,
+                vp9_h_predictor_32x32_c, vp9_d45_predictor_32x32_c,
+                vp9_d135_predictor_32x32_c, vp9_d117_predictor_32x32_c,
+                vp9_d153_predictor_32x32_c, vp9_d207_predictor_32x32_c,
+                vp9_d63_predictor_32x32_c, vp9_tm_predictor_32x32_c)
+
+#if HAVE_SSE2
+#if ARCH_X86_64
+INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
+                vp9_dc_left_predictor_32x32_sse2,
+                vp9_dc_top_predictor_32x32_sse2,
+                vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                vp9_tm_predictor_32x32_sse2)
+#else
+INTRA_PRED_TEST(SSE2, TestIntraPred32, vp9_dc_predictor_32x32_sse2,
+                vp9_dc_left_predictor_32x32_sse2,
+                vp9_dc_top_predictor_32x32_sse2,
+                vp9_dc_128_predictor_32x32_sse2, vp9_v_predictor_32x32_sse2,
+                NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
+#endif  // ARCH_X86_64
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL,
+                vp9_h_predictor_32x32_ssse3, vp9_d45_predictor_32x32_ssse3,
+                NULL, NULL, vp9_d153_predictor_32x32_ssse3,
+                vp9_d207_predictor_32x32_ssse3, vp9_d63_predictor_32x32_ssse3,
+                NULL)
+#endif  // HAVE_SSSE3
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred32, vp9_dc_predictor_32x32_neon,
+                vp9_dc_left_predictor_32x32_neon,
+                vp9_dc_top_predictor_32x32_neon,
+                vp9_dc_128_predictor_32x32_neon, vp9_v_predictor_32x32_neon,
+                vp9_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
+                vp9_tm_predictor_32x32_neon)
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred32, vp9_dc_predictor_32x32_msa,
+                vp9_dc_left_predictor_32x32_msa, vp9_dc_top_predictor_32x32_msa,
+                vp9_dc_128_predictor_32x32_msa, vp9_v_predictor_32x32_msa,
+                vp9_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL,
+                NULL, vp9_tm_predictor_32x32_msa)
+#endif  // HAVE_MSA
+
+#include "test/test_libvpx.cc"
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -15,10 +15,12 @@
 extern "C" {
 #if CONFIG_VP8
 extern void vp8_rtcd();
-#endif
+#endif  // CONFIG_VP8
 #if CONFIG_VP9
 extern void vp9_rtcd();
-#endif
+#endif  // CONFIG_VP9
+extern void vpx_dsp_rtcd();
+extern void vpx_scale_rtcd();
 }
 #include "third_party/googletest/src/include/gtest/gtest.h"

@@ -36,21 +38,21 @@ int main(int argc, char **argv) {
 #if ARCH_X86 || ARCH_X86_64
  const int simd_caps = x86_simd_caps();
  if (!(simd_caps & HAS_MMX))
-    append_negative_gtest_filter(":MMX/*");
+    append_negative_gtest_filter(":MMX.*:MMX/*");
  if (!(simd_caps & HAS_SSE))
-    append_negative_gtest_filter(":SSE/*");
+    append_negative_gtest_filter(":SSE.*:SSE/*");
  if (!(simd_caps & HAS_SSE2))
-    append_negative_gtest_filter(":SSE2/*");
+    append_negative_gtest_filter(":SSE2.*:SSE2/*");
  if (!(simd_caps & HAS_SSE3))
-    append_negative_gtest_filter(":SSE3/*");
+    append_negative_gtest_filter(":SSE3.*:SSE3/*");
  if (!(simd_caps & HAS_SSSE3))
-    append_negative_gtest_filter(":SSSE3/*");
+    append_negative_gtest_filter(":SSSE3.*:SSSE3/*");
  if (!(simd_caps & HAS_SSE4_1))
-    append_negative_gtest_filter(":SSE4_1/*");
+    append_negative_gtest_filter(":SSE4_1.*:SSE4_1/*");
  if (!(simd_caps & HAS_AVX))
-    append_negative_gtest_filter(":AVX/*");
+    append_negative_gtest_filter(":AVX.*:AVX/*");
  if (!(simd_caps & HAS_AVX2))
-    append_negative_gtest_filter(":AVX2/*");
+    append_negative_gtest_filter(":AVX2.*:AVX2/*");
 #endif

 #if !CONFIG_SHARED
@@ -59,11 +61,13 @@ int main(int argc, char **argv) {

 #if CONFIG_VP8
  vp8_rtcd();
-#endif
+#endif  // CONFIG_VP8
 #if CONFIG_VP9
  vp9_rtcd();
-#endif
-#endif
+#endif  // CONFIG_VP9
+  vpx_dsp_rtcd();
+  vpx_scale_rtcd();
+#endif  // !CONFIG_SHARED

  return RUN_ALL_TESTS();
 }
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -12,6 +12,7 @@
 #include <cstdlib>
 #include <string>
 #include "third_party/googletest/src/include/gtest/gtest.h"
+#include "../tools_common.h"
 #include "./vpx_config.h"
 #include "test/codec_factory.h"
 #include "test/decode_test_driver.h"
@@ -26,10 +27,24 @@

 namespace {

+enum DecodeMode {
+  kSerialMode,
+  kFrameParallelMode
+};
+
+const int kDecodeMode = 0;
+const int kThreads = 1;
+const int kFileName = 2;
+
+typedef std::tr1::tuple<int, int, const char*> DecodeParam;
+
 class TestVectorTest : public ::libvpx_test::DecoderTest,
-    public ::libvpx_test::CodecTestWithParam<const char*> {
+    public ::libvpx_test::CodecTestWithParam<DecodeParam> {
 protected:
-  TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(NULL) {}
+  TestVectorTest()
+      : DecoderTest(GET_PARAM(0)),
+        md5_file_(NULL) {
+  }

  virtual ~TestVectorTest() {
    if (md5_file_)
@@ -71,8 +86,25 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
 // checksums match the correct md5 data, then the test is passed. Otherwise,
 // the test failed.
 TEST_P(TestVectorTest, MD5Match) {
-  const std::string filename = GET_PARAM(1);
+  const DecodeParam input = GET_PARAM(1);
+  const std::string filename = std::tr1::get<kFileName>(input);
+  const int threads = std::tr1::get<kThreads>(input);
+  const int mode = std::tr1::get<kDecodeMode>(input);
  libvpx_test::CompressedVideoSource *video = NULL;
+  vpx_codec_flags_t flags = 0;
+  vpx_codec_dec_cfg_t cfg = {0};
+  char str[256];
+
+  if (mode == kFrameParallelMode) {
+    flags |= VPX_CODEC_USE_FRAME_THREADING;
+  }
+
+  cfg.threads = threads;
+
+  snprintf(str, sizeof(str) / sizeof(str[0]) - 1,
+           "file: %s  mode: %s threads: %d",
+           filename.c_str(), mode == 0 ? "Serial" : "Parallel", threads);
+  SCOPED_TRACE(str);

  // Open compressed video file.
  if (filename.substr(filename.length() - 3, 3) == "ivf") {
@@ -92,18 +124,50 @@ TEST_P(TestVectorTest, MD5Match) {
  const std::string md5_filename = filename + ".md5";
  OpenMD5File(md5_filename);

+  // Set decode config and flags.
+  set_cfg(cfg);
+  set_flags(flags);
+
  // Decode frame, and check the md5 matching.
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video, cfg));
  delete video;
 }

-VP8_INSTANTIATE_TEST_CASE(TestVectorTest,
-                          ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
-                                              libvpx_test::kVP8TestVectors +
-                                              libvpx_test::kNumVP8TestVectors));
-VP9_INSTANTIATE_TEST_CASE(TestVectorTest,
-                          ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
-                                              libvpx_test::kVP9TestVectors +
-                                              libvpx_test::kNumVP9TestVectors));
+// Test VP8 decode in serial mode with single thread.
+// NOTE: VP8 only support serial mode.
+#if CONFIG_VP8_DECODER
+VP8_INSTANTIATE_TEST_CASE(
+    TestVectorTest,
+    ::testing::Combine(
+        ::testing::Values(0),  // Serial Mode.
+        ::testing::Values(1),  // Single thread.
+        ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
+                            libvpx_test::kVP8TestVectors +
+                                libvpx_test::kNumVP8TestVectors)));
+#endif

+// Test VP9 decode in serial mode with single thread.
+#if CONFIG_VP9_DECODER
+VP9_INSTANTIATE_TEST_CASE(
+    TestVectorTest,
+    ::testing::Combine(
+        ::testing::Values(0),  // Serial Mode.
+        ::testing::Values(1),  // Single thread.
+        ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
+                            libvpx_test::kVP9TestVectors +
+                                libvpx_test::kNumVP9TestVectors)));
+
+// Test VP9 decode in frame parallel mode with different number of threads.
+INSTANTIATE_TEST_CASE_P(
+    VP9MultiThreadedFrameParallel, TestVectorTest,
+    ::testing::Combine(
+        ::testing::Values(
+            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
+        ::testing::Combine(
+            ::testing::Values(1),        // Frame Parallel mode.
+            ::testing::Range(2, 9),      // With 2 ~ 8 threads.
+            ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
+                                libvpx_test::kVP9TestVectors +
+                                    libvpx_test::kNumVP9TestVectors))));
+#endif
 }  // namespace
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -165,7 +165,10 @@ const char *const kVP9TestVectors[] = {
  "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm",
  "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf",
  "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf",
+#if !CONFIG_SIZE_LIMIT || \
+    (DECODE_WIDTH_LIMIT >= 20400 && DECODE_HEIGHT_LIMIT >= 120)
  "vp90-2-13-largescaling.webm",
+#endif
  "vp90-2-14-resize-fp-tiles-1-16.webm",
  "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm",
  "vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm",
@@ -184,6 +187,14 @@ const char *const kVP9TestVectors[] = {
  "vp90-2-18-resize.ivf", "vp90-2-19-skip.webm",
  "vp90-2-19-skip-01.webm", "vp90-2-19-skip-02.webm",
  "vp91-2-04-yuv444.webm",
+  "vp91-2-04-yuv422.webm", "vp91-2-04-yuv440.webm",
+#if CONFIG_VP9_HIGHBITDEPTH
+  "vp92-2-20-10bit-yuv420.webm", "vp92-2-20-12bit-yuv420.webm",
+  "vp93-2-20-10bit-yuv422.webm", "vp93-2-20-12bit-yuv422.webm",
+  "vp93-2-20-10bit-yuv440.webm", "vp93-2-20-12bit-yuv440.webm",
+  "vp93-2-20-10bit-yuv444.webm", "vp93-2-20-12bit-yuv444.webm",
+#endif  // CONFIG_VP9_HIGHBITDEPTH`
+  "vp90-2-20-big_superframe-01.webm", "vp90-2-20-big_superframe-02.webm",
 };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
 #endif  // CONFIG_VP9_DECODER
--- a/test/tools_common.sh
+++ b/test/tools_common.sh
@@ -106,22 +106,24 @@ check_git_hashes() {
  fi
 }

+# $1 is the name of an environment variable containing a directory name to
+# test.
+test_env_var_dir() {
+  local dir=$(eval echo "\${$1}")
+  if [ ! -d "${dir}" ]; then
+    elog "'${dir}': No such directory"
+    elog "The $1 environment variable must be set to a valid directory."
+    return 1
+  fi
+}
+
 # This script requires that the LIBVPX_BIN_PATH, LIBVPX_CONFIG_PATH, and
 # LIBVPX_TEST_DATA_PATH variables are in the environment: Confirm that
 # the variables are set and that they all evaluate to directory paths.
 verify_vpx_test_environment() {
-  if [ ! -d "${LIBVPX_BIN_PATH}" ]; then
-    echo "The LIBVPX_BIN_PATH environment variable must be set."
-    return 1
-  fi
-  if [ ! -d "${LIBVPX_CONFIG_PATH}" ]; then
-    echo "The LIBVPX_CONFIG_PATH environment variable must be set."
-    return 1
-  fi
-  if [ ! -d "${LIBVPX_TEST_DATA_PATH}" ]; then
-    echo "The LIBVPX_TEST_DATA_PATH environment variable must be set."
-    return 1
-  fi
+  test_env_var_dir "LIBVPX_BIN_PATH" \
+    && test_env_var_dir "LIBVPX_CONFIG_PATH" \
+    && test_env_var_dir "LIBVPX_TEST_DATA_PATH"
 }

 # Greps vpx_config.h in LIBVPX_CONFIG_PATH for positional parameter one, which
@@ -261,6 +263,9 @@ run_tests() {
    return
  fi

+  # Don't bother with the environment tests if everything else was disabled.
+  [ -z "${tests_to_filter}" ] && return
+
  # Combine environment and actual tests.
  local tests_to_run="${env_tests} ${tests_to_filter}"

@@ -378,8 +383,7 @@ else
  VPX_TEST_TEMP_ROOT=/tmp
 fi

-VPX_TEST_RAND=$(awk 'BEGIN { srand(); printf "%d\n",(rand() * 32768)}')
-VPX_TEST_OUTPUT_DIR="${VPX_TEST_TEMP_ROOT}/vpx_test_${VPX_TEST_RAND}"
+VPX_TEST_OUTPUT_DIR="${VPX_TEST_TEMP_ROOT}/vpx_test_$$"

 if ! mkdir -p "${VPX_TEST_OUTPUT_DIR}" || \
   [ ! -d "${VPX_TEST_OUTPUT_DIR}" ]; then
@@ -397,11 +401,16 @@ VP8_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf"
 VP9_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-09-subpixel-00.ivf"

 VP9_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm"
+VP9_FPM_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-07-frame_parallel-1.webm"
+VP9_LT_50_FRAMES_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-02-size-32x08.webm"

 YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv"
 YUV_RAW_INPUT_WIDTH=352
 YUV_RAW_INPUT_HEIGHT=288

+Y4M_NOSQ_PAR_INPUT="${LIBVPX_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m"
+Y4M_720P_INPUT="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.y4m"
+
 # Setup a trap function to clean up after tests complete.
 trap cleanup EXIT

@@ -417,13 +426,13 @@ vlog "$(basename "${0%.*}") test configuration:
  VPX_TEST_LIST_TESTS=${VPX_TEST_LIST_TESTS}
  VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}
  VPX_TEST_PREFIX=${VPX_TEST_PREFIX}
-  VPX_TEST_RAND=${VPX_TEST_RAND}
  VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS}
  VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT}
  VPX_TEST_TEMP_ROOT=${VPX_TEST_TEMP_ROOT}
  VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT}
  YUV_RAW_INPUT=${YUV_RAW_INPUT}
  YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH}
-  YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}"
+  YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT}
+  Y4M_NOSQ_PAR_INPUT=${Y4M_NOSQ_PAR_INPUT}"

 fi  # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard.
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
--- a/test/video_source.h
+++ b/test/video_source.h
@@ -134,8 +134,13 @@ class VideoSource {

 class DummyVideoSource : public VideoSource {
 public:
-  DummyVideoSource() : img_(NULL), limit_(100), width_(0), height_(0) {
-    SetSize(80, 64);
+  DummyVideoSource()
+      : img_(NULL),
+        limit_(100),
+        width_(80),
+        height_(64),
+        format_(VPX_IMG_FMT_I420) {
+    ReallocImage();
  }

  virtual ~DummyVideoSource() { vpx_img_free(img_); }
@@ -174,23 +179,35 @@ class DummyVideoSource : public VideoSource {

  void SetSize(unsigned int width, unsigned int height) {
    if (width != width_ || height != height_) {
-      vpx_img_free(img_);
-      raw_sz_ = ((width + 31)&~31) * height * 3 / 2;
-      img_ = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, width, height, 32);
      width_ = width;
      height_ = height;
+      ReallocImage();
+    }
+  }
+
+  void SetImageFormat(vpx_img_fmt_t format) {
+    if (format_ != format) {
+      format_ = format;
+      ReallocImage();
    }
  }

 protected:
  virtual void FillFrame() { if (img_) memset(img_->img_data, 0, raw_sz_); }

+  void ReallocImage() {
+    vpx_img_free(img_);
+    img_ = vpx_img_alloc(NULL, format_, width_, height_, 32);
+    raw_sz_ = ((img_->w + 31) & ~31) * img_->h * img_->bps / 8;
+  }
+
  vpx_image_t *img_;
  size_t       raw_sz_;
  unsigned int limit_;
  unsigned int frame_;
  unsigned int width_;
  unsigned int height_;
+  vpx_img_fmt_t format_;
 };


--- a/test/vp8_denoiser_sse2_test.cc
+++ b/test/vp8_denoiser_sse2_test.cc
@@ -28,19 +28,18 @@ using libvpx_test::ACMRandom;
 namespace {

 const int kNumPixels = 16 * 16;
-class VP8DenoiserTest
-    : public ::testing::TestWithParam<int> {
+class VP8DenoiserTest : public ::testing::TestWithParam<int> {
 public:
  virtual ~VP8DenoiserTest() {}

  virtual void SetUp() {
-    increase_denoising = GetParam();
+    increase_denoising_ = GetParam();
  }

  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  int increase_denoising;
+  int increase_denoising_;
 };

 TEST_P(VP8DenoiserTest, BitexactCheck) {
@@ -53,18 +52,18 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
  // mc_avg_block is the denoised reference block,
  // avg_block_c is the denoised result from C code,
  // avg_block_sse2 is the denoised result from SSE2 code.
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block_c, kNumPixels);
+  DECLARE_ALIGNED(16, uint8_t, sig_block_c[kNumPixels]);
  // Since in VP8 denoiser, the source signal will be changed,
  // we need another copy of the source signal as the input of sse2 code.
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block_sse2, kNumPixels);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, mc_avg_block, kNumPixels);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_c, kNumPixels);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_sse2, kNumPixels);
+  DECLARE_ALIGNED(16, uint8_t, sig_block_sse2[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, mc_avg_block[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, avg_block_c[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, avg_block_sse2[kNumPixels]);

  for (int i = 0; i < count_test_block; ++i) {
    // Generate random motion magnitude, 20% of which exceed the threshold.
-    uint8_t motion_magnitude_random
-              = rnd.Rand8() % (uint8_t)(MOTION_MAGNITUDE_THRESHOLD * 1.2);
+    const int motion_magnitude_ran =
+        rnd.Rand8() % static_cast<int>(MOTION_MAGNITUDE_THRESHOLD * 1.2);

    // Initialize a test block with random number in range [0, 255].
    for (int j = 0; j < kNumPixels; ++j) {
@@ -72,20 +71,20 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
      sig_block_sse2[j] = sig_block_c[j] = rnd.Rand8();
      // The pixels in mc_avg_block are generated by adding a random
      // number in range [-19, 19] to corresponding pixels in sig_block.
-      temp = sig_block_c[j] + (rnd.Rand8() % 2 == 0? -1 : 1) *
-             (rnd.Rand8()%20);
+      temp = sig_block_c[j] + (rnd.Rand8() % 2 == 0 ? -1 : 1) *
+             (rnd.Rand8() % 20);
      // Clip.
-      mc_avg_block[j] = (temp < 0? 0 : (temp > 255? 255 : temp));
+      mc_avg_block[j] = (temp < 0) ? 0 : ((temp > 255) ? 255 : temp);
    }

    // Test denosiser on Y component.
-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_c(mc_avg_block, stride,
-                               avg_block_c, stride, sig_block_c, stride,
-                               motion_magnitude_random, increase_denoising));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_c(
+        mc_avg_block, stride, avg_block_c, stride, sig_block_c, stride,
+        motion_magnitude_ran, increase_denoising_));

-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_sse2(mc_avg_block, stride,
-                               avg_block_sse2, stride, sig_block_sse2, stride,
-                               motion_magnitude_random, increase_denoising));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_sse2(
+        mc_avg_block, stride, avg_block_sse2, stride, sig_block_sse2, stride,
+        motion_magnitude_ran, increase_denoising_));

    // Check bitexactness.
    for (int h = 0; h < 16; ++h) {
@@ -94,14 +93,14 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
      }
    }

-    // Test denosiser on UV component.
-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_c(mc_avg_block, stride,
-                               avg_block_c, stride, sig_block_c, stride,
-                               motion_magnitude_random, increase_denoising));
+    // Test denoiser on UV component.
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_c(
+        mc_avg_block, stride, avg_block_c, stride, sig_block_c, stride,
+        motion_magnitude_ran, increase_denoising_));

-    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_sse2(mc_avg_block, stride,
-                               avg_block_sse2, stride, sig_block_sse2, stride,
-                               motion_magnitude_random, increase_denoising));
+    ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_sse2(
+        mc_avg_block, stride, avg_block_sse2, stride, sig_block_sse2, stride,
+        motion_magnitude_ran, increase_denoising_));

    // Check bitexactness.
    for (int h = 0; h < 16; ++h) {
@@ -113,7 +112,5 @@ TEST_P(VP8DenoiserTest, BitexactCheck) {
 }

 // Test for all block size.
-INSTANTIATE_TEST_CASE_P(
-    SSE2, VP8DenoiserTest,
-    ::testing::Values(0, 1));
+INSTANTIATE_TEST_CASE_P(SSE2, VP8DenoiserTest, ::testing::Values(0, 1));
 }  // namespace
--- a/test/vp8_fragments_test.cc
+++ b/test/vp8_fragments_test.cc
@@ -0,0 +1,37 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/video_source.h"
+
+namespace {
+
+class VP8FramgmentsTest
+    : public ::libvpx_test::EncoderTest,
+      public ::testing::Test {
+ protected:
+  VP8FramgmentsTest() : EncoderTest(&::libvpx_test::kVP8) {}
+  virtual ~VP8FramgmentsTest() {}
+
+  virtual void SetUp() {
+    const unsigned long init_flags =  // NOLINT(runtime/int)
+        VPX_CODEC_USE_OUTPUT_PARTITION;
+    InitializeConfig();
+    SetMode(::libvpx_test::kRealTime);
+    set_init_flags(init_flags);
+  }
+};
+
+TEST_F(VP8FramgmentsTest, TestFragmentsEncodeDecode) {
+  ::libvpx_test::RandomVideoSource video;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+}  // namespace
--- a/test/vp9_arf_freq_test.cc
+++ b/test/vp9_arf_freq_test.cc
@@ -0,0 +1,230 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vp9/encoder/vp9_ratectrl.h"
+
+namespace {
+
+const unsigned int kFrames = 100;
+const int kBitrate = 500;
+
+#define ARF_NOT_SEEN   1000001
+#define ARF_SEEN_ONCE  1000000
+
+typedef struct {
+  const char *filename;
+  unsigned int width;
+  unsigned int height;
+  unsigned int framerate_num;
+  unsigned int framerate_den;
+  unsigned int input_bit_depth;
+  vpx_img_fmt fmt;
+  vpx_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+typedef struct {
+  libvpx_test::TestMode mode;
+  int cpu_used;
+} TestEncodeParam;
+
+const TestVideoParam kTestVectors[] = {
+  // artificially increase framerate to trigger default check
+  {"hantro_collage_w352h288.yuv", 352, 288, 5000, 1,
+    8, VPX_IMG_FMT_I420, VPX_BITS_8, 0},
+  {"hantro_collage_w352h288.yuv", 352, 288, 30, 1,
+    8, VPX_IMG_FMT_I420, VPX_BITS_8, 0},
+  {"rush_hour_444.y4m", 352, 288, 30, 1,
+    8, VPX_IMG_FMT_I444, VPX_BITS_8, 1},
+#if CONFIG_VP9_HIGHBITDEPTH
+  // Add list of profile 2/3 test videos here ...
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+};
+
+const TestEncodeParam kEncodeVectors[] = {
+  {::libvpx_test::kOnePassGood, 2},
+  {::libvpx_test::kOnePassGood, 5},
+  {::libvpx_test::kTwoPassGood, 1},
+  {::libvpx_test::kTwoPassGood, 2},
+  {::libvpx_test::kTwoPassGood, 5},
+  {::libvpx_test::kRealTime, 5},
+};
+
+const int kMinArfVectors[] = {
+  // NOTE: 0 refers to the default built-in logic in:
+  //       vp9_rc_get_default_min_gf_interval(...)
+  0, 4, 8, 12, 15
+};
+
+int is_extension_y4m(const char *filename) {
+  const char *dot = strrchr(filename, '.');
+  if (!dot || dot == filename)
+    return 0;
+  else
+    return !strcmp(dot, ".y4m");
+}
+
+class ArfFreqTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith3Params<TestVideoParam, \
+                                                 TestEncodeParam, int> {
+ protected:
+  ArfFreqTest()
+      : EncoderTest(GET_PARAM(0)),
+        test_video_param_(GET_PARAM(1)),
+        test_encode_param_(GET_PARAM(2)),
+        min_arf_requested_(GET_PARAM(3)) {
+  }
+
+  virtual ~ArfFreqTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(test_encode_param_.mode);
+    if (test_encode_param_.mode != ::libvpx_test::kRealTime) {
+      cfg_.g_lag_in_frames = 25;
+      cfg_.rc_end_usage = VPX_VBR;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = VPX_CBR;
+      cfg_.rc_buf_sz = 1000;
+      cfg_.rc_buf_initial_sz = 500;
+      cfg_.rc_buf_optimal_sz = 600;
+    }
+    dec_cfg_.threads = 4;
+  }
+
+  virtual void BeginPassHook(unsigned int) {
+    min_arf_ = ARF_NOT_SEEN;
+    run_of_visible_frames_ = 0;
+  }
+
+  int GetNumFramesInPkt(const vpx_codec_cx_pkt_t *pkt) {
+    const uint8_t *buffer = reinterpret_cast<uint8_t*>(pkt->data.frame.buf);
+    const uint8_t marker = buffer[pkt->data.frame.sz - 1];
+    const int mag = ((marker >> 3) & 3) + 1;
+    int frames = (marker & 0x7) + 1;
+    const unsigned int index_sz = 2 + mag  * frames;
+    // Check for superframe or not.
+    // Assume superframe has only one visible frame, the rest being
+    // invisible. If superframe index is not found, then there is only
+    // one frame.
+    if (!((marker & 0xe0) == 0xc0 &&
+          pkt->data.frame.sz >= index_sz &&
+          buffer[pkt->data.frame.sz - index_sz] == marker)) {
+      frames = 1;
+    }
+    return frames;
+  }
+
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
+      return;
+    const int frames = GetNumFramesInPkt(pkt);
+    if (frames == 1) {
+      run_of_visible_frames_++;
+    } else if (frames == 2) {
+      if (min_arf_ == ARF_NOT_SEEN) {
+        min_arf_ = ARF_SEEN_ONCE;
+      } else if (min_arf_ == ARF_SEEN_ONCE ||
+                 run_of_visible_frames_ < min_arf_) {
+        min_arf_ = run_of_visible_frames_;
+      }
+      run_of_visible_frames_ = 1;
+    } else {
+      min_arf_ = 0;
+      run_of_visible_frames_ = 1;
+    }
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 0) {
+      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, 4);
+      encoder->Control(VP8E_SET_CPUUSED, test_encode_param_.cpu_used);
+      encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_arf_requested_);
+      if (test_encode_param_.mode != ::libvpx_test::kRealTime) {
+        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+      }
+    }
+  }
+
+  int GetMinArfDistance() const {
+    return min_arf_;
+  }
+
+  int GetMinArfDistanceRequested() const {
+    if (min_arf_requested_)
+      return min_arf_requested_;
+    else
+      return vp9_rc_get_default_min_gf_interval(
+          test_video_param_.width, test_video_param_.height,
+          (double)test_video_param_.framerate_num /
+          test_video_param_.framerate_den);
+  }
+
+  TestVideoParam test_video_param_;
+  TestEncodeParam test_encode_param_;
+
+ private:
+  int min_arf_requested_;
+  int min_arf_;
+  int run_of_visible_frames_;
+};
+
+TEST_P(ArfFreqTest, MinArfFreqTest) {
+  cfg_.rc_target_bitrate = kBitrate;
+  cfg_.g_error_resilient = 0;
+  cfg_.g_profile = test_video_param_.profile;
+  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+  cfg_.g_bit_depth = test_video_param_.bit_depth;
+  init_flags_ = VPX_CODEC_USE_PSNR;
+  if (cfg_.g_bit_depth > 8)
+    init_flags_ |= VPX_CODEC_USE_HIGHBITDEPTH;
+
+  libvpx_test::VideoSource *video;
+  if (is_extension_y4m(test_video_param_.filename)) {
+    video = new libvpx_test::Y4mVideoSource(test_video_param_.filename,
+                                            0, kFrames);
+  } else {
+    video = new libvpx_test::YUVVideoSource(test_video_param_.filename,
+                                            test_video_param_.fmt,
+                                            test_video_param_.width,
+                                            test_video_param_.height,
+                                            test_video_param_.framerate_num,
+                                            test_video_param_.framerate_den,
+                                            0, kFrames);
+  }
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
+  const int min_arf_dist = GetMinArfDistance();
+  const int min_arf_dist_requested = GetMinArfDistanceRequested();
+  if (min_arf_dist != ARF_NOT_SEEN && min_arf_dist != ARF_SEEN_ONCE) {
+    EXPECT_GE(min_arf_dist, min_arf_dist_requested);
+  }
+  delete(video);
+}
+
+VP9_INSTANTIATE_TEST_CASE(
+    ArfFreqTest,
+    ::testing::ValuesIn(kTestVectors),
+    ::testing::ValuesIn(kEncodeVectors),
+    ::testing::ValuesIn(kMinArfVectors));
+}  // namespace
--- a/test/vp9_avg_test.cc
+++ b/test/vp9_avg_test.cc
@@ -57,7 +57,7 @@ class AverageTestBase : public ::testing::Test {
  }

  // Sum Pixels
-  unsigned int ReferenceAverage(const uint8_t* source, int pitch ) {
+  unsigned int ReferenceAverage8x8(const uint8_t* source, int pitch ) {
    unsigned int average = 0;
    for (int h = 0; h < 8; ++h)
      for (int w = 0; w < 8; ++w)
@@ -65,6 +65,14 @@ class AverageTestBase : public ::testing::Test {
    return ((average + 32) >> 6);
  }

+  unsigned int ReferenceAverage4x4(const uint8_t* source, int pitch ) {
+    unsigned int average = 0;
+    for (int h = 0; h < 4; ++h)
+      for (int w = 0; w < 4; ++w)
+        average += source[h * source_stride_ + w];
+    return ((average + 8) >> 4);
+  }
+
  void FillConstant(uint8_t fill_constant) {
    for (int i = 0; i < width_ * height_; ++i) {
        source_data_[i] = fill_constant;
@@ -85,7 +93,7 @@ class AverageTestBase : public ::testing::Test {
 };
 typedef unsigned int (*AverageFunction)(const uint8_t* s, int pitch);

-typedef std::tr1::tuple<int, int, int, AverageFunction> AvgFunc;
+typedef std::tr1::tuple<int, int, int, int, AverageFunction> AvgFunc;

 class AverageTest
    : public AverageTestBase,
@@ -95,18 +103,97 @@ class AverageTest

 protected:
  void CheckAverages() {
-    unsigned int expected = ReferenceAverage(source_data_+ GET_PARAM(2),
-                                             source_stride_);
+    unsigned int expected = 0;
+    if (GET_PARAM(3) == 8) {
+      expected = ReferenceAverage8x8(source_data_+ GET_PARAM(2),
+                                     source_stride_);
+    } else  if (GET_PARAM(3) == 4) {
+      expected = ReferenceAverage4x4(source_data_+ GET_PARAM(2),
+                                     source_stride_);
+    }

-    ASM_REGISTER_STATE_CHECK(GET_PARAM(3)(source_data_+ GET_PARAM(2),
+    ASM_REGISTER_STATE_CHECK(GET_PARAM(4)(source_data_+ GET_PARAM(2),
                                          source_stride_));
-    unsigned int actual = GET_PARAM(3)(source_data_+ GET_PARAM(2),
+    unsigned int actual = GET_PARAM(4)(source_data_+ GET_PARAM(2),
                                       source_stride_);

    EXPECT_EQ(expected, actual);
  }
 };

+typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref,
+                              const int ref_stride, const int height);
+
+typedef std::tr1::tuple<int, IntProRowFunc, IntProRowFunc> IntProRowParam;
+
+class IntProRowTest
+    : public AverageTestBase,
+      public ::testing::WithParamInterface<IntProRowParam> {
+ public:
+  IntProRowTest()
+    : AverageTestBase(16, GET_PARAM(0)),
+      hbuf_asm_(NULL),
+      hbuf_c_(NULL) {
+    asm_func_ = GET_PARAM(1);
+    c_func_ = GET_PARAM(2);
+  }
+
+ protected:
+  virtual void SetUp() {
+    hbuf_asm_ = reinterpret_cast<int16_t*>(
+        vpx_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16));
+    hbuf_c_ = reinterpret_cast<int16_t*>(
+        vpx_memalign(kDataAlignment, sizeof(*hbuf_c_) * 16));
+  }
+
+  virtual void TearDown() {
+    vpx_free(hbuf_c_);
+    hbuf_c_ = NULL;
+    vpx_free(hbuf_asm_);
+    hbuf_asm_ = NULL;
+  }
+
+  void RunComparison() {
+    ASM_REGISTER_STATE_CHECK(c_func_(hbuf_c_, source_data_, 0, height_));
+    ASM_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, 0, height_));
+    EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * 16))
+        << "Output mismatch";
+  }
+
+ private:
+  IntProRowFunc asm_func_;
+  IntProRowFunc c_func_;
+  int16_t *hbuf_asm_;
+  int16_t *hbuf_c_;
+};
+
+typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width);
+
+typedef std::tr1::tuple<int, IntProColFunc, IntProColFunc> IntProColParam;
+
+class IntProColTest
+    : public AverageTestBase,
+      public ::testing::WithParamInterface<IntProColParam> {
+ public:
+  IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) {
+    asm_func_ = GET_PARAM(1);
+    c_func_ = GET_PARAM(2);
+  }
+
+ protected:
+  void RunComparison() {
+    ASM_REGISTER_STATE_CHECK(sum_c_ = c_func_(source_data_, width_));
+    ASM_REGISTER_STATE_CHECK(sum_asm_ = asm_func_(source_data_, width_));
+    EXPECT_EQ(sum_c_, sum_asm_) << "Output mismatch";
+  }
+
+ private:
+  IntProColFunc asm_func_;
+  IntProColFunc c_func_;
+  int16_t sum_asm_;
+  int16_t sum_c_;
+};
+

 uint8_t* AverageTestBase::source_data_ = NULL;

@@ -129,22 +216,88 @@ TEST_P(AverageTest, Random) {
  }
 }

+TEST_P(IntProRowTest, MinValue) {
+  FillConstant(0);
+  RunComparison();
+}
+
+TEST_P(IntProRowTest, MaxValue) {
+  FillConstant(255);
+  RunComparison();
+}
+
+TEST_P(IntProRowTest, Random) {
+  FillRandom();
+  RunComparison();
+}
+
+TEST_P(IntProColTest, MinValue) {
+  FillConstant(0);
+  RunComparison();
+}
+
+TEST_P(IntProColTest, MaxValue) {
+  FillConstant(255);
+  RunComparison();
+}
+
+TEST_P(IntProColTest, Random) {
+  FillRandom();
+  RunComparison();
+}
+
 using std::tr1::make_tuple;

 INSTANTIATE_TEST_CASE_P(
    C, AverageTest,
    ::testing::Values(
-        make_tuple(16, 16, 1, &vp9_avg_8x8_c)));
-
+        make_tuple(16, 16, 1, 8, &vp9_avg_8x8_c),
+        make_tuple(16, 16, 1, 4, &vp9_avg_4x4_c)));

 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
    SSE2, AverageTest,
    ::testing::Values(
-        make_tuple(16, 16, 0, &vp9_avg_8x8_sse2),
-        make_tuple(16, 16, 5, &vp9_avg_8x8_sse2),
-        make_tuple(32, 32, 15, &vp9_avg_8x8_sse2)));
+        make_tuple(16, 16, 0, 8, &vp9_avg_8x8_sse2),
+        make_tuple(16, 16, 5, 8, &vp9_avg_8x8_sse2),
+        make_tuple(32, 32, 15, 8, &vp9_avg_8x8_sse2),
+        make_tuple(16, 16, 0, 4, &vp9_avg_4x4_sse2),
+        make_tuple(16, 16, 5, 4, &vp9_avg_4x4_sse2),
+        make_tuple(32, 32, 15, 4, &vp9_avg_4x4_sse2)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, IntProRowTest, ::testing::Values(
+        make_tuple(16, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c),
+        make_tuple(32, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c),
+        make_tuple(64, &vp9_int_pro_row_sse2, &vp9_int_pro_row_c)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, IntProColTest, ::testing::Values(
+        make_tuple(16, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
+        make_tuple(32, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c),
+        make_tuple(64, &vp9_int_pro_col_sse2, &vp9_int_pro_col_c)));
+#endif
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, AverageTest,
+    ::testing::Values(
+        make_tuple(16, 16, 0, 8, &vp9_avg_8x8_neon),
+        make_tuple(16, 16, 5, 8, &vp9_avg_8x8_neon),
+        make_tuple(32, 32, 15, 8, &vp9_avg_8x8_neon)));

 #endif

+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, AverageTest,
+    ::testing::Values(
+        make_tuple(16, 16, 0, 8, &vp9_avg_8x8_msa),
+        make_tuple(16, 16, 5, 8, &vp9_avg_8x8_msa),
+        make_tuple(32, 32, 15, 8, &vp9_avg_8x8_msa),
+        make_tuple(16, 16, 0, 4, &vp9_avg_4x4_msa),
+        make_tuple(16, 16, 5, 4, &vp9_avg_4x4_msa),
+        make_tuple(32, 32, 15, 4, &vp9_avg_4x4_msa)));
+#endif
+
 }  // namespace
--- a/test/vp9_denoiser_sse2_test.cc
+++ b/test/vp9_denoiser_sse2_test.cc
@@ -29,19 +29,18 @@ using libvpx_test::ACMRandom;
 namespace {

 const int kNumPixels = 64 * 64;
-class VP9DenoiserTest
-    : public ::testing::TestWithParam<int> {
+class VP9DenoiserTest : public ::testing::TestWithParam<BLOCK_SIZE> {
 public:
  virtual ~VP9DenoiserTest() {}

  virtual void SetUp() {
-    bs = (BLOCK_SIZE)GetParam();
+    bs_ = GetParam();
  }

  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  BLOCK_SIZE bs;
+  BLOCK_SIZE bs_;
 };

 TEST_P(VP9DenoiserTest, BitexactCheck) {
@@ -53,15 +52,15 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
  // mc_avg_block is the denoised reference block,
  // avg_block_c is the denoised result from C code,
  // avg_block_sse2 is the denoised result from SSE2 code.
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, sig_block, kNumPixels);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, mc_avg_block, kNumPixels);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_c, kNumPixels);
-  DECLARE_ALIGNED_ARRAY(16, uint8_t, avg_block_sse2, kNumPixels);
+  DECLARE_ALIGNED(16, uint8_t, sig_block[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, mc_avg_block[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, avg_block_c[kNumPixels]);
+  DECLARE_ALIGNED(16, uint8_t, avg_block_sse2[kNumPixels]);

  for (int i = 0; i < count_test_block; ++i) {
    // Generate random motion magnitude, 20% of which exceed the threshold.
-    uint8_t motion_magnitude_random
-              = rnd.Rand8() % (uint8_t)(MOTION_MAGNITUDE_THRESHOLD * 1.2);
+    const int motion_magnitude_random =
+        rnd.Rand8() % static_cast<int>(MOTION_MAGNITUDE_THRESHOLD * 1.2);

    // Initialize a test block with random number in range [0, 255].
    for (int j = 0; j < kNumPixels; ++j) {
@@ -69,23 +68,23 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
      sig_block[j] = rnd.Rand8();
      // The pixels in mc_avg_block are generated by adding a random
      // number in range [-19, 19] to corresponding pixels in sig_block.
-      temp = sig_block[j] + (rnd.Rand8() % 2 == 0? -1 : 1) *
-             (rnd.Rand8()%20);
+      temp = sig_block[j] + ((rnd.Rand8() % 2 == 0) ? -1 : 1) *
+             (rnd.Rand8() % 20);
      // Clip.
-      mc_avg_block[j] = (temp < 0? 0 : (temp > 255? 255 : temp));
+      mc_avg_block[j] = (temp < 0) ? 0 : ((temp > 255) ? 255 : temp);
    }

-    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_c(sig_block, 64,
-                             mc_avg_block, 64, avg_block_c, 64,
-                             0, bs, motion_magnitude_random));
+    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_c(
+        sig_block, 64, mc_avg_block, 64, avg_block_c,
+        64, 0, bs_, motion_magnitude_random));

-    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_sse2(sig_block, 64,
-                             mc_avg_block, 64, avg_block_sse2, 64,
-                             0, bs, motion_magnitude_random));
+    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_sse2(
+        sig_block, 64, mc_avg_block, 64, avg_block_sse2,
+        64, 0, bs_, motion_magnitude_random));

    // Test bitexactness.
-    for (int h = 0; h < (4 << b_height_log2_lookup[bs]); ++h) {
-      for (int w = 0; w < (4 << b_width_log2_lookup[bs]); ++w) {
+    for (int h = 0; h < (4 << b_height_log2_lookup[bs_]); ++h) {
+      for (int w = 0; w < (4 << b_width_log2_lookup[bs_]); ++w) {
        EXPECT_EQ(avg_block_c[h * 64 + w], avg_block_sse2[h * 64 + w]);
      }
    }
--- a/test/vp9_encoder_parms_get_to_decoder.cc
+++ b/test/vp9_encoder_parms_get_to_decoder.cc
@@ -0,0 +1,193 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "vp9/decoder/vp9_decoder.h"
+
+typedef vpx_codec_stream_info_t vp9_stream_info_t;
+struct vpx_codec_alg_priv {
+  vpx_codec_priv_t        base;
+  vpx_codec_dec_cfg_t     cfg;
+  vp9_stream_info_t       si;
+  struct VP9Decoder      *pbi;
+  int                     postproc_cfg_set;
+  vp8_postproc_cfg_t      postproc_cfg;
+  vpx_decrypt_cb          decrypt_cb;
+  void                   *decrypt_state;
+  vpx_image_t             img;
+  int                     img_avail;
+  int                     flushed;
+  int                     invert_tile_order;
+  int                     frame_parallel_decode;
+
+  // External frame buffer info to save for VP9 common.
+  void *ext_priv;  // Private data associated with the external frame buffers.
+  vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb;
+  vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb;
+};
+
+static vpx_codec_alg_priv_t *get_alg_priv(vpx_codec_ctx_t *ctx) {
+  return (vpx_codec_alg_priv_t *)ctx->priv;
+}
+
+namespace {
+
+const unsigned int kFramerate = 50;
+const int kCpuUsed = 2;
+
+struct EncodePerfTestVideo {
+  const char *name;
+  uint32_t width;
+  uint32_t height;
+  uint32_t bitrate;
+  int frames;
+};
+
+const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = {
+  {"niklas_1280_720_30.y4m", 1280, 720, 600, 10},
+};
+
+struct EncodeParameters {
+  int32_t tile_rows;
+  int32_t tile_cols;
+  int32_t lossless;
+  int32_t error_resilient;
+  int32_t frame_parallel;
+  vpx_color_space_t cs;
+  // TODO(JBB): quantizers / bitrate
+};
+
+const EncodeParameters kVP9EncodeParameterSet[] = {
+    {0, 0, 0, 1, 0, VPX_CS_BT_601},
+    {0, 0, 0, 0, 0, VPX_CS_BT_709},
+    {0, 0, 1, 0, 0, VPX_CS_BT_2020},
+    {0, 2, 0, 0, 1, VPX_CS_UNKNOWN},
+    // TODO(JBB): Test profiles (requires more work).
+};
+
+int is_extension_y4m(const char *filename) {
+  const char *dot = strrchr(filename, '.');
+  if (!dot || dot == filename)
+    return 0;
+  else
+    return !strcmp(dot, ".y4m");
+}
+
+class Vp9EncoderParmsGetToDecoder
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<EncodeParameters, \
+                                                 EncodePerfTestVideo> {
+ protected:
+  Vp9EncoderParmsGetToDecoder()
+      : EncoderTest(GET_PARAM(0)),
+        encode_parms(GET_PARAM(1)) {
+  }
+
+  virtual ~Vp9EncoderParmsGetToDecoder() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(::libvpx_test::kTwoPassGood);
+    cfg_.g_lag_in_frames = 25;
+    cfg_.g_error_resilient = encode_parms.error_resilient;
+    dec_cfg_.threads = 4;
+    test_video_ = GET_PARAM(2);
+    cfg_.rc_target_bitrate = test_video_.bitrate;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(VP9E_SET_COLOR_SPACE, encode_parms.cs);
+      encoder->Control(VP9E_SET_LOSSLESS, encode_parms.lossless);
+      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING,
+                       encode_parms.frame_parallel);
+      encoder->Control(VP9E_SET_TILE_ROWS, encode_parms.tile_rows);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, encode_parms.tile_cols);
+      encoder->Control(VP8E_SET_CPUUSED, kCpuUsed);
+      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+      encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
+      encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+      encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+    }
+  }
+
+  virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec,
+                                  const libvpx_test::VideoSource& video,
+                                  libvpx_test::Decoder *decoder) {
+    vpx_codec_ctx_t* vp9_decoder = decoder->GetDecoder();
+    vpx_codec_alg_priv_t* priv =
+        (vpx_codec_alg_priv_t*) get_alg_priv(vp9_decoder);
+
+    VP9Decoder* pbi = priv->pbi;
+    VP9_COMMON* common = &pbi->common;
+
+    if (encode_parms.lossless) {
+      EXPECT_EQ(common->base_qindex, 0);
+      EXPECT_EQ(common->y_dc_delta_q, 0);
+      EXPECT_EQ(common->uv_dc_delta_q, 0);
+      EXPECT_EQ(common->uv_ac_delta_q, 0);
+      EXPECT_EQ(common->tx_mode, ONLY_4X4);
+    }
+    EXPECT_EQ(common->error_resilient_mode, encode_parms.error_resilient);
+    if (encode_parms.error_resilient) {
+      EXPECT_EQ(common->frame_parallel_decoding_mode, 1);
+      EXPECT_EQ(common->use_prev_frame_mvs, 0);
+    } else {
+      EXPECT_EQ(common->frame_parallel_decoding_mode,
+                encode_parms.frame_parallel);
+    }
+    EXPECT_EQ(common->color_space, encode_parms.cs);
+    EXPECT_EQ(common->log2_tile_cols, encode_parms.tile_cols);
+    EXPECT_EQ(common->log2_tile_rows, encode_parms.tile_rows);
+
+    EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError();
+    return VPX_CODEC_OK == res_dec;
+  }
+
+  EncodePerfTestVideo test_video_;
+
+ private:
+  EncodeParameters encode_parms;
+};
+
+// TODO(hkuang): This test conflicts with frame parallel decode. So disable it
+// for now until fix.
+TEST_P(Vp9EncoderParmsGetToDecoder, DISABLED_BitstreamParms) {
+  init_flags_ = VPX_CODEC_USE_PSNR;
+
+  libvpx_test::VideoSource *video;
+  if (is_extension_y4m(test_video_.name)) {
+    video = new libvpx_test::Y4mVideoSource(test_video_.name,
+                                            0, test_video_.frames);
+  } else {
+    video = new libvpx_test::YUVVideoSource(test_video_.name,
+                                            VPX_IMG_FMT_I420,
+                                            test_video_.width,
+                                            test_video_.height,
+                                            kFramerate, 1, 0,
+                                            test_video_.frames);
+  }
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
+  delete(video);
+}
+
+VP9_INSTANTIATE_TEST_CASE(
+    Vp9EncoderParmsGetToDecoder,
+    ::testing::ValuesIn(kVP9EncodeParameterSet),
+    ::testing::ValuesIn(kVP9EncodePerfTestVectors));
+
+}  // namespace
--- a/test/vp9_end_to_end_test.cc
+++ b/test/vp9_end_to_end_test.cc
@@ -0,0 +1,189 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/y4m_video_source.h"
+#include "test/yuv_video_source.h"
+#include "test/util.h"
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+namespace {
+
+const unsigned int kWidth  = 160;
+const unsigned int kHeight = 90;
+const unsigned int kFramerate = 50;
+const unsigned int kFrames = 10;
+const int kBitrate = 500;
+// List of psnr thresholds for speed settings 0-7 and 5 encoding modes
+const double kPsnrThreshold[][5] = {
+  { 36.0, 37.0, 37.0, 37.0, 37.0 },
+  { 35.0, 36.0, 36.0, 36.0, 36.0 },
+  { 34.0, 35.0, 35.0, 35.0, 35.0 },
+  { 33.0, 34.0, 34.0, 34.0, 34.0 },
+  { 32.0, 33.0, 33.0, 33.0, 33.0 },
+  { 31.0, 32.0, 32.0, 32.0, 32.0 },
+  { 30.0, 31.0, 31.0, 31.0, 31.0 },
+  { 29.0, 30.0, 30.0, 30.0, 30.0 },
+};
+
+typedef struct {
+  const char *filename;
+  unsigned int input_bit_depth;
+  vpx_img_fmt fmt;
+  vpx_bit_depth_t bit_depth;
+  unsigned int profile;
+} TestVideoParam;
+
+const TestVideoParam kTestVectors[] = {
+  {"park_joy_90p_8_420.y4m", 8, VPX_IMG_FMT_I420, VPX_BITS_8, 0},
+  {"park_joy_90p_8_422.y4m", 8, VPX_IMG_FMT_I422, VPX_BITS_8, 1},
+  {"park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444, VPX_BITS_8, 1},
+  {"park_joy_90p_8_440.yuv", 8, VPX_IMG_FMT_I440, VPX_BITS_8, 1},
+#if CONFIG_VP9_HIGHBITDEPTH
+  {"park_joy_90p_10_420.y4m", 10, VPX_IMG_FMT_I42016, VPX_BITS_10, 2},
+  {"park_joy_90p_10_422.y4m", 10, VPX_IMG_FMT_I42216, VPX_BITS_10, 3},
+  {"park_joy_90p_10_444.y4m", 10, VPX_IMG_FMT_I44416, VPX_BITS_10, 3},
+  {"park_joy_90p_10_440.yuv", 10, VPX_IMG_FMT_I44016, VPX_BITS_10, 3},
+  {"park_joy_90p_12_420.y4m", 12, VPX_IMG_FMT_I42016, VPX_BITS_12, 2},
+  {"park_joy_90p_12_422.y4m", 12, VPX_IMG_FMT_I42216, VPX_BITS_12, 3},
+  {"park_joy_90p_12_444.y4m", 12, VPX_IMG_FMT_I44416, VPX_BITS_12, 3},
+  {"park_joy_90p_12_440.yuv", 12, VPX_IMG_FMT_I44016, VPX_BITS_12, 3},
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+};
+
+// Encoding modes tested
+const libvpx_test::TestMode kEncodingModeVectors[] = {
+  ::libvpx_test::kTwoPassGood,
+  ::libvpx_test::kOnePassGood,
+  ::libvpx_test::kRealTime,
+};
+
+// Speed settings tested
+const int kCpuUsedVectors[] = {1, 2, 3, 5, 6};
+
+int is_extension_y4m(const char *filename) {
+  const char *dot = strrchr(filename, '.');
+  if (!dot || dot == filename)
+    return 0;
+  else
+    return !strcmp(dot, ".y4m");
+}
+
+class EndToEndTestLarge
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith3Params<libvpx_test::TestMode, \
+                                                 TestVideoParam, int> {
+ protected:
+  EndToEndTestLarge()
+      : EncoderTest(GET_PARAM(0)),
+        test_video_param_(GET_PARAM(2)),
+        cpu_used_(GET_PARAM(3)),
+        psnr_(0.0),
+        nframes_(0),
+        encoding_mode_(GET_PARAM(1)) {
+  }
+
+  virtual ~EndToEndTestLarge() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+    if (encoding_mode_ != ::libvpx_test::kRealTime) {
+      cfg_.g_lag_in_frames = 5;
+      cfg_.rc_end_usage = VPX_VBR;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = VPX_CBR;
+      cfg_.rc_buf_sz = 1000;
+      cfg_.rc_buf_initial_sz = 500;
+      cfg_.rc_buf_optimal_sz = 600;
+    }
+    dec_cfg_.threads = 4;
+  }
+
+  virtual void BeginPassHook(unsigned int) {
+    psnr_ = 0.0;
+    nframes_ = 0;
+  }
+
+  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
+    psnr_ += pkt->data.psnr.psnr[0];
+    nframes_++;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1);
+      encoder->Control(VP9E_SET_TILE_COLUMNS, 4);
+      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
+      if (encoding_mode_ != ::libvpx_test::kRealTime) {
+        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+      }
+    }
+  }
+
+  double GetAveragePsnr() const {
+    if (nframes_)
+      return psnr_ / nframes_;
+    return 0.0;
+  }
+
+  double GetPsnrThreshold() {
+    return kPsnrThreshold[cpu_used_][encoding_mode_];
+  }
+
+  TestVideoParam test_video_param_;
+  int cpu_used_;
+
+ private:
+  double psnr_;
+  unsigned int nframes_;
+  libvpx_test::TestMode encoding_mode_;
+};
+
+TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) {
+  cfg_.rc_target_bitrate = kBitrate;
+  cfg_.g_error_resilient = 0;
+  cfg_.g_profile = test_video_param_.profile;
+  cfg_.g_input_bit_depth = test_video_param_.input_bit_depth;
+  cfg_.g_bit_depth = test_video_param_.bit_depth;
+  init_flags_ = VPX_CODEC_USE_PSNR;
+  if (cfg_.g_bit_depth > 8)
+    init_flags_ |= VPX_CODEC_USE_HIGHBITDEPTH;
+
+  libvpx_test::VideoSource *video;
+  if (is_extension_y4m(test_video_param_.filename)) {
+    video = new libvpx_test::Y4mVideoSource(test_video_param_.filename,
+                                            0, kFrames);
+  } else {
+    video = new libvpx_test::YUVVideoSource(test_video_param_.filename,
+                                            test_video_param_.fmt,
+                                            kWidth, kHeight,
+                                            kFramerate, 1, 0, kFrames);
+  }
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video));
+  const double psnr = GetAveragePsnr();
+  EXPECT_GT(psnr, GetPsnrThreshold());
+  delete(video);
+}
+
+VP9_INSTANTIATE_TEST_CASE(
+    EndToEndTestLarge,
+    ::testing::ValuesIn(kEncodingModeVectors),
+    ::testing::ValuesIn(kTestVectors),
+    ::testing::ValuesIn(kCpuUsedVectors));
+
+}  // namespace
--- a/test/vp9_error_block_test.cc
+++ b/test/vp9_error_block_test.cc
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cmath>
+#include <cstdlib>
+#include <string>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vpx/vpx_codec.h"
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+#if CONFIG_VP9_HIGHBITDEPTH
+const int kNumIterations = 1000;
+
+typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
+                                  const tran_low_t *dqcoeff,
+                                  intptr_t block_size,
+                                  int64_t *ssz, int bps);
+
+typedef std::tr1::tuple<ErrorBlockFunc, ErrorBlockFunc, vpx_bit_depth_t>
+                        ErrorBlockParam;
+
+class ErrorBlockTest
+  : public ::testing::TestWithParam<ErrorBlockParam> {
+ public:
+  virtual ~ErrorBlockTest() {}
+  virtual void SetUp() {
+    error_block_op_     = GET_PARAM(0);
+    ref_error_block_op_ = GET_PARAM(1);
+    bit_depth_  = GET_PARAM(2);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  vpx_bit_depth_t bit_depth_;
+  ErrorBlockFunc error_block_op_;
+  ErrorBlockFunc ref_error_block_op_;
+};
+
+TEST_P(ErrorBlockTest, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  intptr_t block_size;
+  int64_t ssz;
+  int64_t ret;
+  int64_t ref_ssz;
+  int64_t ref_ret;
+  for (int i = 0; i < kNumIterations; ++i) {
+    int err_count = 0;
+    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
+    for (int j = 0; j < block_size; j++) {
+      coeff[j]   = rnd(2 << 20) - (1 << 20);
+      dqcoeff[j] = rnd(2 << 20) - (1 << 20);
+    }
+    ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
+                                  bit_depth_);
+    ASM_REGISTER_STATE_CHECK(ret = error_block_op_(coeff, dqcoeff, block_size,
+                                                   &ssz, bit_depth_));
+    err_count += (ref_ret != ret) | (ref_ssz != ssz);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Error Block Test, C output doesn't match SSE2 output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(ErrorBlockTest, ExtremeValues) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  intptr_t block_size;
+  int64_t ssz;
+  int64_t ret;
+  int64_t ref_ssz;
+  int64_t ref_ret;
+  int max_val = ((1 << 20) - 1);
+  for (int i = 0; i < kNumIterations; ++i) {
+    int err_count = 0;
+    int k = (i / 9) % 5;
+
+    // Change the maximum coeff value, to test different bit boundaries
+    if ( k == 4 && (i % 9) == 0 ) {
+      max_val >>= 1;
+    }
+    block_size = 16 << (i % 9);  // All block sizes from 4x4, 8x4 ..64x64
+    for (int j = 0; j < block_size; j++) {
+      if (k < 4) {  // Test at maximum values
+        coeff[j]   = k % 2 ? max_val : -max_val;
+        dqcoeff[j] = (k >> 1) % 2 ? max_val : -max_val;
+      } else {
+        coeff[j]   = rnd(2 << 14) - (1 << 14);
+        dqcoeff[j] = rnd(2 << 14) - (1 << 14);
+      }
+    }
+    ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz,
+                                  bit_depth_);
+    ASM_REGISTER_STATE_CHECK(ret = error_block_op_(coeff, dqcoeff, block_size,
+                                                   &ssz, bit_depth_));
+    err_count += (ref_ret != ret) | (ref_ssz != ssz);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Error Block Test, C output doesn't match SSE2 output. "
+      << "First failed at test case " << first_failure;
+}
+
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, ErrorBlockTest,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_block_error_sse2,
+                   &vp9_highbd_block_error_c, VPX_BITS_10),
+        make_tuple(&vp9_highbd_block_error_sse2,
+                   &vp9_highbd_block_error_c, VPX_BITS_12),
+        make_tuple(&vp9_highbd_block_error_sse2,
+                   &vp9_highbd_block_error_c, VPX_BITS_8)));
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+}  // namespace
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -0,0 +1,137 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+#include <vector>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/y4m_video_source.h"
+
+namespace {
+class VP9EncoderThreadTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+  VP9EncoderThreadTest()
+      : EncoderTest(GET_PARAM(0)),
+        encoder_initialized_(false),
+        tiles_(2),
+        encoding_mode_(GET_PARAM(1)),
+        set_cpu_used_(GET_PARAM(2)) {
+    init_flags_ = VPX_CODEC_USE_PSNR;
+    vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
+    cfg.w = 1280;
+    cfg.h = 720;
+    decoder_ = codec_->CreateDecoder(cfg, 0);
+
+    md5_.clear();
+  }
+  virtual ~VP9EncoderThreadTest() {
+    delete decoder_;
+  }
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(encoding_mode_);
+
+    if (encoding_mode_ != ::libvpx_test::kRealTime) {
+      cfg_.g_lag_in_frames = 3;
+      cfg_.rc_end_usage = VPX_VBR;
+      cfg_.rc_2pass_vbr_minsection_pct = 5;
+      cfg_.rc_2pass_vbr_minsection_pct = 2000;
+    } else {
+      cfg_.g_lag_in_frames = 0;
+      cfg_.rc_end_usage = VPX_CBR;
+      cfg_.g_error_resilient = 1;
+    }
+    cfg_.rc_max_quantizer = 56;
+    cfg_.rc_min_quantizer = 0;
+  }
+
+  virtual void BeginPassHook(unsigned int /*pass*/) {
+    encoder_initialized_ = false;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (!encoder_initialized_) {
+      // Encode 4 column tiles.
+      encoder->Control(VP9E_SET_TILE_COLUMNS, tiles_);
+      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+      if (encoding_mode_ != ::libvpx_test::kRealTime) {
+        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
+        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
+        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
+        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+      } else {
+        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
+        encoder->Control(VP9E_SET_AQ_MODE, 3);
+      }
+      encoder_initialized_ = true;
+    }
+  }
+
+  virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) {
+    const vpx_codec_err_t res = decoder_->DecodeFrame(
+        reinterpret_cast<uint8_t*>(pkt->data.frame.buf), pkt->data.frame.sz);
+    if (res != VPX_CODEC_OK) {
+      abort_ = true;
+      ASSERT_EQ(VPX_CODEC_OK, res);
+    }
+    const vpx_image_t *img = decoder_->GetDxData().Next();
+
+    if (img) {
+      ::libvpx_test::MD5 md5_res;
+      md5_res.Add(img);
+      md5_.push_back(md5_res.Get());
+    }
+  }
+
+  bool encoder_initialized_;
+  int tiles_;
+  ::libvpx_test::TestMode encoding_mode_;
+  int set_cpu_used_;
+  ::libvpx_test::Decoder *decoder_;
+  std::vector<std::string> md5_;
+};
+
+TEST_P(VP9EncoderThreadTest, EncoderResultTest) {
+  std::vector<std::string> single_thr_md5, multi_thr_md5;
+
+  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20);
+
+  cfg_.rc_target_bitrate = 1000;
+
+  // Encode using single thread.
+  cfg_.g_threads = 1;
+  init_flags_ = VPX_CODEC_USE_PSNR;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  single_thr_md5 = md5_;
+  md5_.clear();
+
+  // Encode using multiple threads.
+  cfg_.g_threads = 4;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  multi_thr_md5 = md5_;
+  md5_.clear();
+
+  // Compare to check if two vectors are equal.
+  ASSERT_EQ(single_thr_md5, multi_thr_md5);
+}
+
+VP9_INSTANTIATE_TEST_CASE(
+    VP9EncoderThreadTest,
+    ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
+                      ::libvpx_test::kRealTime),
+    ::testing::Range(1, 9));
+}  // namespace
--- a/test/vp9_frame_parallel_test.cc
+++ b/test/vp9_frame_parallel_test.cc
@@ -0,0 +1,220 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vpx_config.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#if CONFIG_WEBM_IO
+#include "test/webm_video_source.h"
+#endif
+#include "vpx_mem/vpx_mem.h"
+
+namespace {
+
+using std::string;
+
+#if CONFIG_WEBM_IO
+
+struct PauseFileList {
+  const char *name;
+  // md5 sum for decoded frames which does not include skipped frames.
+  const char *expected_md5;
+  const int pause_frame_num;
+};
+
+// Decodes |filename| with |num_threads|. Pause at the specified frame_num,
+// seek to next key frame and then continue decoding until the end. Return
+// the md5 of the decoded frames which does not include skipped frames.
+string DecodeFileWithPause(const string &filename, int num_threads,
+                           int pause_num) {
+  libvpx_test::WebMVideoSource video(filename);
+  video.Init();
+  int in_frames = 0;
+  int out_frames = 0;
+
+  vpx_codec_dec_cfg_t cfg = {0};
+  cfg.threads = num_threads;
+  vpx_codec_flags_t flags = 0;
+  flags |= VPX_CODEC_USE_FRAME_THREADING;
+  libvpx_test::VP9Decoder decoder(cfg, flags, 0);
+
+  libvpx_test::MD5 md5;
+  video.Begin();
+
+  do {
+    ++in_frames;
+    const vpx_codec_err_t res =
+        decoder.DecodeFrame(video.cxdata(), video.frame_size());
+    if (res != VPX_CODEC_OK) {
+      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
+      break;
+    }
+
+    // Pause at specified frame number.
+    if (in_frames == pause_num) {
+      // Flush the decoder and then seek to next key frame.
+      decoder.DecodeFrame(NULL, 0);
+      video.SeekToNextKeyFrame();
+    } else {
+      video.Next();
+    }
+
+    // Flush the decoder at the end of the video.
+    if (!video.cxdata())
+      decoder.DecodeFrame(NULL, 0);
+
+    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
+    const vpx_image_t *img;
+
+    // Get decompressed data
+    while ((img = dec_iter.Next())) {
+      ++out_frames;
+      md5.Add(img);
+    }
+  } while (video.cxdata() != NULL);
+
+  EXPECT_EQ(in_frames, out_frames) <<
+      "Input frame count does not match output frame count";
+
+  return string(md5.Get());
+}
+
+void DecodeFilesWithPause(const PauseFileList files[]) {
+  for (const PauseFileList *iter = files; iter->name != NULL; ++iter) {
+    SCOPED_TRACE(iter->name);
+    for (int t = 2; t <= 8; ++t) {
+      EXPECT_EQ(iter->expected_md5,
+                DecodeFileWithPause(iter->name, t, iter->pause_frame_num))
+          << "threads = " << t;
+    }
+  }
+}
+
+TEST(VP9MultiThreadedFrameParallel, PauseSeekResume) {
+  // vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
+  // one key frame for every ten frames.
+  static const PauseFileList files[] = {
+    { "vp90-2-07-frame_parallel-1.webm",
+      "6ea7c3875d67252e7caf2bc6e75b36b1", 6 },
+    { "vp90-2-07-frame_parallel-1.webm",
+      "4bb634160c7356a8d7d4299b6dc83a45", 12 },
+    { "vp90-2-07-frame_parallel-1.webm",
+      "89772591e6ef461f9fa754f916c78ed8", 26 },
+    { NULL, NULL, 0 },
+  };
+  DecodeFilesWithPause(files);
+}
+
+struct FileList {
+  const char *name;
+  // md5 sum for decoded frames which does not include corrupted frames.
+  const char *expected_md5;
+  // Expected number of decoded frames which does not include corrupted frames.
+  const int expected_frame_count;
+};
+
+// Decodes |filename| with |num_threads|. Return the md5 of the decoded
+// frames which does not include corrupted frames.
+string DecodeFile(const string &filename, int num_threads,
+                  int expected_frame_count) {
+  libvpx_test::WebMVideoSource video(filename);
+  video.Init();
+
+  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
+  cfg.threads = num_threads;
+  const vpx_codec_flags_t flags = VPX_CODEC_USE_FRAME_THREADING;
+  libvpx_test::VP9Decoder decoder(cfg, flags, 0);
+
+  libvpx_test::MD5 md5;
+  video.Begin();
+
+  int out_frames = 0;
+  do {
+    const vpx_codec_err_t res =
+        decoder.DecodeFrame(video.cxdata(), video.frame_size());
+    // TODO(hkuang): frame parallel mode should return an error on corruption.
+    if (res != VPX_CODEC_OK) {
+      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
+      break;
+    }
+
+    video.Next();
+
+    // Flush the decoder at the end of the video.
+    if (!video.cxdata())
+      decoder.DecodeFrame(NULL, 0);
+
+    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
+    const vpx_image_t *img;
+
+    // Get decompressed data
+    while ((img = dec_iter.Next())) {
+      ++out_frames;
+      md5.Add(img);
+    }
+  } while (video.cxdata() != NULL);
+
+  EXPECT_EQ(expected_frame_count, out_frames) <<
+      "Input frame count does not match expected output frame count";
+
+  return string(md5.Get());
+}
+
+void DecodeFiles(const FileList files[]) {
+  for (const FileList *iter = files; iter->name != NULL; ++iter) {
+    SCOPED_TRACE(iter->name);
+    for (int t = 2; t <= 8; ++t) {
+      EXPECT_EQ(iter->expected_md5,
+                DecodeFile(iter->name, t, iter->expected_frame_count))
+          << "threads = " << t;
+    }
+  }
+}
+
+TEST(VP9MultiThreadedFrameParallel, InvalidFileTest) {
+  static const FileList files[] = {
+    // invalid-vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
+    // one key frame for every ten frames. The 11th frame has corrupted data.
+    { "invalid-vp90-2-07-frame_parallel-1.webm",
+      "0549d0f45f60deaef8eb708e6c0eb6cb", 30 },
+    // invalid-vp90-2-07-frame_parallel-2.webm is a 40 frame video file with
+    // one key frame for every ten frames. The 1st and 31st frames have
+    // corrupted data.
+    { "invalid-vp90-2-07-frame_parallel-2.webm",
+      "6a1f3cf6f9e7a364212fadb9580d525e", 20 },
+    // invalid-vp90-2-07-frame_parallel-3.webm is a 40 frame video file with
+    // one key frame for every ten frames. The 5th and 13th frames have
+    // corrupted data.
+    { "invalid-vp90-2-07-frame_parallel-3.webm",
+      "8256544308de926b0681e04685b98677", 27 },
+    { NULL, NULL, 0 },
+  };
+  DecodeFiles(files);
+}
+
+TEST(VP9MultiThreadedFrameParallel, ValidFileTest) {
+  static const FileList files[] = {
+#if CONFIG_VP9_HIGHBITDEPTH
+    { "vp92-2-20-10bit-yuv420.webm",
+      "a16b99df180c584e8db2ffeda987d293", 10 },
+#endif
+    { NULL, NULL, 0 },
+  };
+  DecodeFiles(files);
+}
+#endif  // CONFIG_WEBM_IO
+}  // namespace
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@@ -120,10 +120,10 @@ class VP9IntraPredTest

 TEST_P(VP9IntraPredTest, IntraPredTests) {
  // max block size is 32
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, left_col, 2*32);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, above_data, 2*32+32);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, dst, 3 * 32 * 32);
-  DECLARE_ALIGNED_ARRAY(16, uint16_t, ref_dst, 3 * 32 * 32);
+  DECLARE_ALIGNED(16, uint16_t, left_col[2*32]);
+  DECLARE_ALIGNED(16, uint16_t, above_data[2*32+32]);
+  DECLARE_ALIGNED(16, uint16_t, dst[3 * 32 * 32]);
+  DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 32 * 32]);
  RunTest(left_col, above_data, dst, ref_dst);
 }

--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -0,0 +1,351 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "./vpx_config.h"
+#include "./vp9_rtcd.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vp9/common/vp9_scan.h"
+#include "vpx/vpx_codec.h"
+#include "vpx/vpx_integer.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+#if CONFIG_VP9_HIGHBITDEPTH
+const int number_of_iterations = 100;
+
+typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
+                             int skip_block, const int16_t *zbin,
+                             const int16_t *round, const int16_t *quant,
+                             const int16_t *quant_shift,
+                             tran_low_t *qcoeff, tran_low_t *dqcoeff,
+                             const int16_t *dequant,
+                             uint16_t *eob, const int16_t *scan,
+                             const int16_t *iscan);
+typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t>
+    QuantizeParam;
+
+class VP9QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
+ public:
+  virtual ~VP9QuantizeTest() {}
+  virtual void SetUp() {
+    quantize_op_   = GET_PARAM(0);
+    ref_quantize_op_ = GET_PARAM(1);
+    bit_depth_  = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  vpx_bit_depth_t bit_depth_;
+  int mask_;
+  QuantizeFunc quantize_op_;
+  QuantizeFunc ref_quantize_op_;
+};
+
+class VP9Quantize32Test : public ::testing::TestWithParam<QuantizeParam> {
+ public:
+  virtual ~VP9Quantize32Test() {}
+  virtual void SetUp() {
+    quantize_op_   = GET_PARAM(0);
+    ref_quantize_op_ = GET_PARAM(1);
+    bit_depth_  = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  vpx_bit_depth_t bit_depth_;
+  int mask_;
+  QuantizeFunc quantize_op_;
+  QuantizeFunc ref_quantize_op_;
+};
+
+TEST_P(VP9QuantizeTest, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[256]);
+  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
+  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    const int skip_block = i == 0;
+    const TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
+    const TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
+    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+    const int count = (4 << sz) * (4 << sz);  // 16, 64, 256
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = rnd.Rand16()&mask_;
+    }
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+          (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Quantization Test, C output doesn't match SSE2 output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(VP9Quantize32Test, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[1024]);
+  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
+  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    const int skip_block = i == 0;
+    const TX_SIZE sz = TX_32X32;
+    const TX_TYPE tx_type = (TX_TYPE)(i % 4);
+    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+    const int count = (4 << sz) * (4 << sz);  // 1024
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = rnd.Rand16()&mask_;
+    }
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+          (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Quantization Test, C output doesn't match SSE2 output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(VP9QuantizeTest, EOBCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[256]);
+  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
+  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int skip_block = i == 0;
+    TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
+    TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
+    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+    int count = (4 << sz) * (4 << sz);  // 16, 64, 256
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    // Two random entries
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = 0;
+    }
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+          (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Quantization Test, C output doesn't match SSE2 output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(VP9Quantize32Test, EOBCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[1024]);
+  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
+  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int skip_block = i == 0;
+    TX_SIZE sz = TX_32X32;
+    TX_TYPE tx_type = (TX_TYPE)(i % 4);
+    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+    int count = (4 << sz) * (4 << sz);  // 1024
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = 0;
+    }
+    // Two random entries
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    coeff_ptr[rnd(count)] = rnd.Rand16()&mask_;
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16()&mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr,
+                     ref_eob_ptr, scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(coeff_ptr, count, skip_block,
+                                          zbin_ptr, round_ptr, quant_ptr,
+                                          quant_shift_ptr, qcoeff_ptr,
+                                          dqcoeff_ptr, dequant_ptr, eob_ptr,
+                                          scan_order->scan, scan_order->iscan));
+
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j]  != qcoeff_ptr[j]) |
+          (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Quantization Test, C output doesn't match SSE2 output. "
+      << "First failed at test case " << first_failure;
+}
+using std::tr1::make_tuple;
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, VP9QuantizeTest,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_quantize_b_sse2,
+                   &vp9_highbd_quantize_b_c, VPX_BITS_8),
+        make_tuple(&vp9_highbd_quantize_b_sse2,
+                   &vp9_highbd_quantize_b_c, VPX_BITS_10),
+        make_tuple(&vp9_highbd_quantize_b_sse2,
+                   &vp9_highbd_quantize_b_c, VPX_BITS_12)));
+INSTANTIATE_TEST_CASE_P(
+    SSE2, VP9Quantize32Test,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+                   &vp9_highbd_quantize_b_32x32_c, VPX_BITS_8),
+        make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+                   &vp9_highbd_quantize_b_32x32_c, VPX_BITS_10),
+        make_tuple(&vp9_highbd_quantize_b_32x32_sse2,
+                   &vp9_highbd_quantize_b_32x32_c, VPX_BITS_12)));
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+}  // namespace
--- a/test/vp9_skip_loopfilter_test.cc
+++ b/test/vp9_skip_loopfilter_test.cc
@@ -0,0 +1,180 @@
+/*
+ *  Copyright (c) 2015 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#include "test/webm_video_source.h"
+
+namespace {
+
+const char kVp9TestFile[] = "vp90-2-08-tile_1x8_frame_parallel.webm";
+const char kVp9Md5File[] = "vp90-2-08-tile_1x8_frame_parallel.webm.md5";
+
+// Class for testing shutting off the loop filter.
+class SkipLoopFilterTest {
+ public:
+  SkipLoopFilterTest()
+      : video_(NULL),
+        decoder_(NULL),
+        md5_file_(NULL) {}
+
+  ~SkipLoopFilterTest() {
+    if (md5_file_ != NULL)
+      fclose(md5_file_);
+    delete decoder_;
+    delete video_;
+  }
+
+  // If |threads| > 0 then set the decoder with that number of threads.
+  void Init(int num_threads) {
+    expected_md5_[0] = '\0';
+    junk_[0] = '\0';
+    video_ = new libvpx_test::WebMVideoSource(kVp9TestFile);
+    ASSERT_TRUE(video_ != NULL);
+    video_->Init();
+    video_->Begin();
+
+    vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
+    if (num_threads > 0)
+      cfg.threads = num_threads;
+    decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
+    ASSERT_TRUE(decoder_ != NULL);
+
+    OpenMd5File(kVp9Md5File);
+  }
+
+  // Set the VP9 skipLoopFilter control value.
+  void SetSkipLoopFilter(int value, vpx_codec_err_t expected_value) {
+    decoder_->Control(VP9_SET_SKIP_LOOP_FILTER, value, expected_value);
+  }
+
+  vpx_codec_err_t DecodeOneFrame() {
+    const vpx_codec_err_t res =
+        decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+    if (res == VPX_CODEC_OK) {
+      ReadMd5();
+      video_->Next();
+    }
+    return res;
+  }
+
+  vpx_codec_err_t DecodeRemainingFrames() {
+    for (; video_->cxdata() != NULL; video_->Next()) {
+      const vpx_codec_err_t res =
+          decoder_->DecodeFrame(video_->cxdata(), video_->frame_size());
+      if (res != VPX_CODEC_OK)
+        return res;
+      ReadMd5();
+    }
+    return VPX_CODEC_OK;
+  }
+
+  // Checks if MD5 matches or doesn't.
+  void CheckMd5(bool matches) {
+    libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
+    const vpx_image_t *img = dec_iter.Next();
+    CheckMd5Vpx(*img, matches);
+  }
+
+ private:
+  // TODO(fgalligan): Move the MD5 testing code into another class.
+  void OpenMd5File(const std::string &md5_file_name) {
+    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name);
+    ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
+        << md5_file_name;
+  }
+
+  // Reads the next line of the MD5 file.
+  void ReadMd5() {
+    ASSERT_TRUE(md5_file_ != NULL);
+    const int res = fscanf(md5_file_, "%s  %s", expected_md5_, junk_);
+    ASSERT_NE(EOF, res) << "Read md5 data failed";
+    expected_md5_[32] = '\0';
+  }
+
+  // Checks if the last read MD5 matches |img| or doesn't.
+  void CheckMd5Vpx(const vpx_image_t &img, bool matches) {
+    ::libvpx_test::MD5 md5_res;
+    md5_res.Add(&img);
+    const char *const actual_md5 = md5_res.Get();
+
+    // Check MD5.
+    if (matches)
+      ASSERT_STREQ(expected_md5_, actual_md5) << "MD5 checksums don't match";
+    else
+      ASSERT_STRNE(expected_md5_, actual_md5) << "MD5 checksums match";
+  }
+
+  libvpx_test::WebMVideoSource *video_;
+  libvpx_test::VP9Decoder *decoder_;
+  FILE *md5_file_;
+  char expected_md5_[33];
+  char junk_[128];
+};
+
+TEST(SkipLoopFilterTest, ShutOffLoopFilter) {
+  const int non_zero_value = 1;
+  const int num_threads = 0;
+  SkipLoopFilterTest skip_loop_filter;
+  skip_loop_filter.Init(num_threads);
+  skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
+  ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
+  skip_loop_filter.CheckMd5(false);
+}
+
+TEST(SkipLoopFilterTest, ShutOffLoopFilterSingleThread) {
+  const int non_zero_value = 1;
+  const int num_threads = 1;
+  SkipLoopFilterTest skip_loop_filter;
+  skip_loop_filter.Init(num_threads);
+  skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
+  ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
+  skip_loop_filter.CheckMd5(false);
+}
+
+TEST(SkipLoopFilterTest, ShutOffLoopFilter8Threads) {
+  const int non_zero_value = 1;
+  const int num_threads = 8;
+  SkipLoopFilterTest skip_loop_filter;
+  skip_loop_filter.Init(num_threads);
+  skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
+  ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
+  skip_loop_filter.CheckMd5(false);
+}
+
+TEST(SkipLoopFilterTest, WithLoopFilter) {
+  const int non_zero_value = 1;
+  const int num_threads = 0;
+  SkipLoopFilterTest skip_loop_filter;
+  skip_loop_filter.Init(num_threads);
+  skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK);
+  skip_loop_filter.SetSkipLoopFilter(0, VPX_CODEC_OK);
+  ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
+  skip_loop_filter.CheckMd5(true);
+}
+
+TEST(SkipLoopFilterTest, ToggleLoopFilter) {
+  const int num_threads = 0;
+  SkipLoopFilterTest skip_loop_filter;
+  skip_loop_filter.Init(num_threads);
+
+  for (int i = 0; i < 10; ++i) {
+    skip_loop_filter.SetSkipLoopFilter(i % 2, VPX_CODEC_OK);
+    ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeOneFrame());
+  }
+  ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames());
+  skip_loop_filter.CheckMd5(false);
+}
+
+}  // namespace
--- a/test/vp9_thread_test.cc
+++ b/test/vp9_thread_test.cc
@@ -207,7 +207,7 @@ int Reset(VP9Worker *const /*worker*/) { return 1; }
 int Sync(VP9Worker *const worker) { return !worker->had_error; }

 void Execute(VP9Worker *const worker) {
-  worker->had_error |= worker->hook(worker->data1, worker->data2);
+  worker->had_error |= !worker->hook(worker->data1, worker->data2);
 }

 void Launch(VP9Worker *const worker) { Execute(worker); }
--- a/test/vpx_scale_test.cc
+++ b/test/vpx_scale_test.cc
@@ -33,10 +33,10 @@ class VpxScaleBase {
  void ResetImage(int width, int height) {
    width_ = width;
    height_ = height;
-    vpx_memset(&img_, 0, sizeof(img_));
+    memset(&img_, 0, sizeof(img_));
    ASSERT_EQ(0, vp8_yv12_alloc_frame_buffer(&img_, width_, height_,
                                             VP8BORDERINPIXELS));
-    vpx_memset(img_.buffer_alloc, kBufFiller, img_.frame_size);
+    memset(img_.buffer_alloc, kBufFiller, img_.frame_size);
    FillPlane(img_.y_buffer, img_.y_crop_width, img_.y_crop_height,
              img_.y_stride);
    FillPlane(img_.u_buffer, img_.uv_crop_width, img_.uv_crop_height,
@@ -44,15 +44,15 @@ class VpxScaleBase {
    FillPlane(img_.v_buffer, img_.uv_crop_width, img_.uv_crop_height,
              img_.uv_stride);

-    vpx_memset(&ref_img_, 0, sizeof(ref_img_));
+    memset(&ref_img_, 0, sizeof(ref_img_));
    ASSERT_EQ(0, vp8_yv12_alloc_frame_buffer(&ref_img_, width_, height_,
                                             VP8BORDERINPIXELS));
-    vpx_memset(ref_img_.buffer_alloc, kBufFiller, ref_img_.frame_size);
+    memset(ref_img_.buffer_alloc, kBufFiller, ref_img_.frame_size);

-    vpx_memset(&cpy_img_, 0, sizeof(cpy_img_));
+    memset(&cpy_img_, 0, sizeof(cpy_img_));
    ASSERT_EQ(0, vp8_yv12_alloc_frame_buffer(&cpy_img_, width_, height_,
                                             VP8BORDERINPIXELS));
-    vpx_memset(cpy_img_.buffer_alloc, kBufFiller, cpy_img_.frame_size);
+    memset(cpy_img_.buffer_alloc, kBufFiller, cpy_img_.frame_size);
    ReferenceCopyFrame();
  }

@@ -87,8 +87,8 @@ class VpxScaleBase {

    // Fill the border pixels from the nearest image pixel.
    for (int y = 0; y < crop_height; ++y) {
-      vpx_memset(left, left[padding], padding);
-      vpx_memset(right, right[-1], right_extend);
+      memset(left, left[padding], padding);
+      memset(right, right[-1], right_extend);
      left += stride;
      right += stride;
    }
@@ -101,13 +101,13 @@ class VpxScaleBase {

    // The first row was already extended to the left and right. Copy it up.
    for (int y = 0; y < padding; ++y) {
-      vpx_memcpy(top, left, extend_width);
+      memcpy(top, left, extend_width);
      top += stride;
    }

    uint8_t *bottom = left + (crop_height * stride);
    for (int y = 0; y <  bottom_extend; ++y) {
-      vpx_memcpy(bottom, left + (crop_height - 1) * stride, extend_width);
+      memcpy(bottom, left + (crop_height - 1) * stride, extend_width);
      bottom += stride;
    }
  }
--- a/test/vpxdec.sh
+++ b/test/vpxdec.sh
@@ -16,7 +16,9 @@

 # Environment check: Make sure input is available.
 vpxdec_verify_environment() {
-  if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_WEBM_FILE}" ]; then
+  if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_WEBM_FILE}" ] || \
+    [ ! -e "${VP9_FPM_WEBM_FILE}" ] || \
+    [ ! -e "${VP9_LT_50_FRAMES_WEBM_FILE}" ] ; then
    elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
    return 1
  fi
@@ -78,8 +80,37 @@ vpxdec_vp9_webm() {
  fi
 }

+vpxdec_vp9_webm_frame_parallel() {
+  if [ "$(vpxdec_can_decode_vp9)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    for threads in 2 3 4 5 6 7 8; do
+      vpxdec "${VP9_FPM_WEBM_FILE}" --summary --noblit --threads=$threads \
+        --frame-parallel
+    done
+  fi
+}
+
+vpxdec_vp9_webm_less_than_50_frames() {
+  # ensure that reaching eof in webm_guess_framerate doesn't result in invalid
+  # frames in actual webm_read_frame calls.
+  if [ "$(vpxdec_can_decode_vp9)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local readonly decoder="$(vpx_tool_path vpxdec)"
+    local readonly expected=10
+    local readonly num_frames=$(${VPX_TEST_PREFIX} "${decoder}" \
+      "${VP9_LT_50_FRAMES_WEBM_FILE}" --summary --noblit 2>&1 \
+      | awk '/^[0-9]+ decoded frames/ { print $1 }')
+    if [ "$num_frames" -ne "$expected" ]; then
+      elog "Output frames ($num_frames) != expected ($expected)"
+      return 1
+    fi
+  fi
+}
+
 vpxdec_tests="vpxdec_vp8_ivf
              vpxdec_vp8_ivf_pipe_input
-              vpxdec_vp9_webm"
+              vpxdec_vp9_webm
+              vpxdec_vp9_webm_frame_parallel
+              vpxdec_vp9_webm_less_than_50_frames"

 run_tests vpxdec_verify_environment "${vpxdec_tests}"
--- a/test/vpxenc.sh
+++ b/test/vpxenc.sh
@@ -23,6 +23,13 @@ vpxenc_verify_environment() {
    elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBVPX_TEST_DATA_PATH."
    return 1
  fi
+  if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then
+    if [ ! -e "${Y4M_NOSQ_PAR_INPUT}" ]; then
+      elog "The file ${Y4M_NOSQ_PAR_INPUT##*/} must exist in"
+      elog "LIBVPX_TEST_DATA_PATH."
+      return 1
+    fi
+  fi
  if [ -z "$(vpx_tool_path vpxenc)" ]; then
    elog "vpxenc not found. It must exist in LIBVPX_BIN_PATH or its parent."
    return 1
@@ -49,6 +56,14 @@ yuv_input_hantro_collage() {
       --height="${YUV_RAW_INPUT_HEIGHT}""
 }

+y4m_input_non_square_par() {
+  echo ""${Y4M_NOSQ_PAR_INPUT}""
+}
+
+y4m_input_720p() {
+  echo ""${Y4M_720P_INPUT}""
+}
+
 # Echo default vpxenc real time encoding params. $1 is the codec, which defaults
 # to vp8 if unspecified.
 vpxenc_rt_params() {
@@ -57,7 +72,7 @@ vpxenc_rt_params() {
    --buf-initial-sz=500
    --buf-optimal-sz=600
    --buf-sz=1000
-    --cpu-used=-5
+    --cpu-used=-6
    --end-usage=cbr
    --error-resilient=1
    --kf-max-dist=90000
@@ -247,6 +262,63 @@ vpxenc_vp9_webm_rt() {
  fi
 }

+vpxenc_vp9_webm_rt_multithread_tiled() {
+  if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_multithread_tiled.webm"
+    local readonly tilethread_min=2
+    local readonly tilethread_max=4
+    local readonly num_threads="$(seq ${tilethread_min} ${tilethread_max})"
+    local readonly num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})"
+
+    for threads in ${num_threads}; do
+      for tile_cols in ${num_tile_cols}; do
+        vpxenc $(y4m_input_720p) \
+          $(vpxenc_rt_params vp9) \
+          --threads=${threads} \
+          --tile-columns=${tile_cols} \
+          --output="${output}"
+      done
+    done
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+
+    rm "${output}"
+  fi
+}
+
+vpxenc_vp9_webm_rt_multithread_tiled_frameparallel() {
+  if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_mt_t_fp.webm"
+    local readonly tilethread_min=2
+    local readonly tilethread_max=4
+    local readonly num_threads="$(seq ${tilethread_min} ${tilethread_max})"
+    local readonly num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})"
+
+    for threads in ${num_threads}; do
+      for tile_cols in ${num_tile_cols}; do
+        vpxenc $(y4m_input_720p) \
+          $(vpxenc_rt_params vp9) \
+          --threads=${threads} \
+          --tile-columns=${tile_cols} \
+          --frame-parallel=1 \
+          --output="${output}"
+      done
+    done
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+
+    rm "${output}"
+  fi
+}
+
 vpxenc_vp9_webm_2pass() {
  if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
     [ "$(webm_io_available)" = "yes" ]; then
@@ -320,6 +392,23 @@ vpxenc_vp9_webm_lag10_frames20() {
  fi
 }

+# TODO(fgalligan): Test that DisplayWidth is different than video width.
+vpxenc_vp9_webm_non_square_par() {
+  if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \
+     [ "$(webm_io_available)" = "yes" ]; then
+    local readonly output="${VPX_TEST_OUTPUT_DIR}/vp9_non_square_par.webm"
+    vpxenc $(y4m_input_non_square_par) \
+      --codec=vp9 \
+      --limit="${TEST_FRAMES}" \
+      --output="${output}"
+
+    if [ ! -e "${output}" ]; then
+      elog "Output file does not exist."
+      return 1
+    fi
+  fi
+}
+
 vpxenc_tests="vpxenc_vp8_ivf
              vpxenc_vp8_webm
              vpxenc_vp8_webm_rt
@@ -329,9 +418,12 @@ vpxenc_tests="vpxenc_vp8_ivf
              vpxenc_vp9_ivf
              vpxenc_vp9_webm
              vpxenc_vp9_webm_rt
+              vpxenc_vp9_webm_rt_multithread_tiled
+              vpxenc_vp9_webm_rt_multithread_tiled_frameparallel
              vpxenc_vp9_webm_2pass
              vpxenc_vp9_ivf_lossless
              vpxenc_vp9_ivf_minq0_maxq0
-              vpxenc_vp9_webm_lag10_frames20"
+              vpxenc_vp9_webm_lag10_frames20
+              vpxenc_vp9_webm_non_square_par"

 run_tests vpxenc_verify_environment "${vpxenc_tests}"
--- a/test/webm_video_source.h
+++ b/test/webm_video_source.h
@@ -69,6 +69,18 @@ class WebMVideoSource : public CompressedVideoSource {
    }
  }

+  void SeekToNextKeyFrame() {
+    ASSERT_TRUE(vpx_ctx_->file != NULL);
+    do {
+      const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_, &buf_sz_);
+      ASSERT_GE(status, 0) << "webm_read_frame failed";
+      ++frame_;
+      if (status == 1) {
+        end_of_file_ = true;
+      }
+    } while (!webm_ctx_->is_key_frame && !end_of_file_);
+  }
+
  virtual const uint8_t *cxdata() const {
    return end_of_file_ ? NULL : buf_;
  }
--- a/test/yuv_video_source.h
+++ b/test/yuv_video_source.h
@@ -0,0 +1,151 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#ifndef TEST_YUV_VIDEO_SOURCE_H_
+#define TEST_YUV_VIDEO_SOURCE_H_
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+
+#include "test/video_source.h"
+#include "vpx/vpx_image.h"
+
+namespace libvpx_test {
+
+// This class extends VideoSource to allow parsing of raw YUV
+// formats of various color sampling and bit-depths so that we can
+// do actual file encodes.
+class YUVVideoSource : public VideoSource {
+ public:
+  YUVVideoSource(const std::string &file_name, vpx_img_fmt format,
+                 unsigned int width, unsigned int height,
+                 int rate_numerator, int rate_denominator,
+                 unsigned int start, int limit)
+      : file_name_(file_name),
+        input_file_(NULL),
+        img_(NULL),
+        start_(start),
+        limit_(limit),
+        frame_(0),
+        width_(0),
+        height_(0),
+        format_(VPX_IMG_FMT_NONE),
+        framerate_numerator_(rate_numerator),
+        framerate_denominator_(rate_denominator) {
+    // This initializes format_, raw_size_, width_, height_ and allocates img.
+    SetSize(width, height, format);
+  }
+
+  virtual ~YUVVideoSource() {
+    vpx_img_free(img_);
+    if (input_file_)
+      fclose(input_file_);
+  }
+
+  virtual void Begin() {
+    if (input_file_)
+      fclose(input_file_);
+    input_file_ = OpenTestDataFile(file_name_);
+    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+                                     << file_name_;
+    if (start_)
+      fseek(input_file_, static_cast<unsigned>(raw_size_) * start_, SEEK_SET);
+
+    frame_ = start_;
+    FillFrame();
+  }
+
+  virtual void Next() {
+    ++frame_;
+    FillFrame();
+  }
+
+  virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL;  }
+
+  // Models a stream where Timebase = 1/FPS, so pts == frame.
+  virtual vpx_codec_pts_t pts() const { return frame_; }
+
+  virtual unsigned long duration() const { return 1; }
+
+  virtual vpx_rational_t timebase() const {
+    const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ };
+    return t;
+  }
+
+  virtual unsigned int frame() const { return frame_; }
+
+  virtual unsigned int limit() const { return limit_; }
+
+  virtual void SetSize(unsigned int width, unsigned int height,
+                       vpx_img_fmt format) {
+    if (width != width_ || height != height_ || format != format_) {
+      vpx_img_free(img_);
+      img_ = vpx_img_alloc(NULL, format, width, height, 1);
+      ASSERT_TRUE(img_ != NULL);
+      width_ = width;
+      height_ = height;
+      format_ = format;
+      switch (format) {
+        case VPX_IMG_FMT_I420:
+          raw_size_ = width * height * 3 / 2;
+          break;
+        case VPX_IMG_FMT_I422:
+          raw_size_ = width * height * 2;
+          break;
+        case VPX_IMG_FMT_I440:
+          raw_size_ = width * height * 2;
+          break;
+        case VPX_IMG_FMT_I444:
+          raw_size_ = width * height * 3;
+          break;
+        case VPX_IMG_FMT_I42016:
+          raw_size_ = width * height * 3;
+          break;
+        case VPX_IMG_FMT_I42216:
+          raw_size_ = width * height * 4;
+          break;
+        case VPX_IMG_FMT_I44016:
+          raw_size_ = width * height * 4;
+          break;
+        case VPX_IMG_FMT_I44416:
+          raw_size_ = width * height * 6;
+          break;
+        default:
+          ASSERT_TRUE(0);
+      }
+    }
+  }
+
+  virtual void FillFrame() {
+    ASSERT_TRUE(input_file_ != NULL);
+    // Read a frame from input_file.
+    if (fread(img_->img_data, raw_size_, 1, input_file_) == 0) {
+      limit_ = frame_;
+    }
+  }
+
+ protected:
+  std::string file_name_;
+  FILE *input_file_;
+  vpx_image_t *img_;
+  size_t raw_size_;
+  unsigned int start_;
+  unsigned int limit_;
+  unsigned int frame_;
+  unsigned int width_;
+  unsigned int height_;
+  vpx_img_fmt format_;
+  int framerate_numerator_;
+  int framerate_denominator_;
+};
+
+}  // namespace libvpx_test
+
+#endif  // TEST_YUV_VIDEO_SOURCE_H_
--- a/third_party/libyuv/README.libvpx
+++ b/third_party/libyuv/README.libvpx
@@ -1,6 +1,6 @@
 Name: libyuv
 URL: http://code.google.com/p/libyuv/
-Version: 1060
+Version: 1305
 License: BSD
 License File: LICENSE

@@ -13,4 +13,4 @@ which down-samples the original input video (f.g. 1280x720) a number of times
 in order to encode multiple resolution bit streams.

 Local Modifications:
-cherry-pick 'Issue 24479004: Fix building with MSVC for arm'
+cherry pick r1311 'disable nv12 avx2 for vs9/10 that dont support avx2 instructions.'
--- a/Show More
+++ b/Show More