Release v1.6.1 Long Tailed Duck

Change-Id: If27447472417c7ed34238295427ddb9da0561725
Merge "Add mips dspr2 partial idct tests"
2017-01-12 12:27:27 -08:00 · 2017-01-09 19:49:02 +00:00 · 2017-01-09 19:47:47 +00:00 · 2017-01-09 19:47:00 +00:00 · 2017-01-09 19:46:18 +00:00 · 2017-01-09 19:45:54 +00:00
365 changed files with 31081 additions and 24514 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -1,10 +1,11 @@
 ---
 Language:        Cpp
 # BasedOnStyle:  Google
-# Generated with clang-format 3.7.1
+# Generated with clang-format 3.8.1
 AccessModifierOffset: -1
-AlignAfterOpenBracket: true
+AlignAfterOpenBracket: Align
 AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
 AlignEscapedNewlinesLeft: true
 AlignOperands:   true
 AlignTrailingComments: true
@@ -15,10 +16,23 @@ AllowShortFunctionsOnASingleLine: All
 AllowShortIfStatementsOnASingleLine: true
 AllowShortLoopsOnASingleLine: true
 AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
 AlwaysBreakBeforeMultilineStrings: true
 AlwaysBreakTemplateDeclarations: true
 BinPackArguments: true
 BinPackParameters: true
+BraceWrapping:
+  AfterClass:      false
+  AfterControlStatement: false
+  AfterEnum:       false
+  AfterFunction:   false
+  AfterNamespace:  false
+  AfterObjCDeclaration: false
+  AfterStruct:     false
+  AfterUnion:      false
+  BeforeCatch:     false
+  BeforeElse:      false
+  IndentBraces:    false
 BreakBeforeBinaryOperators: None
 BreakBeforeBraces: Attach
 BreakBeforeTernaryOperators: true
@@ -33,6 +47,13 @@ DerivePointerAlignment: false
 DisableFormat:   false
 ExperimentalAutoDetectBinPacking: false
 ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
+IncludeCategories:
+  - Regex:           '^<.*\.h>'
+    Priority:        1
+  - Regex:           '^<.*'
+    Priority:        2
+  - Regex:           '.*'
+    Priority:        3
 IndentCaseLabels: true
 IndentWidth:     2
 IndentWrappedFunctionNames: false
@@ -51,6 +72,8 @@ PenaltyBreakString: 1000
 PenaltyExcessCharacter: 1000000
 PenaltyReturnTypeOnItsOwnLine: 200
 PointerAlignment: Right
+ReflowComments:  true
+SortIncludes:    false
 SpaceAfterCStyleCast: false
 SpaceBeforeAssignmentOperators: true
 SpaceBeforeParens: ControlStatements
--- a/.gitignore
+++ b/.gitignore
@@ -37,9 +37,9 @@
 /examples/twopass_encoder
 /examples/vp8_multi_resolution_encoder
 /examples/vp8cx_set_ref
+/examples/vp9cx_set_ref
 /examples/vp9_lossless_encoder
-/examples/vp9_spatial_scalable_encoder
-/examples/vpx_temporal_scalable_patterns
+/examples/vp9_spatial_svc_encoder
 /examples/vpx_temporal_svc_encoder
 /ivfdec
 /ivfdec.dox
@@ -50,6 +50,9 @@
 /samples.dox
 /test_intra_pred_speed
 /test_libvpx
+/tools.dox
+/tools/*.dox
+/tools/tiny_ssim
 /vp8_api1_migration.dox
 /vp[89x]_rtcd.h
 /vpx.pc
--- a/15
+++ b/15
@@ -7,6 +7,8 @@ Adam Xu <adam@xuyaowu.com>
 Adrian Grange <agrange@google.com>
 Aℓex Converse <aconverse@google.com>
 Ahmad Sharif <asharif@google.com>
+Aleksey Vasenev <margtu-fivt@ya.ru>
+Alexander Potapenko <glider@google.com>
 Alexander Voronov <avoronov@graphics.cs.msu.ru>
 Alexis Ballier <aballier@gentoo.org>
 Alok Ahuja <waveletcoeff@gmail.com>
@@ -27,6 +29,7 @@ Christian Duvivier <cduvivier@google.com>
 Daniele Castagna <dcastagna@chromium.org>
 Daniel Kang <ddkang@google.com>
 Deb Mukherjee <debargha@google.com>
+Deepa K G <deepa.kg@ittiam.com>
 Dim Temp <dimtemp0@gmail.com>
 Dmitry Kovalev <dkovalev@google.com>
 Dragan Mrdjan <dmrdjan@mips.com>
@@ -37,6 +40,7 @@ Fabio Pedretti <fabio.ped@libero.it>
 Frank Galligan <fgalligan@google.com>
 Fredrik Söderquist <fs@opera.com>
 Fritz Koenig <frkoenig@google.com>
+Gabriel Marin <gmx@chromium.org>
 Gaute Strokkenes <gaute.strokkenes@broadcom.com>
 Geza Lore <gezalore@gmail.com>
 Ghislain MARY <ghislainmary2@gmail.com>
@@ -48,6 +52,7 @@ Hangyu Kuang <hkuang@google.com>
 Hanno Böck <hanno@hboeck.de>
 Henrik Lundin <hlundin@google.com>
 Hui Su <huisu@google.com>
+Ivan Krasin <krasin@chromium.org>
 Ivan Maltz <ivanmaltz@google.com>
 Jacek Caban <cjacek@gmail.com>
 Jacky Chen <jackychen@google.com>
@@ -61,6 +66,7 @@ Jean-Yves Avenard <jyavenard@mozilla.com>
 Jeff Faust <jfaust@google.com>
 Jeff Muizelaar <jmuizelaar@mozilla.com>
 Jeff Petkau <jpet@chromium.org>
+Jerome Jiang <jianj@google.com>
 Jia Jia <jia.jia@linaro.org>
 Jian Zhou <zhoujian@google.com>
 Jim Bankoski <jimbankoski@google.com>
@@ -75,6 +81,7 @@ Joshua Litt <joshualitt@google.com>
 Julia Robson <juliamrobson@gmail.com>
 Justin Clift <justin@salasaga.org>
 Justin Lebar <justin.lebar@gmail.com>
+Kaustubh Raste <kaustubh.raste@imgtec.com>
 KO Myung-Hun <komh@chollian.net>
 Lawrence Velázquez <larryv@macports.org>
 Linfeng Zhang <linfengz@google.com>
@@ -91,8 +98,11 @@ Michael Kohler <michaelkohler@live.com>
 Mike Frysinger <vapier@chromium.org>
 Mike Hommey <mhommey@mozilla.com>
 Mikhal Shemer <mikhal@google.com>
+Min Chen <chenm003@gmail.com>
 Minghai Shang <minghai@google.com>
+Min Ye <yeemmi@google.com>
 Morton Jonuschat <yabawock@gmail.com>
+Nathan E. Egge <negge@mozilla.com>
 Nico Weber <thakis@chromium.org>
 Parag Salasakar <img.mips1@gmail.com>
 Pascal Massimino <pascal.massimino@gmail.com>
@@ -101,16 +111,19 @@ Paul Wilkins <paulwilkins@google.com>
 Pavol Rusnak <stick@gk2.sk>
 Paweł Hajdan <phajdan@google.com>
 Pengchong Jin <pengchong@google.com>
+Peter Boström <pbos@google.com>
 Peter de Rivaz <peter.derivaz@gmail.com>
 Philip Jägenstedt <philipj@opera.com>
 Priit Laes <plaes@plaes.org>
 Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
 Rafaël Carré <funman@videolan.org>
 Ralph Giles <giles@xiph.org>
+Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com>
 Rob Bradford <rob@linux.intel.com>
 Ronald S. Bultje <rsbultje@gmail.com>
 Rui Ueyama <ruiu@google.com>
 Sami Pietilä <samipietila@google.com>
+Sarah Parker <sarahparker@google.com>
 Sasi Inguva <isasi@google.com>
 Scott Graham <scottmg@chromium.org>
 Scott LaVarnway <slavarnway@google.com>
@@ -130,6 +143,8 @@ Thijs Vermeir <thijsvermeir@gmail.com>
 Tim Kopp <tkopp@google.com>
 Timothy B. Terriberry <tterribe@xiph.org>
 Tom Finegan <tomfinegan@google.com>
+Tristan Matthews <le.businessman@gmail.com>
+Urvang Joshi <urvang@google.com>
 Vignesh Venkatasubramanian <vigneshv@google.com>
 Yaowu Xu <yaowu@google.com>
 Yi Luo <luoyi@google.com>
--- a/16
+++ b/16
@@ -1,3 +1,19 @@
+2017-01-09 v1.6.1 "Long Tailed Duck"
+  This release improves upon the VP9 encoder and speeds up the encoding and
+  decoding processes.
+
+  - Upgrading:
+    This release is ABI compatible with 1.6.0.
+
+  - Enhancements:
+    Faster VP9 encoding and decoding.
+    High bit depth builds now provide similar speed for 8 bit encode and decode
+    for x86 targets. Other platforms and higher bit depth improvements are in
+    progress.
+
+  - Bug Fixes:
+    A variety of fuzzing issues.
+
 2016-07-20 v1.6.0 "Khaki Campbell Duck"
  This release improves upon the VP9 encoder and speeds up the encoding and
  decoding processes.
--- a/3
+++ b/3
@@ -1,4 +1,4 @@
-README - 20 July 2016
+README - 9 January 2017

 Welcome to the WebM VP8/VP9 Codec SDK!

@@ -47,6 +47,7 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  --help output of the configure script. As of this writing, the list of
  available targets is:

+    arm64-android-gcc
    arm64-darwin-gcc
    arm64-linux-gcc
    armv7-android-gcc
--- a/args.c
+++ b/args.c
@@ -13,6 +13,7 @@
 #include <limits.h>
 #include "args.h"

+#include "vpx/vpx_integer.h"
 #include "vpx_ports/msvc.h"

 #if defined(__GNUC__) && __GNUC__
@@ -118,13 +119,13 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs) {
 }

 unsigned int arg_parse_uint(const struct arg *arg) {
-  long int rawval;
+  uint32_t rawval;
  char *endptr;

-  rawval = strtol(arg->val, &endptr, 10);
+  rawval = (uint32_t)strtoul(arg->val, &endptr, 10);

  if (arg->val[0] != '\0' && endptr[0] == '\0') {
-    if (rawval >= 0 && rawval <= UINT_MAX) return (unsigned int)rawval;
+    if (rawval <= UINT_MAX) return rawval;

    die("Option %s: Value %ld out of range for unsigned int\n", arg->name,
        rawval);
@@ -135,10 +136,10 @@ unsigned int arg_parse_uint(const struct arg *arg) {
 }

 int arg_parse_int(const struct arg *arg) {
-  long int rawval;
+  int32_t rawval;
  char *endptr;

-  rawval = strtol(arg->val, &endptr, 10);
+  rawval = (int32_t)strtol(arg->val, &endptr, 10);

  if (arg->val[0] != '\0' && endptr[0] == '\0') {
    if (rawval >= INT_MIN && rawval <= INT_MAX) return (int)rawval;
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -41,10 +41,32 @@
 # Running ndk-build will build libvpx and include it in your project.
 #

+# Alternatively, building the examples and unit tests can be accomplished in the
+# following way:
+#
+# Create a standalone toolchain from the NDK:
+# https://developer.android.com/ndk/guides/standalone_toolchain.html
+#
+# For example - to test on arm64 devices with clang:
+# $NDK/build/tools/make_standalone_toolchain.py \
+#   --arch arm64 --install-dir=/tmp/my-android-toolchain
+# export PATH=/tmp/my-android-toolchain/bin:$PATH
+# CROSS=aarch64-linux-android- CC=clang CXX=clang++ /path/to/libvpx/configure \
+#   --target=arm64-android-gcc
+#
+# Push the resulting binaries to a device and run them:
+# adb push test_libvpx /data/tmp/test_libvpx
+# adb shell /data/tmp/test_libvpx --gtest_filter=\*Sixtap\*
+#
+# Make sure to push the test data as well and set LIBVPX_TEST_DATA
+
 CONFIG_DIR := $(LOCAL_PATH)/
 LIBVPX_PATH := $(LOCAL_PATH)/libvpx
 ASM_CNV_PATH_LOCAL := $(TARGET_ARCH_ABI)/ads2gas
 ASM_CNV_PATH := $(LOCAL_PATH)/$(ASM_CNV_PATH_LOCAL)
+ifneq ($(V),1)
+  qexec := @
+endif

 # Use the makefiles generated by upstream configure to determine which files to
 # build. Also set any architecture-specific flags.
@@ -52,7 +74,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
  include $(CONFIG_DIR)libs-armv7-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq  ($(TARGET_ARCH_ABI),arm64-v8a)
-  include $(CONFIG_DIR)libs-armv8-android-gcc.mk
+  include $(CONFIG_DIR)libs-arm64-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq ($(TARGET_ARCH_ABI),x86)
  include $(CONFIG_DIR)libs-x86-android-gcc.mk
@@ -82,10 +104,10 @@ LOCAL_CFLAGS := -O3
 # like x86inc.asm and x86_abi_support.asm
 LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)

-.PRECIOUS: %.asm.s
-$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm
-	@mkdir -p $(dir $@)
-	@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@
+.PRECIOUS: %.asm.S
+$(ASM_CNV_PATH)/libvpx/%.asm.S: $(LIBVPX_PATH)/%.asm
+	$(qexec)mkdir -p $(dir $@)
+	$(qexec)$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@

 # For building *_rtcd.h, which have rules in libs.mk
 TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN)))
@@ -113,7 +135,7 @@ endif

 # Pull out assembly files, splitting NEON from the rest.  This is
 # done to specify that the NEON assembly files use NEON assembler flags.
-# x86 assembly matches %.asm, arm matches %.asm.s
+# x86 assembly matches %.asm, arm matches %.asm.S

 # x86:

@@ -121,31 +143,44 @@ CODEC_SRCS_ASM_X86 = $(filter %.asm, $(CODEC_SRCS_UNIQUE))
 LOCAL_SRC_FILES += $(foreach file, $(CODEC_SRCS_ASM_X86), libvpx/$(file))

 # arm:
-CODEC_SRCS_ASM_ARM_ALL = $(filter %.asm.s, $(CODEC_SRCS_UNIQUE))
+CODEC_SRCS_ASM_ARM_ALL = $(filter %.asm.S, $(CODEC_SRCS_UNIQUE))
 CODEC_SRCS_ASM_ARM = $(foreach v, \
                     $(CODEC_SRCS_ASM_ARM_ALL), \
                     $(if $(findstring neon,$(v)),,$(v)))
-CODEC_SRCS_ASM_ADS2GAS = $(patsubst %.s, \
-                         $(ASM_CNV_PATH_LOCAL)/libvpx/%.s, \
+CODEC_SRCS_ASM_ADS2GAS = $(patsubst %.S, \
+                         $(ASM_CNV_PATH_LOCAL)/libvpx/%.S, \
                         $(CODEC_SRCS_ASM_ARM))
 LOCAL_SRC_FILES += $(CODEC_SRCS_ASM_ADS2GAS)

 ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
+  ASM_INCLUDES := vpx_dsp/arm/idct_neon.asm.S
  CODEC_SRCS_ASM_NEON = $(foreach v, \
                        $(CODEC_SRCS_ASM_ARM_ALL),\
                        $(if $(findstring neon,$(v)),$(v),))
-  CODEC_SRCS_ASM_NEON_ADS2GAS = $(patsubst %.s, \
-                                $(ASM_CNV_PATH_LOCAL)/libvpx/%.s, \
+  CODEC_SRCS_ASM_NEON := $(filter-out $(addprefix %, $(ASM_INCLUDES)), \
+                         $(CODEC_SRCS_ASM_NEON))
+  CODEC_SRCS_ASM_NEON_ADS2GAS = $(patsubst %.S, \
+                                $(ASM_CNV_PATH_LOCAL)/libvpx/%.S, \
                                $(CODEC_SRCS_ASM_NEON))
-  LOCAL_SRC_FILES += $(patsubst %.s, \
-                     %.s.neon, \
+  LOCAL_SRC_FILES += $(patsubst %.S, \
+                     %.S.neon, \
                     $(CODEC_SRCS_ASM_NEON_ADS2GAS))
+
+  NEON_ASM_TARGETS = $(patsubst %.S, \
+                     $(ASM_CNV_PATH)/libvpx/%.S, \
+                     $(CODEC_SRCS_ASM_NEON))
+# add a dependency to the full path to the ads2gas output to ensure the
+# includes are converted first.
+ifneq ($(strip $(NEON_ASM_TARGETS)),)
+$(NEON_ASM_TARGETS): $(addprefix $(ASM_CNV_PATH)/libvpx/, $(ASM_INCLUDES))
+endif
 endif

 LOCAL_CFLAGS += \
    -DHAVE_CONFIG_H=vpx_config.h \
    -I$(LIBVPX_PATH) \
-    -I$(ASM_CNV_PATH)
+    -I$(ASM_CNV_PATH) \
+    -I$(ASM_CNV_PATH)/libvpx

 LOCAL_MODULE := libvpx

@@ -166,7 +201,8 @@ endif
 $$(rtcd_dep_template_SRCS): vpx_scale_rtcd.h
 $$(rtcd_dep_template_SRCS): vpx_dsp_rtcd.h

-ifneq ($(findstring $(TARGET_ARCH_ABI),x86 x86_64),)
+rtcd_dep_template_CONFIG_ASM_ABIS := x86 x86_64 armeabi-v7a
+ifneq ($$(findstring $(TARGET_ARCH_ABI),$$(rtcd_dep_template_CONFIG_ASM_ABIS)),)
 $$(rtcd_dep_template_SRCS): vpx_config.asm
 endif
 endef
@@ -176,16 +212,17 @@ $(eval $(call rtcd_dep_template))
 .PHONY: clean
 clean:
 	@echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]"
-	@$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
-	@$(RM) -r $(ASM_CNV_PATH)
-	@$(RM) $(CLEAN-OBJS)
+	$(qexec)$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
+	$(qexec)$(RM) -r $(ASM_CNV_PATH)
+	$(qexec)$(RM) $(CLEAN-OBJS)

 ifeq ($(ENABLE_SHARED),1)
+  LOCAL_CFLAGS += -fPIC
  include $(BUILD_SHARED_LIBRARY)
 else
  include $(BUILD_STATIC_LIBRARY)
 endif

 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
-$(call import-module,cpufeatures)
+$(call import-module,android/cpufeatures)
 endif
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -90,7 +90,7 @@ all:

 .PHONY: clean
 clean::
-	rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.s.o=.asm.s)
+	rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.S.o=.asm.S)
 	rm -f $(CLEAN-OBJS)

 .PHONY: clean
@@ -180,13 +180,13 @@ $(BUILD_PFX)%.asm.o: %.asm
 	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<

-$(BUILD_PFX)%.s.d: %.s
+$(BUILD_PFX)%.S.d: %.S
 	$(if $(quiet),@echo "    [DEP] $@")
 	$(qexec)mkdir -p $(dir $@)
 	$(qexec)$(SRC_PATH_BARE)/build/make/gen_asm_deps.sh \
            --build-pfx=$(BUILD_PFX) --depfile=$@ $(ASFLAGS) $< > $@

-$(BUILD_PFX)%.s.o: %.s
+$(BUILD_PFX)%.S.o: %.S
 	$(if $(quiet),@echo "    [AS] $@")
 	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<
@@ -198,8 +198,8 @@ $(BUILD_PFX)%.c.S: %.c
 	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CC) -S $(CFLAGS) -o $@ $<

-.PRECIOUS: %.asm.s
-$(BUILD_PFX)%.asm.s: %.asm
+.PRECIOUS: %.asm.S
+$(BUILD_PFX)%.asm.S: %.asm
 	$(if $(quiet),@echo "    [ASM CONVERSION] $@")
 	$(qexec)mkdir -p $(dir $@)
 	$(qexec)$(ASM_CONVERSION) <$< >$@
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@@ -138,14 +138,6 @@ while (<STDIN>)
    s/DCD(.*)/.long $1/;
    s/DCB(.*)/.byte $1/;

-    # RN to .req
-    if (s/RN\s+([Rr]\d+|lr)/.req $1/)
-    {
-        print;
-        print "$comment_sub$comment\n" if defined $comment;
-        next;
-    }
-
    # Make function visible to linker, and make additional symbol with
    # prepended underscore
    s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/;
--- a/build/make/ads2gas_apple.pl
+++ b/build/make/ads2gas_apple.pl
@@ -120,18 +120,6 @@ while (<STDIN>)
    s/DCD(.*)/.long $1/;
    s/DCB(.*)/.byte $1/;

-    # Build a hash of all the register - alias pairs.
-    if (s/(.*)RN(.*)/$1 .req $2/g)
-    {
-        $register_aliases{trim($1)} = trim($2);
-        next;
-    }
-
-    while (($key, $value) = each(%register_aliases))
-    {
-        s/\b$key\b/$value/g;
-    }
-
    # Make function visible to linker, and make additional symbol with
    # prepended underscore
    s/EXPORT\s+\|([\$\w]*)\|/.globl _$1\n\t.globl $1/;
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -635,7 +635,7 @@ setup_gnu_toolchain() {
  AS=${AS:-${CROSS}as}
  STRIP=${STRIP:-${CROSS}strip}
  NM=${NM:-${CROSS}nm}
-  AS_SFX=.s
+  AS_SFX=.S
  EXE_SFX=
 }

@@ -926,7 +926,7 @@ EOF
          ;;
        vs*)
          asm_conversion_cmd="${source_path}/build/make/ads2armasm_ms.pl"
-          AS_SFX=.s
+          AS_SFX=.S
          msvs_arch_dir=arm-msvs
          disable_feature multithread
          disable_feature unit_tests
@@ -936,6 +936,7 @@ EOF
            # only "AppContainerApplication" which requires an AppxManifest.
            # Therefore disable the examples, just build the library.
            disable_feature examples
+            disable_feature tools
          fi
          ;;
        rvct)
@@ -978,47 +979,50 @@ EOF
          ;;

        android*)
-          if [ -z "${sdk_path}" ]; then
-            die "Must specify --sdk-path for Android builds."
-          fi
+          if [ -n "${sdk_path}" ]; then
+            SDK_PATH=${sdk_path}
+            COMPILER_LOCATION=`find "${SDK_PATH}" \
+              -name "arm-linux-androideabi-gcc*" -print -quit`
+            TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi-
+            CC=${TOOLCHAIN_PATH}gcc
+            CXX=${TOOLCHAIN_PATH}g++
+            AR=${TOOLCHAIN_PATH}ar
+            LD=${TOOLCHAIN_PATH}gcc
+            AS=${TOOLCHAIN_PATH}as
+            STRIP=${TOOLCHAIN_PATH}strip
+            NM=${TOOLCHAIN_PATH}nm

-          SDK_PATH=${sdk_path}
-          COMPILER_LOCATION=`find "${SDK_PATH}" \
-                             -name "arm-linux-androideabi-gcc*" -print -quit`
-          TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi-
-          CC=${TOOLCHAIN_PATH}gcc
-          CXX=${TOOLCHAIN_PATH}g++
-          AR=${TOOLCHAIN_PATH}ar
-          LD=${TOOLCHAIN_PATH}gcc
-          AS=${TOOLCHAIN_PATH}as
-          STRIP=${TOOLCHAIN_PATH}strip
-          NM=${TOOLCHAIN_PATH}nm
-
-          if [ -z "${alt_libc}" ]; then
-            alt_libc=`find "${SDK_PATH}" -name arch-arm -print | \
-              awk '{n = split($0,a,"/"); \
+            if [ -z "${alt_libc}" ]; then
+              alt_libc=`find "${SDK_PATH}" -name arch-arm -print | \
+                awk '{n = split($0,a,"/"); \
                split(a[n-1],b,"-"); \
                print $0 " " b[2]}' | \
                sort -g -k 2 | \
                awk '{ print $1 }' | tail -1`
-          fi
+            fi

-          if [ -d "${alt_libc}" ]; then
-            add_cflags "--sysroot=${alt_libc}"
-            add_ldflags "--sysroot=${alt_libc}"
-          fi
+            if [ -d "${alt_libc}" ]; then
+              add_cflags "--sysroot=${alt_libc}"
+              add_ldflags "--sysroot=${alt_libc}"
+            fi

-          # linker flag that routes around a CPU bug in some
-          # Cortex-A8 implementations (NDK Dev Guide)
-          add_ldflags "-Wl,--fix-cortex-a8"
+            # linker flag that routes around a CPU bug in some
+            # Cortex-A8 implementations (NDK Dev Guide)
+            add_ldflags "-Wl,--fix-cortex-a8"

-          enable_feature pic
-          soft_enable realtime_only
-          if [ ${tgt_isa} = "armv7" ]; then
-            soft_enable runtime_cpu_detect
-          fi
-          if enabled runtime_cpu_detect; then
-            add_cflags "-I${SDK_PATH}/sources/android/cpufeatures"
+            enable_feature pic
+            soft_enable realtime_only
+            if [ ${tgt_isa} = "armv7" ]; then
+              soft_enable runtime_cpu_detect
+            fi
+            if enabled runtime_cpu_detect; then
+              add_cflags "-I${SDK_PATH}/sources/android/cpufeatures"
+            fi
+          else
+            echo "Assuming standalone build with NDK toolchain."
+            echo "See build/make/Android.mk for details."
+            check_add_ldflags -static
+            soft_enable unit_tests
          fi
          ;;

@@ -1031,7 +1035,7 @@ EOF
          STRIP="$(${XCRUN_FIND} strip)"
          NM="$(${XCRUN_FIND} nm)"
          RANLIB="$(${XCRUN_FIND} ranlib)"
-          AS_SFX=.s
+          AS_SFX=.S
          LD="${CXX:-$(${XCRUN_FIND} ld)}"

          # ASFLAGS is written here instead of using check_add_asflags
@@ -1392,6 +1396,7 @@ EOF
      *-win*-vs*)
        ;;
      *-android-gcc)
+        # bionic includes basic pthread functionality, obviating -lpthread.
        ;;
      *)
        check_header pthread.h && add_extralibs -lpthread
--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -82,7 +82,7 @@ generate_filter() {
                       | sed -e "s,$src_path_bare,," \
                             -e 's/^[\./]\+//g' -e 's,[:/ ],_,g')

-                if ([ "$pat" == "asm" ] || [ "$pat" == "s" ]) && $asm_use_custom_step; then
+                if ([ "$pat" == "asm" ] || [ "$pat" == "s" ] || [ "$pat" == "S" ]) && $asm_use_custom_step; then
                    # Avoid object file name collisions, i.e. vpx_config.c and
                    # vpx_config.asm produce the same object file without
                    # this additional suffix.
@@ -203,7 +203,7 @@ for opt in "$@"; do
            # The paths in file_list are fixed outside of the loop.
            file_list[${#file_list[@]}]="$opt"
            case "$opt" in
-                 *.asm|*.s) uses_asm=true
+                 *.asm|*.[Ss]) uses_asm=true
                 ;;
            esac
        ;;
@@ -452,7 +452,7 @@ generate_vcxproj() {
    done

    open_tag ItemGroup
-    generate_filter "Source Files"   "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s"
+    generate_filter "Source Files"   "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s;S"
    close_tag ItemGroup
    open_tag ItemGroup
    generate_filter "Header Files"   "h;hm;inl;inc;xsd"
--- a/28
+++ b/28
@@ -22,6 +22,7 @@ show_help(){
 Advanced options:
  ${toggle_libs}                  libraries
  ${toggle_examples}              examples
+  ${toggle_tools}                 tools
  ${toggle_docs}                  documentation
  ${toggle_unit_tests}            unit tests
  ${toggle_decode_perf_tests}     build decoder perf tests with unit tests
@@ -97,6 +98,7 @@ EOF

 # all_platforms is a list of all supported target platforms. Maintain
 # alphabetically by architecture, generic-gnu last.
+all_platforms="${all_platforms} arm64-android-gcc"
 all_platforms="${all_platforms} arm64-darwin-gcc"
 all_platforms="${all_platforms} arm64-linux-gcc"
 all_platforms="${all_platforms} armv7-android-gcc"   #neon Cortex-A8
@@ -154,7 +156,7 @@ all_platforms="${all_platforms} generic-gnu"

 # all_targets is a list of all targets that can be configured
 # note that these should be in dependency order for now.
-all_targets="libs examples docs"
+all_targets="libs examples tools docs"

 # all targets available are enabled, by default.
 for t in ${all_targets}; do
@@ -330,6 +332,7 @@ CMDLINE_SELECT="

    libs
    examples
+    tools
    docs
    libc
    as
@@ -475,7 +478,7 @@ EOF
    #
    # Write makefiles for all enabled targets
    #
-    for tgt in libs examples docs solution; do
+    for tgt in libs examples tools docs solution; do
        tgt_fn="$tgt-$toolchain.mk"

        if enabled $tgt; then
@@ -574,17 +577,22 @@ process_toolchain() {
        check_add_cflags -Wimplicit-function-declaration
        check_add_cflags -Wuninitialized
        check_add_cflags -Wunused
-        case ${CC} in
-          *clang*)
-              # libvpx and/or clang have issues with aliasing:
-              # https://code.google.com/p/webm/issues/detail?id=603
-              # work around them until they are fixed
-              check_add_cflags -fno-strict-aliasing
-          ;;
-        esac
+        # -Wextra has some tricky cases. Rather than fix them all now, get the
+        # flag for as many files as possible and fix the remaining issues
+        # piecemeal.
+        # https://bugs.chromium.org/p/webm/issues/detail?id=1069
+        check_add_cflags -Wextra
+        # check_add_cflags also adds to cxxflags. gtest does not do well with
+        # -Wundef so add it explicitly to CFLAGS only.
+        check_cflags -Wundef && add_cflags_only -Wundef
        if enabled mips || [ -z "${INLINE}" ]; then
          enabled extra_warnings || check_add_cflags -Wno-unused-function
        fi
+        if ! enabled vp9_highbitdepth; then
+          # Avoid this warning for third_party C++ sources. Some reorganization
+          # would be needed to apply this only to test/*.cc.
+          check_cflags -Wshorten-64-to-32 && add_cflags_only -Wshorten-64-to-32
+        fi
    fi

    if enabled icc; then
--- a/examples.mk
+++ b/examples.mk
@@ -76,6 +76,7 @@ vpxdec.SRCS                 += tools_common.c tools_common.h
 vpxdec.SRCS                 += y4menc.c y4menc.h
 ifeq ($(CONFIG_LIBYUV),yes)
  vpxdec.SRCS                 += $(LIBYUV_SRCS)
+  $(BUILD_PFX)third_party/libyuv/%.cc.o: CXXFLAGS += -Wno-unused-parameter
 endif
 ifeq ($(CONFIG_WEBM_IO),yes)
  vpxdec.SRCS                 += $(LIBWEBM_COMMON_SRCS)
--- a/examples/decode_with_drops.c
+++ b/examples/decode_with_drops.c
@@ -92,8 +92,8 @@ int main(int argc, char **argv) {
  if (!(outfile = fopen(argv[2], "wb")))
    die("Failed to open %s for writing.", argv[2]);

-  n = strtol(argv[3], &nptr, 0);
-  m = strtol(nptr + 1, NULL, 0);
+  n = (int)strtol(argv[3], &nptr, 0);
+  m = (int)strtol(nptr + 1, NULL, 0);
  is_range = (*nptr == '-');
  if (!n || !m || (*nptr != '-' && *nptr != '/'))
    die("Couldn't parse pattern %s.\n", argv[3]);
--- a/examples/set_maps.c
+++ b/examples/set_maps.c
@@ -174,8 +174,8 @@ int main(int argc, char **argv) {
  }
  assert(encoder != NULL);
  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = strtol(argv[2], NULL, 0);
-  info.frame_height = strtol(argv[3], NULL, 0);
+  info.frame_width = (int)strtol(argv[2], NULL, 0);
+  info.frame_height = (int)strtol(argv[3], NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

--- a/examples/simple_encoder.c
+++ b/examples/simple_encoder.c
@@ -150,7 +150,7 @@ int main(int argc, char **argv) {
  int frame_count = 0;
  vpx_image_t raw;
  vpx_codec_err_t res;
-  VpxVideoInfo info = { 0 };
+  VpxVideoInfo info = { 0, 0, 0, { 0, 0 } };
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;
  const int fps = 30;
@@ -175,14 +175,14 @@ int main(int argc, char **argv) {
  infile_arg = argv[4];
  outfile_arg = argv[5];
  keyframe_interval_arg = argv[6];
-  max_frames = strtol(argv[8], NULL, 0);
+  max_frames = (int)strtol(argv[8], NULL, 0);

  encoder = get_vpx_encoder_by_name(codec_arg);
  if (!encoder) die("Unsupported codec.");

  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = strtol(width_arg, NULL, 0);
-  info.frame_height = strtol(height_arg, NULL, 0);
+  info.frame_width = (int)strtol(width_arg, NULL, 0);
+  info.frame_height = (int)strtol(height_arg, NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

@@ -196,7 +196,7 @@ int main(int argc, char **argv) {
    die("Failed to allocate image.");
  }

-  keyframe_interval = strtol(keyframe_interval_arg, NULL, 0);
+  keyframe_interval = (int)strtol(keyframe_interval_arg, NULL, 0);
  if (keyframe_interval < 0) die("Invalid keyframe interval value.");

  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
@@ -209,7 +209,7 @@ int main(int argc, char **argv) {
  cfg.g_timebase.num = info.time_base.numerator;
  cfg.g_timebase.den = info.time_base.denominator;
  cfg.rc_target_bitrate = bitrate;
-  cfg.g_error_resilient = strtol(argv[7], NULL, 0);
+  cfg.g_error_resilient = (vpx_codec_er_flags_t)strtoul(argv[7], NULL, 0);

  writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
  if (!writer) die("Failed to open %s for writing.", outfile_arg);
--- a/examples/twopass_encoder.c
+++ b/examples/twopass_encoder.c
@@ -209,13 +209,13 @@ int main(int argc, char **argv) {

  if (argc != 7) die("Invalid number of arguments.");

-  max_frames = strtol(argv[6], NULL, 0);
+  max_frames = (int)strtol(argv[6], NULL, 0);

  encoder = get_vpx_encoder_by_name(codec_arg);
  if (!encoder) die("Unsupported codec.");

-  w = strtol(width_arg, NULL, 0);
-  h = strtol(height_arg, NULL, 0);
+  w = (int)strtol(width_arg, NULL, 0);
+  h = (int)strtol(height_arg, NULL, 0);

  if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0)
    die("Invalid frame size: %dx%d", w, h);
--- a/examples/vp8_multi_resolution_encoder.c
+++ b/examples/vp8_multi_resolution_encoder.c
@@ -240,9 +240,9 @@ static void set_temporal_layer_pattern(int num_temporal_layers,
      cfg->ts_layer_id[1] = 2;
      cfg->ts_layer_id[2] = 1;
      cfg->ts_layer_id[3] = 2;
-      // Use 40/20/40 bit allocation as example.
-      cfg->ts_target_bitrate[0] = 0.4f * bitrate;
-      cfg->ts_target_bitrate[1] = 0.6f * bitrate;
+      // Use 45/20/35 bit allocation as example.
+      cfg->ts_target_bitrate[0] = 0.45f * bitrate;
+      cfg->ts_target_bitrate[1] = 0.65f * bitrate;
      cfg->ts_target_bitrate[2] = bitrate;

      /* 0=L, 1=GF, 2=ARF */
@@ -340,8 +340,7 @@ int main(int argc, char **argv) {
  unsigned int num_temporal_layers[NUM_ENCODERS] = { 3, 3, 3 };

  if (argc != (7 + 3 * NUM_ENCODERS))
-    die(
-        "Usage: %s <width> <height> <frame_rate>  <infile> <outfile(s)> "
+    die("Usage: %s <width> <height> <frame_rate>  <infile> <outfile(s)> "
        "<rate_encoder(s)> <temporal_layer(s)> <key_frame_insert> <output "
        "psnr?> \n",
        argv[0]);
@@ -461,7 +460,7 @@ int main(int argc, char **argv) {

  // Set the number of threads per encode/spatial layer.
  // (1, 1, 1) means no encoder threading.
-  cfg[0].g_threads = 2;
+  cfg[0].g_threads = 1;
  cfg[1].g_threads = 1;
  cfg[2].g_threads = 1;

@@ -508,9 +507,11 @@ int main(int argc, char **argv) {

  /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */
  /* Enable denoising for the highest-resolution encoder. */
-  if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 4))
+  if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 1))
    die_codec(&codec[0], "Failed to set noise_sensitivity");
-  for (i = 1; i < NUM_ENCODERS; i++) {
+  if (vpx_codec_control(&codec[1], VP8E_SET_NOISE_SENSITIVITY, 1))
+    die_codec(&codec[1], "Failed to set noise_sensitivity");
+  for (i = 2; i < NUM_ENCODERS; i++) {
    if (vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0))
      die_codec(&codec[i], "Failed to set noise_sensitivity");
  }
--- a/examples/vp8cx_set_ref.c
+++ b/examples/vp8cx_set_ref.c
@@ -51,6 +51,7 @@

 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
+#include "vp8/common/common.h"

 #include "../tools_common.h"
 #include "../video_writer.h"
@@ -93,18 +94,22 @@ static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img,

 int main(int argc, char **argv) {
  FILE *infile = NULL;
-  vpx_codec_ctx_t codec = { 0 };
-  vpx_codec_enc_cfg_t cfg = { 0 };
+  vpx_codec_ctx_t codec;
+  vpx_codec_enc_cfg_t cfg;
  int frame_count = 0;
  vpx_image_t raw;
  vpx_codec_err_t res;
-  VpxVideoInfo info = { 0 };
+  VpxVideoInfo info;
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;
  int update_frame_num = 0;
  const int fps = 30;       // TODO(dkovalev) add command line argument
  const int bitrate = 200;  // kbit/s TODO(dkovalev) add command line argument

+  vp8_zero(codec);
+  vp8_zero(cfg);
+  vp8_zero(info);
+
  exec_name = argv[0];

  if (argc != 6) die("Invalid number of arguments");
@@ -117,8 +122,8 @@ int main(int argc, char **argv) {
  if (!update_frame_num) die("Couldn't parse frame number '%s'\n", argv[5]);

  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = strtol(argv[1], NULL, 0);
-  info.frame_height = strtol(argv[2], NULL, 0);
+  info.frame_width = (int)strtol(argv[1], NULL, 0);
+  info.frame_height = (int)strtol(argv[2], NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

--- a/examples/vp9_lossless_encoder.c
+++ b/examples/vp9_lossless_encoder.c
@@ -14,6 +14,7 @@

 #include "vpx/vpx_encoder.h"
 #include "vpx/vp8cx.h"
+#include "vp9/common/vp9_common.h"

 #include "../tools_common.h"
 #include "../video_writer.h"
@@ -62,11 +63,13 @@ int main(int argc, char **argv) {
  int frame_count = 0;
  vpx_image_t raw;
  vpx_codec_err_t res;
-  VpxVideoInfo info = { 0 };
+  VpxVideoInfo info;
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;
  const int fps = 30;

+  vp9_zero(info);
+
  exec_name = argv[0];

  if (argc < 5) die("Invalid number of arguments");
@@ -75,8 +78,8 @@ int main(int argc, char **argv) {
  if (!encoder) die("Unsupported codec.");

  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = strtol(argv[1], NULL, 0);
-  info.frame_height = strtol(argv[2], NULL, 0);
+  info.frame_width = (int)strtol(argv[1], NULL, 0);
+  info.frame_height = (int)strtol(argv[2], NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -84,6 +84,8 @@ static const arg_def_t speed_arg =
    ARG_DEF("sp", "speed", 1, "speed configuration");
 static const arg_def_t aqmode_arg =
    ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
+static const arg_def_t bitrates_arg =
+    ARG_DEF("bl", "bitrates", 1, "bitrates[sl * num_tl + tl]");

 #if CONFIG_VP9_HIGHBITDEPTH
 static const struct arg_enum_list bitdepth_enum[] = {
@@ -124,6 +126,7 @@ static const arg_def_t *svc_args[] = { &frames_arg,
 #endif
                                       &speed_arg,
                                       &rc_end_usage_arg,
+                                       &bitrates_arg,
                                       NULL };

 static const uint32_t default_frames_to_skip = 0;
@@ -250,6 +253,9 @@ static void parse_command_line(int argc, const char **argv_,
    } else if (arg_match(&arg, &scale_factors_arg, argi)) {
      snprintf(string_options, sizeof(string_options), "%s scale-factors=%s",
               string_options, arg.val);
+    } else if (arg_match(&arg, &bitrates_arg, argi)) {
+      snprintf(string_options, sizeof(string_options), "%s bitrates=%s",
+               string_options, arg.val);
    } else if (arg_match(&arg, &passes_arg, argi)) {
      passes = arg_parse_uint(&arg);
      if (passes < 1 || passes > 2) {
@@ -417,7 +423,6 @@ static void set_rate_control_stats(struct RateControlStats *rc,
  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
      const int layer = sl * cfg->ts_number_layers + tl;
-      const int tlayer0 = sl * cfg->ts_number_layers;
      if (cfg->ts_number_layers == 1)
        rc->layer_framerate[layer] = framerate;
      else
@@ -428,8 +433,8 @@ static void set_rate_control_stats(struct RateControlStats *rc,
                      cfg->layer_target_bitrate[layer - 1]) /
            (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
      } else {
-        rc->layer_pfb[tlayer0] = 1000.0 * cfg->layer_target_bitrate[tlayer0] /
-                                 rc->layer_framerate[tlayer0];
+        rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] /
+                               rc->layer_framerate[layer];
      }
      rc->layer_input_frames[layer] = 0;
      rc->layer_enc_frames[layer] = 0;
@@ -449,12 +454,13 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
                                          vpx_codec_enc_cfg_t *cfg,
                                          int frame_cnt) {
  unsigned int sl, tl;
-  int tot_num_frames = 0;
  double perc_fluctuation = 0.0;
+  int tot_num_frames = 0;
  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
         cfg->ss_number_layers, cfg->ts_number_layers);
  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
+    tot_num_frames = 0;
    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
      const int layer = sl * cfg->ts_number_layers + tl;
      const int num_dropped =
@@ -462,7 +468,7 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
              ? (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer])
              : (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] -
                 1);
-      if (!sl) tot_num_frames += rc->layer_input_frames[layer];
+      tot_num_frames += rc->layer_input_frames[layer];
      rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
                                          rc->layer_encoding_bitrate[layer] /
                                          tot_num_frames;
@@ -497,8 +503,7 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
         rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
         perc_fluctuation);
  if (frame_cnt != tot_num_frames)
-    die(
-        "Error: Number of input frames not equal to output encoded frames != "
+    die("Error: Number of input frames not equal to output encoded frames != "
        "%d tot_num_frames = %d\n",
        frame_cnt, tot_num_frames);
 }
@@ -621,7 +626,7 @@ int main(int argc, const char **argv) {
  struct RateControlStats rc;
  vpx_svc_layer_id_t layer_id;
  vpx_svc_ref_frame_config_t ref_frame_config;
-  int sl, tl;
+  unsigned int sl, tl;
  double sum_bitrate = 0.0;
  double sum_bitrate2 = 0.0;
  double framerate = 30.0;
@@ -674,7 +679,7 @@ int main(int argc, const char **argv) {
  }
 #if OUTPUT_RC_STATS
  // For now, just write temporal layer streams.
-  // TODO(wonkap): do spatial by re-writing superframe.
+  // TODO(marpan): do spatial by re-writing superframe.
  if (svc_ctx.output_rc_stat) {
    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
      char file_name[PATH_MAX];
@@ -696,6 +701,8 @@ int main(int argc, const char **argv) {
    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
  if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+  if (svc_ctx.speed >= 5)
+    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);

  // Encode frames
  while (!end_of_stream) {
@@ -731,7 +738,7 @@ int main(int argc, const char **argv) {
                        &ref_frame_config);
      // Keep track of input frames, to account for frame drops in rate control
      // stats/metrics.
-      for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
+      for (sl = 0; sl < (unsigned int)enc_cfg.ss_number_layers; ++sl) {
        ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
                                layer_id.temporal_layer_id];
      }
@@ -763,7 +770,7 @@ int main(int argc, const char **argv) {
                                         cx_pkt->data.frame.sz,
                                         cx_pkt->data.frame.pts);
 #if OUTPUT_RC_STATS
-            // TODO(marpan/wonkap): Put this (to line728) in separate function.
+            // TODO(marpan): Put this (to line728) in separate function.
            if (svc_ctx.output_rc_stat) {
              vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
              parse_superframe_index(cx_pkt->data.frame.buf,
@@ -794,7 +801,7 @@ int main(int argc, const char **argv) {
                  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
                  // Keep count of rate control stats per layer, for non-key
                  // frames.
-                  if (tl == layer_id.temporal_layer_id &&
+                  if (tl == (unsigned int)layer_id.temporal_layer_id &&
                      !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
                    rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
                    rc.layer_avg_rate_mismatch[layer] +=
@@ -808,7 +815,7 @@ int main(int argc, const char **argv) {
              // Update for short-time encoding bitrate states, for moving
              // window of size rc->window, shifted by rc->window / 2.
              // Ignore first window segment, due to key frame.
-              if (frame_cnt > rc.window_size) {
+              if (frame_cnt > (unsigned int)rc.window_size) {
                tl = layer_id.temporal_layer_id;
                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
                  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
@@ -824,13 +831,14 @@ int main(int argc, const char **argv) {
              }

              // Second shifted window.
-              if (frame_cnt > rc.window_size + rc.window_size / 2) {
+              if (frame_cnt >
+                  (unsigned int)(rc.window_size + rc.window_size / 2)) {
                tl = layer_id.temporal_layer_id;
                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
                  sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
                }

-                if (frame_cnt > 2 * rc.window_size &&
+                if (frame_cnt > (unsigned int)(2 * rc.window_size) &&
                    frame_cnt % rc.window_size == 0) {
                  rc.window_count += 1;
                  rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
@@ -843,10 +851,11 @@ int main(int argc, const char **argv) {
            }
 #endif
          }
-
+          /*
          printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
                 !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
                 (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
+          */
          if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
            si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
          ++frames_received;
--- a/examples/vp9cx_set_ref.c
+++ b/examples/vp9cx_set_ref.c
@@ -53,6 +53,7 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_decoder.h"
 #include "vpx/vpx_encoder.h"
+#include "vp9/common/vp9_common.h"

 #include "./tools_common.h"
 #include "./video_writer.h"
@@ -190,8 +191,7 @@ static void find_mismatch(const vpx_image_t *const img1,
 }

 static void testing_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder,
-                           vpx_codec_enc_cfg_t *cfg, unsigned int frame_out,
-                           int *mismatch_seen) {
+                           unsigned int frame_out, int *mismatch_seen) {
  vpx_image_t enc_img, dec_img;
  struct vp9_ref_frame ref_enc, ref_dec;

@@ -225,11 +225,10 @@ static void testing_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder,
  vpx_img_free(&dec_img);
 }

-static int encode_frame(vpx_codec_ctx_t *ecodec, vpx_codec_enc_cfg_t *cfg,
-                        vpx_image_t *img, unsigned int frame_in,
-                        VpxVideoWriter *writer, int test_decode,
-                        vpx_codec_ctx_t *dcodec, unsigned int *frame_out,
-                        int *mismatch_seen) {
+static int encode_frame(vpx_codec_ctx_t *ecodec, vpx_image_t *img,
+                        unsigned int frame_in, VpxVideoWriter *writer,
+                        int test_decode, vpx_codec_ctx_t *dcodec,
+                        unsigned int *frame_out, int *mismatch_seen) {
  int got_pkts = 0;
  vpx_codec_iter_t iter = NULL;
  const vpx_codec_cx_pkt_t *pkt = NULL;
@@ -270,7 +269,7 @@ static int encode_frame(vpx_codec_ctx_t *ecodec, vpx_codec_enc_cfg_t *cfg,

  // Mismatch checking
  if (got_data && test_decode) {
-    testing_decode(ecodec, dcodec, cfg, *frame_out, mismatch_seen);
+    testing_decode(ecodec, dcodec, *frame_out, mismatch_seen);
  }

  return got_pkts;
@@ -279,12 +278,12 @@ static int encode_frame(vpx_codec_ctx_t *ecodec, vpx_codec_enc_cfg_t *cfg,
 int main(int argc, char **argv) {
  FILE *infile = NULL;
  // Encoder
-  vpx_codec_ctx_t ecodec = { 0 };
-  vpx_codec_enc_cfg_t cfg = { 0 };
+  vpx_codec_ctx_t ecodec;
+  vpx_codec_enc_cfg_t cfg;
  unsigned int frame_in = 0;
  vpx_image_t raw;
  vpx_codec_err_t res;
-  VpxVideoInfo info = { 0 };
+  VpxVideoInfo info;
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;

@@ -305,7 +304,13 @@ int main(int argc, char **argv) {
  const char *height_arg = NULL;
  const char *infile_arg = NULL;
  const char *outfile_arg = NULL;
+  const char *update_frame_num_arg = NULL;
  unsigned int limit = 0;
+
+  vp9_zero(ecodec);
+  vp9_zero(cfg);
+  vp9_zero(info);
+
  exec_name = argv[0];

  if (argc < 6) die("Invalid number of arguments");
@@ -314,25 +319,28 @@ int main(int argc, char **argv) {
  height_arg = argv[2];
  infile_arg = argv[3];
  outfile_arg = argv[4];
+  update_frame_num_arg = argv[5];

  encoder = get_vpx_encoder_by_name("vp9");
  if (!encoder) die("Unsupported codec.");

-  update_frame_num = atoi(argv[5]);
+  update_frame_num = (unsigned int)strtoul(update_frame_num_arg, NULL, 0);
  // In VP9, the reference buffers (cm->buffer_pool->frame_bufs[i].buf) are
  // allocated while calling vpx_codec_encode(), thus, setting reference for
  // 1st frame isn't supported.
-  if (update_frame_num <= 1) die("Couldn't parse frame number '%s'\n", argv[5]);
+  if (update_frame_num <= 1) {
+    die("Couldn't parse frame number '%s'\n", update_frame_num_arg);
+  }

  if (argc > 6) {
-    limit = atoi(argv[6]);
+    limit = (unsigned int)strtoul(argv[6], NULL, 0);
    if (update_frame_num > limit)
      die("Update frame number couldn't larger than limit\n");
  }

  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = strtol(width_arg, NULL, 0);
-  info.frame_height = strtol(height_arg, NULL, 0);
+  info.frame_width = (int)strtol(width_arg, NULL, 0);
+  info.frame_height = (int)strtol(height_arg, NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

@@ -397,7 +405,7 @@ int main(int argc, char **argv) {
      }
    }

-    encode_frame(&ecodec, &cfg, &raw, frame_in, writer, test_decode, &dcodec,
+    encode_frame(&ecodec, &raw, frame_in, writer, test_decode, &dcodec,
                 &frame_out, &mismatch_seen);
    frame_in++;
    if (mismatch_seen) break;
@@ -405,8 +413,8 @@ int main(int argc, char **argv) {

  // Flush encoder.
  if (!mismatch_seen)
-    while (encode_frame(&ecodec, &cfg, NULL, frame_in, writer, test_decode,
-                        &dcodec, &frame_out, &mismatch_seen)) {
+    while (encode_frame(&ecodec, NULL, frame_in, writer, test_decode, &dcodec,
+                        &frame_out, &mismatch_seen)) {
    }

  printf("\n");
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -514,7 +514,7 @@ int main(int argc, char **argv) {
  FILE *infile = NULL;
  struct RateControlMetrics rc;
  int64_t cx_time = 0;
-  const int min_args_base = 11;
+  const int min_args_base = 12;
 #if CONFIG_VP9_HIGHBITDEPTH
  vpx_bit_depth_t bit_depth = VPX_BITS_8;
  int input_bit_depth = 8;
@@ -530,15 +530,13 @@ int main(int argc, char **argv) {
  // Check usage and arguments.
  if (argc < min_args) {
 #if CONFIG_VP9_HIGHBITDEPTH
-    die(
-        "Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> "
+    die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
+        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <threads> <mode> "
        "<Rate_0> ... <Rate_nlayers-1> <bit-depth> \n",
        argv[0]);
 #else
-    die(
-        "Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> "
+    die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
+        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <threads> <mode> "
        "<Rate_0> ... <Rate_nlayers-1> \n",
        argv[0]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -549,15 +547,15 @@ int main(int argc, char **argv) {

  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));

-  width = strtol(argv[4], NULL, 0);
-  height = strtol(argv[5], NULL, 0);
+  width = (unsigned int)strtoul(argv[4], NULL, 0);
+  height = (unsigned int)strtoul(argv[5], NULL, 0);
  if (width < 16 || width % 2 || height < 16 || height % 2) {
    die("Invalid resolution: %d x %d", width, height);
  }

-  layering_mode = strtol(argv[10], NULL, 0);
+  layering_mode = (int)strtol(argv[11], NULL, 0);
  if (layering_mode < 0 || layering_mode > 13) {
-    die("Invalid layering mode (0..12) %s", argv[10]);
+    die("Invalid layering mode (0..12) %s", argv[11]);
  }

  if (argc != min_args + mode_to_num_layers[layering_mode]) {
@@ -611,25 +609,25 @@ int main(int argc, char **argv) {
 #endif  // CONFIG_VP9_HIGHBITDEPTH

  // Timebase format e.g. 30fps: numerator=1, demoninator = 30.
-  cfg.g_timebase.num = strtol(argv[6], NULL, 0);
-  cfg.g_timebase.den = strtol(argv[7], NULL, 0);
+  cfg.g_timebase.num = (int)strtol(argv[6], NULL, 0);
+  cfg.g_timebase.den = (int)strtol(argv[7], NULL, 0);

-  speed = strtol(argv[8], NULL, 0);
+  speed = (int)strtol(argv[8], NULL, 0);
  if (speed < 0) {
    die("Invalid speed setting: must be positive");
  }

  for (i = min_args_base;
       (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) {
-    rc.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
+    rc.layer_target_bitrate[i - 12] = (int)strtol(argv[i], NULL, 0);
    if (strncmp(encoder->name, "vp8", 3) == 0)
-      cfg.ts_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
+      cfg.ts_target_bitrate[i - 12] = rc.layer_target_bitrate[i - 12];
    else if (strncmp(encoder->name, "vp9", 3) == 0)
-      cfg.layer_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
+      cfg.layer_target_bitrate[i - 12] = rc.layer_target_bitrate[i - 12];
  }

  // Real time parameters.
-  cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
+  cfg.rc_dropframe_thresh = (unsigned int)strtoul(argv[9], NULL, 0);
  cfg.rc_end_usage = VPX_CBR;
  cfg.rc_min_quantizer = 2;
  cfg.rc_max_quantizer = 56;
@@ -644,7 +642,7 @@ int main(int argc, char **argv) {
  cfg.rc_resize_allowed = 0;

  // Use 1 thread as default.
-  cfg.g_threads = 1;
+  cfg.g_threads = (unsigned int)strtoul(argv[10], NULL, 0);

  // Enable error resilient mode.
  cfg.g_error_resilient = 1;
@@ -704,11 +702,14 @@ int main(int argc, char **argv) {
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
+    vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0);
  } else if (strncmp(encoder->name, "vp9", 3) == 0) {
    vpx_svc_extra_cfg_t svc_params;
    memset(&svc_params, 0, sizeof(svc_params));
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
+    vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0);
+    vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
    vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
--- a/libs.mk
+++ b/libs.mk
@@ -12,7 +12,7 @@
 # ARM assembly files are written in RVCT-style. We use some make magic to
 # filter those files to allow GCC compilation
 ifeq ($(ARCH_ARM),yes)
-  ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.s,.asm)
+  ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.S,.asm)
 else
  ASM:=.asm
 endif
@@ -106,9 +106,6 @@ ifeq ($(CONFIG_VP9_DECODER),yes)
  CODEC_DOC_SECTIONS += vp9 vp9_decoder
 endif

-VP9_PREFIX=vp9/
-$(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra
-
 ifeq ($(CONFIG_ENCODERS),yes)
  CODEC_DOC_SECTIONS += encoder
 endif
@@ -116,6 +113,12 @@ ifeq ($(CONFIG_DECODERS),yes)
  CODEC_DOC_SECTIONS += decoder
 endif

+# Suppress -Wextra warnings in third party code.
+$(BUILD_PFX)third_party/googletest/%.cc.o: CXXFLAGS += -Wno-missing-field-initializers
+# Suppress -Wextra warnings in first party code pending investigation.
+# https://bugs.chromium.org/p/webm/issues/detail?id=1069
+$(BUILD_PFX)vp8/encoder/onyx_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered
+$(BUILD_PFX)vp8/decoder/onyxd_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered

 ifeq ($(CONFIG_MSVS),yes)
 CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)
@@ -230,7 +233,7 @@ LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)

 SO_VERSION_MAJOR := 4
-SO_VERSION_MINOR := 0
+SO_VERSION_MINOR := 1
 SO_VERSION_PATCH := 0
 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
 LIBVPX_SO               := libvpx.$(SO_VERSION_MAJOR).dylib
@@ -363,7 +366,7 @@ endif
 #
 # Add assembler dependencies for configuration.
 #
-$(filter %.s.o,$(OBJS-yes)):     $(BUILD_PFX)vpx_config.asm
+$(filter %.S.o,$(OBJS-yes)):     $(BUILD_PFX)vpx_config.asm
 $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm


@@ -388,7 +391,7 @@ LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
 LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX)
 LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
                     $(call enabled,LIBVPX_TEST_DATA))
-libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)
+libvpx_test_data_url=https://storage.googleapis.com/downloads.webmproject.org/test_data/libvpx/$(1)

 TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
 TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
@@ -402,7 +405,7 @@ CLEAN-OBJS += libvpx_test_srcs.txt
 $(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1
 	@echo "    [DOWNLOAD] $@"
 	$(qexec)trap 'rm -f $@' INT TERM &&\
-            curl -L -o $@ $(call libvpx_test_data_url,$(@F))
+            curl --retry 1 -L -o $@ $(call libvpx_test_data_url,$(@F))

 testdata:: $(LIBVPX_TEST_DATA)
 	$(qexec)[ -x "$$(which sha1sum)" ] && sha1sum=sha1sum;\
--- a/test/alt_ref_aq_segment_test.cc
+++ b/test/alt_ref_aq_segment_test.cc
@@ -0,0 +1,157 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "test/codec_factory.h"
+#include "test/encode_test_driver.h"
+#include "test/i420_video_source.h"
+#include "test/util.h"
+
+namespace {
+
+class AltRefAqSegmentTest
+    : public ::libvpx_test::EncoderTest,
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+ protected:
+  AltRefAqSegmentTest() : EncoderTest(GET_PARAM(0)) {}
+  virtual ~AltRefAqSegmentTest() {}
+
+  virtual void SetUp() {
+    InitializeConfig();
+    SetMode(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
+    aq_mode_ = 0;
+    alt_ref_aq_mode_ = 0;
+  }
+
+  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder *encoder) {
+    if (video->frame() == 1) {
+      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(VP9E_SET_ALT_REF_AQ, alt_ref_aq_mode_);
+      encoder->Control(VP9E_SET_AQ_MODE, aq_mode_);
+      encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 100);
+    }
+  }
+
+  int set_cpu_used_;
+  int aq_mode_;
+  int alt_ref_aq_mode_;
+};
+
+// Validate that this ALT_REF_AQ/AQ segmentation mode
+// (ALT_REF_AQ=0, AQ=0/no_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ0) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_VBR;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 0;
+  alt_ref_aq_mode_ = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// Validate that this ALT_REF_AQ/AQ segmentation mode
+// (ALT_REF_AQ=0, AQ=1/variance_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ1) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_VBR;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 1;
+  alt_ref_aq_mode_ = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// Validate that this ALT_REF_AQ/AQ segmentation mode
+// (ALT_REF_AQ=0, AQ=2/complexity_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ2) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_VBR;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 2;
+  alt_ref_aq_mode_ = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// Validate that this ALT_REF_AQ/AQ segmentation mode
+// (ALT_REF_AQ=0, AQ=3/cyclicrefresh_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ3) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_VBR;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 3;
+  alt_ref_aq_mode_ = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+// Validate that this ALT_REF_AQ/AQ segmentation mode
+// (ALT_REF_AQ=0, AQ=4/equator360_aq)
+// encodes and decodes without a mismatch.
+TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ4) {
+  cfg_.rc_min_quantizer = 8;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_VBR;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_target_bitrate = 300;
+
+  aq_mode_ = 4;
+  alt_ref_aq_mode_ = 1;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 100);
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+}
+
+VP9_INSTANTIATE_TEST_CASE(AltRefAqSegmentTest,
+                          ::testing::Values(::libvpx_test::kOnePassGood,
+                                            ::libvpx_test::kTwoPassGood),
+                          ::testing::Range(2, 5));
+}  // namespace
--- a/test/altref_test.cc
+++ b/test/altref_test.cc
@@ -31,7 +31,7 @@ class AltRefTest : public ::libvpx_test::EncoderTest,
    SetMode(libvpx_test::kTwoPassGood);
  }

-  virtual void BeginPassHook(unsigned int pass) { altref_count_ = 0; }
+  virtual void BeginPassHook(unsigned int /*pass*/) { altref_count_ = 0; }

  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                  libvpx_test::Encoder *encoder) {
--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -53,7 +53,7 @@ class AverageTestBase : public ::testing::Test {
  }

  // Sum Pixels
-  unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
+  static unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
    unsigned int average = 0;
    for (int h = 0; h < 8; ++h) {
      for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
@@ -61,7 +61,7 @@ class AverageTestBase : public ::testing::Test {
    return ((average + 32) >> 6);
  }

-  unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
+  static unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
    unsigned int average = 0;
    for (int h = 0; h < 4; ++h) {
      for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
@@ -98,11 +98,12 @@ class AverageTest : public AverageTestBase,

 protected:
  void CheckAverages() {
+    const int block_size = GET_PARAM(3);
    unsigned int expected = 0;
-    if (GET_PARAM(3) == 8) {
+    if (block_size == 8) {
      expected =
          ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_);
-    } else if (GET_PARAM(3) == 4) {
+    } else if (block_size == 4) {
      expected =
          ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_);
    }
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -65,6 +65,12 @@ class CodecTestWith3Params
    : public ::testing::TestWithParam<
          std::tr1::tuple<const libvpx_test::CodecFactory *, T1, T2, T3> > {};

+template <class T1, class T2, class T3, class T4>
+class CodecTestWith4Params
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<const libvpx_test::CodecFactory *, T1, T2, T3, T4> > {
+};
+
 /*
 * VP8 Codec Definitions
 */
@@ -115,6 +121,8 @@ class VP8CodecFactory : public CodecFactory {
 #if CONFIG_VP8_DECODER
    return new VP8Decoder(cfg, flags);
 #else
+    (void)cfg;
+    (void)flags;
    return NULL;
 #endif
  }
@@ -126,6 +134,10 @@ class VP8CodecFactory : public CodecFactory {
 #if CONFIG_VP8_ENCODER
    return new VP8Encoder(cfg, deadline, init_flags, stats);
 #else
+    (void)cfg;
+    (void)deadline;
+    (void)init_flags;
+    (void)stats;
    return NULL;
 #endif
  }
@@ -135,6 +147,8 @@ class VP8CodecFactory : public CodecFactory {
 #if CONFIG_VP8_ENCODER
    return vpx_codec_enc_config_default(&vpx_codec_vp8_cx_algo, cfg, usage);
 #else
+    (void)cfg;
+    (void)usage;
    return VPX_CODEC_INCAPABLE;
 #endif
  }
@@ -203,6 +217,8 @@ class VP9CodecFactory : public CodecFactory {
 #if CONFIG_VP9_DECODER
    return new VP9Decoder(cfg, flags);
 #else
+    (void)cfg;
+    (void)flags;
    return NULL;
 #endif
  }
@@ -214,6 +230,10 @@ class VP9CodecFactory : public CodecFactory {
 #if CONFIG_VP9_ENCODER
    return new VP9Encoder(cfg, deadline, init_flags, stats);
 #else
+    (void)cfg;
+    (void)deadline;
+    (void)init_flags;
+    (void)stats;
    return NULL;
 #endif
  }
@@ -223,6 +243,8 @@ class VP9CodecFactory : public CodecFactory {
 #if CONFIG_VP9_ENCODER
    return vpx_codec_enc_config_default(&vpx_codec_vp9_cx_algo, cfg, usage);
 #else
+    (void)cfg;
+    (void)usage;
    return VPX_CODEC_INCAPABLE;
 #endif
  }
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
@@ -12,8 +12,8 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "./vpx_config.h"
 #include "./vp9_rtcd.h"
+#include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
 #include "test/clear_system_state.h"
@@ -36,6 +36,12 @@ typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride,
                             const int16_t *filter_y, int filter_y_stride,
                             int w, int h);

+typedef void (*WrapperFilterBlock2d8Func)(
+    const uint8_t *src_ptr, const unsigned int src_stride,
+    const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr,
+    unsigned int dst_stride, unsigned int output_width,
+    unsigned int output_height, int use_highbd);
+
 struct ConvolveFunctions {
  ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8,
                    ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg,
@@ -43,25 +49,30 @@ struct ConvolveFunctions {
                    ConvolveFunc sh8_avg, ConvolveFunc sv8,
                    ConvolveFunc sv8_avg, ConvolveFunc shv8,
                    ConvolveFunc shv8_avg, int bd)
-      : copy_(copy), avg_(avg), h8_(h8), v8_(v8), hv8_(hv8), h8_avg_(h8_avg),
-        v8_avg_(v8_avg), hv8_avg_(hv8_avg), sh8_(sh8), sv8_(sv8), shv8_(shv8),
-        sh8_avg_(sh8_avg), sv8_avg_(sv8_avg), shv8_avg_(shv8_avg),
-        use_highbd_(bd) {}
+      : use_highbd_(bd) {
+    copy_[0] = copy;
+    copy_[1] = avg;
+    h8_[0] = h8;
+    h8_[1] = h8_avg;
+    v8_[0] = v8;
+    v8_[1] = v8_avg;
+    hv8_[0] = hv8;
+    hv8_[1] = hv8_avg;
+    sh8_[0] = sh8;
+    sh8_[1] = sh8_avg;
+    sv8_[0] = sv8;
+    sv8_[1] = sv8_avg;
+    shv8_[0] = shv8;
+    shv8_[1] = shv8_avg;
+  }

-  ConvolveFunc copy_;
-  ConvolveFunc avg_;
-  ConvolveFunc h8_;
-  ConvolveFunc v8_;
-  ConvolveFunc hv8_;
-  ConvolveFunc h8_avg_;
-  ConvolveFunc v8_avg_;
-  ConvolveFunc hv8_avg_;
-  ConvolveFunc sh8_;       // scaled horiz
-  ConvolveFunc sv8_;       // scaled vert
-  ConvolveFunc shv8_;      // scaled horiz/vert
-  ConvolveFunc sh8_avg_;   // scaled avg horiz
-  ConvolveFunc sv8_avg_;   // scaled avg vert
-  ConvolveFunc shv8_avg_;  // scaled avg horiz/vert
+  ConvolveFunc copy_[2];
+  ConvolveFunc h8_[2];
+  ConvolveFunc v8_[2];
+  ConvolveFunc hv8_[2];
+  ConvolveFunc sh8_[2];   // scaled horiz
+  ConvolveFunc sv8_[2];   // scaled vert
+  ConvolveFunc shv8_[2];  // scaled horiz/vert
  int use_highbd_;  // 0 if high bitdepth not used, else the actual bit depth.
 };

@@ -82,7 +93,7 @@ typedef std::tr1::tuple<int, int, const ConvolveFunctions *> ConvolveParam;
 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; }

 void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride,
-                        const int16_t *HFilter, const int16_t *VFilter,
+                        const int16_t *hfilter, const int16_t *vfilter,
                        uint8_t *dst_ptr, unsigned int dst_stride,
                        unsigned int output_width, unsigned int output_height) {
  // Between passes, we use an intermediate buffer whose height is extended to
@@ -112,10 +123,10 @@ void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride,
  for (i = 0; i < intermediate_height; ++i) {
    for (j = 0; j < output_width; ++j) {
      // Apply filter...
-      const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
-                       (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
-                       (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
-                       (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
+      const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) +
+                       (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) +
+                       (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) +
+                       (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) +
                       (VP9_FILTER_WEIGHT >> 1);  // Rounding

      // Normalize back to 0-255...
@@ -133,10 +144,10 @@ void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride,
  for (i = 0; i < output_height; ++i) {
    for (j = 0; j < output_width; ++j) {
      // Apply filter...
-      const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
-                       (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
-                       (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
-                       (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
+      const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) +
+                       (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) +
+                       (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) +
+                       (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) +
                       (VP9_FILTER_WEIGHT >> 1);  // Rounding

      // Normalize back to 0-255...
@@ -162,7 +173,7 @@ void block2d_average_c(uint8_t *src, unsigned int src_stride,

 void filter_average_block2d_8_c(const uint8_t *src_ptr,
                                const unsigned int src_stride,
-                                const int16_t *HFilter, const int16_t *VFilter,
+                                const int16_t *hfilter, const int16_t *vfilter,
                                uint8_t *dst_ptr, unsigned int dst_stride,
                                unsigned int output_width,
                                unsigned int output_height) {
@@ -170,7 +181,7 @@ void filter_average_block2d_8_c(const uint8_t *src_ptr,

  assert(output_width <= kMaxDimension);
  assert(output_height <= kMaxDimension);
-  filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
+  filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64,
                     output_width, output_height);
  block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, output_height);
 }
@@ -178,7 +189,7 @@ void filter_average_block2d_8_c(const uint8_t *src_ptr,
 #if CONFIG_VP9_HIGHBITDEPTH
 void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
                               const unsigned int src_stride,
-                               const int16_t *HFilter, const int16_t *VFilter,
+                               const int16_t *hfilter, const int16_t *vfilter,
                               uint16_t *dst_ptr, unsigned int dst_stride,
                               unsigned int output_width,
                               unsigned int output_height, int bd) {
@@ -210,10 +221,10 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
    for (i = 0; i < intermediate_height; ++i) {
      for (j = 0; j < output_width; ++j) {
        // Apply filter...
-        const int temp = (src_ptr[0] * HFilter[0]) + (src_ptr[1] * HFilter[1]) +
-                         (src_ptr[2] * HFilter[2]) + (src_ptr[3] * HFilter[3]) +
-                         (src_ptr[4] * HFilter[4]) + (src_ptr[5] * HFilter[5]) +
-                         (src_ptr[6] * HFilter[6]) + (src_ptr[7] * HFilter[7]) +
+        const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) +
+                         (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) +
+                         (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) +
+                         (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) +
                         (VP9_FILTER_WEIGHT >> 1);  // Rounding

        // Normalize back to 0-255...
@@ -234,10 +245,10 @@ void highbd_filter_block2d_8_c(const uint16_t *src_ptr,
    for (i = 0; i < output_height; ++i) {
      for (j = 0; j < output_width; ++j) {
        // Apply filter...
-        const int temp = (src_ptr[0] * VFilter[0]) + (src_ptr[1] * VFilter[1]) +
-                         (src_ptr[2] * VFilter[2]) + (src_ptr[3] * VFilter[3]) +
-                         (src_ptr[4] * VFilter[4]) + (src_ptr[5] * VFilter[5]) +
-                         (src_ptr[6] * VFilter[6]) + (src_ptr[7] * VFilter[7]) +
+        const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) +
+                         (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) +
+                         (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) +
+                         (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) +
                         (VP9_FILTER_WEIGHT >> 1);  // Rounding

        // Normalize back to 0-255...
@@ -265,20 +276,64 @@ void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride,

 void highbd_filter_average_block2d_8_c(
    const uint16_t *src_ptr, const unsigned int src_stride,
-    const int16_t *HFilter, const int16_t *VFilter, uint16_t *dst_ptr,
+    const int16_t *hfilter, const int16_t *vfilter, uint16_t *dst_ptr,
    unsigned int dst_stride, unsigned int output_width,
    unsigned int output_height, int bd) {
  uint16_t tmp[kMaxDimension * kMaxDimension];

  assert(output_width <= kMaxDimension);
  assert(output_height <= kMaxDimension);
-  highbd_filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, tmp, 64,
+  highbd_filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64,
                            output_width, output_height, bd);
  highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width,
                           output_height);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH

+void wrapper_filter_average_block2d_8_c(
+    const uint8_t *src_ptr, const unsigned int src_stride,
+    const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr,
+    unsigned int dst_stride, unsigned int output_width,
+    unsigned int output_height, int use_highbd) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (use_highbd == 0) {
+    filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
+                               dst_stride, output_width, output_height);
+  } else {
+    highbd_filter_average_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
+                                      hfilter, vfilter,
+                                      CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
+                                      output_width, output_height, use_highbd);
+  }
+#else
+  ASSERT_EQ(0, use_highbd);
+  filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
+                             dst_stride, output_width, output_height);
+#endif
+}
+
+void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
+                                const unsigned int src_stride,
+                                const int16_t *hfilter, const int16_t *vfilter,
+                                uint8_t *dst_ptr, unsigned int dst_stride,
+                                unsigned int output_width,
+                                unsigned int output_height, int use_highbd) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  if (use_highbd == 0) {
+    filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr,
+                       dst_stride, output_width, output_height);
+  } else {
+    highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride, hfilter,
+                              vfilter, CONVERT_TO_SHORTPTR(dst_ptr), dst_stride,
+                              output_width, output_height, use_highbd);
+  }
+#else
+  ASSERT_EQ(0, use_highbd);
+  filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, dst_stride,
+                     output_width, output_height);
+#endif
+}
+
 class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 public:
  static void SetUpTestCase() {
@@ -461,50 +516,6 @@ class ConvolveTest : public ::testing::TestWithParam<ConvolveParam> {
 #endif
  }

-  void wrapper_filter_average_block2d_8_c(
-      const uint8_t *src_ptr, const unsigned int src_stride,
-      const int16_t *HFilter, const int16_t *VFilter, uint8_t *dst_ptr,
-      unsigned int dst_stride, unsigned int output_width,
-      unsigned int output_height) {
-#if CONFIG_VP9_HIGHBITDEPTH
-    if (UUT_->use_highbd_ == 0) {
-      filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
-                                 dst_stride, output_width, output_height);
-    } else {
-      highbd_filter_average_block2d_8_c(
-          CONVERT_TO_SHORTPTR(src_ptr), src_stride, HFilter, VFilter,
-          CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height,
-          UUT_->use_highbd_);
-    }
-#else
-    filter_average_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
-                               dst_stride, output_width, output_height);
-#endif
-  }
-
-  void wrapper_filter_block2d_8_c(const uint8_t *src_ptr,
-                                  const unsigned int src_stride,
-                                  const int16_t *HFilter,
-                                  const int16_t *VFilter, uint8_t *dst_ptr,
-                                  unsigned int dst_stride,
-                                  unsigned int output_width,
-                                  unsigned int output_height) {
-#if CONFIG_VP9_HIGHBITDEPTH
-    if (UUT_->use_highbd_ == 0) {
-      filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
-                         dst_stride, output_width, output_height);
-    } else {
-      highbd_filter_block2d_8_c(CONVERT_TO_SHORTPTR(src_ptr), src_stride,
-                                HFilter, VFilter, CONVERT_TO_SHORTPTR(dst_ptr),
-                                dst_stride, output_width, output_height,
-                                UUT_->use_highbd_);
-    }
-#else
-    filter_block2d_8_c(src_ptr, src_stride, HFilter, VFilter, dst_ptr,
-                       dst_stride, output_width, output_height);
-#endif
-  }
-
  const ConvolveFunctions *UUT_;
  static uint8_t *input_;
  static uint8_t *output_;
@@ -532,8 +543,8 @@ TEST_P(ConvolveTest, Copy) {
  uint8_t *const in = input();
  uint8_t *const out = output();

-  ASM_REGISTER_STATE_CHECK(UUT_->copy_(in, kInputStride, out, kOutputStride,
-                                       NULL, 0, NULL, 0, Width(), Height()));
+  ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride,
+                                          NULL, 0, NULL, 0, Width(), Height()));

  CheckGuardBlocks();

@@ -551,8 +562,8 @@ TEST_P(ConvolveTest, Avg) {
  uint8_t *const out_ref = output_ref();
  CopyOutputToRef();

-  ASM_REGISTER_STATE_CHECK(UUT_->avg_(in, kInputStride, out, kOutputStride,
-                                      NULL, 0, NULL, 0, Width(), Height()));
+  ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride,
+                                          NULL, 0, NULL, 0, Width(), Height()));

  CheckGuardBlocks();

@@ -572,9 +583,9 @@ TEST_P(ConvolveTest, CopyHoriz) {
  DECLARE_ALIGNED(256, const int16_t,
                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };

-  ASM_REGISTER_STATE_CHECK(UUT_->sh8_(in, kInputStride, out, kOutputStride,
-                                      filter8, 16, filter8, 16, Width(),
-                                      Height()));
+  ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride,
+                                         filter8, 16, filter8, 16, Width(),
+                                         Height()));

  CheckGuardBlocks();

@@ -592,9 +603,9 @@ TEST_P(ConvolveTest, CopyVert) {
  DECLARE_ALIGNED(256, const int16_t,
                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };

-  ASM_REGISTER_STATE_CHECK(UUT_->sv8_(in, kInputStride, out, kOutputStride,
-                                      filter8, 16, filter8, 16, Width(),
-                                      Height()));
+  ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride,
+                                         filter8, 16, filter8, 16, Width(),
+                                         Height()));

  CheckGuardBlocks();

@@ -612,9 +623,9 @@ TEST_P(ConvolveTest, Copy2D) {
  DECLARE_ALIGNED(256, const int16_t,
                  filter8[8]) = { 0, 0, 0, 128, 0, 0, 0, 0 };

-  ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
-                                       filter8, 16, filter8, 16, Width(),
-                                       Height()));
+  ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride,
+                                          filter8, 16, filter8, 16, Width(),
+                                          Height()));

  CheckGuardBlocks();

@@ -651,137 +662,84 @@ TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) {
 }

 const int16_t kInvalidFilter[8] = { 0 };
+const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = {
+  wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c
+};

 TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) {
-  uint8_t *const in = input();
-  uint8_t *const out = output();
+  for (int i = 0; i < 2; ++i) {
+    uint8_t *const in = input();
+    uint8_t *const out = output();
 #if CONFIG_VP9_HIGHBITDEPTH
-  uint8_t ref8[kOutputStride * kMaxDimension];
-  uint16_t ref16[kOutputStride * kMaxDimension];
-  uint8_t *ref;
-  if (UUT_->use_highbd_ == 0) {
-    ref = ref8;
-  } else {
-    ref = CONVERT_TO_BYTEPTR(ref16);
-  }
+    uint8_t ref8[kOutputStride * kMaxDimension];
+    uint16_t ref16[kOutputStride * kMaxDimension];
+    uint8_t *ref;
+    if (UUT_->use_highbd_ == 0) {
+      ref = ref8;
+    } else {
+      ref = CONVERT_TO_BYTEPTR(ref16);
+    }
 #else
-  uint8_t ref[kOutputStride * kMaxDimension];
+    uint8_t ref[kOutputStride * kMaxDimension];
 #endif

-  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
-    const InterpKernel *filters =
-        vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
-
-    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
-      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
-        wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
-                                   filters[filter_y], ref, kOutputStride,
-                                   Width(), Height());
-
-        if (filter_x && filter_y)
-          ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
-              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-              filters[filter_y], 16, Width(), Height()));
-        else if (filter_y)
-          ASM_REGISTER_STATE_CHECK(
-              UUT_->v8_(in, kInputStride, out, kOutputStride, kInvalidFilter,
-                        16, filters[filter_y], 16, Width(), Height()));
-        else if (filter_x)
-          ASM_REGISTER_STATE_CHECK(
-              UUT_->h8_(in, kInputStride, out, kOutputStride, filters[filter_x],
-                        16, kInvalidFilter, 16, Width(), Height()));
-        else
-          ASM_REGISTER_STATE_CHECK(
-              UUT_->copy_(in, kInputStride, out, kOutputStride, kInvalidFilter,
-                          0, kInvalidFilter, 0, Width(), Height()));
-
-        CheckGuardBlocks();
-
-        for (int y = 0; y < Height(); ++y) {
-          for (int x = 0; x < Width(); ++x)
-            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
-                      lookup(out, y * kOutputStride + x))
-                << "mismatch at (" << x << "," << y << "), "
-                << "filters (" << filter_bank << "," << filter_x << ","
-                << filter_y << ")";
+    // Populate ref and out with some random data
+    ::libvpx_test::ACMRandom prng;
+    for (int y = 0; y < Height(); ++y) {
+      for (int x = 0; x < Width(); ++x) {
+        uint16_t r;
+#if CONFIG_VP9_HIGHBITDEPTH
+        if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
+          r = prng.Rand8Extremes();
+        } else {
+          r = prng.Rand16() & mask_;
        }
-      }
-    }
-  }
-}
-
-TEST_P(ConvolveTest, MatchesReferenceAveragingSubpixelFilter) {
-  uint8_t *const in = input();
-  uint8_t *const out = output();
-#if CONFIG_VP9_HIGHBITDEPTH
-  uint8_t ref8[kOutputStride * kMaxDimension];
-  uint16_t ref16[kOutputStride * kMaxDimension];
-  uint8_t *ref;
-  if (UUT_->use_highbd_ == 0) {
-    ref = ref8;
-  } else {
-    ref = CONVERT_TO_BYTEPTR(ref16);
-  }
 #else
-  uint8_t ref[kOutputStride * kMaxDimension];
-#endif
-
-  // Populate ref and out with some random data
-  ::libvpx_test::ACMRandom prng;
-  for (int y = 0; y < Height(); ++y) {
-    for (int x = 0; x < Width(); ++x) {
-      uint16_t r;
-#if CONFIG_VP9_HIGHBITDEPTH
-      if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) {
        r = prng.Rand8Extremes();
-      } else {
-        r = prng.Rand16() & mask_;
-      }
-#else
-      r = prng.Rand8Extremes();
 #endif

-      assign_val(out, y * kOutputStride + x, r);
-      assign_val(ref, y * kOutputStride + x, r);
+        assign_val(out, y * kOutputStride + x, r);
+        assign_val(ref, y * kOutputStride + x, r);
+      }
    }
-  }

-  for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
-    const InterpKernel *filters =
-        vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];
+    for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) {
+      const InterpKernel *filters =
+          vp9_filter_kernels[static_cast<INTERP_FILTER>(filter_bank)];

-    for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
-      for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
-        wrapper_filter_average_block2d_8_c(in, kInputStride, filters[filter_x],
-                                           filters[filter_y], ref,
-                                           kOutputStride, Width(), Height());
+      for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) {
+        for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
+          wrapper_filter_block2d_8[i](in, kInputStride, filters[filter_x],
+                                      filters[filter_y], ref, kOutputStride,
+                                      Width(), Height(), UUT_->use_highbd_);

-        if (filter_x && filter_y)
-          ASM_REGISTER_STATE_CHECK(UUT_->hv8_avg_(
-              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-              filters[filter_y], 16, Width(), Height()));
-        else if (filter_y)
-          ASM_REGISTER_STATE_CHECK(UUT_->v8_avg_(
-              in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
-              filters[filter_y], 16, Width(), Height()));
-        else if (filter_x)
-          ASM_REGISTER_STATE_CHECK(UUT_->h8_avg_(
-              in, kInputStride, out, kOutputStride, filters[filter_x], 16,
-              kInvalidFilter, 16, Width(), Height()));
-        else
-          ASM_REGISTER_STATE_CHECK(
-              UUT_->avg_(in, kInputStride, out, kOutputStride, kInvalidFilter,
-                         0, kInvalidFilter, 0, Width(), Height()));
+          if (filter_x && filter_y)
+            ASM_REGISTER_STATE_CHECK(UUT_->hv8_[i](
+                in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+                filters[filter_y], 16, Width(), Height()));
+          else if (filter_y)
+            ASM_REGISTER_STATE_CHECK(UUT_->v8_[i](
+                in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
+                filters[filter_y], 16, Width(), Height()));
+          else if (filter_x)
+            ASM_REGISTER_STATE_CHECK(UUT_->h8_[i](
+                in, kInputStride, out, kOutputStride, filters[filter_x], 16,
+                kInvalidFilter, 16, Width(), Height()));
+          else
+            ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](
+                in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
+                kInvalidFilter, 0, Width(), Height()));

-        CheckGuardBlocks();
+          CheckGuardBlocks();

-        for (int y = 0; y < Height(); ++y) {
-          for (int x = 0; x < Width(); ++x)
-            ASSERT_EQ(lookup(ref, y * kOutputStride + x),
-                      lookup(out, y * kOutputStride + x))
-                << "mismatch at (" << x << "," << y << "), "
-                << "filters (" << filter_bank << "," << filter_x << ","
-                << filter_y << ")";
+          for (int y = 0; y < Height(); ++y) {
+            for (int x = 0; x < Width(); ++x)
+              ASSERT_EQ(lookup(ref, y * kOutputStride + x),
+                        lookup(out, y * kOutputStride + x))
+                  << "mismatch at (" << x << "," << y << "), "
+                  << "filters (" << filter_bank << "," << filter_x << ","
+                  << filter_y << ")";
+          }
        }
      }
    }
@@ -852,21 +810,21 @@ TEST_P(ConvolveTest, FilterExtremes) {
          for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) {
            wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x],
                                       filters[filter_y], ref, kOutputStride,
-                                       Width(), Height());
+                                       Width(), Height(), UUT_->use_highbd_);
            if (filter_x && filter_y)
-              ASM_REGISTER_STATE_CHECK(UUT_->hv8_(
+              ASM_REGISTER_STATE_CHECK(UUT_->hv8_[0](
                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
                  filters[filter_y], 16, Width(), Height()));
            else if (filter_y)
-              ASM_REGISTER_STATE_CHECK(UUT_->v8_(
+              ASM_REGISTER_STATE_CHECK(UUT_->v8_[0](
                  in, kInputStride, out, kOutputStride, kInvalidFilter, 16,
                  filters[filter_y], 16, Width(), Height()));
            else if (filter_x)
-              ASM_REGISTER_STATE_CHECK(UUT_->h8_(
+              ASM_REGISTER_STATE_CHECK(UUT_->h8_[0](
                  in, kInputStride, out, kOutputStride, filters[filter_x], 16,
                  kInvalidFilter, 16, Width(), Height()));
            else
-              ASM_REGISTER_STATE_CHECK(UUT_->copy_(
+              ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](
                  in, kInputStride, out, kOutputStride, kInvalidFilter, 0,
                  kInvalidFilter, 0, Width(), Height()));

@@ -897,9 +855,9 @@ TEST_P(ConvolveTest, CheckScalingFiltering) {
  for (int frac = 0; frac < 16; ++frac) {
    for (int step = 1; step <= 32; ++step) {
      /* Test the horizontal and vertical filters in combination. */
-      ASM_REGISTER_STATE_CHECK(UUT_->shv8_(in, kInputStride, out, kOutputStride,
-                                           eighttap[frac], step, eighttap[frac],
-                                           step, Width(), Height()));
+      ASM_REGISTER_STATE_CHECK(
+          UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap[frac],
+                         step, eighttap[frac], step, Width(), Height()));

      CheckGuardBlocks();

@@ -926,6 +884,7 @@ using std::tr1::make_tuple;
    vpx_highbd_##func(src, src_stride, dst, dst_stride, filter_x,            \
                      filter_x_stride, filter_y, filter_y_stride, w, h, bd); \
  }
+
 #if HAVE_SSE2 && ARCH_X86_64
 WRAP(convolve_copy_sse2, 8)
 WRAP(convolve_avg_sse2, 8)
@@ -953,6 +912,33 @@ WRAP(convolve8_sse2, 12)
 WRAP(convolve8_avg_sse2, 12)
 #endif  // HAVE_SSE2 && ARCH_X86_64

+#if HAVE_NEON
+WRAP(convolve_copy_neon, 8)
+WRAP(convolve_avg_neon, 8)
+WRAP(convolve_copy_neon, 10)
+WRAP(convolve_avg_neon, 10)
+WRAP(convolve_copy_neon, 12)
+WRAP(convolve_avg_neon, 12)
+WRAP(convolve8_horiz_neon, 8)
+WRAP(convolve8_avg_horiz_neon, 8)
+WRAP(convolve8_vert_neon, 8)
+WRAP(convolve8_avg_vert_neon, 8)
+WRAP(convolve8_neon, 8)
+WRAP(convolve8_avg_neon, 8)
+WRAP(convolve8_horiz_neon, 10)
+WRAP(convolve8_avg_horiz_neon, 10)
+WRAP(convolve8_vert_neon, 10)
+WRAP(convolve8_avg_vert_neon, 10)
+WRAP(convolve8_neon, 10)
+WRAP(convolve8_avg_neon, 10)
+WRAP(convolve8_horiz_neon, 12)
+WRAP(convolve8_avg_horiz_neon, 12)
+WRAP(convolve8_vert_neon, 12)
+WRAP(convolve8_avg_vert_neon, 12)
+WRAP(convolve8_neon, 12)
+WRAP(convolve8_avg_neon, 12)
+#endif  // HAVE_NEON
+
 WRAP(convolve_copy_c, 8)
 WRAP(convolve_avg_c, 8)
 WRAP(convolve8_horiz_c, 8)
@@ -1085,25 +1071,46 @@ INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest,
 #endif  // HAVE_AVX2 && HAVE_SSSE3

 #if HAVE_NEON
-#if HAVE_NEON_ASM
+#if CONFIG_VP9_HIGHBITDEPTH
+const ConvolveFunctions convolve8_neon(
+    wrap_convolve_copy_neon_8, wrap_convolve_avg_neon_8,
+    wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8,
+    wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8,
+    wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8,
+    wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8,
+    wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8,
+    wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8, 8);
+const ConvolveFunctions convolve10_neon(
+    wrap_convolve_copy_neon_10, wrap_convolve_avg_neon_10,
+    wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10,
+    wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10,
+    wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10,
+    wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10,
+    wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10,
+    wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10, 10);
+const ConvolveFunctions convolve12_neon(
+    wrap_convolve_copy_neon_12, wrap_convolve_avg_neon_12,
+    wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12,
+    wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12,
+    wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12,
+    wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12,
+    wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12,
+    wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12, 12);
+const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon),
+                                              ALL_SIZES(convolve10_neon),
+                                              ALL_SIZES(convolve12_neon) };
+#else
 const ConvolveFunctions convolve8_neon(
    vpx_convolve_copy_neon, vpx_convolve_avg_neon, vpx_convolve8_horiz_neon,
    vpx_convolve8_avg_horiz_neon, vpx_convolve8_vert_neon,
    vpx_convolve8_avg_vert_neon, vpx_convolve8_neon, vpx_convolve8_avg_neon,
    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
    vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-#else   // HAVE_NEON
-const ConvolveFunctions convolve8_neon(
-    vpx_convolve_copy_neon, vpx_convolve_avg_neon, vpx_convolve8_horiz_neon,
-    vpx_convolve8_avg_horiz_neon, vpx_convolve8_vert_neon,
-    vpx_convolve8_avg_vert_neon, vpx_convolve8_neon, vpx_convolve8_avg_neon,
-    vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c,
-    vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0);
-#endif  // HAVE_NEON_ASM

-const ConvolveParam kArrayConvolve8_neon[] = { ALL_SIZES(convolve8_neon) };
+const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon) };
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest,
-                        ::testing::ValuesIn(kArrayConvolve8_neon));
+                        ::testing::ValuesIn(kArrayConvolve_neon));
 #endif  // HAVE_NEON

 #if HAVE_DSPR2
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
@@ -20,7 +20,7 @@ namespace {

 class DatarateTestLarge
    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWithParam<libvpx_test::TestMode> {
+      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
 public:
  DatarateTestLarge() : EncoderTest(GET_PARAM(0)) {}

@@ -30,6 +30,7 @@ class DatarateTestLarge
  virtual void SetUp() {
    InitializeConfig();
    SetMode(GET_PARAM(1));
+    set_cpu_used_ = GET_PARAM(2);
    ResetModel();
  }

@@ -42,12 +43,15 @@ class DatarateTestLarge
    duration_ = 0.0;
    denoiser_offon_test_ = 0;
    denoiser_offon_period_ = -1;
+    gf_boost_ = 0;
  }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
                                  ::libvpx_test::Encoder *encoder) {
    if (video->frame() == 0) {
      encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_);
+      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
+      encoder->Control(VP8E_SET_GF_CBR_BOOST_PCT, gf_boost_);
    }

    if (denoiser_offon_test_) {
@@ -139,6 +143,8 @@ class DatarateTestLarge
  int denoiser_on_;
  int denoiser_offon_test_;
  int denoiser_offon_period_;
+  int set_cpu_used_;
+  int gf_boost_;
 };

 #if CONFIG_TEMPORAL_DENOISING
@@ -156,9 +162,6 @@ TEST_P(DatarateTestLarge, DenoiserLevels) {
    // For the temporal denoiser (#if CONFIG_TEMPORAL_DENOISING) the level j
    // refers to the 4 denoiser modes: denoiserYonly, denoiserOnYUV,
    // denoiserOnAggressive, and denoiserOnAdaptive.
-    // For the spatial denoiser (if !CONFIG_TEMPORAL_DENOISING), the level j
-    // refers to the blur thresholds: 20, 40, 60 80.
-    // The j = 0 case (denoiser off) is covered in the tests below.
    denoiser_on_ = j;
    cfg_.rc_target_bitrate = 300;
    ResetModel();
@@ -166,7 +169,7 @@ TEST_P(DatarateTestLarge, DenoiserLevels) {
    ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
        << " The datarate for the file exceeds the target!";

-    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
+    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
        << " The datarate for the file missed the target!";
  }
 }
@@ -190,7 +193,7 @@ TEST_P(DatarateTestLarge, DenoiserOffOn) {
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
      << " The datarate for the file exceeds the target!";
-  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
+  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
      << " The datarate for the file missed the target!";
 }
 #endif  // CONFIG_TEMPORAL_DENOISING
@@ -221,8 +224,7 @@ TEST_P(DatarateTestLarge, BasicBufferModel) {
    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
    ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
        << " The datarate for the file exceeds the target!";
-
-    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.3)
+    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
        << " The datarate for the file missed the target!";
  }
 }
@@ -256,6 +258,199 @@ TEST_P(DatarateTestLarge, ChangingDropFrameThresh) {
  }
 }

+// Disabled for tsan, see:
+// https://bugs.chromium.org/p/webm/issues/detail?id=1049
+#if defined(__has_feature)
+#if __has_feature(thread_sanitizer)
+#define BUILDING_WITH_TSAN
+#endif
+#endif
+#ifndef BUILDING_WITH_TSAN
+TEST_P(DatarateTestLarge, DropFramesMultiThreads) {
+  denoiser_on_ = 0;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_dropframe_thresh = 30;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_threads = 2;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 140);
+  cfg_.rc_target_bitrate = 200;
+  ResetModel();
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
+      << " The datarate for the file exceeds the target!";
+
+  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
+      << " The datarate for the file missed the target!";
+}
+#endif  // !BUILDING_WITH_TSAN
+
+class DatarateTestRealTime : public DatarateTestLarge {
+ public:
+  virtual ~DatarateTestRealTime() {}
+};
+
+#if CONFIG_TEMPORAL_DENOISING
+// Check basic datarate targeting, for a single bitrate, but loop over the
+// various denoiser settings.
+TEST_P(DatarateTestRealTime, DenoiserLevels) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 140);
+  for (int j = 1; j < 5; ++j) {
+    // Run over the denoiser levels.
+    // For the temporal denoiser (#if CONFIG_TEMPORAL_DENOISING) the level j
+    // refers to the 4 denoiser modes: denoiserYonly, denoiserOnYUV,
+    // denoiserOnAggressive, and denoiserOnAdaptive.
+    denoiser_on_ = j;
+    cfg_.rc_target_bitrate = 300;
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
+        << " The datarate for the file exceeds the target!";
+    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
+        << " The datarate for the file missed the target!";
+  }
+}
+
+// Check basic datarate targeting, for a single bitrate, when denoiser is off
+// and on.
+TEST_P(DatarateTestRealTime, DenoiserOffOn) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 299);
+  cfg_.rc_target_bitrate = 300;
+  ResetModel();
+  // The denoiser is off by default.
+  denoiser_on_ = 0;
+  // Set the offon test flag.
+  denoiser_offon_test_ = 1;
+  denoiser_offon_period_ = 100;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
+      << " The datarate for the file exceeds the target!";
+  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
+      << " The datarate for the file missed the target!";
+}
+#endif  // CONFIG_TEMPORAL_DENOISING
+
+TEST_P(DatarateTestRealTime, BasicBufferModel) {
+  denoiser_on_ = 0;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  // 2 pass cbr datarate control has a bug hidden by the small # of
+  // frames selected in this encode. The problem is that even if the buffer is
+  // negative we produce a keyframe on a cutscene, ignoring datarate
+  // constraints
+  // TODO(jimbankoski): Fix when issue
+  // http://bugs.chromium.org/p/webm/issues/detail?id=495 is addressed.
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 140);
+
+  // There is an issue for low bitrates in real-time mode, where the
+  // effective_datarate slightly overshoots the target bitrate.
+  // This is same the issue as noted above (#495).
+  // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100),
+  // when the issue is resolved.
+  for (int i = 100; i <= 700; i += 200) {
+    cfg_.rc_target_bitrate = i;
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
+        << " The datarate for the file exceeds the target!";
+    ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
+        << " The datarate for the file missed the target!";
+  }
+}
+
+TEST_P(DatarateTestRealTime, ChangingDropFrameThresh) {
+  denoiser_on_ = 0;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_max_quantizer = 36;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.rc_target_bitrate = 200;
+  cfg_.kf_mode = VPX_KF_DISABLED;
+
+  const int frame_count = 40;
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, frame_count);
+
+  // Check that the first dropped frame gets earlier and earlier
+  // as the drop frame threshold is increased.
+
+  const int kDropFrameThreshTestStep = 30;
+  vpx_codec_pts_t last_drop = frame_count;
+  for (int i = 1; i < 91; i += kDropFrameThreshTestStep) {
+    cfg_.rc_dropframe_thresh = i;
+    ResetModel();
+    ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+    ASSERT_LE(first_drop_, last_drop)
+        << " The first dropped frame for drop_thresh " << i
+        << " > first dropped frame for drop_thresh "
+        << i - kDropFrameThreshTestStep;
+    last_drop = first_drop_;
+  }
+}
+
+// Disabled for tsan, see:
+// https://bugs.chromium.org/p/webm/issues/detail?id=1049
+
+#ifndef BUILDING_WITH_TSAN
+TEST_P(DatarateTestRealTime, DropFramesMultiThreads) {
+  denoiser_on_ = 0;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_dropframe_thresh = 30;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  // Encode using multiple threads.
+  cfg_.g_threads = 2;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 140);
+  cfg_.rc_target_bitrate = 200;
+  ResetModel();
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
+      << " The datarate for the file exceeds the target!";
+
+  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
+      << " The datarate for the file missed the target!";
+}
+#endif
+
+TEST_P(DatarateTestRealTime, GFBoost) {
+  denoiser_on_ = 0;
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_dropframe_thresh = 0;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_error_resilient = 0;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 300);
+  cfg_.rc_target_bitrate = 300;
+  ResetModel();
+  // Apply a gf boost.
+  gf_boost_ = 50;
+
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95)
+      << " The datarate for the file exceeds the target!";
+
+  ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4)
+      << " The datarate for the file missed the target!";
+}
+
 class DatarateTestVP9Large
    : public ::libvpx_test::EncoderTest,
      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
@@ -520,6 +715,30 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting) {
  }
 }

+// Check basic rate targeting for CBR mode, with 2 threads and dropped frames.
+TEST_P(DatarateTestVP9Large, BasicRateTargetingDropFramesMultiThreads) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_dropframe_thresh = 30;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  // Encode using multiple threads.
+  cfg_.g_threads = 2;
+
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 140);
+  cfg_.rc_target_bitrate = 200;
+  ResetModel();
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
+      << " The datarate for the file is lower than target by too much!";
+  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
+      << " The datarate for the file is greater than target by too much!";
+}
+
 // Check basic rate targeting for CBR.
 TEST_P(DatarateTestVP9Large, BasicRateTargeting444) {
  ::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140);
@@ -735,8 +954,13 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) {
 }

 #if CONFIG_VP9_TEMPORAL_DENOISING
+class DatarateTestVP9LargeDenoiser : public DatarateTestVP9Large {
+ public:
+  virtual ~DatarateTestVP9LargeDenoiser() {}
+};
+
 // Check basic datarate targeting, for a single bitrate, when denoiser is on.
-TEST_P(DatarateTestVP9Large, DenoiserLevels) {
+TEST_P(DatarateTestVP9LargeDenoiser, LowNoise) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
@@ -763,9 +987,37 @@ TEST_P(DatarateTestVP9Large, DenoiserLevels) {
      << " The datarate for the file is greater than target by too much!";
 }

+// Check basic datarate targeting, for a single bitrate, when denoiser is on,
+// for clip with high noise level.
+TEST_P(DatarateTestVP9LargeDenoiser, HighNoise) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_dropframe_thresh = 1;
+  cfg_.rc_min_quantizer = 2;
+  cfg_.rc_max_quantizer = 56;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+
+  ::libvpx_test::Y4mVideoSource video("noisy_clip_640_360.y4m", 0, 200);
+
+  // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING),
+  // there is only one denoiser mode: denoiserYonly(which is 1),
+  // but may add more modes in the future.
+  cfg_.rc_target_bitrate = 1000;
+  ResetModel();
+  // Turn on the denoiser.
+  denoiser_on_ = 1;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85)
+      << " The datarate for the file is lower than target by too much!";
+  ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15)
+      << " The datarate for the file is greater than target by too much!";
+}
+
 // Check basic datarate targeting, for a single bitrate, when denoiser is off
 // and on.
-TEST_P(DatarateTestVP9Large, DenoiserOffOn) {
+TEST_P(DatarateTestVP9LargeDenoiser, DenoiserOffOn) {
  cfg_.rc_buf_initial_sz = 500;
  cfg_.rc_buf_optimal_sz = 500;
  cfg_.rc_buf_sz = 1000;
@@ -833,7 +1085,7 @@ class DatarateOnePassCbrSvc
      }
      svc_params_.speed_per_layer[0] = 5;
      for (i = 1; i < VPX_SS_MAX_LAYERS; ++i) {
-        svc_params_.speed_per_layer[i] = 7;
+        svc_params_.speed_per_layer[i] = speed_setting_;
      }
      encoder->Control(VP9E_SET_SVC, 1);
      encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_);
@@ -841,6 +1093,7 @@ class DatarateOnePassCbrSvc
      encoder->Control(VP9E_SET_TILE_COLUMNS, 0);
      encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300);
      encoder->Control(VP9E_SET_TILE_COLUMNS, (cfg_.g_threads >> 1));
+      encoder->Control(VP8E_SET_STATIC_THRESHOLD, 1);
    }
    const vpx_rational_t tb = video->timebase();
    timebase_ = static_cast<double>(tb.num) / tb.den;
@@ -854,11 +1107,13 @@ class DatarateOnePassCbrSvc
    const bool key_frame =
        (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false;
    if (!key_frame) {
-      ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
-                                          << pkt->data.frame.pts;
+      // TODO(marpan): This check currently fails for some of the SVC tests,
+      // re-enable when issue (webm:1350) is resolved.
+      //  ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame "
+      //                                      << pkt->data.frame.pts;
    }
    const size_t frame_size_in_bits = pkt->data.frame.sz * 8;
-    bits_in_buffer_model_ -= frame_size_in_bits;
+    bits_in_buffer_model_ -= static_cast<int64_t>(frame_size_in_bits);
    bits_total_ += frame_size_in_bits;
    if (!first_drop_ && duration > 1) first_drop_ = last_pts_ + 1;
    last_pts_ = pkt->data.frame.pts;
@@ -951,7 +1206,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayers) {
  cfg_.kf_max_dist = 9999;
  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
                                       30, 1, 0, 200);
-  // TODO(wonkap/marpan): Check that effective_datarate for each layer hits the
+  // TODO(marpan): Check that effective_datarate for each layer hits the
  // layer target_bitrate.
  for (int i = 200; i <= 800; i += 200) {
    cfg_.rc_target_bitrate = i;
@@ -1033,8 +1288,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayers4threads) {
  svc_params_.scaling_factor_den[1] = 288;
  cfg_.rc_dropframe_thresh = 10;
  cfg_.kf_max_dist = 9999;
-  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30,
-                                       1, 0, 300);
+  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
  cfg_.rc_target_bitrate = 800;
  ResetModel();
  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
@@ -1073,8 +1327,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayers) {
  svc_params_.scaling_factor_den[2] = 288;
  cfg_.rc_dropframe_thresh = 10;
  cfg_.kf_max_dist = 9999;
-  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30,
-                                       1, 0, 300);
+  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
  cfg_.rc_target_bitrate = 800;
  ResetModel();
  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
@@ -1112,8 +1365,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayersSmallKf) {
  svc_params_.scaling_factor_num[2] = 288;
  svc_params_.scaling_factor_den[2] = 288;
  cfg_.rc_dropframe_thresh = 10;
-  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30,
-                                       1, 0, 300);
+  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
  cfg_.rc_target_bitrate = 800;
  // For this 3 temporal layer case, pattern repeats every 4 frames, so choose
  // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2).
@@ -1157,8 +1409,7 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayers4threads) {
  svc_params_.scaling_factor_den[2] = 288;
  cfg_.rc_dropframe_thresh = 10;
  cfg_.kf_max_dist = 9999;
-  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30,
-                                       1, 0, 300);
+  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
  cfg_.rc_target_bitrate = 800;
  ResetModel();
  assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers,
@@ -1171,11 +1422,54 @@ TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc3SpatialLayers4threads) {
  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
 }

-VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES);
+// Run SVC encoder for 1 temporal layer, 2 spatial layers, with spatial
+// downscale 5x5.
+TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc2SpatialLayers5x5MultipleRuns) {
+  cfg_.rc_buf_initial_sz = 500;
+  cfg_.rc_buf_optimal_sz = 500;
+  cfg_.rc_buf_sz = 1000;
+  cfg_.rc_min_quantizer = 0;
+  cfg_.rc_max_quantizer = 63;
+  cfg_.rc_end_usage = VPX_CBR;
+  cfg_.g_lag_in_frames = 0;
+  cfg_.ss_number_layers = 2;
+  cfg_.ts_number_layers = 1;
+  cfg_.ts_rate_decimator[0] = 1;
+  cfg_.g_error_resilient = 1;
+  cfg_.g_threads = 3;
+  cfg_.temporal_layering_mode = 0;
+  svc_params_.scaling_factor_num[0] = 256;
+  svc_params_.scaling_factor_den[0] = 1280;
+  svc_params_.scaling_factor_num[1] = 1280;
+  svc_params_.scaling_factor_den[1] = 1280;
+  cfg_.rc_dropframe_thresh = 0;
+  cfg_.kf_max_dist = 999999;
+  cfg_.kf_min_dist = 0;
+  cfg_.ss_target_bitrate[0] = 300;
+  cfg_.ss_target_bitrate[1] = 1400;
+  cfg_.layer_target_bitrate[0] = 300;
+  cfg_.layer_target_bitrate[1] = 1400;
+  cfg_.rc_target_bitrate = 1700;
+  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300);
+  ResetModel();
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
+}
+
+VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES,
+                          ::testing::Values(0));
+VP8_INSTANTIATE_TEST_CASE(DatarateTestRealTime,
+                          ::testing::Values(::libvpx_test::kRealTime),
+                          ::testing::Values(-6, -12));
 VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large,
                          ::testing::Values(::libvpx_test::kOnePassGood,
                                            ::libvpx_test::kRealTime),
                          ::testing::Range(2, 9));
+#if CONFIG_VP9_TEMPORAL_DENOISING
+VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeDenoiser,
+                          ::testing::Values(::libvpx_test::kRealTime),
+                          ::testing::Range(5, 9));
+#endif
 VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvc,
                          ::testing::Values(::libvpx_test::kRealTime),
                          ::testing::Range(5, 9));
--- a/test/decode_svc_test.cc
+++ b/test/decode_svc_test.cc
@@ -0,0 +1,124 @@
+/*
+ *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <string>
+
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/test_vectors.h"
+#include "test/util.h"
+
+namespace {
+
+const unsigned int kNumFrames = 19;
+
+class DecodeSvcTest : public ::libvpx_test::DecoderTest,
+                      public ::libvpx_test::CodecTestWithParam<const char *> {
+ protected:
+  DecodeSvcTest() : DecoderTest(GET_PARAM(::libvpx_test::kCodecFactoryParam)) {}
+  virtual ~DecodeSvcTest() {}
+
+  virtual void PreDecodeFrameHook(
+      const libvpx_test::CompressedVideoSource &video,
+      libvpx_test::Decoder *decoder) {
+    if (video.frame_number() == 0)
+      decoder->Control(VP9_DECODE_SVC_SPATIAL_LAYER, spatial_layer_);
+  }
+
+  virtual void DecompressedFrameHook(const vpx_image_t &img,
+                                     const unsigned int frame_number) {
+    ASSERT_EQ(img.d_w, width_);
+    ASSERT_EQ(img.d_h, height_);
+    total_frames_ = frame_number;
+  }
+
+  int spatial_layer_;
+  unsigned int width_;
+  unsigned int height_;
+  unsigned int total_frames_;
+};
+
+// SVC test vector is 1280x720, with 3 spatial layers, and 20 frames.
+
+// Decode the SVC test vector, which has 3 spatial layers, and decode up to
+// spatial layer 0. Verify the resolution of each decoded frame and the total
+// number of frames decoded. This results in 1/4x1/4 resolution (320x180).
+TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer0) {
+  const std::string filename = GET_PARAM(1);
+  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
+  video.reset(new libvpx_test::IVFVideoSource(filename));
+  ASSERT_TRUE(video.get() != NULL);
+  video->Init();
+  total_frames_ = 0;
+  spatial_layer_ = 0;
+  width_ = 320;
+  height_ = 180;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  ASSERT_EQ(total_frames_, kNumFrames);
+}
+
+// Decode the SVC test vector, which has 3 spatial layers, and decode up to
+// spatial layer 1. Verify the resolution of each decoded frame and the total
+// number of frames decoded. This results in 1/2x1/2 resolution (640x360).
+TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer1) {
+  const std::string filename = GET_PARAM(1);
+  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
+  video.reset(new libvpx_test::IVFVideoSource(filename));
+  ASSERT_TRUE(video.get() != NULL);
+  video->Init();
+  total_frames_ = 0;
+  spatial_layer_ = 1;
+  width_ = 640;
+  height_ = 360;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  ASSERT_EQ(total_frames_, kNumFrames);
+}
+
+// Decode the SVC test vector, which has 3 spatial layers, and decode up to
+// spatial layer 2. Verify the resolution of each decoded frame and the total
+// number of frames decoded. This results in the full resolution (1280x720).
+TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer2) {
+  const std::string filename = GET_PARAM(1);
+  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
+  video.reset(new libvpx_test::IVFVideoSource(filename));
+  ASSERT_TRUE(video.get() != NULL);
+  video->Init();
+  total_frames_ = 0;
+  spatial_layer_ = 2;
+  width_ = 1280;
+  height_ = 720;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  ASSERT_EQ(total_frames_, kNumFrames);
+}
+
+// Decode the SVC test vector, which has 3 spatial layers, and decode up to
+// spatial layer 10. Verify the resolution of each decoded frame and the total
+// number of frames decoded. This is beyond the number of spatial layers, so
+// the decoding should result in the full resolution (1280x720).
+TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer10) {
+  const std::string filename = GET_PARAM(1);
+  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
+  video.reset(new libvpx_test::IVFVideoSource(filename));
+  ASSERT_TRUE(video.get() != NULL);
+  video->Init();
+  total_frames_ = 0;
+  spatial_layer_ = 10;
+  width_ = 1280;
+  height_ = 720;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
+  ASSERT_EQ(total_frames_, kNumFrames);
+}
+
+VP9_INSTANTIATE_TEST_CASE(
+    DecodeSvcTest, ::testing::ValuesIn(libvpx_test::kVP9TestVectorsSvc,
+                                       libvpx_test::kVP9TestVectorsSvc +
+                                           libvpx_test::kNumVP9TestVectorsSvc));
+}  // namespace
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -90,8 +90,7 @@ class ErrorResilienceTestLarge
    return frame_flags;
  }

-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder * /*encoder*/) {
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video) {
    frame_flags_ &=
        ~(VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF);
    // For temporal layer case.
--- a/test/examples.sh
+++ b/test/examples.sh
@@ -15,7 +15,7 @@
 example_tests=$(ls $(dirname $0)/*.sh)

 # List of script names to exclude.
-exclude_list="examples tools_common"
+exclude_list="examples stress tools_common"

 # Filter out the scripts in $exclude_list.
 for word in ${exclude_list}; do
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -438,14 +438,12 @@ INSTANTIATE_TEST_CASE_P(C, Trans4x4WHT,
                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(NEON, Trans4x4DCT,
                        ::testing::Values(make_tuple(&vpx_fdct4x4_c,
                                                     &vpx_idct4x4_16_add_neon,
                                                     0, VPX_BITS_8)));
-#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans4x4HT,
    ::testing::Values(
@@ -453,7 +451,8 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#endif  // !CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -105,20 +105,20 @@ void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {

 #if HAVE_SSE2

-void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_10_add_c(in, out, stride, 10);
+void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_12_add_c(in, out, stride, 10);
 }

-void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_10_add_c(in, out, stride, 12);
+void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_12_add_c(in, out, stride, 12);
 }

-void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
+void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_12_add_sse2(in, out, stride, 10);
 }

-void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
+void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_12_add_sse2(in, out, stride, 12);
 }

 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
@@ -670,14 +670,17 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct8x8_c,
+                                                     &vpx_idct8x8_64_add_neon,
+                                                     0, VPX_BITS_8)));
+#else   // !CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
                        ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
                                                     &vpx_idct8x8_64_add_neon,
                                                     0, VPX_BITS_8)));
-#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, FwdTrans8x8HT,
    ::testing::Values(
@@ -685,6 +688,7 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 #endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -728,10 +732,10 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2, InvTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&idct8x8_10_add_10_c, &idct8x8_10_add_10_sse2, 6225,
+        make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225,
                   VPX_BITS_10),
        make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
-        make_tuple(&idct8x8_10_add_12_c, &idct8x8_10_add_12_sse2, 6225,
+        make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225,
                   VPX_BITS_12),
        make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
--- a/test/idct_test.cc
+++ b/test/idct_test.cc
@@ -115,6 +115,10 @@ TEST_P(IDCTTest, TestWithData) {
 }

 INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(NEON, IDCTTest,
+                        ::testing::Values(vp8_short_idct4x4llm_neon));
+#endif
 #if HAVE_MMX
 INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_mmx));
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -120,6 +120,7 @@ class InvalidFileTest : public ::libvpx_test::DecoderTest,

 TEST_P(InvalidFileTest, ReturnCode) { RunTest(); }

+#if CONFIG_VP9_DECODER
 const DecodeParam kVP9InvalidFileTests[] = {
  { 1, "invalid-vp90-02-v2.webm" },
 #if CONFIG_VP9_HIGHBITDEPTH
@@ -141,10 +142,14 @@ const DecodeParam kVP9InvalidFileTests[] = {
  { 1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf" },
  { 1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf" },
  { 1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf" },
+  { 1,
+    "invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf" },
+  { 1, "invalid-crbug-667044.webm" },
 };

 VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
                          ::testing::ValuesIn(kVP9InvalidFileTests));
+#endif  // CONFIG_VP9_DECODER

 // This class will include test vectors that are expected to fail
 // peek. However they are still expected to have no fatal failures.
@@ -158,6 +163,16 @@ class InvalidFileInvalidPeekTest : public InvalidFileTest {

 TEST_P(InvalidFileInvalidPeekTest, ReturnCode) { RunTest(); }

+#if CONFIG_VP8_DECODER
+const DecodeParam kVP8InvalidFileTests[] = {
+  { 1, "invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf" },
+};
+
+VP8_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
+                          ::testing::ValuesIn(kVP8InvalidFileTests));
+#endif  // CONFIG_VP8_DECODER
+
+#if CONFIG_VP9_DECODER
 const DecodeParam kVP9InvalidFileInvalidPeekTests[] = {
  { 1, "invalid-vp90-01-v3.webm" },
 };
@@ -174,6 +189,7 @@ const DecodeParam kMultiThreadedVP9InvalidFileTests[] = {
    "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf" },
  { 2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf" },
  { 4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf" },
+  { 2, "invalid-crbug-629481.webm" },
 };

 INSTANTIATE_TEST_CASE_P(
@@ -182,4 +198,5 @@ INSTANTIATE_TEST_CASE_P(
        ::testing::Values(
            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
        ::testing::ValuesIn(kMultiThreadedVP9InvalidFileTests)));
+#endif  // CONFIG_VP9_DECODER
 }  // namespace
--- a/test/level_test.cc
+++ b/test/level_test.cc
@@ -66,6 +66,36 @@ class LevelTest
  int level_;
 };

+TEST_P(LevelTest, TestTargetLevel11) {
+  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
+  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
+                                       90);
+  target_level_ = 11;
+  cfg_.rc_target_bitrate = 150;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(target_level_, level_);
+}
+
+TEST_P(LevelTest, TestTargetLevel20) {
+  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
+  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
+                                       30, 1, 0, 90);
+  target_level_ = 20;
+  cfg_.rc_target_bitrate = 1200;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(target_level_, level_);
+}
+
+TEST_P(LevelTest, TestTargetLevel31) {
+  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
+  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30,
+                                       1, 0, 60);
+  target_level_ = 31;
+  cfg_.rc_target_bitrate = 8000;
+  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
+  ASSERT_EQ(target_level_, level_);
+}
+
 // Test for keeping level stats only
 TEST_P(LevelTest, TestTargetLevel0) {
  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
@@ -94,6 +124,7 @@ TEST_P(LevelTest, TestTargetLevelApi) {
  vpx_codec_ctx_t enc;
  vpx_codec_enc_cfg_t cfg;
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(codec, &cfg, 0));
+  cfg.rc_target_bitrate = 100;
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, codec, &cfg, 0));
  for (int level = 0; level <= 256; ++level) {
    if (level == 10 || level == 11 || level == 20 || level == 21 ||
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -402,10 +402,10 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_highbd_lpf_vertical_4_c, 8),
                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
                                 &vpx_highbd_lpf_horizontal_8_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_8_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_16_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 8),
                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
                                 &vpx_highbd_lpf_vertical_8_c, 8),
                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
@@ -416,10 +416,10 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_highbd_lpf_vertical_4_c, 10),
                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
                                 &vpx_highbd_lpf_horizontal_8_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_8_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_16_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 10),
                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
                                 &vpx_highbd_lpf_vertical_8_c, 10),
                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
@@ -430,10 +430,10 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_highbd_lpf_vertical_4_c, 12),
                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
                                 &vpx_highbd_lpf_horizontal_8_c, 12),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_8_c, 12),
-                      make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
-                                 &vpx_highbd_lpf_horizontal_edge_16_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 12),
                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
                                 &vpx_highbd_lpf_vertical_8_c, 12),
                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
@@ -450,10 +450,9 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vpx_lpf_horizontal_4_sse2, &vpx_lpf_horizontal_4_c, 8),
        make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_8_sse2,
-                   &vpx_lpf_horizontal_edge_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_16_sse2,
-                   &vpx_lpf_horizontal_edge_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_sse2,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
        make_tuple(&vpx_lpf_vertical_4_sse2, &vpx_lpf_vertical_4_c, 8),
        make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8),
        make_tuple(&vpx_lpf_vertical_16_sse2, &vpx_lpf_vertical_16_c, 8),
@@ -465,10 +464,10 @@ INSTANTIATE_TEST_CASE_P(
 #if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
 INSTANTIATE_TEST_CASE_P(
    AVX2, Loop8Test6Param,
-    ::testing::Values(make_tuple(&vpx_lpf_horizontal_edge_8_avx2,
-                                 &vpx_lpf_horizontal_edge_8_c, 8),
-                      make_tuple(&vpx_lpf_horizontal_edge_16_avx2,
-                                 &vpx_lpf_horizontal_edge_16_c, 8)));
+    ::testing::Values(make_tuple(&vpx_lpf_horizontal_16_avx2,
+                                 &vpx_lpf_horizontal_16_c, 8),
+                      make_tuple(&vpx_lpf_horizontal_16_dual_avx2,
+                                 &vpx_lpf_horizontal_16_dual_c, 8)));
 #endif

 #if HAVE_SSE2
@@ -515,15 +514,89 @@ INSTANTIATE_TEST_CASE_P(

 #if HAVE_NEON
 #if CONFIG_VP9_HIGHBITDEPTH
-// No neon high bitdepth functions.
+INSTANTIATE_TEST_CASE_P(
+    NEON, Loop8Test6Param,
+    ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_neon,
+                                 &vpx_highbd_lpf_horizontal_4_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_neon,
+                                 &vpx_highbd_lpf_horizontal_4_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_neon,
+                                 &vpx_highbd_lpf_horizontal_4_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_neon,
+                                 &vpx_highbd_lpf_horizontal_8_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_neon,
+                                 &vpx_highbd_lpf_horizontal_8_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_neon,
+                                 &vpx_highbd_lpf_horizontal_8_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_neon,
+                                 &vpx_highbd_lpf_horizontal_16_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_neon,
+                                 &vpx_highbd_lpf_horizontal_16_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_neon,
+                                 &vpx_highbd_lpf_horizontal_16_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_16_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_neon,
+                                 &vpx_highbd_lpf_vertical_4_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_neon,
+                                 &vpx_highbd_lpf_vertical_4_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_neon,
+                                 &vpx_highbd_lpf_vertical_4_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_neon,
+                                 &vpx_highbd_lpf_vertical_8_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_neon,
+                                 &vpx_highbd_lpf_vertical_8_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_neon,
+                                 &vpx_highbd_lpf_vertical_8_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_neon,
+                                 &vpx_highbd_lpf_vertical_16_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_neon,
+                                 &vpx_highbd_lpf_vertical_16_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_neon,
+                                 &vpx_highbd_lpf_vertical_16_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon,
+                                 &vpx_highbd_lpf_vertical_16_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon,
+                                 &vpx_highbd_lpf_vertical_16_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon,
+                                 &vpx_highbd_lpf_vertical_16_dual_c, 12)));
+INSTANTIATE_TEST_CASE_P(
+    NEON, Loop8Test9Param,
+    ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_4_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_4_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_4_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_8_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_8_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon,
+                                 &vpx_highbd_lpf_horizontal_8_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon,
+                                 &vpx_highbd_lpf_vertical_4_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon,
+                                 &vpx_highbd_lpf_vertical_4_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon,
+                                 &vpx_highbd_lpf_vertical_4_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon,
+                                 &vpx_highbd_lpf_vertical_8_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon,
+                                 &vpx_highbd_lpf_vertical_8_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon,
+                                 &vpx_highbd_lpf_vertical_8_dual_c, 12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    NEON, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_edge_8_neon,
-                   &vpx_lpf_horizontal_edge_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_16_neon,
-                   &vpx_lpf_horizontal_edge_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_neon, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_neon,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
        make_tuple(&vpx_lpf_vertical_16_neon, &vpx_lpf_vertical_16_c, 8),
        make_tuple(&vpx_lpf_vertical_16_dual_neon, &vpx_lpf_vertical_16_dual_c,
                   8),
@@ -550,8 +623,9 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8),
        make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_8, &vpx_lpf_horizontal_edge_8, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_16, &vpx_lpf_horizontal_edge_16, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dspr2, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_dspr2,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
        make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8),
        make_tuple(&vpx_lpf_vertical_8_dspr2, &vpx_lpf_vertical_8_c, 8),
        make_tuple(&vpx_lpf_vertical_16_dspr2, &vpx_lpf_vertical_16_c, 8),
@@ -576,10 +650,9 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8),
        make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_edge_8_msa, &vpx_lpf_horizontal_edge_8_c,
-                   8),
-        make_tuple(&vpx_lpf_horizontal_edge_16_msa,
-                   &vpx_lpf_horizontal_edge_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8),
+        make_tuple(&vpx_lpf_horizontal_16_dual_msa,
+                   &vpx_lpf_horizontal_16_dual_c, 8),
        make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8),
        make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8),
        make_tuple(&vpx_lpf_vertical_16_msa, &vpx_lpf_vertical_16_c, 8)));
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
@@ -12,6 +12,8 @@
 #include <stdlib.h>
 #include <string.h>

+#include <limits>
+
 #include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vp9_rtcd.h"
@@ -23,235 +25,668 @@
 #include "vp9/common/vp9_blockd.h"
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_integer.h"
+#include "vpx_ports/vpx_timer.h"

 using libvpx_test::ACMRandom;

 namespace {
+
 typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride);
 typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, InvTxfmFunc, TX_SIZE, int>
+typedef void (*InvTxfmWithBdFunc)(const tran_low_t *in, uint8_t *out,
+                                  int stride, int bd);
+
+template <InvTxfmFunc fn>
+void wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
+  (void)bd;
+  fn(in, out, stride);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+template <InvTxfmWithBdFunc fn>
+void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) {
+  fn(in, CONVERT_TO_BYTEPTR(out), stride, bd);
+}
+#endif
+
+typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmWithBdFunc, InvTxfmWithBdFunc,
+                        TX_SIZE, int, int, int>
    PartialInvTxfmParam;
 const int kMaxNumCoeffs = 1024;
+const int kCountTestBlock = 1000;
+
+// https://bugs.chromium.org/p/webm/issues/detail?id=1332
+// The functions specified do not pass with INT16_MIN/MAX. They fail at the
+// value specified, but pass when 1 is added/subtracted.
+int16_t MaxSupportedCoeff(InvTxfmWithBdFunc a) {
+#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
+  if (a == &wrapper<vpx_idct8x8_64_add_ssse3> ||
+      a == &wrapper<vpx_idct8x8_12_add_ssse3>) {
+    return 23625 - 1;
+  }
+#else
+  (void)a;
+#endif
+  return std::numeric_limits<int16_t>::max();
+}
+
+int16_t MinSupportedCoeff(InvTxfmWithBdFunc a) {
+#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
+  if (a == &wrapper<vpx_idct8x8_64_add_ssse3> ||
+      a == &wrapper<vpx_idct8x8_12_add_ssse3>) {
+    return -23625 + 1;
+  }
+#else
+  (void)a;
+#endif
+  return std::numeric_limits<int16_t>::min();
+}
+
 class PartialIDctTest : public ::testing::TestWithParam<PartialInvTxfmParam> {
 public:
  virtual ~PartialIDctTest() {}
  virtual void SetUp() {
+    rnd_.Reset(ACMRandom::DeterministicSeed());
    ftxfm_ = GET_PARAM(0);
    full_itxfm_ = GET_PARAM(1);
    partial_itxfm_ = GET_PARAM(2);
    tx_size_ = GET_PARAM(3);
    last_nonzero_ = GET_PARAM(4);
+    bit_depth_ = GET_PARAM(5);
+    pixel_size_ = GET_PARAM(6);
+    mask_ = (1 << bit_depth_) - 1;
+
+    switch (tx_size_) {
+      case TX_4X4: size_ = 4; break;
+      case TX_8X8: size_ = 8; break;
+      case TX_16X16: size_ = 16; break;
+      case TX_32X32: size_ = 32; break;
+      default: FAIL() << "Wrong Size!"; break;
+    }
+
+    // Randomize stride_ to a value less than or equal to 1024
+    stride_ = rnd_(1024) + 1;
+    if (stride_ < size_) {
+      stride_ = size_;
+    }
+    // Align stride_ to 16 if it's bigger than 16.
+    if (stride_ > 16) {
+      stride_ &= ~15;
+    }
+
+    input_block_size_ = size_ * size_;
+    output_block_size_ = size_ * stride_;
+
+    input_block_ = reinterpret_cast<tran_low_t *>(
+        vpx_memalign(16, sizeof(*input_block_) * input_block_size_));
+    output_block_ = reinterpret_cast<uint8_t *>(
+        vpx_memalign(16, pixel_size_ * output_block_size_));
+    output_block_ref_ = reinterpret_cast<uint8_t *>(
+        vpx_memalign(16, pixel_size_ * output_block_size_));
  }

-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+  virtual void TearDown() {
+    vpx_free(input_block_);
+    input_block_ = NULL;
+    vpx_free(output_block_);
+    output_block_ = NULL;
+    vpx_free(output_block_ref_);
+    output_block_ref_ = NULL;
+    libvpx_test::ClearSystemState();
+  }
+
+  void InitMem() {
+    memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
+    if (pixel_size_ == 1) {
+      for (int j = 0; j < output_block_size_; ++j) {
+        output_block_[j] = output_block_ref_[j] = rnd_.Rand16() & mask_;
+      }
+    } else {
+      ASSERT_EQ(2, pixel_size_);
+      uint16_t *const output = reinterpret_cast<uint16_t *>(output_block_);
+      uint16_t *const output_ref =
+          reinterpret_cast<uint16_t *>(output_block_ref_);
+      for (int j = 0; j < output_block_size_; ++j) {
+        output[j] = output_ref[j] = rnd_.Rand16() & mask_;
+      }
+    }
+  }
+
+  void InitInput() {
+    const int max_coeff = 32766 / 4;
+    int max_energy_leftover = max_coeff * max_coeff;
+    for (int j = 0; j < last_nonzero_; ++j) {
+      int16_t coeff = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
+                                           (rnd_.Rand16() - 32768) / 65536);
+      max_energy_leftover -= coeff * coeff;
+      if (max_energy_leftover < 0) {
+        max_energy_leftover = 0;
+        coeff = 0;
+      }
+      input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = coeff;
+    }
+  }

 protected:
  int last_nonzero_;
  TX_SIZE tx_size_;
+  tran_low_t *input_block_;
+  uint8_t *output_block_;
+  uint8_t *output_block_ref_;
+  int size_;
+  int stride_;
+  int pixel_size_;
+  int input_block_size_;
+  int output_block_size_;
+  int bit_depth_;
+  int mask_;
  FwdTxfmFunc ftxfm_;
-  InvTxfmFunc full_itxfm_;
-  InvTxfmFunc partial_itxfm_;
+  InvTxfmWithBdFunc full_itxfm_;
+  InvTxfmWithBdFunc partial_itxfm_;
+  ACMRandom rnd_;
 };

 TEST_P(PartialIDctTest, RunQuantCheck) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int size;
-  switch (tx_size_) {
-    case TX_4X4: size = 4; break;
-    case TX_8X8: size = 8; break;
-    case TX_16X16: size = 16; break;
-    case TX_32X32: size = 32; break;
-    default: FAIL() << "Wrong Size!"; break;
-  }
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
-
-  const int count_test_block = 1000;
-  const int block_size = size * size;
-
  DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]);
  DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]);

-  int max_error = 0;
-  for (int i = 0; i < count_test_block; ++i) {
-    // clear out destination buffer
-    memset(dst1, 0, sizeof(*dst1) * block_size);
-    memset(dst2, 0, sizeof(*dst2) * block_size);
-    memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
-    memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
-
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-255, 255].
-      if (i == 0) {
-        for (int j = 0; j < block_size; ++j) input_extreme_block[j] = 255;
-      } else if (i == 1) {
-        for (int j = 0; j < block_size; ++j) input_extreme_block[j] = -255;
-      } else {
-        for (int j = 0; j < block_size; ++j) {
-          input_extreme_block[j] = rnd.Rand8() % 2 ? 255 : -255;
-        }
+  InitMem();
+  for (int i = 0; i < kCountTestBlock * kCountTestBlock; ++i) {
+    // Initialize a test block with input range [-mask_, mask_].
+    if (i == 0) {
+      for (int k = 0; k < input_block_size_; ++k) {
+        input_extreme_block[k] = mask_;
      }
-
-      ftxfm_(input_extreme_block, output_ref_block, size);
-
-      // quantization with maximum allowed step sizes
-      test_coef_block1[0] = (output_ref_block[0] / 1336) * 1336;
-      for (int j = 1; j < last_nonzero_; ++j) {
-        test_coef_block1[vp9_default_scan_orders[tx_size_].scan[j]] =
-            (output_ref_block[j] / 1828) * 1828;
+    } else if (i == 1) {
+      for (int k = 0; k < input_block_size_; ++k) {
+        input_extreme_block[k] = -mask_;
+      }
+    } else {
+      for (int k = 0; k < input_block_size_; ++k) {
+        input_extreme_block[k] = rnd_.Rand8() % 2 ? mask_ : -mask_;
      }
    }

-    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
-    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block1, dst2, size));
+    ftxfm_(input_extreme_block, output_ref_block, size_);

-    for (int j = 0; j < block_size; ++j) {
-      const int diff = dst1[j] - dst2[j];
-      const int error = diff * diff;
-      if (max_error < error) max_error = error;
+    // quantization with minimum allowed step sizes
+    input_block_[0] = (output_ref_block[0] / 4) * 4;
+    for (int k = 1; k < last_nonzero_; ++k) {
+      const int pos = vp9_default_scan_orders[tx_size_].scan[k];
+      input_block_[pos] = (output_ref_block[pos] / 4) * 4;
    }
+
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+    ASM_REGISTER_STATE_CHECK(
+        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                        pixel_size_ * output_block_size_))
+        << "Error: partial inverse transform produces different results";
  }
-
-  EXPECT_EQ(0, max_error)
-      << "Error: partial inverse transform produces different results";
 }

 TEST_P(PartialIDctTest, ResultsMatch) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  int size;
-  switch (tx_size_) {
-    case TX_4X4: size = 4; break;
-    case TX_8X8: size = 8; break;
-    case TX_16X16: size = 16; break;
-    case TX_32X32: size = 32; break;
-    default: FAIL() << "Wrong Size!"; break;
+  for (int i = 0; i < kCountTestBlock; ++i) {
+    InitMem();
+    InitInput();
+
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+    ASM_REGISTER_STATE_CHECK(
+        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                        pixel_size_ * output_block_size_))
+        << "Error: partial inverse transform produces different results";
  }
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, tran_low_t, test_coef_block2[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst1[kMaxNumCoeffs]);
-  DECLARE_ALIGNED(16, uint8_t, dst2[kMaxNumCoeffs]);
-  const int count_test_block = 1000;
-  const int max_coeff = 32766 / 4;
-  const int block_size = size * size;
-  int max_error = 0;
-  for (int i = 0; i < count_test_block; ++i) {
-    // clear out destination buffer
-    memset(dst1, 0, sizeof(*dst1) * block_size);
-    memset(dst2, 0, sizeof(*dst2) * block_size);
-    memset(test_coef_block1, 0, sizeof(*test_coef_block1) * block_size);
-    memset(test_coef_block2, 0, sizeof(*test_coef_block2) * block_size);
-    int max_energy_leftover = max_coeff * max_coeff;
+}
+
+TEST_P(PartialIDctTest, AddOutputBlock) {
+  for (int i = 0; i < kCountTestBlock; ++i) {
+    InitMem();
    for (int j = 0; j < last_nonzero_; ++j) {
-      int16_t coef = static_cast<int16_t>(sqrt(1.0 * max_energy_leftover) *
-                                          (rnd.Rand16() - 32768) / 65536);
-      max_energy_leftover -= coef * coef;
-      if (max_energy_leftover < 0) {
-        max_energy_leftover = 0;
-        coef = 0;
-      }
-      test_coef_block1[vp9_default_scan_orders[tx_size_].scan[j]] = coef;
+      input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = 10;
    }

-    memcpy(test_coef_block2, test_coef_block1,
-           sizeof(*test_coef_block2) * block_size);
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+    ASM_REGISTER_STATE_CHECK(
+        partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+    ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                        pixel_size_ * output_block_size_))
+        << "Error: Transform results are not correctly added to output.";
+  }
+}

-    ASM_REGISTER_STATE_CHECK(full_itxfm_(test_coef_block1, dst1, size));
-    ASM_REGISTER_STATE_CHECK(partial_itxfm_(test_coef_block2, dst2, size));
+TEST_P(PartialIDctTest, SingleExtremeCoeff) {
+  const int16_t max_coeff = MaxSupportedCoeff(partial_itxfm_);
+  const int16_t min_coeff = MinSupportedCoeff(partial_itxfm_);
+  for (int i = 0; i < last_nonzero_; ++i) {
+    memset(input_block_, 0, sizeof(*input_block_) * input_block_size_);
+    // Run once for min and once for max.
+    for (int j = 0; j < 2; ++j) {
+      const int coeff = j ? min_coeff : max_coeff;

-    for (int j = 0; j < block_size; ++j) {
-      const int diff = dst1[j] - dst2[j];
-      const int error = diff * diff;
-      if (max_error < error) max_error = error;
+      memset(output_block_, 0, pixel_size_ * output_block_size_);
+      memset(output_block_ref_, 0, pixel_size_ * output_block_size_);
+      input_block_[vp9_default_scan_orders[tx_size_].scan[i]] = coeff;
+
+      ASM_REGISTER_STATE_CHECK(
+          full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+      ASM_REGISTER_STATE_CHECK(
+          partial_itxfm_(input_block_, output_block_, stride_, bit_depth_));
+      ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                          pixel_size_ * output_block_size_))
+          << "Error: Fails with single coeff of " << coeff << " at " << i
+          << ".";
    }
  }
+}

-  EXPECT_EQ(0, max_error)
+TEST_P(PartialIDctTest, DISABLED_Speed) {
+  // Keep runtime stable with transform size.
+  const int kCountSpeedTestBlock = 500000000 / input_block_size_;
+  InitMem();
+  InitInput();
+
+  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
+    ASM_REGISTER_STATE_CHECK(
+        full_itxfm_(input_block_, output_block_ref_, stride_, bit_depth_));
+  }
+  vpx_usec_timer timer;
+  vpx_usec_timer_start(&timer);
+  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
+    partial_itxfm_(input_block_, output_block_, stride_, bit_depth_);
+  }
+  libvpx_test::ClearSystemState();
+  vpx_usec_timer_mark(&timer);
+  const int elapsed_time =
+      static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
+  printf("idct%dx%d_%d (bitdepth %d) time: %5d ms ", size_, size_,
+         last_nonzero_, bit_depth_, elapsed_time);
+
+  ASSERT_EQ(0, memcmp(output_block_ref_, output_block_,
+                      pixel_size_ * output_block_size_))
      << "Error: partial inverse transform produces different results";
 }
+
 using std::tr1::make_tuple;

-INSTANTIATE_TEST_CASE_P(
-    C, PartialIDctTest,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
-                                 &vpx_idct32x32_34_add_c, TX_32X32, 34),
-                      make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
-                                 &vpx_idct32x32_1_add_c, TX_32X32, 1),
-                      make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
-                                 &vpx_idct16x16_10_add_c, TX_16X16, 10),
-                      make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
-                                 &vpx_idct16x16_1_add_c, TX_16X16, 1),
-                      make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
-                                 &vpx_idct8x8_12_add_c, TX_8X8, 12),
-                      make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
-                                 &vpx_idct8x8_1_add_c, TX_8X8, 1),
-                      make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
-                                 &vpx_idct4x4_1_add_c, TX_4X4, 1)));
+const PartialInvTxfmParam c_partial_idct_tests[] = {
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>, TX_32X32, 1024, 12, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_34_add_c>, TX_32X32, 34, 12, 2),
+  make_tuple(&vpx_highbd_fdct32x32_c,
+             &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+             &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct32x32_c,
+             &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+             &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct32x32_c,
+             &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+             &highbd_wrapper<vpx_highbd_idct32x32_1_add_c>, TX_32X32, 1, 12, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>, TX_16X16, 256, 12, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_c>, TX_16X16, 10, 12, 2),
+  make_tuple(&vpx_highbd_fdct16x16_c,
+             &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+             &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct16x16_c,
+             &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+             &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct16x16_c,
+             &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+             &highbd_wrapper<vpx_highbd_idct16x16_1_add_c>, TX_16X16, 1, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 8, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 10, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>, TX_8X8, 64, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 8, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 10, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>, TX_8X8, 12, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>, TX_8X8, 1, 12, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 8, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 10, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>, TX_4X4, 16, 12, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>, TX_4X4, 1, 12, 2),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_c>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_135_add_c>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_c>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1_add_c>, TX_32X32, 1, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_256_add_c>, TX_16X16, 256, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_10_add_c>, TX_16X16, 10, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_1_add_c>, TX_16X16, 1, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_c>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_c>, TX_8X8, 12, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_1_add_c>, TX_8X8, 1, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_16_add_c>, TX_4X4, 16, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_1_add_c>, TX_4X4, 1, 8, 1)
+};

-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    NEON, PartialIDctTest,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
-                                 &vpx_idct32x32_1_add_neon, TX_32X32, 1),
-                      make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
-                                 &vpx_idct16x16_10_add_neon, TX_16X16, 10),
-                      make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
-                                 &vpx_idct16x16_1_add_neon, TX_16X16, 1),
-                      make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
-                                 &vpx_idct8x8_12_add_neon, TX_8X8, 12),
-                      make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
-                                 &vpx_idct8x8_1_add_neon, TX_8X8, 1),
-                      make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
-                                 &vpx_idct4x4_1_add_neon, TX_4X4, 1)));
-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(C, PartialIDctTest,
+                        ::testing::ValuesIn(c_partial_idct_tests));

-#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    SSE2, PartialIDctTest,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
-                                 &vpx_idct32x32_34_add_sse2, TX_32X32, 34),
-                      make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
-                                 &vpx_idct32x32_1_add_sse2, TX_32X32, 1),
-                      make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
-                                 &vpx_idct16x16_10_add_sse2, TX_16X16, 10),
-                      make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
-                                 &vpx_idct16x16_1_add_sse2, TX_16X16, 1),
-                      make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
-                                 &vpx_idct8x8_12_add_sse2, TX_8X8, 12),
-                      make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
-                                 &vpx_idct8x8_1_add_sse2, TX_8X8, 1),
-                      make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
-                                 &vpx_idct4x4_1_add_sse2, TX_4X4, 1)));
-#endif
+#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+const PartialInvTxfmParam neon_partial_idct_tests[] = {
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_neon>, TX_8X8, 64, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_64_add_neon>, TX_8X8, 64, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_64_add_neon>, TX_8X8, 64, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_neon>, TX_8X8, 12, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_12_add_neon>, TX_8X8, 12, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_12_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_12_add_neon>, TX_8X8, 12, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_neon>, TX_8X8, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_neon>, TX_8X8, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_1_add_neon>, TX_8X8, 1, 12, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 16, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+      &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 16, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+      &highbd_wrapper<vpx_highbd_idct4x4_16_add_neon>, TX_4X4, 16, 12, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 8, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 10, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_1_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_1_add_neon>, TX_4X4, 1, 12, 2),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_neon>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_135_add_neon>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_neon>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1_add_neon>, TX_32X32, 1, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_256_add_neon>, TX_16X16, 256, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_10_add_neon>, TX_16X16, 10, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_1_add_neon>, TX_16X16, 1, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_neon>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_neon>, TX_8X8, 12, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_1_add_neon>, TX_8X8, 1, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_16_add_neon>, TX_4X4, 16, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_1_add_neon>, TX_4X4, 1, 8, 1)
+};

-#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \
-    !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    SSSE3_64, PartialIDctTest,
-    ::testing::Values(make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
-                                 &vpx_idct8x8_12_add_ssse3, TX_8X8, 12)));
-#endif
+INSTANTIATE_TEST_CASE_P(NEON, PartialIDctTest,
+                        ::testing::ValuesIn(neon_partial_idct_tests));
+#endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE

-#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    MSA, PartialIDctTest,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
-                                 &vpx_idct32x32_34_add_msa, TX_32X32, 34),
-                      make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c,
-                                 &vpx_idct32x32_1_add_msa, TX_32X32, 1),
-                      make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
-                                 &vpx_idct16x16_10_add_msa, TX_16X16, 10),
-                      make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c,
-                                 &vpx_idct16x16_1_add_msa, TX_16X16, 1),
-                      make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
-                                 &vpx_idct8x8_12_add_msa, TX_8X8, 10),
-                      make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c,
-                                 &vpx_idct8x8_1_add_msa, TX_8X8, 1),
-                      make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c,
-                                 &vpx_idct4x4_1_add_msa, TX_4X4, 1)));
-#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+#if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
+// 32x32_135_ is implemented using the 1024 version.
+const PartialInvTxfmParam sse2_partial_idct_tests[] = {
+#if CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct32x32_c, &highbd_wrapper<vpx_highbd_idct32x32_1024_add_c>,
+      &highbd_wrapper<vpx_highbd_idct32x32_1_add_sse2>, TX_32X32, 1, 12, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_256_add_sse2>, TX_16X16, 256, 12, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct16x16_c, &highbd_wrapper<vpx_highbd_idct16x16_256_add_c>,
+      &highbd_wrapper<vpx_highbd_idct16x16_10_add_sse2>, TX_16X16, 10, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_64_add_sse2>, TX_8X8, 64, 12, 2),
+  make_tuple(&vpx_highbd_fdct8x8_c,
+             &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+             &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct8x8_c, &highbd_wrapper<vpx_highbd_idct8x8_64_add_c>,
+      &highbd_wrapper<vpx_highbd_idct8x8_12_add_sse2>, TX_8X8, 12, 12, 2),
+  make_tuple(&vpx_highbd_fdct4x4_c,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+             &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 2),
+  make_tuple(
+      &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+      &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 10, 2),
+  make_tuple(
+      &vpx_highbd_fdct4x4_c, &highbd_wrapper<vpx_highbd_idct4x4_16_add_c>,
+      &highbd_wrapper<vpx_highbd_idct4x4_16_add_sse2>, TX_4X4, 16, 12, 2),
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_sse2>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_sse2>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_sse2>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1_add_sse2>, TX_32X32, 1, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_256_add_sse2>, TX_16X16, 256, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_10_add_sse2>, TX_16X16, 10, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_1_add_sse2>, TX_16X16, 1, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_sse2>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_sse2>, TX_8X8, 12, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_1_add_sse2>, TX_8X8, 1, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_16_add_sse2>, TX_4X4, 16, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_1_add_sse2>, TX_4X4, 1, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(SSE2, PartialIDctTest,
+                        ::testing::ValuesIn(sse2_partial_idct_tests));
+
+#endif  // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
+const PartialInvTxfmParam ssse3_partial_idct_tests[] = {
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_ssse3>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_135_add_ssse3>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_ssse3>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_ssse3>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_ssse3>, TX_8X8, 12, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(SSSE3, PartialIDctTest,
+                        ::testing::ValuesIn(ssse3_partial_idct_tests));
+#endif  // HAVE_SSSE3 && ARCH_X86_64 && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_DSPR2 && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
+const PartialInvTxfmParam dspr2_partial_idct_tests[] = {
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_dspr2>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_dspr2>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_dspr2>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1_add_dspr2>, TX_32X32, 1, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_256_add_dspr2>, TX_16X16, 256, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_10_add_dspr2>, TX_16X16, 10, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_1_add_dspr2>, TX_16X16, 1, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_dspr2>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_dspr2>, TX_8X8, 12, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_1_add_dspr2>, TX_8X8, 1, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_16_add_dspr2>, TX_4X4, 16, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_1_add_dspr2>, TX_4X4, 1, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(DSPR2, PartialIDctTest,
+                        ::testing::ValuesIn(dspr2_partial_idct_tests));
+#endif  // HAVE_DSPR2 && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
+
+#if HAVE_MSA && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH
+// 32x32_135_ is implemented using the 1024 version.
+const PartialInvTxfmParam msa_partial_idct_tests[] = {
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_msa>, TX_32X32, 1024, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1024_add_msa>, TX_32X32, 135, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_34_add_msa>, TX_32X32, 34, 8, 1),
+  make_tuple(&vpx_fdct32x32_c, &wrapper<vpx_idct32x32_1024_add_c>,
+             &wrapper<vpx_idct32x32_1_add_msa>, TX_32X32, 1, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_256_add_msa>, TX_16X16, 256, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_10_add_msa>, TX_16X16, 10, 8, 1),
+  make_tuple(&vpx_fdct16x16_c, &wrapper<vpx_idct16x16_256_add_c>,
+             &wrapper<vpx_idct16x16_1_add_msa>, TX_16X16, 1, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_64_add_msa>, TX_8X8, 64, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_12_add_msa>, TX_8X8, 12, 8, 1),
+  make_tuple(&vpx_fdct8x8_c, &wrapper<vpx_idct8x8_64_add_c>,
+             &wrapper<vpx_idct8x8_1_add_msa>, TX_8X8, 1, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_16_add_msa>, TX_4X4, 16, 8, 1),
+  make_tuple(&vpx_fdct4x4_c, &wrapper<vpx_idct4x4_16_add_c>,
+             &wrapper<vpx_idct4x4_1_add_msa>, TX_4X4, 1, 8, 1)
+};
+
+INSTANTIATE_TEST_CASE_P(MSA, PartialIDctTest,
+                        ::testing::ValuesIn(msa_partial_idct_tests));
+#endif  // HAVE_MSA && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH

 }  // namespace
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -7,22 +7,40 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
+#include <limits.h>
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
-#include "./vpx_config.h"
-#include "./vpx_dsp_rtcd.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"

-typedef void (*PostProcFunc)(unsigned char *src_ptr, unsigned char *dst_ptr,
-                             int src_pixels_per_line, int dst_pixels_per_line,
-                             int cols, unsigned char *flimit, int size);
+using libvpx_test::ACMRandom;
+
+typedef void (*VpxPostProcDownAndAcrossMbRowFunc)(
+    unsigned char *src_ptr, unsigned char *dst_ptr, int src_pixels_per_line,
+    int dst_pixels_per_line, int cols, unsigned char *flimit, int size);
+
+typedef void (*VpxMbPostProcAcrossIpFunc)(unsigned char *src, int pitch,
+                                          int rows, int cols, int flimit);
+
+typedef void (*VpxMbPostProcDownFunc)(unsigned char *dst, int pitch, int rows,
+                                      int cols, int flimit);

 namespace {

-class VPxPostProcessingFilterTest
-    : public ::testing::TestWithParam<PostProcFunc> {
+// Compute the filter level used in post proc from the loop filter strength
+int q2mbl(int x) {
+  if (x < 20) x = 20;
+
+  x = 50 + (x - 50) * 10 / 8;
+  return x * x / 3;
+}
+
+class VpxPostProcDownAndAcrossMbRowTest
+    : public ::testing::TestWithParam<VpxPostProcDownAndAcrossMbRowFunc> {
 public:
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 };
@@ -30,7 +48,7 @@ class VPxPostProcessingFilterTest
 // Test routine for the VPx post-processing function
 // vpx_post_proc_down_and_across_mb_row_c.

-TEST_P(VPxPostProcessingFilterTest, FilterOutputCheck) {
+TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) {
  // Size of the underlying data block that will be filtered.
  const int block_width = 16;
  const int block_height = 16;
@@ -47,14 +65,20 @@ TEST_P(VPxPostProcessingFilterTest, FilterOutputCheck) {
  const int output_stride = output_width;
  const int output_size = output_width * output_height;

-  uint8_t *const src_image =
-      reinterpret_cast<uint8_t *>(vpx_calloc(input_size, 1));
-  uint8_t *const dst_image =
-      reinterpret_cast<uint8_t *>(vpx_calloc(output_size, 1));
+  uint8_t *const src_image = new uint8_t[input_size];
+  ASSERT_TRUE(src_image != NULL);
+
+  // Though the left padding is only 8 bytes, the assembly code tries to
+  // read 16 bytes before the pointer.
+  uint8_t *const dst_image = new uint8_t[output_size + 8];
+  ASSERT_TRUE(dst_image != NULL);

  // Pointers to top-left pixel of block in the input and output images.
  uint8_t *const src_image_ptr = src_image + (input_stride << 1);
-  uint8_t *const dst_image_ptr = dst_image + 8;
+
+  // The assembly works in increments of 16. The first read may be offset by
+  // this amount.
+  uint8_t *const dst_image_ptr = dst_image + 16;
  uint8_t *const flimits =
      reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
  (void)memset(flimits, 255, block_width);
@@ -78,37 +102,514 @@ TEST_P(VPxPostProcessingFilterTest, FilterOutputCheck) {
                                      input_stride, output_stride, block_width,
                                      flimits, 16));

-  static const uint8_t expected_data[block_height] = { 4, 3, 1, 1, 1, 1, 1, 1,
-                                                       1, 1, 1, 1, 1, 1, 3, 4 };
+  static const uint8_t kExpectedOutput[block_height] = {
+    4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4
+  };

  pixel_ptr = dst_image_ptr;
  for (int i = 0; i < block_height; ++i) {
    for (int j = 0; j < block_width; ++j) {
-      EXPECT_EQ(expected_data[i], pixel_ptr[j])
-          << "VPxPostProcessingFilterTest failed with invalid filter output";
+      ASSERT_EQ(kExpectedOutput[i], pixel_ptr[j]) << "at (" << i << ", " << j
+                                                  << ")";
    }
    pixel_ptr += output_stride;
  }

-  vpx_free(src_image);
-  vpx_free(dst_image);
+  delete[] src_image;
+  delete[] dst_image;
  vpx_free(flimits);
 };

+TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) {
+  // Size of the underlying data block that will be filtered.
+  // Y blocks are always a multiple of 16 wide and exactly 16 high. U and V
+  // blocks are always a multiple of 8 wide and exactly 8 high.
+  const int block_width = 136;
+  const int block_height = 16;
+
+  // 5-tap filter needs 2 padding rows above and below the block in the input.
+  // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16.
+  const int input_width = block_width;
+  const int input_height = block_height + 4 + 8;
+  const int input_stride = input_width;
+  const int input_size = input_stride * input_height;
+
+  // Filter extends output block by 8 samples at left and right edges.
+  // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16.
+  const int output_width = block_width + 24;
+  const int output_height = block_height;
+  const int output_stride = output_width;
+  const int output_size = output_stride * output_height;
+
+  uint8_t *const src_image = new uint8_t[input_size];
+  ASSERT_TRUE(src_image != NULL);
+
+  // Though the left padding is only 8 bytes, the assembly code tries to
+  // read 16 bytes before the pointer.
+  uint8_t *const dst_image = new uint8_t[output_size + 8];
+  ASSERT_TRUE(dst_image != NULL);
+  uint8_t *const dst_image_ref = new uint8_t[output_size + 8];
+  ASSERT_TRUE(dst_image_ref != NULL);
+
+  // Pointers to top-left pixel of block in the input and output images.
+  uint8_t *const src_image_ptr = src_image + (input_stride << 1);
+
+  // The assembly works in increments of 16. The first read may be offset by
+  // this amount.
+  uint8_t *const dst_image_ptr = dst_image + 16;
+  uint8_t *const dst_image_ref_ptr = dst_image + 16;
+
+  // Filter values are set in blocks of 16 for Y and 8 for U/V. Each macroblock
+  // can have a different filter. SSE2 assembly reads flimits in blocks of 16 so
+  // it must be padded out.
+  const int flimits_width = block_width % 16 ? block_width + 8 : block_width;
+  uint8_t *const flimits =
+      reinterpret_cast<uint8_t *>(vpx_memalign(16, flimits_width));
+
+  ACMRandom rnd;
+  rnd.Reset(ACMRandom::DeterministicSeed());
+  // Initialize pixels in the input:
+  //   block pixels to random values.
+  //   border pixels to value 10.
+  (void)memset(src_image, 10, input_size);
+  uint8_t *pixel_ptr = src_image_ptr;
+  for (int i = 0; i < block_height; ++i) {
+    for (int j = 0; j < block_width; ++j) {
+      pixel_ptr[j] = rnd.Rand8();
+    }
+    pixel_ptr += input_stride;
+  }
+
+  for (int blocks = 0; blocks < block_width; blocks += 8) {
+    (void)memset(flimits, 0, sizeof(*flimits) * flimits_width);
+
+    for (int f = 0; f < 255; f++) {
+      (void)memset(flimits + blocks, f, sizeof(*flimits) * 8);
+
+      (void)memset(dst_image, 0, output_size);
+      (void)memset(dst_image_ref, 0, output_size);
+
+      vpx_post_proc_down_and_across_mb_row_c(
+          src_image_ptr, dst_image_ref_ptr, input_stride, output_stride,
+          block_width, flimits, block_height);
+      ASM_REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr,
+                                          input_stride, output_stride,
+                                          block_width, flimits, 16));
+
+      for (int i = 0; i < block_height; ++i) {
+        for (int j = 0; j < block_width; ++j) {
+          ASSERT_EQ(dst_image_ref_ptr[j + i * output_stride],
+                    dst_image_ptr[j + i * output_stride])
+              << "at (" << i << ", " << j << ")";
+        }
+      }
+    }
+  }
+
+  delete[] src_image;
+  delete[] dst_image;
+  delete[] dst_image_ref;
+  vpx_free(flimits);
+}
+
+class VpxMbPostProcAcrossIpTest
+    : public ::testing::TestWithParam<VpxMbPostProcAcrossIpFunc> {
+ public:
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void SetCols(unsigned char *s, int rows, int cols, int src_width) {
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        s[c] = c;
+      }
+      s += src_width;
+    }
+  }
+
+  void RunComparison(const unsigned char *expected_output, unsigned char *src_c,
+                     int rows, int cols, int src_pitch) {
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        ASSERT_EQ(expected_output[c], src_c[c]) << "at (" << r << ", " << c
+                                                << ")";
+      }
+      src_c += src_pitch;
+    }
+  }
+
+  void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width,
+                      int filter_level, const unsigned char *expected_output) {
+    ASM_REGISTER_STATE_CHECK(
+        GetParam()(s, src_width, rows, cols, filter_level));
+    RunComparison(expected_output, s, rows, cols, src_width);
+  }
+};
+
+TEST_P(VpxMbPostProcAcrossIpTest, CheckLowFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_left_padding = 8;
+  const int src_right_padding = 17;
+  const int src_width = cols + src_left_padding + src_right_padding;
+  const int src_size = rows * src_width;
+
+  unsigned char *const src = new unsigned char[src_size];
+  ASSERT_TRUE(src != NULL);
+  memset(src, 10, src_size);
+  unsigned char *const s = src + src_left_padding;
+  SetCols(s, rows, cols, src_width);
+
+  unsigned char *expected_output = new unsigned char[rows * cols];
+  ASSERT_TRUE(expected_output != NULL);
+  SetCols(expected_output, rows, cols, cols);
+
+  RunFilterLevel(s, rows, cols, src_width, q2mbl(0), expected_output);
+  delete[] src;
+  delete[] expected_output;
+}
+
+TEST_P(VpxMbPostProcAcrossIpTest, CheckMediumFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_left_padding = 8;
+  const int src_right_padding = 17;
+  const int src_width = cols + src_left_padding + src_right_padding;
+  const int src_size = rows * src_width;
+
+  unsigned char *const src = new unsigned char[src_size];
+  ASSERT_TRUE(src != NULL);
+  memset(src, 10, src_size);
+  unsigned char *const s = src + src_left_padding;
+
+  SetCols(s, rows, cols, src_width);
+  static const unsigned char kExpectedOutput[cols] = {
+    2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13
+  };
+
+  RunFilterLevel(s, rows, cols, src_width, q2mbl(70), kExpectedOutput);
+
+  delete[] src;
+}
+
+TEST_P(VpxMbPostProcAcrossIpTest, CheckHighFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_left_padding = 8;
+  const int src_right_padding = 17;
+  const int src_width = cols + src_left_padding + src_right_padding;
+  const int src_size = rows * src_width;
+
+  unsigned char *const src = new unsigned char[src_size];
+  ASSERT_TRUE(src != NULL);
+  unsigned char *const s = src + src_left_padding;
+
+  memset(src, 10, src_size);
+  SetCols(s, rows, cols, src_width);
+  static const unsigned char kExpectedOutput[cols] = {
+    2, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13
+  };
+
+  RunFilterLevel(s, rows, cols, src_width, INT_MAX, kExpectedOutput);
+
+  memset(src, 10, src_size);
+  SetCols(s, rows, cols, src_width);
+  RunFilterLevel(s, rows, cols, src_width, q2mbl(100), kExpectedOutput);
+
+  delete[] src;
+}
+
+TEST_P(VpxMbPostProcAcrossIpTest, CheckCvsAssembly) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_left_padding = 8;
+  const int src_right_padding = 17;
+  const int src_width = cols + src_left_padding + src_right_padding;
+  const int src_size = rows * src_width;
+
+  unsigned char *const c_mem = new unsigned char[src_size];
+  unsigned char *const asm_mem = new unsigned char[src_size];
+  ASSERT_TRUE(c_mem != NULL);
+  ASSERT_TRUE(asm_mem != NULL);
+  unsigned char *const src_c = c_mem + src_left_padding;
+  unsigned char *const src_asm = asm_mem + src_left_padding;
+
+  // When level >= 100, the filter behaves the same as the level = INT_MAX
+  // When level < 20, it behaves the same as the level = 0
+  for (int level = 0; level < 100; level++) {
+    memset(c_mem, 10, src_size);
+    memset(asm_mem, 10, src_size);
+    SetCols(src_c, rows, cols, src_width);
+    SetCols(src_asm, rows, cols, src_width);
+
+    vpx_mbpost_proc_across_ip_c(src_c, src_width, rows, cols, q2mbl(level));
+    ASM_REGISTER_STATE_CHECK(
+        GetParam()(src_asm, src_width, rows, cols, q2mbl(level)));
+
+    RunComparison(src_c, src_asm, rows, cols, src_width);
+  }
+
+  delete[] c_mem;
+  delete[] asm_mem;
+}
+
+class VpxMbPostProcDownTest
+    : public ::testing::TestWithParam<VpxMbPostProcDownFunc> {
+ public:
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void SetRows(unsigned char *src_c, int rows, int cols) {
+    for (int r = 0; r < rows; r++) {
+      memset(src_c, r, cols);
+      src_c += cols;
+    }
+  }
+
+  void SetRandom(unsigned char *src_c, unsigned char *src_asm, int rows,
+                 int cols, int src_pitch) {
+    ACMRandom rnd;
+    rnd.Reset(ACMRandom::DeterministicSeed());
+
+    // Add some random noise to the input
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        const int noise = rnd(4);
+        src_c[c] = r + noise;
+        src_asm[c] = r + noise;
+      }
+      src_c += src_pitch;
+      src_asm += src_pitch;
+    }
+  }
+
+  void SetRandomSaturation(unsigned char *src_c, unsigned char *src_asm,
+                           int rows, int cols, int src_pitch) {
+    ACMRandom rnd;
+    rnd.Reset(ACMRandom::DeterministicSeed());
+
+    // Add some random noise to the input
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        const int noise = 3 * rnd(2);
+        src_c[c] = r + noise;
+        src_asm[c] = r + noise;
+      }
+      src_c += src_pitch;
+      src_asm += src_pitch;
+    }
+  }
+
+  void RunComparison(const unsigned char *expected_output, unsigned char *src_c,
+                     int rows, int cols, int src_pitch) {
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        ASSERT_EQ(expected_output[r * rows + c], src_c[c]) << "at (" << r
+                                                           << ", " << c << ")";
+      }
+      src_c += src_pitch;
+    }
+  }
+
+  void RunComparison(unsigned char *src_c, unsigned char *src_asm, int rows,
+                     int cols, int src_pitch) {
+    for (int r = 0; r < rows; r++) {
+      for (int c = 0; c < cols; c++) {
+        ASSERT_EQ(src_c[c], src_asm[c]) << "at (" << r << ", " << c << ")";
+      }
+      src_c += src_pitch;
+      src_asm += src_pitch;
+    }
+  }
+
+  void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width,
+                      int filter_level, const unsigned char *expected_output) {
+    ASM_REGISTER_STATE_CHECK(
+        GetParam()(s, src_width, rows, cols, filter_level));
+    RunComparison(expected_output, s, rows, cols, src_width);
+  }
+};
+
+TEST_P(VpxMbPostProcDownTest, CheckHighFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_pitch = cols;
+  const int src_top_padding = 8;
+  const int src_bottom_padding = 17;
+
+  const int src_size = cols * (rows + src_top_padding + src_bottom_padding);
+  unsigned char *const c_mem = new unsigned char[src_size];
+  ASSERT_TRUE(c_mem != NULL);
+  memset(c_mem, 10, src_size);
+  unsigned char *const src_c = c_mem + src_top_padding * src_pitch;
+
+  SetRows(src_c, rows, cols);
+
+  static const unsigned char kExpectedOutput[rows * cols] = {
+    2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  2,
+    2,  3,  2,  2,  2,  2,  2,  2,  2,  3,  2,  2,  2,  3,  3,  3,  3,  3,  3,
+    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  3,  4,  4,  3,  3,  3,
+    4,  4,  3,  4,  4,  3,  3,  4,  5,  4,  4,  4,  4,  4,  4,  4,  5,  4,  4,
+    4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+    5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,
+    7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
+    8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  8,  9,  9,  8,  8,  8,  9,
+    9,  8,  9,  9,  8,  8,  8,  9,  9,  10, 10, 9,  9,  9,  10, 10, 9,  10, 10,
+    9,  9,  9,  10, 10, 10, 11, 10, 10, 10, 11, 10, 11, 10, 11, 10, 10, 10, 11,
+    10, 11, 11, 11, 11, 11, 11, 11, 12, 11, 11, 11, 11, 11, 11, 11, 12, 11, 12,
+    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 12,
+    13, 12, 13, 12, 12, 12, 13, 12, 13, 12, 13, 12, 13, 13, 13, 14, 13, 13, 13,
+    13, 13, 13, 13, 14, 13, 13, 13, 13
+  };
+
+  RunFilterLevel(src_c, rows, cols, src_pitch, INT_MAX, kExpectedOutput);
+
+  memset(c_mem, 10, src_size);
+  SetRows(src_c, rows, cols);
+  RunFilterLevel(src_c, rows, cols, src_pitch, q2mbl(100), kExpectedOutput);
+
+  delete[] c_mem;
+}
+
+TEST_P(VpxMbPostProcDownTest, CheckMediumFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_pitch = cols;
+  const int src_top_padding = 8;
+  const int src_bottom_padding = 17;
+
+  const int src_size = cols * (rows + src_top_padding + src_bottom_padding);
+  unsigned char *const c_mem = new unsigned char[src_size];
+  ASSERT_TRUE(c_mem != NULL);
+  memset(c_mem, 10, src_size);
+  unsigned char *const src_c = c_mem + src_top_padding * src_pitch;
+
+  SetRows(src_c, rows, cols);
+
+  static const unsigned char kExpectedOutput[rows * cols] = {
+    2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  2,
+    2,  3,  2,  2,  2,  2,  2,  2,  2,  3,  2,  2,  2,  2,  2,  2,  2,  2,  2,
+    2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,
+    3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
+    4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+    5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,
+    7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
+    8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,
+    9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+    10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+    11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,
+    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 13, 12,
+    13, 12, 13, 12, 12, 12, 13, 12, 13, 12, 13, 12, 13, 13, 13, 14, 13, 13, 13,
+    13, 13, 13, 13, 14, 13, 13, 13, 13
+  };
+
+  RunFilterLevel(src_c, rows, cols, src_pitch, q2mbl(70), kExpectedOutput);
+
+  delete[] c_mem;
+}
+
+TEST_P(VpxMbPostProcDownTest, CheckLowFilterOutput) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_pitch = cols;
+  const int src_top_padding = 8;
+  const int src_bottom_padding = 17;
+
+  const int src_size = cols * (rows + src_top_padding + src_bottom_padding);
+  unsigned char *const c_mem = new unsigned char[src_size];
+  ASSERT_TRUE(c_mem != NULL);
+  memset(c_mem, 10, src_size);
+  unsigned char *const src_c = c_mem + src_top_padding * src_pitch;
+
+  SetRows(src_c, rows, cols);
+
+  unsigned char *expected_output = new unsigned char[rows * cols];
+  ASSERT_TRUE(expected_output != NULL);
+  SetRows(expected_output, rows, cols);
+
+  RunFilterLevel(src_c, rows, cols, src_pitch, q2mbl(0), expected_output);
+
+  delete[] c_mem;
+  delete[] expected_output;
+}
+
+TEST_P(VpxMbPostProcDownTest, CheckCvsAssembly) {
+  const int rows = 16;
+  const int cols = 16;
+  const int src_pitch = cols;
+  const int src_top_padding = 8;
+  const int src_bottom_padding = 17;
+  const int src_size = cols * (rows + src_top_padding + src_bottom_padding);
+  unsigned char *const c_mem = new unsigned char[src_size];
+  unsigned char *const asm_mem = new unsigned char[src_size];
+  ASSERT_TRUE(c_mem != NULL);
+  ASSERT_TRUE(asm_mem != NULL);
+  unsigned char *const src_c = c_mem + src_top_padding * src_pitch;
+  unsigned char *const src_asm = asm_mem + src_top_padding * src_pitch;
+
+  for (int level = 0; level < 100; level++) {
+    memset(c_mem, 10, src_size);
+    memset(asm_mem, 10, src_size);
+    SetRandom(src_c, src_asm, rows, cols, src_pitch);
+    vpx_mbpost_proc_down_c(src_c, src_pitch, rows, cols, q2mbl(level));
+    ASM_REGISTER_STATE_CHECK(
+        GetParam()(src_asm, src_pitch, rows, cols, q2mbl(level)));
+    RunComparison(src_c, src_asm, rows, cols, src_pitch);
+
+    memset(c_mem, 10, src_size);
+    memset(asm_mem, 10, src_size);
+    SetRandomSaturation(src_c, src_asm, rows, cols, src_pitch);
+    vpx_mbpost_proc_down_c(src_c, src_pitch, rows, cols, q2mbl(level));
+    ASM_REGISTER_STATE_CHECK(
+        GetParam()(src_asm, src_pitch, rows, cols, q2mbl(level)));
+    RunComparison(src_c, src_asm, rows, cols, src_pitch);
+  }
+
+  delete[] c_mem;
+  delete[] asm_mem;
+}
+
 INSTANTIATE_TEST_CASE_P(
-    C, VPxPostProcessingFilterTest,
+    C, VpxPostProcDownAndAcrossMbRowTest,
    ::testing::Values(vpx_post_proc_down_and_across_mb_row_c));

+INSTANTIATE_TEST_CASE_P(C, VpxMbPostProcAcrossIpTest,
+                        ::testing::Values(vpx_mbpost_proc_across_ip_c));
+
+INSTANTIATE_TEST_CASE_P(C, VpxMbPostProcDownTest,
+                        ::testing::Values(vpx_mbpost_proc_down_c));
+
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
-    SSE2, VPxPostProcessingFilterTest,
+    SSE2, VpxPostProcDownAndAcrossMbRowTest,
    ::testing::Values(vpx_post_proc_down_and_across_mb_row_sse2));
-#endif
+
+INSTANTIATE_TEST_CASE_P(SSE2, VpxMbPostProcAcrossIpTest,
+                        ::testing::Values(vpx_mbpost_proc_across_ip_sse2));
+
+INSTANTIATE_TEST_CASE_P(SSE2, VpxMbPostProcDownTest,
+                        ::testing::Values(vpx_mbpost_proc_down_sse2));
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, VpxPostProcDownAndAcrossMbRowTest,
+    ::testing::Values(vpx_post_proc_down_and_across_mb_row_neon));
+
+INSTANTIATE_TEST_CASE_P(NEON, VpxMbPostProcAcrossIpTest,
+                        ::testing::Values(vpx_mbpost_proc_across_ip_neon));
+#endif  // HAVE_NEON

 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(
-    MSA, VPxPostProcessingFilterTest,
+    MSA, VpxPostProcDownAndAcrossMbRowTest,
    ::testing::Values(vpx_post_proc_down_and_across_mb_row_msa));
-#endif
+
+INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcAcrossIpTest,
+                        ::testing::Values(vpx_mbpost_proc_across_ip_msa));
+
+INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcDownTest,
+                        ::testing::Values(vpx_mbpost_proc_down_msa));
+#endif  // HAVE_MSA

 }  // namespace
--- a/test/predict_test.cc
+++ b/test/predict_test.cc
@@ -0,0 +1,376 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp8_rtcd.h"
+#include "./vpx_config.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
+
+namespace {
+
+using libvpx_test::ACMRandom;
+using std::tr1::make_tuple;
+
+typedef void (*PredictFunc)(uint8_t *src_ptr, int src_pixels_per_line,
+                            int xoffset, int yoffset, uint8_t *dst_ptr,
+                            int dst_pitch);
+
+typedef std::tr1::tuple<int, int, PredictFunc> PredictParam;
+
+class PredictTestBase : public ::testing::TestWithParam<PredictParam> {
+ public:
+  PredictTestBase()
+      : width_(GET_PARAM(0)), height_(GET_PARAM(1)), predict_(GET_PARAM(2)),
+        src_(NULL), padded_dst_(NULL), dst_(NULL), dst_c_(NULL) {}
+
+  virtual void SetUp() {
+    src_ = new uint8_t[kSrcSize];
+    ASSERT_TRUE(src_ != NULL);
+
+    // padded_dst_ provides a buffer of kBorderSize around the destination
+    // memory to facilitate detecting out of bounds writes.
+    dst_stride_ = kBorderSize + width_ + kBorderSize;
+    padded_dst_size_ = dst_stride_ * (kBorderSize + height_ + kBorderSize);
+    padded_dst_ =
+        reinterpret_cast<uint8_t *>(vpx_memalign(16, padded_dst_size_));
+    ASSERT_TRUE(padded_dst_ != NULL);
+    dst_ = padded_dst_ + (kBorderSize * dst_stride_) + kBorderSize;
+
+    dst_c_ = new uint8_t[16 * 16];
+    ASSERT_TRUE(dst_c_ != NULL);
+
+    memset(src_, 0, kSrcSize);
+    memset(padded_dst_, 128, padded_dst_size_);
+    memset(dst_c_, 0, 16 * 16);
+  }
+
+  virtual void TearDown() {
+    delete[] src_;
+    src_ = NULL;
+    vpx_free(padded_dst_);
+    padded_dst_ = NULL;
+    dst_ = NULL;
+    delete[] dst_c_;
+    dst_c_ = NULL;
+    libvpx_test::ClearSystemState();
+  }
+
+ protected:
+  // Make reference arrays big enough for 16x16 functions. Six-tap filters need
+  // 5 extra pixels outside of the macroblock.
+  static const int kSrcStride = 21;
+  static const int kSrcSize = kSrcStride * kSrcStride;
+  static const int kBorderSize = 16;
+
+  int width_;
+  int height_;
+  PredictFunc predict_;
+  uint8_t *src_;
+  uint8_t *padded_dst_;
+  uint8_t *dst_;
+  int padded_dst_size_;
+  uint8_t *dst_c_;
+  int dst_stride_;
+
+  bool CompareBuffers(const uint8_t *a, int a_stride, const uint8_t *b,
+                      int b_stride) const {
+    for (int height = 0; height < height_; ++height) {
+      EXPECT_EQ(0, memcmp(a + height * a_stride, b + height * b_stride,
+                          sizeof(*a) * width_))
+          << "Row " << height << " does not match.";
+    }
+
+    return !HasFailure();
+  }
+
+  // Given a block of memory 'a' with size 'a_size', determine if all regions
+  // excepting block 'b' described by 'b_stride', 'b_height', and 'b_width'
+  // match pixel value 'c'.
+  bool CheckBorder(const uint8_t *a, int a_size, const uint8_t *b, int b_width,
+                   int b_height, int b_stride, uint8_t c) const {
+    const uint8_t *a_end = a + a_size;
+    const int b_size = (b_stride * b_height) + b_width;
+    const uint8_t *b_end = b + b_size;
+    const int left_border = (b_stride - b_width) / 2;
+    const int right_border = left_border + ((b_stride - b_width) % 2);
+
+    EXPECT_GE(b - left_border, a) << "'b' does not start within 'a'";
+    EXPECT_LE(b_end + right_border, a_end) << "'b' does not end within 'a'";
+
+    // Top border.
+    for (int pixel = 0; pixel < b - a - left_border; ++pixel) {
+      EXPECT_EQ(c, a[pixel]) << "Mismatch at " << pixel << " in top border.";
+    }
+
+    // Left border.
+    for (int height = 0; height < b_height; ++height) {
+      for (int width = left_border; width > 0; --width) {
+        EXPECT_EQ(c, b[height * b_stride - width])
+            << "Mismatch at row " << height << " column " << left_border - width
+            << " in left border.";
+      }
+    }
+
+    // Right border.
+    for (int height = 0; height < b_height; ++height) {
+      for (int width = b_width; width < b_width + right_border; ++width) {
+        EXPECT_EQ(c, b[height * b_stride + width])
+            << "Mismatch at row " << height << " column " << width - b_width
+            << " in right border.";
+      }
+    }
+
+    // Bottom border.
+    for (int pixel = static_cast<int>(b - a + b_size); pixel < a_size;
+         ++pixel) {
+      EXPECT_EQ(c, a[pixel]) << "Mismatch at " << pixel << " in bottom border.";
+    }
+
+    return !HasFailure();
+  }
+
+  void TestWithRandomData(PredictFunc reference) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    // Run tests for almost all possible offsets.
+    for (int xoffset = 0; xoffset < 8; ++xoffset) {
+      for (int yoffset = 0; yoffset < 8; ++yoffset) {
+        if (xoffset == 0 && yoffset == 0) {
+          // This represents a copy which is not required to be handled by this
+          // module.
+          continue;
+        }
+
+        for (int i = 0; i < kSrcSize; ++i) {
+          src_[i] = rnd.Rand8();
+        }
+        reference(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset,
+                  dst_c_, 16);
+
+        ASM_REGISTER_STATE_CHECK(predict_(&src_[kSrcStride * 2 + 2], kSrcStride,
+                                          xoffset, yoffset, dst_, dst_stride_));
+
+        ASSERT_TRUE(CompareBuffers(dst_c_, 16, dst_, dst_stride_));
+        ASSERT_TRUE(CheckBorder(padded_dst_, padded_dst_size_, dst_, width_,
+                                height_, dst_stride_, 128));
+      }
+    }
+  }
+
+  void TestWithUnalignedDst(PredictFunc reference) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+    // Only the 4x4 need to be able to handle unaligned writes.
+    if (width_ == 4 && height_ == 4) {
+      for (int xoffset = 0; xoffset < 8; ++xoffset) {
+        for (int yoffset = 0; yoffset < 8; ++yoffset) {
+          if (xoffset == 0 && yoffset == 0) {
+            continue;
+          }
+          for (int i = 0; i < kSrcSize; ++i) {
+            src_[i] = rnd.Rand8();
+          }
+          reference(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset,
+                    dst_c_, 16);
+
+          for (int i = 1; i < 4; ++i) {
+            memset(padded_dst_, 128, padded_dst_size_);
+
+            ASM_REGISTER_STATE_CHECK(predict_(&src_[kSrcStride * 2 + 2],
+                                              kSrcStride, xoffset, yoffset,
+                                              dst_ + i, dst_stride_ + i));
+
+            ASSERT_TRUE(CompareBuffers(dst_c_, 16, dst_ + i, dst_stride_ + i));
+            ASSERT_TRUE(CheckBorder(padded_dst_, padded_dst_size_, dst_ + i,
+                                    width_, height_, dst_stride_ + i, 128));
+          }
+        }
+      }
+    }
+  }
+};
+
+class SixtapPredictTest : public PredictTestBase {};
+
+TEST_P(SixtapPredictTest, TestWithRandomData) {
+  TestWithRandomData(vp8_sixtap_predict16x16_c);
+}
+TEST_P(SixtapPredictTest, TestWithUnalignedDst) {
+  TestWithUnalignedDst(vp8_sixtap_predict16x16_c);
+}
+
+TEST_P(SixtapPredictTest, TestWithPresetData) {
+  // Test input
+  static const uint8_t kTestData[kSrcSize] = {
+    184, 4,   191, 82,  92,  41,  0,   1,   226, 236, 172, 20,  182, 42,  226,
+    177, 79,  94,  77,  179, 203, 206, 198, 22,  192, 19,  75,  17,  192, 44,
+    233, 120, 48,  168, 203, 141, 210, 203, 143, 180, 184, 59,  201, 110, 102,
+    171, 32,  182, 10,  109, 105, 213, 60,  47,  236, 253, 67,  55,  14,  3,
+    99,  247, 124, 148, 159, 71,  34,  114, 19,  177, 38,  203, 237, 239, 58,
+    83,  155, 91,  10,  166, 201, 115, 124, 5,   163, 104, 2,   231, 160, 16,
+    234, 4,   8,   103, 153, 167, 174, 187, 26,  193, 109, 64,  141, 90,  48,
+    200, 174, 204, 36,  184, 114, 237, 43,  238, 242, 207, 86,  245, 182, 247,
+    6,   161, 251, 14,  8,   148, 182, 182, 79,  208, 120, 188, 17,  6,   23,
+    65,  206, 197, 13,  242, 126, 128, 224, 170, 110, 211, 121, 197, 200, 47,
+    188, 207, 208, 184, 221, 216, 76,  148, 143, 156, 100, 8,   89,  117, 14,
+    112, 183, 221, 54,  197, 208, 180, 69,  176, 94,  180, 131, 215, 121, 76,
+    7,   54,  28,  216, 238, 249, 176, 58,  142, 64,  215, 242, 72,  49,  104,
+    87,  161, 32,  52,  216, 230, 4,   141, 44,  181, 235, 224, 57,  195, 89,
+    134, 203, 144, 162, 163, 126, 156, 84,  185, 42,  148, 145, 29,  221, 194,
+    134, 52,  100, 166, 105, 60,  140, 110, 201, 184, 35,  181, 153, 93,  121,
+    243, 227, 68,  131, 134, 232, 2,   35,  60,  187, 77,  209, 76,  106, 174,
+    15,  241, 227, 115, 151, 77,  175, 36,  187, 121, 221, 223, 47,  118, 61,
+    168, 105, 32,  237, 236, 167, 213, 238, 202, 17,  170, 24,  226, 247, 131,
+    145, 6,   116, 117, 121, 11,  194, 41,  48,  126, 162, 13,  93,  209, 131,
+    154, 122, 237, 187, 103, 217, 99,  60,  200, 45,  78,  115, 69,  49,  106,
+    200, 194, 112, 60,  56,  234, 72,  251, 19,  120, 121, 182, 134, 215, 135,
+    10,  114, 2,   247, 46,  105, 209, 145, 165, 153, 191, 243, 12,  5,   36,
+    119, 206, 231, 231, 11,  32,  209, 83,  27,  229, 204, 149, 155, 83,  109,
+    35,  93,  223, 37,  84,  14,  142, 37,  160, 52,  191, 96,  40,  204, 101,
+    77,  67,  52,  53,  43,  63,  85,  253, 147, 113, 226, 96,  6,   125, 179,
+    115, 161, 17,  83,  198, 101, 98,  85,  139, 3,   137, 75,  99,  178, 23,
+    201, 255, 91,  253, 52,  134, 60,  138, 131, 208, 251, 101, 48,  2,   227,
+    228, 118, 132, 245, 202, 75,  91,  44,  160, 231, 47,  41,  50,  147, 220,
+    74,  92,  219, 165, 89,  16
+  };
+
+  // Expected results for xoffset = 2 and yoffset = 2.
+  static const int kExpectedDstStride = 16;
+  static const uint8_t kExpectedDst[256] = {
+    117, 102, 74,  135, 42,  98,  175, 206, 70,  73,  222, 197, 50,  24,  39,
+    49,  38,  105, 90,  47,  169, 40,  171, 215, 200, 73,  109, 141, 53,  85,
+    177, 164, 79,  208, 124, 89,  212, 18,  81,  145, 151, 164, 217, 153, 91,
+    154, 102, 102, 159, 75,  164, 152, 136, 51,  213, 219, 186, 116, 193, 224,
+    186, 36,  231, 208, 84,  211, 155, 167, 35,  59,  42,  76,  216, 149, 73,
+    201, 78,  149, 184, 100, 96,  196, 189, 198, 188, 235, 195, 117, 129, 120,
+    129, 49,  25,  133, 113, 69,  221, 114, 70,  143, 99,  157, 108, 189, 140,
+    78,  6,   55,  65,  240, 255, 245, 184, 72,  90,  100, 116, 131, 39,  60,
+    234, 167, 33,  160, 88,  185, 200, 157, 159, 176, 127, 151, 138, 102, 168,
+    106, 170, 86,  82,  219, 189, 76,  33,  115, 197, 106, 96,  198, 136, 97,
+    141, 237, 151, 98,  137, 191, 185, 2,   57,  95,  142, 91,  255, 185, 97,
+    137, 76,  162, 94,  173, 131, 193, 161, 81,  106, 72,  135, 222, 234, 137,
+    66,  137, 106, 243, 210, 147, 95,  15,  137, 110, 85,  66,  16,  96,  167,
+    147, 150, 173, 203, 140, 118, 196, 84,  147, 160, 19,  95,  101, 123, 74,
+    132, 202, 82,  166, 12,  131, 166, 189, 170, 159, 85,  79,  66,  57,  152,
+    132, 203, 194, 0,   1,   56,  146, 180, 224, 156, 28,  83,  181, 79,  76,
+    80,  46,  160, 175, 59,  106, 43,  87,  75,  136, 85,  189, 46,  71,  200,
+    90
+  };
+
+  ASM_REGISTER_STATE_CHECK(
+      predict_(const_cast<uint8_t *>(kTestData) + kSrcStride * 2 + 2,
+               kSrcStride, 2, 2, dst_, dst_stride_));
+
+  ASSERT_TRUE(
+      CompareBuffers(kExpectedDst, kExpectedDstStride, dst_, dst_stride_));
+}
+
+INSTANTIATE_TEST_CASE_P(
+    C, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_c),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_c),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_c),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_c)));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_neon),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_neon),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_neon),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_neon)));
+#endif
+#if HAVE_MMX
+INSTANTIATE_TEST_CASE_P(
+    MMX, SixtapPredictTest,
+    ::testing::Values(make_tuple(4, 4, &vp8_sixtap_predict4x4_mmx)));
+#endif
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_sse2),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_sse2),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_sse2)));
+#endif
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_ssse3),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_ssse3),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_ssse3),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_ssse3)));
+#endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_msa),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_msa),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_msa),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_msa)));
+#endif
+
+class BilinearPredictTest : public PredictTestBase {};
+
+TEST_P(BilinearPredictTest, TestWithRandomData) {
+  TestWithRandomData(vp8_bilinear_predict16x16_c);
+}
+TEST_P(BilinearPredictTest, TestWithUnalignedDst) {
+  TestWithUnalignedDst(vp8_bilinear_predict16x16_c);
+}
+
+INSTANTIATE_TEST_CASE_P(
+    C, BilinearPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_c),
+                      make_tuple(8, 8, &vp8_bilinear_predict8x8_c),
+                      make_tuple(8, 4, &vp8_bilinear_predict8x4_c),
+                      make_tuple(4, 4, &vp8_bilinear_predict4x4_c)));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, BilinearPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_neon),
+                      make_tuple(8, 8, &vp8_bilinear_predict8x8_neon),
+                      make_tuple(8, 4, &vp8_bilinear_predict8x4_neon),
+                      make_tuple(4, 4, &vp8_bilinear_predict4x4_neon)));
+#endif
+#if HAVE_MMX
+INSTANTIATE_TEST_CASE_P(
+    MMX, BilinearPredictTest,
+    ::testing::Values(make_tuple(8, 4, &vp8_bilinear_predict8x4_mmx),
+                      make_tuple(4, 4, &vp8_bilinear_predict4x4_mmx)));
+#endif
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, BilinearPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_sse2),
+                      make_tuple(8, 8, &vp8_bilinear_predict8x8_sse2)));
+#endif
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, BilinearPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_ssse3),
+                      make_tuple(8, 8, &vp8_bilinear_predict8x8_ssse3)));
+#endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, BilinearPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_msa),
+                      make_tuple(8, 8, &vp8_bilinear_predict8x8_msa),
+                      make_tuple(8, 4, &vp8_bilinear_predict8x4_msa),
+                      make_tuple(4, 4, &vp8_bilinear_predict4x4_msa)));
+#endif
+}  // namespace
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -32,7 +32,9 @@

 #undef NOMINMAX
 #define NOMINMAX
+#ifndef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
+#endif
 #include <windows.h>
 #include <winnt.h>

--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc
@@ -1,231 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_config.h"
-#include "./vp8_rtcd.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "vpx/vpx_integer.h"
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-typedef void (*SixtapPredictFunc)(uint8_t *src_ptr, int src_pixels_per_line,
-                                  int xoffset, int yoffset, uint8_t *dst_ptr,
-                                  int dst_pitch);
-
-typedef std::tr1::tuple<int, int, SixtapPredictFunc> SixtapPredictParam;
-
-class SixtapPredictTest : public ::testing::TestWithParam<SixtapPredictParam> {
- public:
-  static void SetUpTestCase() {
-    src_ = reinterpret_cast<uint8_t *>(vpx_memalign(kDataAlignment, kSrcSize));
-    dst_ = reinterpret_cast<uint8_t *>(vpx_memalign(kDataAlignment, kDstSize));
-    dst_c_ =
-        reinterpret_cast<uint8_t *>(vpx_memalign(kDataAlignment, kDstSize));
-  }
-
-  static void TearDownTestCase() {
-    vpx_free(src_);
-    src_ = NULL;
-    vpx_free(dst_);
-    dst_ = NULL;
-    vpx_free(dst_c_);
-    dst_c_ = NULL;
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  // Make test arrays big enough for 16x16 functions. Six-tap filters
-  // need 5 extra pixels outside of the macroblock.
-  static const int kSrcStride = 21;
-  static const int kDstStride = 16;
-  static const int kDataAlignment = 16;
-  static const int kSrcSize = kSrcStride * kSrcStride + 1;
-  static const int kDstSize = kDstStride * kDstStride;
-
-  virtual void SetUp() {
-    width_ = GET_PARAM(0);
-    height_ = GET_PARAM(1);
-    sixtap_predict_ = GET_PARAM(2);
-    memset(src_, 0, kSrcSize);
-    memset(dst_, 0, kDstSize);
-    memset(dst_c_, 0, kDstSize);
-  }
-
-  int width_;
-  int height_;
-  SixtapPredictFunc sixtap_predict_;
-  // The src stores the macroblock we will filter on, and makes it 1 byte larger
-  // in order to test unaligned access. The result is stored in dst and dst_c(c
-  // reference code result).
-  static uint8_t *src_;
-  static uint8_t *dst_;
-  static uint8_t *dst_c_;
-};
-
-uint8_t *SixtapPredictTest::src_ = NULL;
-uint8_t *SixtapPredictTest::dst_ = NULL;
-uint8_t *SixtapPredictTest::dst_c_ = NULL;
-
-TEST_P(SixtapPredictTest, TestWithPresetData) {
-  // Test input
-  static const uint8_t test_data[kSrcSize] = {
-    216, 184, 4,   191, 82,  92,  41,  0,   1,   226, 236, 172, 20,  182, 42,
-    226, 177, 79,  94,  77,  179, 203, 206, 198, 22,  192, 19,  75,  17,  192,
-    44,  233, 120, 48,  168, 203, 141, 210, 203, 143, 180, 184, 59,  201, 110,
-    102, 171, 32,  182, 10,  109, 105, 213, 60,  47,  236, 253, 67,  55,  14,
-    3,   99,  247, 124, 148, 159, 71,  34,  114, 19,  177, 38,  203, 237, 239,
-    58,  83,  155, 91,  10,  166, 201, 115, 124, 5,   163, 104, 2,   231, 160,
-    16,  234, 4,   8,   103, 153, 167, 174, 187, 26,  193, 109, 64,  141, 90,
-    48,  200, 174, 204, 36,  184, 114, 237, 43,  238, 242, 207, 86,  245, 182,
-    247, 6,   161, 251, 14,  8,   148, 182, 182, 79,  208, 120, 188, 17,  6,
-    23,  65,  206, 197, 13,  242, 126, 128, 224, 170, 110, 211, 121, 197, 200,
-    47,  188, 207, 208, 184, 221, 216, 76,  148, 143, 156, 100, 8,   89,  117,
-    14,  112, 183, 221, 54,  197, 208, 180, 69,  176, 94,  180, 131, 215, 121,
-    76,  7,   54,  28,  216, 238, 249, 176, 58,  142, 64,  215, 242, 72,  49,
-    104, 87,  161, 32,  52,  216, 230, 4,   141, 44,  181, 235, 224, 57,  195,
-    89,  134, 203, 144, 162, 163, 126, 156, 84,  185, 42,  148, 145, 29,  221,
-    194, 134, 52,  100, 166, 105, 60,  140, 110, 201, 184, 35,  181, 153, 93,
-    121, 243, 227, 68,  131, 134, 232, 2,   35,  60,  187, 77,  209, 76,  106,
-    174, 15,  241, 227, 115, 151, 77,  175, 36,  187, 121, 221, 223, 47,  118,
-    61,  168, 105, 32,  237, 236, 167, 213, 238, 202, 17,  170, 24,  226, 247,
-    131, 145, 6,   116, 117, 121, 11,  194, 41,  48,  126, 162, 13,  93,  209,
-    131, 154, 122, 237, 187, 103, 217, 99,  60,  200, 45,  78,  115, 69,  49,
-    106, 200, 194, 112, 60,  56,  234, 72,  251, 19,  120, 121, 182, 134, 215,
-    135, 10,  114, 2,   247, 46,  105, 209, 145, 165, 153, 191, 243, 12,  5,
-    36,  119, 206, 231, 231, 11,  32,  209, 83,  27,  229, 204, 149, 155, 83,
-    109, 35,  93,  223, 37,  84,  14,  142, 37,  160, 52,  191, 96,  40,  204,
-    101, 77,  67,  52,  53,  43,  63,  85,  253, 147, 113, 226, 96,  6,   125,
-    179, 115, 161, 17,  83,  198, 101, 98,  85,  139, 3,   137, 75,  99,  178,
-    23,  201, 255, 91,  253, 52,  134, 60,  138, 131, 208, 251, 101, 48,  2,
-    227, 228, 118, 132, 245, 202, 75,  91,  44,  160, 231, 47,  41,  50,  147,
-    220, 74,  92,  219, 165, 89,  16
-  };
-
-  // Expected result
-  static const uint8_t expected_dst[kDstSize] = {
-    117, 102, 74,  135, 42,  98,  175, 206, 70,  73,  222, 197, 50,  24,  39,
-    49,  38,  105, 90,  47,  169, 40,  171, 215, 200, 73,  109, 141, 53,  85,
-    177, 164, 79,  208, 124, 89,  212, 18,  81,  145, 151, 164, 217, 153, 91,
-    154, 102, 102, 159, 75,  164, 152, 136, 51,  213, 219, 186, 116, 193, 224,
-    186, 36,  231, 208, 84,  211, 155, 167, 35,  59,  42,  76,  216, 149, 73,
-    201, 78,  149, 184, 100, 96,  196, 189, 198, 188, 235, 195, 117, 129, 120,
-    129, 49,  25,  133, 113, 69,  221, 114, 70,  143, 99,  157, 108, 189, 140,
-    78,  6,   55,  65,  240, 255, 245, 184, 72,  90,  100, 116, 131, 39,  60,
-    234, 167, 33,  160, 88,  185, 200, 157, 159, 176, 127, 151, 138, 102, 168,
-    106, 170, 86,  82,  219, 189, 76,  33,  115, 197, 106, 96,  198, 136, 97,
-    141, 237, 151, 98,  137, 191, 185, 2,   57,  95,  142, 91,  255, 185, 97,
-    137, 76,  162, 94,  173, 131, 193, 161, 81,  106, 72,  135, 222, 234, 137,
-    66,  137, 106, 243, 210, 147, 95,  15,  137, 110, 85,  66,  16,  96,  167,
-    147, 150, 173, 203, 140, 118, 196, 84,  147, 160, 19,  95,  101, 123, 74,
-    132, 202, 82,  166, 12,  131, 166, 189, 170, 159, 85,  79,  66,  57,  152,
-    132, 203, 194, 0,   1,   56,  146, 180, 224, 156, 28,  83,  181, 79,  76,
-    80,  46,  160, 175, 59,  106, 43,  87,  75,  136, 85,  189, 46,  71,  200,
-    90
-  };
-
-  uint8_t *src = const_cast<uint8_t *>(test_data);
-
-  ASM_REGISTER_STATE_CHECK(sixtap_predict_(&src[kSrcStride * 2 + 2 + 1],
-                                           kSrcStride, 2, 2, dst_, kDstStride));
-
-  for (int i = 0; i < height_; ++i) {
-    for (int j = 0; j < width_; ++j)
-      ASSERT_EQ(expected_dst[i * kDstStride + j], dst_[i * kDstStride + j])
-          << "i==" << (i * width_ + j);
-  }
-}
-
-using libvpx_test::ACMRandom;
-
-TEST_P(SixtapPredictTest, TestWithRandomData) {
-  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  for (int i = 0; i < kSrcSize; ++i) src_[i] = rnd.Rand8();
-
-  // Run tests for all possible offsets.
-  for (int xoffset = 0; xoffset < 8; ++xoffset) {
-    for (int yoffset = 0; yoffset < 8; ++yoffset) {
-      // Call c reference function.
-      // Move start point to next pixel to test if the function reads
-      // unaligned data correctly.
-      vp8_sixtap_predict16x16_c(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,
-                                xoffset, yoffset, dst_c_, kDstStride);
-
-      // Run test.
-      ASM_REGISTER_STATE_CHECK(sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1],
-                                               kSrcStride, xoffset, yoffset,
-                                               dst_, kDstStride));
-
-      for (int i = 0; i < height_; ++i) {
-        for (int j = 0; j < width_; ++j)
-          ASSERT_EQ(dst_c_[i * kDstStride + j], dst_[i * kDstStride + j])
-              << "i==" << (i * width_ + j);
-      }
-    }
-  }
-}
-
-using std::tr1::make_tuple;
-
-INSTANTIATE_TEST_CASE_P(
-    C, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_c),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_c),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_c),
-                      make_tuple(4, 4, &vp8_sixtap_predict4x4_c)));
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_neon),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_neon),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_neon)));
-#endif
-#if HAVE_MMX
-INSTANTIATE_TEST_CASE_P(
-    MMX, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_mmx),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_mmx),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_mmx),
-                      make_tuple(4, 4, &vp8_sixtap_predict4x4_mmx)));
-#endif
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_sse2),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_sse2),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_sse2)));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_ssse3),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_ssse3),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_ssse3),
-                      make_tuple(4, 4, &vp8_sixtap_predict4x4_ssse3)));
-#endif
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(
-    MSA, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_msa),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_msa),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_msa),
-                      make_tuple(4, 4, &vp8_sixtap_predict4x4_msa)));
-#endif
-}  // namespace
--- a/test/stress.sh
+++ b/test/stress.sh
@@ -0,0 +1,141 @@
+#!/bin/sh
+##
+##  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
+##
+##  Use of this source code is governed by a BSD-style license
+##  that can be found in the LICENSE file in the root of the source
+##  tree. An additional intellectual property rights grant can be found
+##  in the file PATENTS.  All contributing project authors may
+##  be found in the AUTHORS file in the root of the source tree.
+##
+##  This file performs a stress test. It runs 5 encodes and 30 decodes in
+##  parallel.
+
+. $(dirname $0)/tools_common.sh
+
+YUV="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.yuv"
+VP8="${LIBVPX_TEST_DATA_PATH}/tos_vp8.webm"
+VP9="${LIBVPX_TEST_DATA_PATH}/vp90-2-sintel_1920x818_tile_1x4_fpm_2279kbps.webm"
+DATA_URL="http://downloads.webmproject.org/test_data/libvpx/"
+SHA1_FILE="$(dirname $0)/test-data.sha1"
+
+# Set sha1sum to proper sha program (sha1sum, shasum, sha1). This code is
+# cribbed from libs.mk.
+[ -x "$(which sha1sum)" ] && sha1sum=sha1sum
+[ -x "$(which shasum)" ] && sha1sum=shasum
+[ -x "$(which sha1)" ] && sha1sum=sha1
+
+# Download a file from the url and check its sha1sum.
+download_and_check_file() {
+  # Get the file from the file path.
+  local readonly root="${1#${LIBVPX_TEST_DATA_PATH}/}"
+
+  # Download the file using curl. Trap to insure non partial file.
+  (trap "rm -f $1" INT TERM \
+    && eval "curl --retry 1 -L -o $1 ${DATA_URL}${root} ${devnull}")
+
+  # Check the sha1 sum of the file.
+  if [ -n "${sha1sum}" ]; then
+    set -e
+    grep ${root} ${SHA1_FILE} \
+      | (cd ${LIBVPX_TEST_DATA_PATH}; ${sha1sum} -c);
+  fi
+}
+
+# Environment check: Make sure input is available.
+stress_verify_environment() {
+  if [ ! -e "${SHA1_FILE}" ] ; then
+    echo "Missing ${SHA1_FILE}"
+    return 1
+  fi
+  for file in "${YUV}" "${VP8}" "${VP9}"; do
+    if [ ! -e "${file}" ] ; then
+      download_and_check_file "${file}"
+    fi
+  done
+  if [ ! -e "${YUV}" ] || [ ! -e "${VP8}" ] || [ ! -e "${VP9}" ] ; then
+    elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
+    return 1
+  fi
+  if [ -z "$(vpx_tool_path vpxenc)" ]; then
+    elog "vpxenc not found. It must exist in LIBVPX_BIN_PATH or its parent."
+    return 1
+  fi
+  if [ -z "$(vpx_tool_path vpxdec)" ]; then
+    elog "vpxdec not found. It must exist in LIBVPX_BIN_PATH or its parent."
+    return 1
+  fi
+}
+
+# This function runs tests on libvpx that run multiple encodes and decodes
+# in parallel in hopes of catching synchronization and/or threading issues.
+stress() {
+  local readonly decoder="$(vpx_tool_path vpxdec)"
+  local readonly encoder="$(vpx_tool_path vpxenc)"
+  local readonly codec="$1"
+  local readonly webm="$2"
+  local readonly decode_count="$3"
+  local pids=""
+  local rt_max_jobs=${STRESS_RT_MAX_JOBS:-5}
+  local twopass_max_jobs=${STRESS_TWOPASS_MAX_JOBS:-5}
+
+  # Enable job control, so we can run multiple processes.
+  set -m
+
+  # Start $twopass_max_jobs encode jobs in parallel.
+  for i in $(seq ${twopass_max_jobs}); do
+    bitrate=$(($i * 20 + 300))
+    eval "${VPX_TEST_PREFIX}" "${encoder}" "--codec=${codec} -w 1280 -h 720" \
+      "${YUV}" "-t 4 --limit=150 --test-decode=fatal " \
+      "--target-bitrate=${bitrate} -o ${VPX_TEST_OUTPUT_DIR}/${i}.webm" \
+      ${devnull} &
+    pids="${pids} $!"
+  done
+
+  # Start $rt_max_jobs rt encode jobs in parallel.
+  for i in $(seq ${rt_max_jobs}); do
+    bitrate=$(($i * 20 + 300))
+    eval "${VPX_TEST_PREFIX}" "${encoder}" "--codec=${codec} -w 1280 -h 720" \
+      "${YUV}" "-t 4 --limit=150 --test-decode=fatal " \
+      "--target-bitrate=${bitrate} --lag-in-frames=0 --error-resilient=1" \
+      "--kf-min-dist=3000 --kf-max-dist=3000 --cpu-used=-6 --static-thresh=1" \
+      "--end-usage=cbr --min-q=2 --max-q=56 --undershoot-pct=100" \
+      "--overshoot-pct=15 --buf-sz=1000 --buf-initial-sz=500" \
+      "--buf-optimal-sz=600 --max-intra-rate=900 --resize-allowed=0" \
+      "--drop-frame=0 --passes=1 --rt --noise-sensitivity=4" \
+      "-o ${VPX_TEST_OUTPUT_DIR}/${i}.rt.webm" ${devnull} &
+    pids="${pids} $!"
+  done
+
+  # Start $decode_count decode jobs in parallel.
+  for i in $(seq "${decode_count}"); do
+    eval "${decoder}" "-t 4" "${webm}" "--noblit" ${devnull} &
+    pids="${pids} $!"
+  done
+
+  # Wait for all parallel jobs to finish.
+  fail=0
+  for job in "${pids}"; do
+    wait $job || fail=$(($fail + 1))
+  done
+  return $fail
+}
+
+vp8_stress_test() {
+  local vp8_max_jobs=${STRESS_VP8_DECODE_MAX_JOBS:-40}
+  if [ "$(vp8_decode_available)" = "yes" -a \
+       "$(vp8_encode_available)" = "yes" ]; then
+    stress vp8 "${VP8}" "${vp8_max_jobs}"
+  fi
+}
+
+vp9_stress_test() {
+  local vp9_max_jobs=${STRESS_VP9_DECODE_MAX_JOBS:-25}
+
+  if [ "$(vp9_decode_available)" = "yes" -a \
+       "$(vp9_encode_available)" = "yes" ]; then
+    stress vp9 "${VP9}" "${vp9_max_jobs}"
+  fi
+}
+
+run_tests stress_verify_environment "vp8_stress_test vp9_stress_test"
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -438,7 +438,7 @@ TEST_F(SvcTest, SetAutoAltRefOption) {
 // Test that decoder can handle an SVC frame as the first frame in a sequence.
 TEST_F(SvcTest, OnePassEncodeOneFrame) {
  codec_enc_.g_pass = VPX_RC_ONE_PASS;
-  vpx_fixed_buf output = { 0 };
+  vpx_fixed_buf output = vpx_fixed_buf();
  Pass2EncodeNFrames(NULL, 1, 2, &output);
  DecodeNFrames(&output, 1);
  FreeBitstreamBuffers(&output, 1);
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -20,6 +20,7 @@ LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv

 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += desktop_credits.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.y4m
+LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += noisy_clip_640_360.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m

@@ -730,6 +731,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm.md5
 endif  # CONFIG_VP9_HIGHBITDEPTH

 # Invalid files for testing libvpx error checking.
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm
@@ -762,6 +765,8 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s195
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf
@@ -771,6 +776,10 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.iv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-1.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-2.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-3.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-629481.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-629481.webm.res
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-667044.webm
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-667044.webm.res

 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
 # Encode / Decode test
@@ -863,3 +872,5 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm.md5
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_3.ivf
+LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_3.ivf.md5
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -14,6 +14,7 @@ df1a1453feb3c00d7d89746c7003b4163523bff3 *invalid-vp90-03-v3.webm
 4935c62becc68c13642a03db1e6d3e2331c1c612 *invalid-vp90-03-v3.webm.res
 d637297561dd904eb2c97a9015deeb31c4a1e8d2 *invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm
 3a204bdbeaa3c6458b77bcebb8366d107267f55d *invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm.res
+9aa21d8b2cb9d39abe8a7bb6032dc66955fb4342 *noisy_clip_640_360.y4m
 a432f96ff0a787268e2f94a8092ab161a18d1b06 *park_joy_90p_10_420.y4m
 0b194cc312c3a2e84d156a221b0a5eb615dfddc5 *park_joy_90p_10_422.y4m
 ff0e0a21dc2adc95b8c1b37902713700655ced17 *park_joy_90p_10_444.y4m
@@ -834,5 +835,16 @@ f6856f19236ee46ed462bd0a2e7e72b9c3b9cea6 *vp90-2-21-resize_inter_640x480_5_1-2.w
 7739bfca167b1b43fea72f807f01e097b7cb98d8 *vp90-2-21-resize_inter_640x480_7_1-2.webm.md5
 7291af354b4418917eee00e3a7e366086a0b7a10 *vp90-2-21-resize_inter_640x480_7_3-4.webm
 4a18b09ccb36564193f0215f599d745d95bb558c *vp90-2-21-resize_inter_640x480_7_3-4.webm.md5
-a000d568431d07379dd5a8ec066061c07e560b47  invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf
-1e75aad3433c5c21c194a7b53fc393970f0a8d7f  invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf.res
+a000d568431d07379dd5a8ec066061c07e560b47 *invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf
+1e75aad3433c5c21c194a7b53fc393970f0a8d7f *invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf.res
+235182f9a1c5c8841552510dd4288487447bfc40 *invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf
+787f04f0483320d536894282f3358a4f8cac1cf9 *invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res
+91d3cefd0deb98f3b0caf3a2d900ec7a7605e53a *invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf
+1e472baaf5f6113459f0399a38a5a5e68d17799d *invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf.res
+70057835bf29d14e66699ce5f022df2551fb6b37 *invalid-crbug-629481.webm
+5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-crbug-629481.webm.res
+7602e00378161ca36ae93cc6ee12dd30b5ba1e1d *vp90-2-22-svc_1280x720_3.ivf
+02e53e3eefbf25ec0929047fe50876acdeb040bd *vp90-2-22-svc_1280x720_3.ivf.md5
+6fa3d3ac306a3d9ce1d610b78441dc00d2c2d4b9 *tos_vp8.webm
+e402cbbf9e550ae017a1e9f1f73931c1d18474e8 *invalid-crbug-667044.webm
+d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-crbug-667044.webm.res
--- a/test/test.mk
+++ b/test/test.mk
@@ -20,6 +20,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ivf_video_source.h
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += ../y4minput.h ../y4minput.c
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += altref_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += aq_segment_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += alt_ref_aq_segment_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += datarate_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += encode_api_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
@@ -34,8 +35,8 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += byte_alignment_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += decode_svc_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += invalid_file_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_frame_parallel_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_refresh_test.cc
@@ -88,6 +89,11 @@ ifeq ($(CONFIG_ENCODE_PERF_TESTS)$(CONFIG_VP9_ENCODER), yesyes)
 LIBVPX_TEST_SRCS-yes += encode_perf_test.cc
 endif

+## Multi-codec blackbox tests.
+ifeq ($(findstring yes,$(CONFIG_VP8_DECODER)$(CONFIG_VP9_DECODER)), yes)
+LIBVPX_TEST_SRCS-yes += invalid_file_test.cc
+endif
+
 ##
 ## WHITE BOX TESTS
 ##
@@ -114,7 +120,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc

 LIBVPX_TEST_SRCS-yes                   += idct_test.cc
-LIBVPX_TEST_SRCS-yes                   += sixtap_predict_test.cc
+LIBVPX_TEST_SRCS-yes                   += predict_test.cc
 LIBVPX_TEST_SRCS-yes                   += vpx_scale_test.cc

 ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_TEMPORAL_DENOISING),yesyes)
@@ -138,7 +144,7 @@ LIBVPX_TEST_SRCS-yes                   += vp9_encoder_parms_get_to_decoder.cc
 endif

 LIBVPX_TEST_SRCS-yes                   += convolve_test.cc
-LIBVPX_TEST_SRCS-yes                   += lpf_8_test.cc
+LIBVPX_TEST_SRCS-yes                   += lpf_test.cc
 LIBVPX_TEST_SRCS-yes                   += vp9_intrapred_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_decrypt_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -29,6 +29,8 @@ namespace {
 typedef void (*VpxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
                            const uint8_t *above, const uint8_t *left);

+const int kBPS = 32;
+const int kTotalPixels = 32 * kBPS;
 const int kNumVp9IntraPredFuncs = 13;
 const char *kVp9IntraPredNames[kNumVp9IntraPredFuncs] = {
  "DC_PRED",   "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED",
@@ -36,107 +38,121 @@ const char *kVp9IntraPredNames[kNumVp9IntraPredFuncs] = {
  "D207_PRED", "D63_PRED",     "TM_PRED"
 };

+template <typename Pixel>
+struct IntraPredTestMem {
+  void Init(int block_size, int bd) {
+    libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+    Pixel *const above = above_mem + 16;
+    const int mask = (1 << bd) - 1;
+    for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand16() & mask;
+    for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand16() & mask;
+    for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand16() & mask;
+
+    // some code assumes the top row has been extended:
+    // d45/d63 C-code, for instance, but not the assembly.
+    // TODO(jzern): this style of extension isn't strictly necessary.
+    ASSERT_LE(block_size, kBPS);
+    for (int i = block_size; i < 2 * kBPS; ++i) {
+      above[i] = above[block_size - 1];
+    }
+  }
+
+  DECLARE_ALIGNED(16, Pixel, src[kTotalPixels]);
+  DECLARE_ALIGNED(16, Pixel, ref_src[kTotalPixels]);
+  DECLARE_ALIGNED(16, Pixel, left[kBPS]);
+  DECLARE_ALIGNED(16, Pixel, above_mem[2 * kBPS + 16]);
+};
+
+typedef IntraPredTestMem<uint8_t> Vp9IntraPredTestMem;
+
+void CheckMd5Signature(const char name[], const char *const signatures[],
+                       const void *data, size_t data_size, int elapsed_time,
+                       int idx) {
+  libvpx_test::MD5 md5;
+  md5.Add(reinterpret_cast<const uint8_t *>(data), data_size);
+  printf("Mode %s[%12s]: %5d ms     MD5: %s\n", name, kVp9IntraPredNames[idx],
+         elapsed_time, md5.Get());
+  EXPECT_STREQ(signatures[idx], md5.Get());
+}
+
 void TestIntraPred(const char name[], VpxPredFunc const *pred_funcs,
-                   const char *const pred_func_names[], int num_funcs,
-                   const char *const signatures[], int block_size,
-                   int num_pixels_per_test) {
-  libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
-  const int kBPS = 32;
-  const int kTotalPixels = 32 * kBPS;
-  DECLARE_ALIGNED(16, uint8_t, src[kTotalPixels]);
-  DECLARE_ALIGNED(16, uint8_t, ref_src[kTotalPixels]);
-  DECLARE_ALIGNED(16, uint8_t, left[kBPS]);
-  DECLARE_ALIGNED(16, uint8_t, above_mem[2 * kBPS + 16]);
-  uint8_t *const above = above_mem + 16;
-  for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand8();
-  for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand8();
-  for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand8();
-  const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);
+                   const char *const signatures[], int block_size) {
+  const int kNumTests = static_cast<int>(
+      2.e10 / (block_size * block_size * kNumVp9IntraPredFuncs));
+  Vp9IntraPredTestMem intra_pred_test_mem;
+  const uint8_t *const above = intra_pred_test_mem.above_mem + 16;

-  // some code assumes the top row has been extended:
-  // d45/d63 C-code, for instance, but not the assembly.
-  // TODO(jzern): this style of extension isn't strictly necessary.
-  ASSERT_LE(block_size, kBPS);
-  memset(above + block_size, above[block_size - 1], 2 * kBPS - block_size);
+  intra_pred_test_mem.Init(block_size, 8);

-  for (int k = 0; k < num_funcs; ++k) {
+  for (int k = 0; k < kNumVp9IntraPredFuncs; ++k) {
    if (pred_funcs[k] == NULL) continue;
-    memcpy(src, ref_src, sizeof(src));
+    memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src,
+           sizeof(intra_pred_test_mem.src));
    vpx_usec_timer timer;
    vpx_usec_timer_start(&timer);
    for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
-      pred_funcs[k](src, kBPS, above, left);
+      pred_funcs[k](intra_pred_test_mem.src, kBPS, above,
+                    intra_pred_test_mem.left);
    }
    libvpx_test::ClearSystemState();
    vpx_usec_timer_mark(&timer);
    const int elapsed_time =
        static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
-    libvpx_test::MD5 md5;
-    md5.Add(src, sizeof(src));
-    printf("Mode %s[%12s]: %5d ms     MD5: %s\n", name, pred_func_names[k],
-           elapsed_time, md5.Get());
-    EXPECT_STREQ(signatures[k], md5.Get());
+    CheckMd5Signature(name, signatures, intra_pred_test_mem.src,
+                      sizeof(intra_pred_test_mem.src), elapsed_time, k);
  }
 }

 void TestIntraPred4(VpxPredFunc const *pred_funcs) {
-  static const int kNumVp9IntraFuncs = 13;
-  static const char *const kSignatures[kNumVp9IntraFuncs] = {
-    "4334156168b34ab599d9b5b30f522fe9", "bc4649d5ba47c7ff178d92e475960fb0",
-    "8d316e5933326dcac24e1064794b5d12", "a27270fed024eafd762c95de85f4da51",
-    "c33dff000d4256c2b8f3bf9e9bab14d2", "44d8cddc2ad8f79b8ed3306051722b4f",
-    "eb54839b2bad6699d8946f01ec041cd0", "ecb0d56ae5f677ea45127ce9d5c058e4",
-    "0b7936841f6813da818275944895b574", "9117972ef64f91a58ff73e1731c81db2",
-    "c56d5e8c729e46825f46dd5d3b5d508a", "c0889e2039bcf7bcb5d2f33cdca69adc",
-    "309a618577b27c648f9c5ee45252bc8f",
+  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
+    "e7ed7353c3383fff942e500e9bfe82fe", "2a4a26fcc6ce005eadc08354d196c8a9",
+    "269d92eff86f315d9c38fe7640d85b15", "ae2960eea9f71ee3dabe08b282ec1773",
+    "6c1abcc44e90148998b51acd11144e9c", "f7bb3186e1ef8a2b326037ff898cad8e",
+    "364c1f3fb2f445f935aec2a70a67eaa4", "141624072a4a56773f68fadbdd07c4a7",
+    "7be49b08687a5f24df3a2c612fca3876", "459bb5d9fd5b238348179c9a22108cd6",
+    "73edb8831bf1bdfce21ae8eaa43b1234", "2e2457f2009c701a355a8b25eb74fcda",
+    "52ae4e8bdbe41494c1f43051d4dd7f0b"
  };
-  TestIntraPred("Intra4", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
-                kSignatures, 4, 4 * 4 * kNumVp9IntraFuncs);
+  TestIntraPred("Intra4", pred_funcs, kSignatures, 4);
 }

 void TestIntraPred8(VpxPredFunc const *pred_funcs) {
-  static const int kNumVp9IntraFuncs = 13;
-  static const char *const kSignatures[kNumVp9IntraFuncs] = {
-    "7694ddeeefed887faf9d339d18850928", "7d726b1213591b99f736be6dec65065b",
-    "19c5711281357a485591aaf9c96c0a67", "ba6b66877a089e71cd938e3b8c40caac",
-    "802440c93317e0f8ba93fab02ef74265", "9e09a47a15deb0b9d8372824f9805080",
-    "b7c2d8c662268c0c427da412d7b0311d", "78339c1c60bb1d67d248ab8c4da08b7f",
-    "5c97d70f7d47de1882a6cd86c165c8a9", "8182bf60688b42205acd95e59e967157",
-    "08323400005a297f16d7e57e7fe1eaac", "95f7bfc262329a5849eda66d8f7c68ce",
-    "815b75c8e0d91cc1ae766dc5d3e445a3",
+  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
+    "d8bbae5d6547cfc17e4f5f44c8730e88", "373bab6d931868d41a601d9d88ce9ac3",
+    "6fdd5ff4ff79656c14747598ca9e3706", "d9661c2811d6a73674f40ffb2b841847",
+    "7c722d10b19ccff0b8c171868e747385", "f81dd986eb2b50f750d3a7da716b7e27",
+    "d500f2c8fc78f46a4c74e4dcf51f14fb", "0e3523f9cab2142dd37fd07ec0760bce",
+    "79ac4efe907f0a0f1885d43066cfedee", "19ecf2432ac305057de3b6578474eec6",
+    "4f985b61acc6dd5d2d2585fa89ea2e2d", "f1bb25a9060dd262f405f15a38f5f674",
+    "209ea00801584829e9a0f7be7d4a74ba"
  };
-  TestIntraPred("Intra8", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
-                kSignatures, 8, 8 * 8 * kNumVp9IntraFuncs);
+  TestIntraPred("Intra8", pred_funcs, kSignatures, 8);
 }

 void TestIntraPred16(VpxPredFunc const *pred_funcs) {
-  static const int kNumVp9IntraFuncs = 13;
-  static const char *const kSignatures[kNumVp9IntraFuncs] = {
-    "b40dbb555d5d16a043dc361e6694fe53", "fb08118cee3b6405d64c1fd68be878c6",
-    "6c190f341475c837cc38c2e566b64875", "db5c34ccbe2c7f595d9b08b0dc2c698c",
-    "a62cbfd153a1f0b9fed13e62b8408a7a", "143df5b4c89335e281103f610f5052e4",
-    "d87feb124107cdf2cfb147655aa0bb3c", "7841fae7d4d47b519322e6a03eeed9dc",
-    "f6ebed3f71cbcf8d6d0516ce87e11093", "3cc480297dbfeed01a1c2d78dd03d0c5",
-    "b9f69fa6532b372c545397dcb78ef311", "a8fe1c70432f09d0c20c67bdb6432c4d",
-    "b8a41aa968ec108af447af4217cba91b",
+  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
+    "50971c07ce26977d30298538fffec619", "527a6b9e0dc5b21b98cf276305432bef",
+    "7eff2868f80ebc2c43a4f367281d80f7", "67cd60512b54964ef6aff1bd4816d922",
+    "48371c87dc95c08a33b2048f89cf6468", "b0acf2872ee411d7530af6d2625a7084",
+    "f32aafed4d8d3776ed58bcb6188756d5", "dae208f3dca583529cff49b73f7c4183",
+    "7af66a2f4c8e0b4908e40f047e60c47c", "125e3ab6ab9bc961f183ec366a7afa88",
+    "6b90f25b23983c35386b9fd704427622", "f8d6b11d710edc136a7c62c917435f93",
+    "ed308f18614a362917f411c218aee532"
  };
-  TestIntraPred("Intra16", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
-                kSignatures, 16, 16 * 16 * kNumVp9IntraFuncs);
+  TestIntraPred("Intra16", pred_funcs, kSignatures, 16);
 }

 void TestIntraPred32(VpxPredFunc const *pred_funcs) {
-  static const int kNumVp9IntraFuncs = 13;
-  static const char *const kSignatures[kNumVp9IntraFuncs] = {
-    "558541656d84f9ae7896db655826febe", "b3587a1f9a01495fa38c8cd3c8e2a1bf",
-    "4c6501e64f25aacc55a2a16c7e8f0255", "b3b01379ba08916ef6b1b35f7d9ad51c",
-    "0f1eb38b6cbddb3d496199ef9f329071", "911c06efb9ed1c3b4c104b232b55812f",
-    "9225beb0ddfa7a1d24eaa1be430a6654", "0a6d584a44f8db9aa7ade2e2fdb9fc9e",
-    "b01c9076525216925f3456f034fb6eee", "d267e20ad9e5cd2915d1a47254d3d149",
-    "ed012a4a5da71f36c2393023184a0e59", "f162b51ed618d28b936974cff4391da5",
-    "9e1370c6d42e08d357d9612c93a71cfc",
+  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
+    "a0a618c900e65ae521ccc8af789729f2", "985aaa7c72b4a6c2fb431d32100cf13a",
+    "10662d09febc3ca13ee4e700120daeb5", "b3b01379ba08916ef6b1b35f7d9ad51c",
+    "9f4261755795af97e34679c333ec7004", "bc2c9da91ad97ef0d1610fb0a9041657",
+    "75c79b1362ad18abfcdb1aa0aacfc21d", "4039bb7da0f6860090d3c57b5c85468f",
+    "b29fff7b61804e68383e3a609b33da58", "e1aa5e49067fd8dba66c2eb8d07b7a89",
+    "4e042822909c1c06d3b10a88281df1eb", "72eb9d9e0e67c93f4c66b70348e9fef7",
+    "a22d102bcb51ca798aac12ca4ae8f2e8"
  };
-  TestIntraPred("Intra32", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
-                kSignatures, 32, 32 * 32 * kNumVp9IntraFuncs);
+  TestIntraPred("Intra32", pred_funcs, kSignatures, 32);
 }

 }  // namespace
@@ -153,7 +169,6 @@ void TestIntraPred32(VpxPredFunc const *pred_funcs) {
  }

 // -----------------------------------------------------------------------------
-// 4x4

 INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c,
                vpx_dc_left_predictor_4x4_c, vpx_dc_top_predictor_4x4_c,
@@ -163,47 +178,6 @@ INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c,
                vpx_d153_predictor_4x4_c, vpx_d207_predictor_4x4_c,
                vpx_d63_predictor_4x4_c, vpx_tm_predictor_4x4_c)

-#if HAVE_SSE2
-INTRA_PRED_TEST(SSE2, TestIntraPred4, vpx_dc_predictor_4x4_sse2,
-                vpx_dc_left_predictor_4x4_sse2, vpx_dc_top_predictor_4x4_sse2,
-                vpx_dc_128_predictor_4x4_sse2, vpx_v_predictor_4x4_sse2,
-                vpx_h_predictor_4x4_sse2, vpx_d45_predictor_4x4_sse2, NULL,
-                NULL, NULL, vpx_d207_predictor_4x4_sse2, NULL,
-                vpx_tm_predictor_4x4_sse2)
-#endif  // HAVE_SSE2
-
-#if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, vpx_d153_predictor_4x4_ssse3, NULL,
-                vpx_d63_predictor_4x4_ssse3, NULL)
-#endif  // HAVE_SSSE3
-
-#if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TestIntraPred4, vpx_dc_predictor_4x4_dspr2, NULL, NULL,
-                NULL, NULL, vpx_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
-                NULL, NULL, vpx_tm_predictor_4x4_dspr2)
-#endif  // HAVE_DSPR2
-
-#if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred4, vpx_dc_predictor_4x4_neon,
-                vpx_dc_left_predictor_4x4_neon, vpx_dc_top_predictor_4x4_neon,
-                vpx_dc_128_predictor_4x4_neon, vpx_v_predictor_4x4_neon,
-                vpx_h_predictor_4x4_neon, vpx_d45_predictor_4x4_neon,
-                vpx_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_4x4_neon)
-#endif  // HAVE_NEON
-
-#if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred4, vpx_dc_predictor_4x4_msa,
-                vpx_dc_left_predictor_4x4_msa, vpx_dc_top_predictor_4x4_msa,
-                vpx_dc_128_predictor_4x4_msa, vpx_v_predictor_4x4_msa,
-                vpx_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_4x4_msa)
-#endif  // HAVE_MSA
-
-// -----------------------------------------------------------------------------
-// 8x8
-
 INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c,
                vpx_dc_left_predictor_8x8_c, vpx_dc_top_predictor_8x8_c,
                vpx_dc_128_predictor_8x8_c, vpx_v_predictor_8x8_c,
@@ -212,46 +186,6 @@ INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c,
                vpx_d153_predictor_8x8_c, vpx_d207_predictor_8x8_c,
                vpx_d63_predictor_8x8_c, vpx_tm_predictor_8x8_c)

-#if HAVE_SSE2
-INTRA_PRED_TEST(SSE2, TestIntraPred8, vpx_dc_predictor_8x8_sse2,
-                vpx_dc_left_predictor_8x8_sse2, vpx_dc_top_predictor_8x8_sse2,
-                vpx_dc_128_predictor_8x8_sse2, vpx_v_predictor_8x8_sse2,
-                vpx_h_predictor_8x8_sse2, vpx_d45_predictor_8x8_sse2, NULL,
-                NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2)
-#endif  // HAVE_SSE2
-
-#if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, vpx_d153_predictor_8x8_ssse3,
-                vpx_d207_predictor_8x8_ssse3, vpx_d63_predictor_8x8_ssse3, NULL)
-#endif  // HAVE_SSSE3
-
-#if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TestIntraPred8, vpx_dc_predictor_8x8_dspr2, NULL, NULL,
-                NULL, NULL, vpx_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
-                NULL, NULL, vpx_tm_predictor_8x8_c)
-#endif  // HAVE_DSPR2
-
-#if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred8, vpx_dc_predictor_8x8_neon,
-                vpx_dc_left_predictor_8x8_neon, vpx_dc_top_predictor_8x8_neon,
-                vpx_dc_128_predictor_8x8_neon, vpx_v_predictor_8x8_neon,
-                vpx_h_predictor_8x8_neon, vpx_d45_predictor_8x8_neon, NULL,
-                NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_neon)
-
-#endif  // HAVE_NEON
-
-#if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred8, vpx_dc_predictor_8x8_msa,
-                vpx_dc_left_predictor_8x8_msa, vpx_dc_top_predictor_8x8_msa,
-                vpx_dc_128_predictor_8x8_msa, vpx_v_predictor_8x8_msa,
-                vpx_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_8x8_msa)
-#endif  // HAVE_MSA
-
-// -----------------------------------------------------------------------------
-// 16x16
-
 INTRA_PRED_TEST(C, TestIntraPred16, vpx_dc_predictor_16x16_c,
                vpx_dc_left_predictor_16x16_c, vpx_dc_top_predictor_16x16_c,
                vpx_dc_128_predictor_16x16_c, vpx_v_predictor_16x16_c,
@@ -260,48 +194,6 @@ INTRA_PRED_TEST(C, TestIntraPred16, vpx_dc_predictor_16x16_c,
                vpx_d153_predictor_16x16_c, vpx_d207_predictor_16x16_c,
                vpx_d63_predictor_16x16_c, vpx_tm_predictor_16x16_c)

-#if HAVE_SSE2
-INTRA_PRED_TEST(SSE2, TestIntraPred16, vpx_dc_predictor_16x16_sse2,
-                vpx_dc_left_predictor_16x16_sse2,
-                vpx_dc_top_predictor_16x16_sse2,
-                vpx_dc_128_predictor_16x16_sse2, vpx_v_predictor_16x16_sse2,
-                vpx_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_16x16_sse2)
-#endif  // HAVE_SSE2
-
-#if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL, NULL,
-                vpx_d45_predictor_16x16_ssse3, NULL, NULL,
-                vpx_d153_predictor_16x16_ssse3, vpx_d207_predictor_16x16_ssse3,
-                vpx_d63_predictor_16x16_ssse3, NULL)
-#endif  // HAVE_SSSE3
-
-#if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TestIntraPred16, vpx_dc_predictor_16x16_dspr2, NULL,
-                NULL, NULL, NULL, vpx_h_predictor_16x16_dspr2, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
-#endif  // HAVE_DSPR2
-
-#if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred16, vpx_dc_predictor_16x16_neon,
-                vpx_dc_left_predictor_16x16_neon,
-                vpx_dc_top_predictor_16x16_neon,
-                vpx_dc_128_predictor_16x16_neon, vpx_v_predictor_16x16_neon,
-                vpx_h_predictor_16x16_neon, vpx_d45_predictor_16x16_neon, NULL,
-                NULL, NULL, NULL, NULL, vpx_tm_predictor_16x16_neon)
-#endif  // HAVE_NEON
-
-#if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred16, vpx_dc_predictor_16x16_msa,
-                vpx_dc_left_predictor_16x16_msa, vpx_dc_top_predictor_16x16_msa,
-                vpx_dc_128_predictor_16x16_msa, vpx_v_predictor_16x16_msa,
-                vpx_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_16x16_msa)
-#endif  // HAVE_MSA
-
-// -----------------------------------------------------------------------------
-// 32x32
-
 INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c,
                vpx_dc_left_predictor_32x32_c, vpx_dc_top_predictor_32x32_c,
                vpx_dc_128_predictor_32x32_c, vpx_v_predictor_32x32_c,
@@ -311,6 +203,26 @@ INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c,
                vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c)

 #if HAVE_SSE2
+INTRA_PRED_TEST(SSE2, TestIntraPred4, vpx_dc_predictor_4x4_sse2,
+                vpx_dc_left_predictor_4x4_sse2, vpx_dc_top_predictor_4x4_sse2,
+                vpx_dc_128_predictor_4x4_sse2, vpx_v_predictor_4x4_sse2,
+                vpx_h_predictor_4x4_sse2, vpx_d45_predictor_4x4_sse2, NULL,
+                NULL, NULL, vpx_d207_predictor_4x4_sse2, NULL,
+                vpx_tm_predictor_4x4_sse2)
+
+INTRA_PRED_TEST(SSE2, TestIntraPred8, vpx_dc_predictor_8x8_sse2,
+                vpx_dc_left_predictor_8x8_sse2, vpx_dc_top_predictor_8x8_sse2,
+                vpx_dc_128_predictor_8x8_sse2, vpx_v_predictor_8x8_sse2,
+                vpx_h_predictor_8x8_sse2, vpx_d45_predictor_8x8_sse2, NULL,
+                NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2)
+
+INTRA_PRED_TEST(SSE2, TestIntraPred16, vpx_dc_predictor_16x16_sse2,
+                vpx_dc_left_predictor_16x16_sse2,
+                vpx_dc_top_predictor_16x16_sse2,
+                vpx_dc_128_predictor_16x16_sse2, vpx_v_predictor_16x16_sse2,
+                vpx_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_16x16_sse2)
+
 INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
                vpx_dc_left_predictor_32x32_sse2,
                vpx_dc_top_predictor_32x32_sse2,
@@ -320,22 +232,79 @@ INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
 #endif  // HAVE_SSE2

 #if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_d153_predictor_4x4_ssse3, NULL,
+                vpx_d63_predictor_4x4_ssse3, NULL)
+INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_d153_predictor_8x8_ssse3,
+                vpx_d207_predictor_8x8_ssse3, vpx_d63_predictor_8x8_ssse3, NULL)
+INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_d45_predictor_16x16_ssse3, NULL, NULL,
+                vpx_d153_predictor_16x16_ssse3, vpx_d207_predictor_16x16_ssse3,
+                vpx_d63_predictor_16x16_ssse3, NULL)
 INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL, NULL,
                vpx_d45_predictor_32x32_ssse3, NULL, NULL,
                vpx_d153_predictor_32x32_ssse3, vpx_d207_predictor_32x32_ssse3,
                vpx_d63_predictor_32x32_ssse3, NULL)
 #endif  // HAVE_SSSE3

+#if HAVE_DSPR2
+INTRA_PRED_TEST(DSPR2, TestIntraPred4, vpx_dc_predictor_4x4_dspr2, NULL, NULL,
+                NULL, NULL, vpx_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_tm_predictor_4x4_dspr2)
+INTRA_PRED_TEST(DSPR2, TestIntraPred8, vpx_dc_predictor_8x8_dspr2, NULL, NULL,
+                NULL, NULL, vpx_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_tm_predictor_8x8_c)
+INTRA_PRED_TEST(DSPR2, TestIntraPred16, vpx_dc_predictor_16x16_dspr2, NULL,
+                NULL, NULL, NULL, vpx_h_predictor_16x16_dspr2, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL)
+#endif  // HAVE_DSPR2
+
 #if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred4, vpx_dc_predictor_4x4_neon,
+                vpx_dc_left_predictor_4x4_neon, vpx_dc_top_predictor_4x4_neon,
+                vpx_dc_128_predictor_4x4_neon, vpx_v_predictor_4x4_neon,
+                vpx_h_predictor_4x4_neon, vpx_d45_predictor_4x4_neon,
+                vpx_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_4x4_neon)
+INTRA_PRED_TEST(NEON, TestIntraPred8, vpx_dc_predictor_8x8_neon,
+                vpx_dc_left_predictor_8x8_neon, vpx_dc_top_predictor_8x8_neon,
+                vpx_dc_128_predictor_8x8_neon, vpx_v_predictor_8x8_neon,
+                vpx_h_predictor_8x8_neon, vpx_d45_predictor_8x8_neon,
+                vpx_d135_predictor_8x8_neon, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_8x8_neon)
+INTRA_PRED_TEST(NEON, TestIntraPred16, vpx_dc_predictor_16x16_neon,
+                vpx_dc_left_predictor_16x16_neon,
+                vpx_dc_top_predictor_16x16_neon,
+                vpx_dc_128_predictor_16x16_neon, vpx_v_predictor_16x16_neon,
+                vpx_h_predictor_16x16_neon, vpx_d45_predictor_16x16_neon,
+                vpx_d135_predictor_16x16_neon, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_16x16_neon)
 INTRA_PRED_TEST(NEON, TestIntraPred32, vpx_dc_predictor_32x32_neon,
                vpx_dc_left_predictor_32x32_neon,
                vpx_dc_top_predictor_32x32_neon,
                vpx_dc_128_predictor_32x32_neon, vpx_v_predictor_32x32_neon,
-                vpx_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_h_predictor_32x32_neon, vpx_d45_predictor_32x32_neon,
+                vpx_d135_predictor_32x32_neon, NULL, NULL, NULL, NULL,
                vpx_tm_predictor_32x32_neon)
 #endif  // HAVE_NEON

 #if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred4, vpx_dc_predictor_4x4_msa,
+                vpx_dc_left_predictor_4x4_msa, vpx_dc_top_predictor_4x4_msa,
+                vpx_dc_128_predictor_4x4_msa, vpx_v_predictor_4x4_msa,
+                vpx_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_4x4_msa)
+INTRA_PRED_TEST(MSA, TestIntraPred8, vpx_dc_predictor_8x8_msa,
+                vpx_dc_left_predictor_8x8_msa, vpx_dc_top_predictor_8x8_msa,
+                vpx_dc_128_predictor_8x8_msa, vpx_v_predictor_8x8_msa,
+                vpx_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_8x8_msa)
+INTRA_PRED_TEST(MSA, TestIntraPred16, vpx_dc_predictor_16x16_msa,
+                vpx_dc_left_predictor_16x16_msa, vpx_dc_top_predictor_16x16_msa,
+                vpx_dc_128_predictor_16x16_msa, vpx_v_predictor_16x16_msa,
+                vpx_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_16x16_msa)
 INTRA_PRED_TEST(MSA, TestIntraPred32, vpx_dc_predictor_32x32_msa,
                vpx_dc_left_predictor_32x32_msa, vpx_dc_top_predictor_32x32_msa,
                vpx_dc_128_predictor_32x32_msa, vpx_v_predictor_32x32_msa,
@@ -343,4 +312,209 @@ INTRA_PRED_TEST(MSA, TestIntraPred32, vpx_dc_predictor_32x32_msa,
                vpx_tm_predictor_32x32_msa)
 #endif  // HAVE_MSA

+// -----------------------------------------------------------------------------
+
+#if CONFIG_VP9_HIGHBITDEPTH
+namespace {
+
+typedef void (*VpxHighbdPredFunc)(uint16_t *dst, ptrdiff_t y_stride,
+                                  const uint16_t *above, const uint16_t *left,
+                                  int bd);
+
+typedef IntraPredTestMem<uint16_t> Vp9HighbdIntraPredTestMem;
+
+void TestHighbdIntraPred(const char name[], VpxHighbdPredFunc const *pred_funcs,
+                         const char *const signatures[], int block_size) {
+  const int kNumTests = static_cast<int>(
+      2.e10 / (block_size * block_size * kNumVp9IntraPredFuncs));
+  Vp9HighbdIntraPredTestMem intra_pred_test_mem;
+  const uint16_t *const above = intra_pred_test_mem.above_mem + 16;
+
+  intra_pred_test_mem.Init(block_size, 12);
+
+  for (int k = 0; k < kNumVp9IntraPredFuncs; ++k) {
+    if (pred_funcs[k] == NULL) continue;
+    memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src,
+           sizeof(intra_pred_test_mem.src));
+    vpx_usec_timer timer;
+    vpx_usec_timer_start(&timer);
+    for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
+      pred_funcs[k](intra_pred_test_mem.src, kBPS, above,
+                    intra_pred_test_mem.left, 12);
+    }
+    libvpx_test::ClearSystemState();
+    vpx_usec_timer_mark(&timer);
+    const int elapsed_time =
+        static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
+    CheckMd5Signature(name, signatures, intra_pred_test_mem.src,
+                      sizeof(intra_pred_test_mem.src), elapsed_time, k);
+  }
+}
+
+void TestHighbdIntraPred4(VpxHighbdPredFunc const *pred_funcs) {
+  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
+    "11f74af6c5737df472f3275cbde062fa", "51bea056b6447c93f6eb8f6b7e8f6f71",
+    "27e97f946766331795886f4de04c5594", "53ab15974b049111fb596c5168ec7e3f",
+    "f0b640bb176fbe4584cf3d32a9b0320a", "729783ca909e03afd4b47111c80d967b",
+    "fbf1c30793d9f32812e4d9f905d53530", "293fc903254a33754133314c6cdba81f",
+    "f8074d704233e73dfd35b458c6092374", "aa6363d08544a1ec4da33d7a0be5640d",
+    "462abcfdfa3d087bb33c9a88f2aec491", "863eab65d22550dd44a2397277c1ec71",
+    "23d61df1574d0fa308f9731811047c4b"
+  };
+  TestHighbdIntraPred("Intra4", pred_funcs, kSignatures, 4);
+}
+
+void TestHighbdIntraPred8(VpxHighbdPredFunc const *pred_funcs) {
+  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
+    "03da8829fe94663047fd108c5fcaa71d", "ecdb37b8120a2d3a4c706b016bd1bfd7",
+    "1d4543ed8d2b9368cb96898095fe8a75", "f791c9a67b913cbd82d9da8ecede30e2",
+    "065c70646f4dbaff913282f55a45a441", "51f87123616662ef7c35691497dfd0ba",
+    "2a5b0131ef4716f098ee65e6df01e3dd", "9ffe186a6bc7db95275f1bbddd6f7aba",
+    "a3258a2eae2e2bd55cb8f71351b22998", "8d909f0a2066e39b3216092c6289ece4",
+    "d183abb30b9f24c886a0517e991b22c7", "702a42fe4c7d665dc561b2aeeb60f311",
+    "7b5dbbbe7ae3a4ac2948731600bde5d6"
+  };
+  TestHighbdIntraPred("Intra8", pred_funcs, kSignatures, 8);
+}
+
+void TestHighbdIntraPred16(VpxHighbdPredFunc const *pred_funcs) {
+  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
+    "e33cb3f56a878e2fddb1b2fc51cdd275", "c7bff6f04b6052c8ab335d726dbbd52d",
+    "d0b0b47b654a9bcc5c6008110a44589b", "78f5da7b10b2b9ab39f114a33b6254e9",
+    "c78e31d23831abb40d6271a318fdd6f3", "90d1347f4ec9198a0320daecb6ff90b8",
+    "d2c623746cbb64a0c9e29c10f2c57041", "cf28bd387b81ad3e5f1a1c779a4b70a0",
+    "24c304330431ddeaf630f6ce94af2eac", "91a329798036bf64e8e00a87b131b8b1",
+    "d39111f22885307f920796a42084c872", "e2e702f7250ece98dd8f3f2854c31eeb",
+    "e2fb05b01eb8b88549e85641d8ce5b59"
+  };
+  TestHighbdIntraPred("Intra16", pred_funcs, kSignatures, 16);
+}
+
+void TestHighbdIntraPred32(VpxHighbdPredFunc const *pred_funcs) {
+  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
+    "a3e8056ba7e36628cce4917cd956fedd", "cc7d3024fe8748b512407edee045377e",
+    "2aab0a0f330a1d3e19b8ecb8f06387a3", "a547bc3fb7b06910bf3973122a426661",
+    "26f712514da95042f93d6e8dc8e431dc", "bb08c6e16177081daa3d936538dbc2e3",
+    "8f031af3e2650e89620d8d2c3a843d8b", "42867c8553285e94ee8e4df7abafbda8",
+    "6496bdee96100667833f546e1be3d640", "2ebfa25bf981377e682e580208504300",
+    "3e8ae52fd1f607f348aa4cb436c71ab7", "3d4efe797ca82193613696753ea624c4",
+    "cb8aab6d372278f3131e8d99efde02d9"
+  };
+  TestHighbdIntraPred("Intra32", pred_funcs, kSignatures, 32);
+}
+
+}  // namespace
+
+// Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
+// to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4.
+#define HIGHBD_INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128,  \
+                               v, h, d45, d135, d117, d153, d207, d63, tm)    \
+  TEST(arch, test_func) {                                                     \
+    static const VpxHighbdPredFunc vpx_intra_pred[] = {                       \
+      dc, dc_left, dc_top, dc_128, v, h, d45, d135, d117, d153, d207, d63, tm \
+    };                                                                        \
+    test_func(vpx_intra_pred);                                                \
+  }
+
+// -----------------------------------------------------------------------------
+
+HIGHBD_INTRA_PRED_TEST(
+    C, TestHighbdIntraPred4, vpx_highbd_dc_predictor_4x4_c,
+    vpx_highbd_dc_left_predictor_4x4_c, vpx_highbd_dc_top_predictor_4x4_c,
+    vpx_highbd_dc_128_predictor_4x4_c, vpx_highbd_v_predictor_4x4_c,
+    vpx_highbd_h_predictor_4x4_c, vpx_highbd_d45_predictor_4x4_c,
+    vpx_highbd_d135_predictor_4x4_c, vpx_highbd_d117_predictor_4x4_c,
+    vpx_highbd_d153_predictor_4x4_c, vpx_highbd_d207_predictor_4x4_c,
+    vpx_highbd_d63_predictor_4x4_c, vpx_highbd_tm_predictor_4x4_c)
+
+HIGHBD_INTRA_PRED_TEST(
+    C, TestHighbdIntraPred8, vpx_highbd_dc_predictor_8x8_c,
+    vpx_highbd_dc_left_predictor_8x8_c, vpx_highbd_dc_top_predictor_8x8_c,
+    vpx_highbd_dc_128_predictor_8x8_c, vpx_highbd_v_predictor_8x8_c,
+    vpx_highbd_h_predictor_8x8_c, vpx_highbd_d45_predictor_8x8_c,
+    vpx_highbd_d135_predictor_8x8_c, vpx_highbd_d117_predictor_8x8_c,
+    vpx_highbd_d153_predictor_8x8_c, vpx_highbd_d207_predictor_8x8_c,
+    vpx_highbd_d63_predictor_8x8_c, vpx_highbd_tm_predictor_8x8_c)
+
+HIGHBD_INTRA_PRED_TEST(
+    C, TestHighbdIntraPred16, vpx_highbd_dc_predictor_16x16_c,
+    vpx_highbd_dc_left_predictor_16x16_c, vpx_highbd_dc_top_predictor_16x16_c,
+    vpx_highbd_dc_128_predictor_16x16_c, vpx_highbd_v_predictor_16x16_c,
+    vpx_highbd_h_predictor_16x16_c, vpx_highbd_d45_predictor_16x16_c,
+    vpx_highbd_d135_predictor_16x16_c, vpx_highbd_d117_predictor_16x16_c,
+    vpx_highbd_d153_predictor_16x16_c, vpx_highbd_d207_predictor_16x16_c,
+    vpx_highbd_d63_predictor_16x16_c, vpx_highbd_tm_predictor_16x16_c)
+
+HIGHBD_INTRA_PRED_TEST(
+    C, TestHighbdIntraPred32, vpx_highbd_dc_predictor_32x32_c,
+    vpx_highbd_dc_left_predictor_32x32_c, vpx_highbd_dc_top_predictor_32x32_c,
+    vpx_highbd_dc_128_predictor_32x32_c, vpx_highbd_v_predictor_32x32_c,
+    vpx_highbd_h_predictor_32x32_c, vpx_highbd_d45_predictor_32x32_c,
+    vpx_highbd_d135_predictor_32x32_c, vpx_highbd_d117_predictor_32x32_c,
+    vpx_highbd_d153_predictor_32x32_c, vpx_highbd_d207_predictor_32x32_c,
+    vpx_highbd_d63_predictor_32x32_c, vpx_highbd_tm_predictor_32x32_c)
+
+#if HAVE_SSE2
+HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred4,
+                       vpx_highbd_dc_predictor_4x4_sse2, NULL, NULL, NULL,
+                       vpx_highbd_v_predictor_4x4_sse2, NULL, NULL, NULL, NULL,
+                       NULL, NULL, NULL, vpx_highbd_tm_predictor_4x4_c)
+
+HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred8,
+                       vpx_highbd_dc_predictor_8x8_sse2, NULL, NULL, NULL,
+                       vpx_highbd_v_predictor_8x8_sse2, NULL, NULL, NULL, NULL,
+                       NULL, NULL, NULL, vpx_highbd_tm_predictor_8x8_sse2)
+
+HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred16,
+                       vpx_highbd_dc_predictor_16x16_sse2, NULL, NULL, NULL,
+                       vpx_highbd_v_predictor_16x16_sse2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL,
+                       vpx_highbd_tm_predictor_16x16_sse2)
+
+HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred32,
+                       vpx_highbd_dc_predictor_32x32_sse2, NULL, NULL, NULL,
+                       vpx_highbd_v_predictor_32x32_sse2, NULL, NULL, NULL,
+                       NULL, NULL, NULL, NULL,
+                       vpx_highbd_tm_predictor_32x32_sse2)
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+HIGHBD_INTRA_PRED_TEST(
+    NEON, TestHighbdIntraPred4, vpx_highbd_dc_predictor_4x4_neon,
+    vpx_highbd_dc_left_predictor_4x4_neon, vpx_highbd_dc_top_predictor_4x4_neon,
+    vpx_highbd_dc_128_predictor_4x4_neon, vpx_highbd_v_predictor_4x4_neon,
+    vpx_highbd_h_predictor_4x4_neon, vpx_highbd_d45_predictor_4x4_neon,
+    vpx_highbd_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
+    vpx_highbd_tm_predictor_4x4_neon)
+HIGHBD_INTRA_PRED_TEST(
+    NEON, TestHighbdIntraPred8, vpx_highbd_dc_predictor_8x8_neon,
+    vpx_highbd_dc_left_predictor_8x8_neon, vpx_highbd_dc_top_predictor_8x8_neon,
+    vpx_highbd_dc_128_predictor_8x8_neon, vpx_highbd_v_predictor_8x8_neon,
+    vpx_highbd_h_predictor_8x8_neon, vpx_highbd_d45_predictor_8x8_neon,
+    vpx_highbd_d135_predictor_8x8_neon, NULL, NULL, NULL, NULL,
+    vpx_highbd_tm_predictor_8x8_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TestHighbdIntraPred16,
+                       vpx_highbd_dc_predictor_16x16_neon,
+                       vpx_highbd_dc_left_predictor_16x16_neon,
+                       vpx_highbd_dc_top_predictor_16x16_neon,
+                       vpx_highbd_dc_128_predictor_16x16_neon,
+                       vpx_highbd_v_predictor_16x16_neon,
+                       vpx_highbd_h_predictor_16x16_neon,
+                       vpx_highbd_d45_predictor_16x16_neon,
+                       vpx_highbd_d135_predictor_16x16_neon, NULL, NULL, NULL,
+                       NULL, vpx_highbd_tm_predictor_16x16_neon)
+HIGHBD_INTRA_PRED_TEST(NEON, TestHighbdIntraPred32,
+                       vpx_highbd_dc_predictor_32x32_neon,
+                       vpx_highbd_dc_left_predictor_32x32_neon,
+                       vpx_highbd_dc_top_predictor_32x32_neon,
+                       vpx_highbd_dc_128_predictor_32x32_neon,
+                       vpx_highbd_v_predictor_32x32_neon,
+                       vpx_highbd_h_predictor_32x32_neon,
+                       vpx_highbd_d45_predictor_32x32_neon,
+                       vpx_highbd_d135_predictor_32x32_neon, NULL, NULL, NULL,
+                       NULL, vpx_highbd_tm_predictor_32x32_neon)
+#endif  // HAVE_NEON
+
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
 #include "test/test_libvpx.cc"
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -156,6 +156,20 @@ VP8_INSTANTIATE_TEST_CASE(
        ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
                            libvpx_test::kVP8TestVectors +
                                libvpx_test::kNumVP8TestVectors)));
+
+// Test VP8 decode in with different numbers of threads.
+INSTANTIATE_TEST_CASE_P(
+    VP8MultiThreaded, TestVectorTest,
+    ::testing::Combine(
+        ::testing::Values(
+            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP8)),
+        ::testing::Combine(
+            ::testing::Values(0),    // Serial Mode.
+            ::testing::Range(1, 8),  // With 1 ~ 8 threads.
+            ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
+                                libvpx_test::kVP8TestVectors +
+                                    libvpx_test::kNumVP8TestVectors))));
+
 #endif  // CONFIG_VP8_DECODER

 // Test VP9 decode in serial mode with single thread.
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -373,7 +373,9 @@ const char *const kVP9TestVectors[] = {
  "vp90-2-20-big_superframe-02.webm",
  RESIZE_TEST_VECTORS
 };
+const char *const kVP9TestVectorsSvc[] = { "vp90-2-22-svc_1280x720_3.ivf" };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
+const int kNumVP9TestVectorsSvc = NELEMENTS(kVP9TestVectorsSvc);
 const char *const kVP9TestVectorsResize[] = { RESIZE_TEST_VECTORS };
 const int kNumVP9TestVectorsResize = NELEMENTS(kVP9TestVectorsResize);
 #undef RESIZE_TEST_VECTORS
--- a/test/test_vectors.h
+++ b/test/test_vectors.h
@@ -23,6 +23,8 @@ extern const char *const kVP8TestVectors[];
 #if CONFIG_VP9_DECODER
 extern const int kNumVP9TestVectors;
 extern const char *const kVP9TestVectors[];
+extern const int kNumVP9TestVectorsSvc;
+extern const char *const kVP9TestVectorsSvc[];
 extern const int kNumVP9TestVectorsResize;
 extern const char *const kVP9TestVectorsResize[];
 #endif  // CONFIG_VP9_DECODER
--- a/test/video_source.h
+++ b/test/video_source.h
@@ -13,7 +13,9 @@
 #if defined(_WIN32)
 #undef NOMINMAX
 #define NOMINMAX
+#ifndef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
+#endif
 #include <windows.h>
 #endif
 #include <cstdio>
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -20,10 +20,12 @@
 namespace {
 class VPxEncoderThreadTest
    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
+      public ::libvpx_test::CodecTestWith4Params<libvpx_test::TestMode, int,
+                                                 int, int> {
 protected:
  VPxEncoderThreadTest()
-      : EncoderTest(GET_PARAM(0)), encoder_initialized_(false), tiles_(2),
+      : EncoderTest(GET_PARAM(0)), encoder_initialized_(false),
+        tiles_(GET_PARAM(3)), threads_(GET_PARAM(4)),
        encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)) {
    init_flags_ = VPX_CODEC_USE_PSNR;
    md5_.clear();
@@ -63,6 +65,7 @@ class VPxEncoderThreadTest
        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
+        encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0);
      } else {
        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
        encoder->Control(VP9E_SET_AQ_MODE, 3);
@@ -91,6 +94,7 @@ class VPxEncoderThreadTest

  bool encoder_initialized_;
  int tiles_;
+  int threads_;
  ::libvpx_test::TestMode encoding_mode_;
  int set_cpu_used_;
  std::vector<std::string> md5_;
@@ -111,7 +115,7 @@ TEST_P(VPxEncoderThreadTest, EncoderResultTest) {
  md5_.clear();

  // Encode using multiple threads.
-  cfg_.g_threads = 4;
+  cfg_.g_threads = threads_;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
  multi_thr_md5 = md5_;
  md5_.clear();
@@ -120,9 +124,31 @@ TEST_P(VPxEncoderThreadTest, EncoderResultTest) {
  ASSERT_EQ(single_thr_md5, multi_thr_md5);
 }

-VP9_INSTANTIATE_TEST_CASE(VPxEncoderThreadTest,
-                          ::testing::Values(::libvpx_test::kTwoPassGood,
-                                            ::libvpx_test::kOnePassGood,
-                                            ::libvpx_test::kRealTime),
-                          ::testing::Range(1, 9));
+// Split this into two instantiations so that we can distinguish
+// between very slow runs ( ie cpu_speed 0 ) vs ones that can be
+// run nightly by adding Large to the title.
+INSTANTIATE_TEST_CASE_P(
+    VP9, VPxEncoderThreadTest,
+    ::testing::Combine(
+        ::testing::Values(
+            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
+        ::testing::Values(::libvpx_test::kTwoPassGood,
+                          ::libvpx_test::kOnePassGood,
+                          ::libvpx_test::kRealTime),
+        ::testing::Range(2, 9),    // cpu_used
+        ::testing::Range(0, 3),    // tile_columns
+        ::testing::Range(2, 5)));  // threads
+
+INSTANTIATE_TEST_CASE_P(
+    VP9Large, VPxEncoderThreadTest,
+    ::testing::Combine(
+        ::testing::Values(
+            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
+        ::testing::Values(::libvpx_test::kTwoPassGood,
+                          ::libvpx_test::kOnePassGood,
+                          ::libvpx_test::kRealTime),
+        ::testing::Range(0, 2),    // cpu_used
+        ::testing::Range(0, 3),    // tile_columns
+        ::testing::Range(2, 5)));  // threads
+
 }  // namespace
--- a/test/vp9_frame_parallel_test.cc
+++ b/test/vp9_frame_parallel_test.cc
@@ -46,7 +46,7 @@ string DecodeFileWithPause(const string &filename, int num_threads,
  int in_frames = 0;
  int out_frames = 0;

-  vpx_codec_dec_cfg_t cfg = { 0 };
+  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
  cfg.threads = num_threads;
  vpx_codec_flags_t flags = 0;
  flags |= VPX_CODEC_USE_FRAME_THREADING;
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
@@ -28,25 +28,25 @@ using libvpx_test::ACMRandom;

 const int count_test_block = 100000;

-typedef void (*IntraPred)(uint16_t *dst, ptrdiff_t stride,
-                          const uint16_t *above, const uint16_t *left, int bps);
+typedef void (*IntraPredFunc)(uint8_t *dst, ptrdiff_t stride,
+                              const uint8_t *above, const uint8_t *left);

-struct IntraPredFunc {
-  IntraPredFunc(IntraPred pred = NULL, IntraPred ref = NULL,
-                int block_size_value = 0, int bit_depth_value = 0)
+struct IntraPredParam {
+  IntraPredParam(IntraPredFunc pred = NULL, IntraPredFunc ref = NULL,
+                 int block_size_value = 0, int bit_depth_value = 0)
      : pred_fn(pred), ref_fn(ref), block_size(block_size_value),
        bit_depth(bit_depth_value) {}

-  IntraPred pred_fn;
-  IntraPred ref_fn;
+  IntraPredFunc pred_fn;
+  IntraPredFunc ref_fn;
  int block_size;
  int bit_depth;
 };

-class VP9IntraPredTest : public ::testing::TestWithParam<IntraPredFunc> {
+template <typename Pixel, typename PredParam>
+class IntraPredTest : public ::testing::TestWithParam<PredParam> {
 public:
-  void RunTest(uint16_t *left_col, uint16_t *above_data, uint16_t *dst,
-               uint16_t *ref_dst) {
+  void RunTest(Pixel *left_col, Pixel *above_data, Pixel *dst, Pixel *ref_dst) {
    ACMRandom rnd(ACMRandom::DeterministicSeed());
    const int block_size = params_.block_size;
    above_row_ = above_data + 16;
@@ -56,13 +56,16 @@ class VP9IntraPredTest : public ::testing::TestWithParam<IntraPredFunc> {
    int error_count = 0;
    for (int i = 0; i < count_test_block; ++i) {
      // Fill edges with random data, try first with saturated values.
-      for (int x = -1; x <= block_size * 2; x++) {
+      for (int x = -1; x < block_size; x++) {
        if (i == 0) {
          above_row_[x] = mask_;
        } else {
          above_row_[x] = rnd.Rand16() & mask_;
        }
      }
+      for (int x = block_size; x < 2 * block_size; x++) {
+        above_row_[x] = above_row_[block_size - 1];
+      }
      for (int y = 0; y < block_size; y++) {
        if (i == 0) {
          left_col_[y] = mask_;
@@ -78,17 +81,12 @@ class VP9IntraPredTest : public ::testing::TestWithParam<IntraPredFunc> {

 protected:
  virtual void SetUp() {
-    params_ = GetParam();
+    params_ = this->GetParam();
    stride_ = params_.block_size * 3;
    mask_ = (1 << params_.bit_depth) - 1;
  }

-  void Predict() {
-    const int bit_depth = params_.bit_depth;
-    params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
-    ASM_REGISTER_STATE_CHECK(
-        params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth));
-  }
+  void Predict();

  void CheckPrediction(int test_case_number, int *error_count) const {
    // For each pixel ensure that the calculated value is the same as reference.
@@ -104,17 +102,309 @@ class VP9IntraPredTest : public ::testing::TestWithParam<IntraPredFunc> {
    }
  }

-  uint16_t *above_row_;
-  uint16_t *left_col_;
-  uint16_t *dst_;
-  uint16_t *ref_dst_;
+  Pixel *above_row_;
+  Pixel *left_col_;
+  Pixel *dst_;
+  Pixel *ref_dst_;
  ptrdiff_t stride_;
  int mask_;

-  IntraPredFunc params_;
+  PredParam params_;
 };

+template <>
+void IntraPredTest<uint8_t, IntraPredParam>::Predict() {
+  params_.ref_fn(ref_dst_, stride_, above_row_, left_col_);
+  ASM_REGISTER_STATE_CHECK(
+      params_.pred_fn(dst_, stride_, above_row_, left_col_));
+}
+
+typedef IntraPredTest<uint8_t, IntraPredParam> VP9IntraPredTest;
+
 TEST_P(VP9IntraPredTest, IntraPredTests) {
+  // max block size is 32
+  DECLARE_ALIGNED(16, uint8_t, left_col[2 * 32]);
+  DECLARE_ALIGNED(16, uint8_t, above_data[2 * 32 + 32]);
+  DECLARE_ALIGNED(16, uint8_t, dst[3 * 32 * 32]);
+  DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 32 * 32]);
+  RunTest(left_col, above_data, dst, ref_dst);
+}
+
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, VP9IntraPredTest,
+    ::testing::Values(
+        IntraPredParam(&vpx_d45_predictor_4x4_sse2, &vpx_d45_predictor_4x4_c, 4,
+                       8),
+        IntraPredParam(&vpx_d45_predictor_8x8_sse2, &vpx_d45_predictor_8x8_c, 8,
+                       8),
+        IntraPredParam(&vpx_d207_predictor_4x4_sse2, &vpx_d207_predictor_4x4_c,
+                       4, 8),
+        IntraPredParam(&vpx_dc_128_predictor_4x4_sse2,
+                       &vpx_dc_128_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_dc_128_predictor_8x8_sse2,
+                       &vpx_dc_128_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_dc_128_predictor_16x16_sse2,
+                       &vpx_dc_128_predictor_16x16_c, 16, 8),
+        IntraPredParam(&vpx_dc_128_predictor_32x32_sse2,
+                       &vpx_dc_128_predictor_32x32_c, 32, 8),
+        IntraPredParam(&vpx_dc_left_predictor_4x4_sse2,
+                       &vpx_dc_left_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_dc_left_predictor_8x8_sse2,
+                       &vpx_dc_left_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_dc_left_predictor_16x16_sse2,
+                       &vpx_dc_left_predictor_16x16_c, 16, 8),
+        IntraPredParam(&vpx_dc_left_predictor_32x32_sse2,
+                       &vpx_dc_left_predictor_32x32_c, 32, 8),
+        IntraPredParam(&vpx_dc_predictor_4x4_sse2, &vpx_dc_predictor_4x4_c, 4,
+                       8),
+        IntraPredParam(&vpx_dc_predictor_8x8_sse2, &vpx_dc_predictor_8x8_c, 8,
+                       8),
+        IntraPredParam(&vpx_dc_predictor_16x16_sse2, &vpx_dc_predictor_16x16_c,
+                       16, 8),
+        IntraPredParam(&vpx_dc_predictor_32x32_sse2, &vpx_dc_predictor_32x32_c,
+                       32, 8),
+        IntraPredParam(&vpx_dc_top_predictor_4x4_sse2,
+                       &vpx_dc_top_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_dc_top_predictor_8x8_sse2,
+                       &vpx_dc_top_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_dc_top_predictor_16x16_sse2,
+                       &vpx_dc_top_predictor_16x16_c, 16, 8),
+        IntraPredParam(&vpx_dc_top_predictor_32x32_sse2,
+                       &vpx_dc_top_predictor_32x32_c, 32, 8),
+        IntraPredParam(&vpx_h_predictor_4x4_sse2, &vpx_h_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_h_predictor_8x8_sse2, &vpx_h_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_h_predictor_16x16_sse2, &vpx_h_predictor_16x16_c,
+                       16, 8),
+        IntraPredParam(&vpx_h_predictor_32x32_sse2, &vpx_h_predictor_32x32_c,
+                       32, 8),
+        IntraPredParam(&vpx_tm_predictor_4x4_sse2, &vpx_tm_predictor_4x4_c, 4,
+                       8),
+        IntraPredParam(&vpx_tm_predictor_8x8_sse2, &vpx_tm_predictor_8x8_c, 8,
+                       8),
+        IntraPredParam(&vpx_tm_predictor_16x16_sse2, &vpx_tm_predictor_16x16_c,
+                       16, 8),
+        IntraPredParam(&vpx_tm_predictor_32x32_sse2, &vpx_tm_predictor_32x32_c,
+                       32, 8),
+        IntraPredParam(&vpx_v_predictor_4x4_sse2, &vpx_v_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_v_predictor_8x8_sse2, &vpx_v_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_v_predictor_16x16_sse2, &vpx_v_predictor_16x16_c,
+                       16, 8),
+        IntraPredParam(&vpx_v_predictor_32x32_sse2, &vpx_v_predictor_32x32_c,
+                       32, 8)));
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, VP9IntraPredTest,
+    ::testing::Values(IntraPredParam(&vpx_d45_predictor_16x16_ssse3,
+                                     &vpx_d45_predictor_16x16_c, 16, 8),
+                      IntraPredParam(&vpx_d45_predictor_32x32_ssse3,
+                                     &vpx_d45_predictor_32x32_c, 32, 8),
+                      IntraPredParam(&vpx_d63_predictor_4x4_ssse3,
+                                     &vpx_d63_predictor_4x4_c, 4, 8),
+                      IntraPredParam(&vpx_d63_predictor_8x8_ssse3,
+                                     &vpx_d63_predictor_8x8_c, 8, 8),
+                      IntraPredParam(&vpx_d63_predictor_16x16_ssse3,
+                                     &vpx_d63_predictor_16x16_c, 16, 8),
+                      IntraPredParam(&vpx_d63_predictor_32x32_ssse3,
+                                     &vpx_d63_predictor_32x32_c, 32, 8),
+                      IntraPredParam(&vpx_d153_predictor_4x4_ssse3,
+                                     &vpx_d153_predictor_4x4_c, 4, 8),
+                      IntraPredParam(&vpx_d153_predictor_8x8_ssse3,
+                                     &vpx_d153_predictor_8x8_c, 8, 8),
+                      IntraPredParam(&vpx_d153_predictor_16x16_ssse3,
+                                     &vpx_d153_predictor_16x16_c, 16, 8),
+                      IntraPredParam(&vpx_d153_predictor_32x32_ssse3,
+                                     &vpx_d153_predictor_32x32_c, 32, 8),
+                      IntraPredParam(&vpx_d207_predictor_8x8_ssse3,
+                                     &vpx_d207_predictor_8x8_c, 8, 8),
+                      IntraPredParam(&vpx_d207_predictor_16x16_ssse3,
+                                     &vpx_d207_predictor_16x16_c, 16, 8),
+                      IntraPredParam(&vpx_d207_predictor_32x32_ssse3,
+                                     &vpx_d207_predictor_32x32_c, 32, 8)));
+#endif  // HAVE_SSSE3
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, VP9IntraPredTest,
+    ::testing::Values(
+        IntraPredParam(&vpx_d45_predictor_4x4_neon, &vpx_d45_predictor_4x4_c, 4,
+                       8),
+        IntraPredParam(&vpx_d45_predictor_8x8_neon, &vpx_d45_predictor_8x8_c, 8,
+                       8),
+        IntraPredParam(&vpx_d45_predictor_16x16_neon,
+                       &vpx_d45_predictor_16x16_c, 16, 8),
+        IntraPredParam(&vpx_d45_predictor_32x32_neon,
+                       &vpx_d45_predictor_32x32_c, 32, 8),
+        IntraPredParam(&vpx_d135_predictor_4x4_neon, &vpx_d135_predictor_4x4_c,
+                       4, 8),
+        IntraPredParam(&vpx_d135_predictor_8x8_neon, &vpx_d135_predictor_8x8_c,
+                       8, 8),
+        IntraPredParam(&vpx_d135_predictor_16x16_neon,
+                       &vpx_d135_predictor_16x16_c, 16, 8),
+        IntraPredParam(&vpx_d135_predictor_32x32_neon,
+                       &vpx_d135_predictor_32x32_c, 32, 8),
+        IntraPredParam(&vpx_dc_128_predictor_4x4_neon,
+                       &vpx_dc_128_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_dc_128_predictor_8x8_neon,
+                       &vpx_dc_128_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_dc_128_predictor_16x16_neon,
+                       &vpx_dc_128_predictor_16x16_c, 16, 8),
+        IntraPredParam(&vpx_dc_128_predictor_32x32_neon,
+                       &vpx_dc_128_predictor_32x32_c, 32, 8),
+        IntraPredParam(&vpx_dc_left_predictor_4x4_neon,
+                       &vpx_dc_left_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_dc_left_predictor_8x8_neon,
+                       &vpx_dc_left_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_dc_left_predictor_16x16_neon,
+                       &vpx_dc_left_predictor_16x16_c, 16, 8),
+        IntraPredParam(&vpx_dc_left_predictor_32x32_neon,
+                       &vpx_dc_left_predictor_32x32_c, 32, 8),
+        IntraPredParam(&vpx_dc_predictor_4x4_neon, &vpx_dc_predictor_4x4_c, 4,
+                       8),
+        IntraPredParam(&vpx_dc_predictor_8x8_neon, &vpx_dc_predictor_8x8_c, 8,
+                       8),
+        IntraPredParam(&vpx_dc_predictor_16x16_neon, &vpx_dc_predictor_16x16_c,
+                       16, 8),
+        IntraPredParam(&vpx_dc_predictor_32x32_neon, &vpx_dc_predictor_32x32_c,
+                       32, 8),
+        IntraPredParam(&vpx_dc_top_predictor_4x4_neon,
+                       &vpx_dc_top_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_dc_top_predictor_8x8_neon,
+                       &vpx_dc_top_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_dc_top_predictor_16x16_neon,
+                       &vpx_dc_top_predictor_16x16_c, 16, 8),
+        IntraPredParam(&vpx_dc_top_predictor_32x32_neon,
+                       &vpx_dc_top_predictor_32x32_c, 32, 8),
+        IntraPredParam(&vpx_h_predictor_4x4_neon, &vpx_h_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_h_predictor_8x8_neon, &vpx_h_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_h_predictor_16x16_neon, &vpx_h_predictor_16x16_c,
+                       16, 8),
+        IntraPredParam(&vpx_h_predictor_32x32_neon, &vpx_h_predictor_32x32_c,
+                       32, 8),
+        IntraPredParam(&vpx_tm_predictor_4x4_neon, &vpx_tm_predictor_4x4_c, 4,
+                       8),
+        IntraPredParam(&vpx_tm_predictor_8x8_neon, &vpx_tm_predictor_8x8_c, 8,
+                       8),
+        IntraPredParam(&vpx_tm_predictor_16x16_neon, &vpx_tm_predictor_16x16_c,
+                       16, 8),
+        IntraPredParam(&vpx_tm_predictor_32x32_neon, &vpx_tm_predictor_32x32_c,
+                       32, 8),
+        IntraPredParam(&vpx_v_predictor_4x4_neon, &vpx_v_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_v_predictor_8x8_neon, &vpx_v_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_v_predictor_16x16_neon, &vpx_v_predictor_16x16_c,
+                       16, 8),
+        IntraPredParam(&vpx_v_predictor_32x32_neon, &vpx_v_predictor_32x32_c,
+                       32, 8)));
+#endif  // HAVE_NEON
+
+#if HAVE_DSPR2
+INSTANTIATE_TEST_CASE_P(
+    DSPR2, VP9IntraPredTest,
+    ::testing::Values(IntraPredParam(&vpx_dc_predictor_4x4_dspr2,
+                                     &vpx_dc_predictor_4x4_c, 4, 8),
+                      IntraPredParam(&vpx_dc_predictor_8x8_dspr2,
+                                     &vpx_dc_predictor_8x8_c, 8, 8),
+                      IntraPredParam(&vpx_dc_predictor_16x16_dspr2,
+                                     &vpx_dc_predictor_16x16_c, 16, 8),
+                      IntraPredParam(&vpx_h_predictor_4x4_dspr2,
+                                     &vpx_h_predictor_4x4_c, 4, 8),
+                      IntraPredParam(&vpx_h_predictor_8x8_dspr2,
+                                     &vpx_h_predictor_8x8_c, 8, 8),
+                      IntraPredParam(&vpx_h_predictor_16x16_dspr2,
+                                     &vpx_h_predictor_16x16_c, 16, 8),
+                      IntraPredParam(&vpx_tm_predictor_4x4_dspr2,
+                                     &vpx_tm_predictor_4x4_c, 4, 8),
+                      IntraPredParam(&vpx_tm_predictor_8x8_dspr2,
+                                     &vpx_tm_predictor_8x8_c, 8, 8)));
+#endif  // HAVE_DSPR2
+
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, VP9IntraPredTest,
+    ::testing::Values(
+        IntraPredParam(&vpx_dc_128_predictor_4x4_msa,
+                       &vpx_dc_128_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_dc_128_predictor_8x8_msa,
+                       &vpx_dc_128_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_dc_128_predictor_16x16_msa,
+                       &vpx_dc_128_predictor_16x16_c, 16, 8),
+        IntraPredParam(&vpx_dc_128_predictor_32x32_msa,
+                       &vpx_dc_128_predictor_32x32_c, 32, 8),
+        IntraPredParam(&vpx_dc_left_predictor_4x4_msa,
+                       &vpx_dc_left_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_dc_left_predictor_8x8_msa,
+                       &vpx_dc_left_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_dc_left_predictor_16x16_msa,
+                       &vpx_dc_left_predictor_16x16_c, 16, 8),
+        IntraPredParam(&vpx_dc_left_predictor_32x32_msa,
+                       &vpx_dc_left_predictor_32x32_c, 32, 8),
+        IntraPredParam(&vpx_dc_predictor_4x4_msa, &vpx_dc_predictor_4x4_c, 4,
+                       8),
+        IntraPredParam(&vpx_dc_predictor_8x8_msa, &vpx_dc_predictor_8x8_c, 8,
+                       8),
+        IntraPredParam(&vpx_dc_predictor_16x16_msa, &vpx_dc_predictor_16x16_c,
+                       16, 8),
+        IntraPredParam(&vpx_dc_predictor_32x32_msa, &vpx_dc_predictor_32x32_c,
+                       32, 8),
+        IntraPredParam(&vpx_dc_top_predictor_4x4_msa,
+                       &vpx_dc_top_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_dc_top_predictor_8x8_msa,
+                       &vpx_dc_top_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_dc_top_predictor_16x16_msa,
+                       &vpx_dc_top_predictor_16x16_c, 16, 8),
+        IntraPredParam(&vpx_dc_top_predictor_32x32_msa,
+                       &vpx_dc_top_predictor_32x32_c, 32, 8),
+        IntraPredParam(&vpx_h_predictor_4x4_msa, &vpx_h_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_h_predictor_8x8_msa, &vpx_h_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_h_predictor_16x16_msa, &vpx_h_predictor_16x16_c, 16,
+                       8),
+        IntraPredParam(&vpx_h_predictor_32x32_msa, &vpx_h_predictor_32x32_c, 32,
+                       8),
+        IntraPredParam(&vpx_tm_predictor_4x4_msa, &vpx_tm_predictor_4x4_c, 4,
+                       8),
+        IntraPredParam(&vpx_tm_predictor_8x8_msa, &vpx_tm_predictor_8x8_c, 8,
+                       8),
+        IntraPredParam(&vpx_tm_predictor_16x16_msa, &vpx_tm_predictor_16x16_c,
+                       16, 8),
+        IntraPredParam(&vpx_tm_predictor_32x32_msa, &vpx_tm_predictor_32x32_c,
+                       32, 8),
+        IntraPredParam(&vpx_v_predictor_4x4_msa, &vpx_v_predictor_4x4_c, 4, 8),
+        IntraPredParam(&vpx_v_predictor_8x8_msa, &vpx_v_predictor_8x8_c, 8, 8),
+        IntraPredParam(&vpx_v_predictor_16x16_msa, &vpx_v_predictor_16x16_c, 16,
+                       8),
+        IntraPredParam(&vpx_v_predictor_32x32_msa, &vpx_v_predictor_32x32_c, 32,
+                       8)));
+#endif  // HAVE_MSA
+
+#if CONFIG_VP9_HIGHBITDEPTH
+typedef void (*HighbdIntraPred)(uint16_t *dst, ptrdiff_t stride,
+                                const uint16_t *above, const uint16_t *left,
+                                int bps);
+struct HighbdIntraPredParam {
+  HighbdIntraPredParam(HighbdIntraPred pred = NULL, HighbdIntraPred ref = NULL,
+                       int block_size_value = 0, int bit_depth_value = 0)
+      : pred_fn(pred), ref_fn(ref), block_size(block_size_value),
+        bit_depth(bit_depth_value) {}
+
+  HighbdIntraPred pred_fn;
+  HighbdIntraPred ref_fn;
+  int block_size;
+  int bit_depth;
+};
+
+template <>
+void IntraPredTest<uint16_t, HighbdIntraPredParam>::Predict() {
+  const int bit_depth = params_.bit_depth;
+  params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth);
+  ASM_REGISTER_STATE_CHECK(
+      params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth));
+}
+
+typedef IntraPredTest<uint16_t, HighbdIntraPredParam> VP9HighbdIntraPredTest;
+
+TEST_P(VP9HighbdIntraPredTest, HighbdIntraPredTests) {
  // max block size is 32
  DECLARE_ALIGNED(16, uint16_t, left_col[2 * 32]);
  DECLARE_ALIGNED(16, uint16_t, above_data[2 * 32 + 32]);
@@ -124,88 +414,320 @@ TEST_P(VP9IntraPredTest, IntraPredTests) {
 }

 #if HAVE_SSE2
-#if CONFIG_VP9_HIGHBITDEPTH
 INSTANTIATE_TEST_CASE_P(
-    SSE2_TO_C_8, VP9IntraPredTest,
-    ::testing::Values(IntraPredFunc(&vpx_highbd_dc_predictor_32x32_sse2,
-                                    &vpx_highbd_dc_predictor_32x32_c, 32, 8),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_16x16_sse2,
-                                    &vpx_highbd_tm_predictor_16x16_c, 16, 8),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_32x32_sse2,
-                                    &vpx_highbd_tm_predictor_32x32_c, 32, 8),
-                      IntraPredFunc(&vpx_highbd_dc_predictor_4x4_sse2,
-                                    &vpx_highbd_dc_predictor_4x4_c, 4, 8),
-                      IntraPredFunc(&vpx_highbd_dc_predictor_8x8_sse2,
-                                    &vpx_highbd_dc_predictor_8x8_c, 8, 8),
-                      IntraPredFunc(&vpx_highbd_dc_predictor_16x16_sse2,
-                                    &vpx_highbd_dc_predictor_16x16_c, 16, 8),
-                      IntraPredFunc(&vpx_highbd_v_predictor_4x4_sse2,
-                                    &vpx_highbd_v_predictor_4x4_c, 4, 8),
-                      IntraPredFunc(&vpx_highbd_v_predictor_8x8_sse2,
-                                    &vpx_highbd_v_predictor_8x8_c, 8, 8),
-                      IntraPredFunc(&vpx_highbd_v_predictor_16x16_sse2,
-                                    &vpx_highbd_v_predictor_16x16_c, 16, 8),
-                      IntraPredFunc(&vpx_highbd_v_predictor_32x32_sse2,
-                                    &vpx_highbd_v_predictor_32x32_c, 32, 8),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_4x4_sse2,
-                                    &vpx_highbd_tm_predictor_4x4_c, 4, 8),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_8x8_sse2,
-                                    &vpx_highbd_tm_predictor_8x8_c, 8, 8)));
+    SSE2_TO_C_8, VP9HighbdIntraPredTest,
+    ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2,
+                             &vpx_highbd_dc_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2,
+                             &vpx_highbd_dc_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_sse2,
+                             &vpx_highbd_dc_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_sse2,
+                             &vpx_highbd_dc_predictor_32x32_c, 32, 8),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_sse2,
+                             &vpx_highbd_tm_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_sse2,
+                             &vpx_highbd_tm_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_sse2,
+                             &vpx_highbd_tm_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_sse2,
+                             &vpx_highbd_tm_predictor_32x32_c, 32, 8),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_sse2,
+                             &vpx_highbd_v_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_sse2,
+                             &vpx_highbd_v_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_sse2,
+                             &vpx_highbd_v_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_sse2,
+                             &vpx_highbd_v_predictor_32x32_c, 32, 8)));

 INSTANTIATE_TEST_CASE_P(
-    SSE2_TO_C_10, VP9IntraPredTest,
-    ::testing::Values(IntraPredFunc(&vpx_highbd_dc_predictor_32x32_sse2,
-                                    &vpx_highbd_dc_predictor_32x32_c, 32, 10),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_16x16_sse2,
-                                    &vpx_highbd_tm_predictor_16x16_c, 16, 10),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_32x32_sse2,
-                                    &vpx_highbd_tm_predictor_32x32_c, 32, 10),
-                      IntraPredFunc(&vpx_highbd_dc_predictor_4x4_sse2,
-                                    &vpx_highbd_dc_predictor_4x4_c, 4, 10),
-                      IntraPredFunc(&vpx_highbd_dc_predictor_8x8_sse2,
-                                    &vpx_highbd_dc_predictor_8x8_c, 8, 10),
-                      IntraPredFunc(&vpx_highbd_dc_predictor_16x16_sse2,
-                                    &vpx_highbd_dc_predictor_16x16_c, 16, 10),
-                      IntraPredFunc(&vpx_highbd_v_predictor_4x4_sse2,
-                                    &vpx_highbd_v_predictor_4x4_c, 4, 10),
-                      IntraPredFunc(&vpx_highbd_v_predictor_8x8_sse2,
-                                    &vpx_highbd_v_predictor_8x8_c, 8, 10),
-                      IntraPredFunc(&vpx_highbd_v_predictor_16x16_sse2,
-                                    &vpx_highbd_v_predictor_16x16_c, 16, 10),
-                      IntraPredFunc(&vpx_highbd_v_predictor_32x32_sse2,
-                                    &vpx_highbd_v_predictor_32x32_c, 32, 10),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_4x4_sse2,
-                                    &vpx_highbd_tm_predictor_4x4_c, 4, 10),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_8x8_sse2,
-                                    &vpx_highbd_tm_predictor_8x8_c, 8, 10)));
+    SSE2_TO_C_10, VP9HighbdIntraPredTest,
+    ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2,
+                             &vpx_highbd_dc_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2,
+                             &vpx_highbd_dc_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_sse2,
+                             &vpx_highbd_dc_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_sse2,
+                             &vpx_highbd_dc_predictor_32x32_c, 32, 10),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_sse2,
+                             &vpx_highbd_tm_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_sse2,
+                             &vpx_highbd_tm_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_sse2,
+                             &vpx_highbd_tm_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_sse2,
+                             &vpx_highbd_tm_predictor_32x32_c, 32, 10),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_sse2,
+                             &vpx_highbd_v_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_sse2,
+                             &vpx_highbd_v_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_sse2,
+                             &vpx_highbd_v_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_sse2,
+                             &vpx_highbd_v_predictor_32x32_c, 32, 10)));

 INSTANTIATE_TEST_CASE_P(
-    SSE2_TO_C_12, VP9IntraPredTest,
-    ::testing::Values(IntraPredFunc(&vpx_highbd_dc_predictor_32x32_sse2,
-                                    &vpx_highbd_dc_predictor_32x32_c, 32, 12),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_16x16_sse2,
-                                    &vpx_highbd_tm_predictor_16x16_c, 16, 12),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_32x32_sse2,
-                                    &vpx_highbd_tm_predictor_32x32_c, 32, 12),
-                      IntraPredFunc(&vpx_highbd_dc_predictor_4x4_sse2,
-                                    &vpx_highbd_dc_predictor_4x4_c, 4, 12),
-                      IntraPredFunc(&vpx_highbd_dc_predictor_8x8_sse2,
-                                    &vpx_highbd_dc_predictor_8x8_c, 8, 12),
-                      IntraPredFunc(&vpx_highbd_dc_predictor_16x16_sse2,
-                                    &vpx_highbd_dc_predictor_16x16_c, 16, 12),
-                      IntraPredFunc(&vpx_highbd_v_predictor_4x4_sse2,
-                                    &vpx_highbd_v_predictor_4x4_c, 4, 12),
-                      IntraPredFunc(&vpx_highbd_v_predictor_8x8_sse2,
-                                    &vpx_highbd_v_predictor_8x8_c, 8, 12),
-                      IntraPredFunc(&vpx_highbd_v_predictor_16x16_sse2,
-                                    &vpx_highbd_v_predictor_16x16_c, 16, 12),
-                      IntraPredFunc(&vpx_highbd_v_predictor_32x32_sse2,
-                                    &vpx_highbd_v_predictor_32x32_c, 32, 12),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_4x4_sse2,
-                                    &vpx_highbd_tm_predictor_4x4_c, 4, 12),
-                      IntraPredFunc(&vpx_highbd_tm_predictor_8x8_sse2,
-                                    &vpx_highbd_tm_predictor_8x8_c, 8, 12)));
+    SSE2_TO_C_12, VP9HighbdIntraPredTest,
+    ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2,
+                             &vpx_highbd_dc_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2,
+                             &vpx_highbd_dc_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_sse2,
+                             &vpx_highbd_dc_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_sse2,
+                             &vpx_highbd_dc_predictor_32x32_c, 32, 12),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_sse2,
+                             &vpx_highbd_tm_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_sse2,
+                             &vpx_highbd_tm_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_sse2,
+                             &vpx_highbd_tm_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_sse2,
+                             &vpx_highbd_tm_predictor_32x32_c, 32, 12),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_sse2,
+                             &vpx_highbd_v_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_sse2,
+                             &vpx_highbd_v_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_sse2,
+                             &vpx_highbd_v_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_sse2,
+                             &vpx_highbd_v_predictor_32x32_c, 32, 12)));
+#endif  // HAVE_SSE2
+
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON_TO_C_8, VP9HighbdIntraPredTest,
+    ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_neon,
+                             &vpx_highbd_d45_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_neon,
+                             &vpx_highbd_d45_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_neon,
+                             &vpx_highbd_d45_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_neon,
+                             &vpx_highbd_d45_predictor_32x32_c, 32, 8),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_neon,
+                             &vpx_highbd_d135_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_neon,
+                             &vpx_highbd_d135_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_neon,
+                             &vpx_highbd_d135_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_neon,
+                             &vpx_highbd_d135_predictor_32x32_c, 32, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_neon,
+                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_neon,
+                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_neon,
+                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_neon,
+                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_neon,
+                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_neon,
+                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_neon,
+                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_neon,
+                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_neon,
+                             &vpx_highbd_dc_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_neon,
+                             &vpx_highbd_dc_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_neon,
+                             &vpx_highbd_dc_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_neon,
+                             &vpx_highbd_dc_predictor_32x32_c, 32, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_neon,
+                             &vpx_highbd_dc_top_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_neon,
+                             &vpx_highbd_dc_top_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_neon,
+                             &vpx_highbd_dc_top_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_neon,
+                             &vpx_highbd_dc_top_predictor_32x32_c, 32, 8),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_neon,
+                             &vpx_highbd_h_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_neon,
+                             &vpx_highbd_h_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_neon,
+                             &vpx_highbd_h_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_neon,
+                             &vpx_highbd_h_predictor_32x32_c, 32, 8),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_neon,
+                             &vpx_highbd_tm_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_neon,
+                             &vpx_highbd_tm_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_neon,
+                             &vpx_highbd_tm_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_neon,
+                             &vpx_highbd_tm_predictor_32x32_c, 32, 8),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_neon,
+                             &vpx_highbd_v_predictor_4x4_c, 4, 8),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_neon,
+                             &vpx_highbd_v_predictor_8x8_c, 8, 8),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_neon,
+                             &vpx_highbd_v_predictor_16x16_c, 16, 8),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_neon,
+                             &vpx_highbd_v_predictor_32x32_c, 32, 8)));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON_TO_C_10, VP9HighbdIntraPredTest,
+    ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_neon,
+                             &vpx_highbd_d45_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_neon,
+                             &vpx_highbd_d45_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_neon,
+                             &vpx_highbd_d45_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_neon,
+                             &vpx_highbd_d45_predictor_32x32_c, 32, 10),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_neon,
+                             &vpx_highbd_d135_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_neon,
+                             &vpx_highbd_d135_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_neon,
+                             &vpx_highbd_d135_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_neon,
+                             &vpx_highbd_d135_predictor_32x32_c, 32, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_neon,
+                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_neon,
+                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_neon,
+                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_neon,
+                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_neon,
+                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_neon,
+                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_neon,
+                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_neon,
+                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_neon,
+                             &vpx_highbd_dc_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_neon,
+                             &vpx_highbd_dc_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_neon,
+                             &vpx_highbd_dc_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_neon,
+                             &vpx_highbd_dc_predictor_32x32_c, 32, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_neon,
+                             &vpx_highbd_dc_top_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_neon,
+                             &vpx_highbd_dc_top_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_neon,
+                             &vpx_highbd_dc_top_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_neon,
+                             &vpx_highbd_dc_top_predictor_32x32_c, 32, 10),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_neon,
+                             &vpx_highbd_h_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_neon,
+                             &vpx_highbd_h_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_neon,
+                             &vpx_highbd_h_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_neon,
+                             &vpx_highbd_h_predictor_32x32_c, 32, 10),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_neon,
+                             &vpx_highbd_tm_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_neon,
+                             &vpx_highbd_tm_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_neon,
+                             &vpx_highbd_tm_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_neon,
+                             &vpx_highbd_tm_predictor_32x32_c, 32, 10),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_neon,
+                             &vpx_highbd_v_predictor_4x4_c, 4, 10),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_neon,
+                             &vpx_highbd_v_predictor_8x8_c, 8, 10),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_neon,
+                             &vpx_highbd_v_predictor_16x16_c, 16, 10),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_neon,
+                             &vpx_highbd_v_predictor_32x32_c, 32, 10)));
+
+INSTANTIATE_TEST_CASE_P(
+    NEON_TO_C_12, VP9HighbdIntraPredTest,
+    ::testing::Values(
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_neon,
+                             &vpx_highbd_d45_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_neon,
+                             &vpx_highbd_d45_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_neon,
+                             &vpx_highbd_d45_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_neon,
+                             &vpx_highbd_d45_predictor_32x32_c, 32, 12),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_neon,
+                             &vpx_highbd_d135_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_neon,
+                             &vpx_highbd_d135_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_neon,
+                             &vpx_highbd_d135_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_neon,
+                             &vpx_highbd_d135_predictor_32x32_c, 32, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_neon,
+                             &vpx_highbd_dc_128_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_neon,
+                             &vpx_highbd_dc_128_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_neon,
+                             &vpx_highbd_dc_128_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_neon,
+                             &vpx_highbd_dc_128_predictor_32x32_c, 32, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_neon,
+                             &vpx_highbd_dc_left_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_neon,
+                             &vpx_highbd_dc_left_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_neon,
+                             &vpx_highbd_dc_left_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_neon,
+                             &vpx_highbd_dc_left_predictor_32x32_c, 32, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_neon,
+                             &vpx_highbd_dc_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_neon,
+                             &vpx_highbd_dc_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_neon,
+                             &vpx_highbd_dc_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_neon,
+                             &vpx_highbd_dc_predictor_32x32_c, 32, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_neon,
+                             &vpx_highbd_dc_top_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_neon,
+                             &vpx_highbd_dc_top_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_neon,
+                             &vpx_highbd_dc_top_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_neon,
+                             &vpx_highbd_dc_top_predictor_32x32_c, 32, 12),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_neon,
+                             &vpx_highbd_h_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_neon,
+                             &vpx_highbd_h_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_neon,
+                             &vpx_highbd_h_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_neon,
+                             &vpx_highbd_h_predictor_32x32_c, 32, 12),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_neon,
+                             &vpx_highbd_tm_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_neon,
+                             &vpx_highbd_tm_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_neon,
+                             &vpx_highbd_tm_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_neon,
+                             &vpx_highbd_tm_predictor_32x32_c, 32, 12),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_neon,
+                             &vpx_highbd_v_predictor_4x4_c, 4, 12),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_neon,
+                             &vpx_highbd_v_predictor_8x8_c, 8, 12),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_neon,
+                             &vpx_highbd_v_predictor_16x16_c, 16, 12),
+        HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_neon,
+                             &vpx_highbd_v_predictor_32x32_c, 32, 12)));
+#endif  // HAVE_NEON

 #endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_SSE2
 }  // namespace
--- a/test/vpx_temporal_svc_encoder.sh
+++ b/test/vpx_temporal_svc_encoder.sh
@@ -40,6 +40,7 @@ vpx_tsvc_encoder() {
  local timebase_den="1000"
  local speed="6"
  local frame_drop_thresh="30"
+  local max_threads="4"

  shift 2

@@ -48,11 +49,14 @@ vpx_tsvc_encoder() {
    return 1
  fi

-  eval "${VPX_TEST_PREFIX}" "${encoder}" "${YUV_RAW_INPUT}" "${output_file}" \
-      "${codec}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
-      "${timebase_num}" "${timebase_den}" "${speed}" "${frame_drop_thresh}" \
-      "$@" \
-      ${devnull}
+  # TODO(tomfinegan): Verify file output for all thread runs.
+  for threads in $(seq $max_threads); do
+    eval "${VPX_TEST_PREFIX}" "${encoder}" "${YUV_RAW_INPUT}" "${output_file}" \
+        "${codec}" "${YUV_RAW_INPUT_WIDTH}" "${YUV_RAW_INPUT_HEIGHT}" \
+        "${timebase_num}" "${timebase_den}" "${speed}" "${frame_drop_thresh}" \
+        "${threads}" "$@" \
+        ${devnull}
+  done
 }

 # Confirms that all expected output files exist given the output file name
--- a/third_party/libwebm/README.libvpx
+++ b/third_party/libwebm/README.libvpx
@@ -1,5 +1,5 @@
 URL: https://chromium.googlesource.com/webm/libwebm
-Version: 32d5ac49414a8914ec1e1f285f3f927c6e8ec29d
+Version: 9732ae991efb71aced4267d4794918279e362d99
 License: BSD
 License File: LICENSE.txt

--- a/third_party/libwebm/common/file_util.cc
+++ b/third_party/libwebm/common/file_util.cc
@@ -14,6 +14,7 @@

 #include <cstdio>
 #include <cstdlib>
+#include <cstring>
 #include <fstream>
 #include <ios>

@@ -21,13 +22,23 @@ namespace libwebm {

 std::string GetTempFileName() {
 #if !defined _MSC_VER && !defined __MINGW32__
-  char temp_file_name_template[] = "libwebm_temp.XXXXXX";
+  std::string temp_file_name_template_str =
+      std::string(std::getenv("TEST_TMPDIR") ? std::getenv("TEST_TMPDIR") :
+                                               ".") +
+      "/libwebm_temp.XXXXXX";
+  char* temp_file_name_template =
+      new char[temp_file_name_template_str.length() + 1];
+  memset(temp_file_name_template, 0, temp_file_name_template_str.length() + 1);
+  temp_file_name_template_str.copy(temp_file_name_template,
+                                   temp_file_name_template_str.length(), 0);
  int fd = mkstemp(temp_file_name_template);
+  std::string temp_file_name =
+      (fd != -1) ? std::string(temp_file_name_template) : std::string();
+  delete[] temp_file_name_template;
  if (fd != -1) {
    close(fd);
-    return std::string(temp_file_name_template);
  }
-  return std::string();
+  return temp_file_name;
 #else
  char tmp_file_name[_MAX_PATH];
  errno_t err = tmpnam_s(tmp_file_name);
--- a/third_party/libwebm/common/hdr_util.cc
+++ b/third_party/libwebm/common/hdr_util.cc
@@ -7,12 +7,15 @@
 // be found in the AUTHORS file in the root of the source tree.
 #include "hdr_util.h"

+#include <climits>
 #include <cstddef>
 #include <new>

 #include "mkvparser/mkvparser.h"

 namespace libwebm {
+const int Vp9CodecFeatures::kValueNotPresent = INT_MAX;
+
 bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc,
                             PrimaryChromaticityPtr* muxer_pc) {
  muxer_pc->reset(new (std::nothrow)
@@ -29,9 +32,9 @@ bool MasteringMetadataValuePresent(double value) {
 bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm,
                           mkvmuxer::MasteringMetadata* muxer_mm) {
  if (MasteringMetadataValuePresent(parser_mm.luminance_max))
-    muxer_mm->luminance_max = parser_mm.luminance_max;
+    muxer_mm->set_luminance_max(parser_mm.luminance_max);
  if (MasteringMetadataValuePresent(parser_mm.luminance_min))
-    muxer_mm->luminance_min = parser_mm.luminance_min;
+    muxer_mm->set_luminance_min(parser_mm.luminance_min);

  PrimaryChromaticityPtr r_ptr(NULL);
  PrimaryChromaticityPtr g_ptr(NULL);
@@ -73,34 +76,37 @@ bool CopyColour(const mkvparser::Colour& parser_colour,
    return false;

  if (ColourValuePresent(parser_colour.matrix_coefficients))
-    muxer_colour->matrix_coefficients = parser_colour.matrix_coefficients;
+    muxer_colour->set_matrix_coefficients(parser_colour.matrix_coefficients);
  if (ColourValuePresent(parser_colour.bits_per_channel))
-    muxer_colour->bits_per_channel = parser_colour.bits_per_channel;
-  if (ColourValuePresent(parser_colour.chroma_subsampling_horz))
-    muxer_colour->chroma_subsampling_horz =
-        parser_colour.chroma_subsampling_horz;
-  if (ColourValuePresent(parser_colour.chroma_subsampling_vert))
-    muxer_colour->chroma_subsampling_vert =
-        parser_colour.chroma_subsampling_vert;
+    muxer_colour->set_bits_per_channel(parser_colour.bits_per_channel);
+  if (ColourValuePresent(parser_colour.chroma_subsampling_horz)) {
+    muxer_colour->set_chroma_subsampling_horz(
+        parser_colour.chroma_subsampling_horz);
+  }
+  if (ColourValuePresent(parser_colour.chroma_subsampling_vert)) {
+    muxer_colour->set_chroma_subsampling_vert(
+        parser_colour.chroma_subsampling_vert);
+  }
  if (ColourValuePresent(parser_colour.cb_subsampling_horz))
-    muxer_colour->cb_subsampling_horz = parser_colour.cb_subsampling_horz;
+    muxer_colour->set_cb_subsampling_horz(parser_colour.cb_subsampling_horz);
  if (ColourValuePresent(parser_colour.cb_subsampling_vert))
-    muxer_colour->cb_subsampling_vert = parser_colour.cb_subsampling_vert;
+    muxer_colour->set_cb_subsampling_vert(parser_colour.cb_subsampling_vert);
  if (ColourValuePresent(parser_colour.chroma_siting_horz))
-    muxer_colour->chroma_siting_horz = parser_colour.chroma_siting_horz;
+    muxer_colour->set_chroma_siting_horz(parser_colour.chroma_siting_horz);
  if (ColourValuePresent(parser_colour.chroma_siting_vert))
-    muxer_colour->chroma_siting_vert = parser_colour.chroma_siting_vert;
+    muxer_colour->set_chroma_siting_vert(parser_colour.chroma_siting_vert);
  if (ColourValuePresent(parser_colour.range))
-    muxer_colour->range = parser_colour.range;
-  if (ColourValuePresent(parser_colour.transfer_characteristics))
-    muxer_colour->transfer_characteristics =
-        parser_colour.transfer_characteristics;
+    muxer_colour->set_range(parser_colour.range);
+  if (ColourValuePresent(parser_colour.transfer_characteristics)) {
+    muxer_colour->set_transfer_characteristics(
+        parser_colour.transfer_characteristics);
+  }
  if (ColourValuePresent(parser_colour.primaries))
-    muxer_colour->primaries = parser_colour.primaries;
+    muxer_colour->set_primaries(parser_colour.primaries);
  if (ColourValuePresent(parser_colour.max_cll))
-    muxer_colour->max_cll = parser_colour.max_cll;
+    muxer_colour->set_max_cll(parser_colour.max_cll);
  if (ColourValuePresent(parser_colour.max_fall))
-    muxer_colour->max_fall = parser_colour.max_fall;
+    muxer_colour->set_max_fall(parser_colour.max_fall);

  if (parser_colour.mastering_metadata) {
    mkvmuxer::MasteringMetadata muxer_mm;
@@ -116,8 +122,8 @@ bool CopyColour(const mkvparser::Colour& parser_colour,
 //
 //   0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
 //  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-//  |    ID Byte    |             Length            |               |
-//  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+               |
+//  |    ID Byte    |   Length      |                               |
+//  +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+                               |
 //  |                                                               |
 //  :               Bytes 1..Length of Codec Feature                :
 //  |                                                               |
@@ -132,51 +138,83 @@ bool CopyColour(const mkvparser::Colour& parser_colour,
 //
 // The X bit is reserved.
 //
-// Currently only profile level is supported. ID byte must be set to 1, and
-// length must be 1. Supported values are:
-//
-//   10: Level 1
-//   11: Level 1.1
-//   20: Level 2
-//   21: Level 2.1
-//   30: Level 3
-//   31: Level 3.1
-//   40: Level 4
-//   41: Level 4.1
-//   50: Level 5
-//   51: Level 5.1
-//   52: Level 5.2
-//   60: Level 6
-//   61: Level 6.1
-//   62: Level 6.2
-//
 // See the following link for more information:
 // http://www.webmproject.org/vp9/profiles/
-int ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length) {
-  const int kVpxCodecPrivateLength = 3;
-  if (!private_data || length != kVpxCodecPrivateLength)
-    return 0;
+bool ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length,
+                          Vp9CodecFeatures* features) {
+  const int kVpxCodecPrivateMinLength = 3;
+  if (!private_data || !features || length < kVpxCodecPrivateMinLength)
+    return false;

-  const uint8_t id_byte = *private_data;
-  if (id_byte != 1)
-    return 0;
+  const uint8_t kVp9ProfileId = 1;
+  const uint8_t kVp9LevelId = 2;
+  const uint8_t kVp9BitDepthId = 3;
+  const uint8_t kVp9ChromaSubsamplingId = 4;
+  const int kVpxFeatureLength = 1;
+  int offset = 0;

-  const int kVpxProfileLength = 1;
-  const uint8_t length_byte = private_data[1];
-  if (length_byte != kVpxProfileLength)
-    return 0;
+  // Set features to not set.
+  features->profile = Vp9CodecFeatures::kValueNotPresent;
+  features->level = Vp9CodecFeatures::kValueNotPresent;
+  features->bit_depth = Vp9CodecFeatures::kValueNotPresent;
+  features->chroma_subsampling = Vp9CodecFeatures::kValueNotPresent;
+  do {
+    const uint8_t id_byte = private_data[offset++];
+    const uint8_t length_byte = private_data[offset++];
+    if (length_byte != kVpxFeatureLength)
+      return false;
+    if (id_byte == kVp9ProfileId) {
+      const int priv_profile = static_cast<int>(private_data[offset++]);
+      if (priv_profile < 0 || priv_profile > 3)
+        return false;
+      if (features->profile != Vp9CodecFeatures::kValueNotPresent &&
+          features->profile != priv_profile) {
+        return false;
+      }
+      features->profile = priv_profile;
+    } else if (id_byte == kVp9LevelId) {
+      const int priv_level = static_cast<int>(private_data[offset++]);

-  const int level = static_cast<int>(private_data[2]);
+      const int kNumLevels = 14;
+      const int levels[kNumLevels] = {10, 11, 20, 21, 30, 31, 40,
+                                      41, 50, 51, 52, 60, 61, 62};

-  const int kNumLevels = 14;
-  const int levels[kNumLevels] = {10, 11, 20, 21, 30, 31, 40,
-                                  41, 50, 51, 52, 60, 61, 62};
+      for (int i = 0; i < kNumLevels; ++i) {
+        if (priv_level == levels[i]) {
+          if (features->level != Vp9CodecFeatures::kValueNotPresent &&
+              features->level != priv_level) {
+            return false;
+          }
+          features->level = priv_level;
+          break;
+        }
+      }
+      if (features->level == Vp9CodecFeatures::kValueNotPresent)
+        return false;
+    } else if (id_byte == kVp9BitDepthId) {
+      const int priv_profile = static_cast<int>(private_data[offset++]);
+      if (priv_profile != 8 && priv_profile != 10 && priv_profile != 12)
+        return false;
+      if (features->bit_depth != Vp9CodecFeatures::kValueNotPresent &&
+          features->bit_depth != priv_profile) {
+        return false;
+      }
+      features->bit_depth = priv_profile;
+    } else if (id_byte == kVp9ChromaSubsamplingId) {
+      const int priv_profile = static_cast<int>(private_data[offset++]);
+      if (priv_profile != 0 && priv_profile != 2 && priv_profile != 3)
+        return false;
+      if (features->chroma_subsampling != Vp9CodecFeatures::kValueNotPresent &&
+          features->chroma_subsampling != priv_profile) {
+        return false;
+      }
+      features->chroma_subsampling = priv_profile;
+    } else {
+      // Invalid ID.
+      return false;
+    }
+  } while (offset + kVpxCodecPrivateMinLength <= length);

-  for (int i = 0; i < kNumLevels; ++i) {
-    if (level == levels[i])
-      return level;
-  }
-
-  return 0;
+  return true;
 }
 }  // namespace libwebm
--- a/third_party/libwebm/common/hdr_util.h
+++ b/third_party/libwebm/common/hdr_util.h
@@ -28,6 +28,25 @@ namespace libwebm {
 // TODO(tomfinegan): These should be moved to libwebm_utils once c++11 is
 // required by libwebm.

+// Features of the VP9 codec that may be set in the CodecPrivate of a VP9 video
+// stream. A value of kValueNotPresent represents that the value was not set in
+// the CodecPrivate.
+struct Vp9CodecFeatures {
+  static const int kValueNotPresent;
+
+  Vp9CodecFeatures()
+      : profile(kValueNotPresent),
+        level(kValueNotPresent),
+        bit_depth(kValueNotPresent),
+        chroma_subsampling(kValueNotPresent) {}
+  ~Vp9CodecFeatures() {}
+
+  int profile;
+  int level;
+  int bit_depth;
+  int chroma_subsampling;
+};
+
 typedef std::auto_ptr<mkvmuxer::PrimaryChromaticity> PrimaryChromaticityPtr;

 bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc,
@@ -43,8 +62,9 @@ bool ColourValuePresent(long long value);
 bool CopyColour(const mkvparser::Colour& parser_colour,
                mkvmuxer::Colour* muxer_colour);

-// Returns VP9 profile upon success or 0 upon failure.
-int ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length);
+// Returns true if |features| is set to one or more valid values.
+bool ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length,
+                          Vp9CodecFeatures* features);

 }  // namespace libwebm

--- a/third_party/libwebm/common/webmids.h
+++ b/third_party/libwebm/common/webmids.h
@@ -124,6 +124,14 @@ enum MkvId {
  kMkvLuminanceMin = 0x55DA,
  // end mastering metadata
  // end colour
+  // projection
+  kMkvProjection = 0x7670,
+  kMkvProjectionType = 0x7671,
+  kMkvProjectionPrivate = 0x7672,
+  kMkvProjectionPoseYaw = 0x7673,
+  kMkvProjectionPosePitch = 0x7674,
+  kMkvProjectionPoseRoll = 0x7675,
+  // end projection
  // audio
  kMkvAudio = 0xE1,
  kMkvSamplingFrequency = 0xB5,
--- a/third_party/libwebm/mkvmuxer/mkvmuxer.cc
+++ b/third_party/libwebm/mkvmuxer/mkvmuxer.cc
--- a/third_party/libwebm/mkvmuxer/mkvmuxer.h
+++ b/third_party/libwebm/mkvmuxer/mkvmuxer.h
@@ -64,6 +64,12 @@ class IMkvWriter {
  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(IMkvWriter);
 };

+// Writes out the EBML header for a WebM file, but allows caller to specify
+// DocType. This function must be called before any other libwebm writing
+// functions are called.
+bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version,
+                     const char* const doc_type);
+
 // Writes out the EBML header for a WebM file. This function must be called
 // before any other libwebm writing functions are called.
 bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version);
@@ -348,26 +354,42 @@ class ContentEncoding {

 ///////////////////////////////////////////////////////////////
 // Colour element.
-struct PrimaryChromaticity {
-  PrimaryChromaticity(float x_val, float y_val) : x(x_val), y(y_val) {}
-  PrimaryChromaticity() : x(0), y(0) {}
+class PrimaryChromaticity {
+ public:
+  static const float kChromaticityMin;
+  static const float kChromaticityMax;
+
+  PrimaryChromaticity(float x_val, float y_val) : x_(x_val), y_(y_val) {}
+  PrimaryChromaticity() : x_(0), y_(0) {}
  ~PrimaryChromaticity() {}
-  uint64_t PrimaryChromaticityPayloadSize(libwebm::MkvId x_id,
-                                          libwebm::MkvId y_id) const;
+
+  // Returns sum of |x_id| and |y_id| element id sizes and payload sizes.
+  uint64_t PrimaryChromaticitySize(libwebm::MkvId x_id,
+                                   libwebm::MkvId y_id) const;
+  bool Valid() const;
  bool Write(IMkvWriter* writer, libwebm::MkvId x_id,
             libwebm::MkvId y_id) const;

-  float x;
-  float y;
+  float x() const { return x_; }
+  void set_x(float new_x) { x_ = new_x; }
+  float y() const { return y_; }
+  void set_y(float new_y) { y_ = new_y; }
+
+ private:
+  float x_;
+  float y_;
 };

 class MasteringMetadata {
 public:
  static const float kValueNotPresent;
+  static const float kMinLuminance;
+  static const float kMinLuminanceMax;
+  static const float kMaxLuminanceMax;

  MasteringMetadata()
-      : luminance_max(kValueNotPresent),
-        luminance_min(kValueNotPresent),
+      : luminance_max_(kValueNotPresent),
+        luminance_min_(kValueNotPresent),
        r_(NULL),
        g_(NULL),
        b_(NULL),
@@ -381,6 +403,7 @@ class MasteringMetadata {

  // Returns total size of the MasteringMetadata element.
  uint64_t MasteringMetadataSize() const;
+  bool Valid() const;
  bool Write(IMkvWriter* writer) const;

  // Copies non-null chromaticity.
@@ -393,13 +416,21 @@ class MasteringMetadata {
  const PrimaryChromaticity* b() const { return b_; }
  const PrimaryChromaticity* white_point() const { return white_point_; }

-  float luminance_max;
-  float luminance_min;
+  float luminance_max() const { return luminance_max_; }
+  void set_luminance_max(float luminance_max) {
+    luminance_max_ = luminance_max;
+  }
+  float luminance_min() const { return luminance_min_; }
+  void set_luminance_min(float luminance_min) {
+    luminance_min_ = luminance_min;
+  }

 private:
  // Returns size of MasteringMetadata child elements.
  uint64_t PayloadSize() const;

+  float luminance_max_;
+  float luminance_min_;
  PrimaryChromaticity* r_;
  PrimaryChromaticity* g_;
  PrimaryChromaticity* b_;
@@ -408,26 +439,90 @@ class MasteringMetadata {

 class Colour {
 public:
+  enum MatrixCoefficients {
+    kGbr = 0,
+    kBt709 = 1,
+    kUnspecifiedMc = 2,
+    kReserved = 3,
+    kFcc = 4,
+    kBt470bg = 5,
+    kSmpte170MMc = 6,
+    kSmpte240MMc = 7,
+    kYcocg = 8,
+    kBt2020NonConstantLuminance = 9,
+    kBt2020ConstantLuminance = 10,
+  };
+  enum ChromaSitingHorz {
+    kUnspecifiedCsh = 0,
+    kLeftCollocated = 1,
+    kHalfCsh = 2,
+  };
+  enum ChromaSitingVert {
+    kUnspecifiedCsv = 0,
+    kTopCollocated = 1,
+    kHalfCsv = 2,
+  };
+  enum Range {
+    kUnspecifiedCr = 0,
+    kBroadcastRange = 1,
+    kFullRange = 2,
+    kMcTcDefined = 3,  // Defined by MatrixCoefficients/TransferCharacteristics.
+  };
+  enum TransferCharacteristics {
+    kIturBt709Tc = 1,
+    kUnspecifiedTc = 2,
+    kReservedTc = 3,
+    kGamma22Curve = 4,
+    kGamma28Curve = 5,
+    kSmpte170MTc = 6,
+    kSmpte240MTc = 7,
+    kLinear = 8,
+    kLog = 9,
+    kLogSqrt = 10,
+    kIec6196624 = 11,
+    kIturBt1361ExtendedColourGamut = 12,
+    kIec6196621 = 13,
+    kIturBt202010bit = 14,
+    kIturBt202012bit = 15,
+    kSmpteSt2084 = 16,
+    kSmpteSt4281Tc = 17,
+    kAribStdB67Hlg = 18,
+  };
+  enum Primaries {
+    kReservedP0 = 0,
+    kIturBt709P = 1,
+    kUnspecifiedP = 2,
+    kReservedP3 = 3,
+    kIturBt470M = 4,
+    kIturBt470Bg = 5,
+    kSmpte170MP = 6,
+    kSmpte240MP = 7,
+    kFilm = 8,
+    kIturBt2020 = 9,
+    kSmpteSt4281P = 10,
+    kJedecP22Phosphors = 22,
+  };
  static const uint64_t kValueNotPresent;
  Colour()
-      : matrix_coefficients(kValueNotPresent),
-        bits_per_channel(kValueNotPresent),
-        chroma_subsampling_horz(kValueNotPresent),
-        chroma_subsampling_vert(kValueNotPresent),
-        cb_subsampling_horz(kValueNotPresent),
-        cb_subsampling_vert(kValueNotPresent),
-        chroma_siting_horz(kValueNotPresent),
-        chroma_siting_vert(kValueNotPresent),
-        range(kValueNotPresent),
-        transfer_characteristics(kValueNotPresent),
-        primaries(kValueNotPresent),
-        max_cll(kValueNotPresent),
-        max_fall(kValueNotPresent),
+      : matrix_coefficients_(kValueNotPresent),
+        bits_per_channel_(kValueNotPresent),
+        chroma_subsampling_horz_(kValueNotPresent),
+        chroma_subsampling_vert_(kValueNotPresent),
+        cb_subsampling_horz_(kValueNotPresent),
+        cb_subsampling_vert_(kValueNotPresent),
+        chroma_siting_horz_(kValueNotPresent),
+        chroma_siting_vert_(kValueNotPresent),
+        range_(kValueNotPresent),
+        transfer_characteristics_(kValueNotPresent),
+        primaries_(kValueNotPresent),
+        max_cll_(kValueNotPresent),
+        max_fall_(kValueNotPresent),
        mastering_metadata_(NULL) {}
  ~Colour() { delete mastering_metadata_; }

  // Returns total size of the Colour element.
  uint64_t ColourSize() const;
+  bool Valid() const;
  bool Write(IMkvWriter* writer) const;

  // Deep copies |mastering_metadata|.
@@ -437,27 +532,124 @@ class Colour {
    return mastering_metadata_;
  }

-  uint64_t matrix_coefficients;
-  uint64_t bits_per_channel;
-  uint64_t chroma_subsampling_horz;
-  uint64_t chroma_subsampling_vert;
-  uint64_t cb_subsampling_horz;
-  uint64_t cb_subsampling_vert;
-  uint64_t chroma_siting_horz;
-  uint64_t chroma_siting_vert;
-  uint64_t range;
-  uint64_t transfer_characteristics;
-  uint64_t primaries;
-  uint64_t max_cll;
-  uint64_t max_fall;
+  uint64_t matrix_coefficients() const { return matrix_coefficients_; }
+  void set_matrix_coefficients(uint64_t matrix_coefficients) {
+    matrix_coefficients_ = matrix_coefficients;
+  }
+  uint64_t bits_per_channel() const { return bits_per_channel_; }
+  void set_bits_per_channel(uint64_t bits_per_channel) {
+    bits_per_channel_ = bits_per_channel;
+  }
+  uint64_t chroma_subsampling_horz() const { return chroma_subsampling_horz_; }
+  void set_chroma_subsampling_horz(uint64_t chroma_subsampling_horz) {
+    chroma_subsampling_horz_ = chroma_subsampling_horz;
+  }
+  uint64_t chroma_subsampling_vert() const { return chroma_subsampling_vert_; }
+  void set_chroma_subsampling_vert(uint64_t chroma_subsampling_vert) {
+    chroma_subsampling_vert_ = chroma_subsampling_vert;
+  }
+  uint64_t cb_subsampling_horz() const { return cb_subsampling_horz_; }
+  void set_cb_subsampling_horz(uint64_t cb_subsampling_horz) {
+    cb_subsampling_horz_ = cb_subsampling_horz;
+  }
+  uint64_t cb_subsampling_vert() const { return cb_subsampling_vert_; }
+  void set_cb_subsampling_vert(uint64_t cb_subsampling_vert) {
+    cb_subsampling_vert_ = cb_subsampling_vert;
+  }
+  uint64_t chroma_siting_horz() const { return chroma_siting_horz_; }
+  void set_chroma_siting_horz(uint64_t chroma_siting_horz) {
+    chroma_siting_horz_ = chroma_siting_horz;
+  }
+  uint64_t chroma_siting_vert() const { return chroma_siting_vert_; }
+  void set_chroma_siting_vert(uint64_t chroma_siting_vert) {
+    chroma_siting_vert_ = chroma_siting_vert;
+  }
+  uint64_t range() const { return range_; }
+  void set_range(uint64_t range) { range_ = range; }
+  uint64_t transfer_characteristics() const {
+    return transfer_characteristics_;
+  }
+  void set_transfer_characteristics(uint64_t transfer_characteristics) {
+    transfer_characteristics_ = transfer_characteristics;
+  }
+  uint64_t primaries() const { return primaries_; }
+  void set_primaries(uint64_t primaries) { primaries_ = primaries; }
+  uint64_t max_cll() const { return max_cll_; }
+  void set_max_cll(uint64_t max_cll) { max_cll_ = max_cll; }
+  uint64_t max_fall() const { return max_fall_; }
+  void set_max_fall(uint64_t max_fall) { max_fall_ = max_fall; }

 private:
  // Returns size of Colour child elements.
  uint64_t PayloadSize() const;

+  uint64_t matrix_coefficients_;
+  uint64_t bits_per_channel_;
+  uint64_t chroma_subsampling_horz_;
+  uint64_t chroma_subsampling_vert_;
+  uint64_t cb_subsampling_horz_;
+  uint64_t cb_subsampling_vert_;
+  uint64_t chroma_siting_horz_;
+  uint64_t chroma_siting_vert_;
+  uint64_t range_;
+  uint64_t transfer_characteristics_;
+  uint64_t primaries_;
+  uint64_t max_cll_;
+  uint64_t max_fall_;
+
  MasteringMetadata* mastering_metadata_;
 };

+///////////////////////////////////////////////////////////////
+// Projection element.
+class Projection {
+ public:
+  enum ProjectionType {
+    kTypeNotPresent = -1,
+    kRectangular = 0,
+    kEquirectangular = 1,
+    kCubeMap = 2,
+    kMesh = 3,
+  };
+  static const uint64_t kValueNotPresent;
+  Projection()
+      : type_(kRectangular),
+        pose_yaw_(0.0),
+        pose_pitch_(0.0),
+        pose_roll_(0.0),
+        private_data_(NULL),
+        private_data_length_(0) {}
+  ~Projection() { delete[] private_data_; }
+
+  uint64_t ProjectionSize() const;
+  bool Write(IMkvWriter* writer) const;
+
+  bool SetProjectionPrivate(const uint8_t* private_data,
+                            uint64_t private_data_length);
+
+  ProjectionType type() const { return type_; }
+  void set_type(ProjectionType type) { type_ = type; }
+  float pose_yaw() const { return pose_yaw_; }
+  void set_pose_yaw(float pose_yaw) { pose_yaw_ = pose_yaw; }
+  float pose_pitch() const { return pose_pitch_; }
+  void set_pose_pitch(float pose_pitch) { pose_pitch_ = pose_pitch; }
+  float pose_roll() const { return pose_roll_; }
+  void set_pose_roll(float pose_roll) { pose_roll_ = pose_roll; }
+  uint8_t* private_data() const { return private_data_; }
+  uint64_t private_data_length() const { return private_data_length_; }
+
+ private:
+  // Returns size of VideoProjection child elements.
+  uint64_t PayloadSize() const;
+
+  ProjectionType type_;
+  float pose_yaw_;
+  float pose_pitch_;
+  float pose_roll_;
+  uint8_t* private_data_;
+  uint64_t private_data_length_;
+};
+
 ///////////////////////////////////////////////////////////////
 // Track element.
 class Track {
@@ -581,6 +773,10 @@ class VideoTrack : public Track {
  uint64_t display_height() const { return display_height_; }
  void set_display_width(uint64_t width) { display_width_ = width; }
  uint64_t display_width() const { return display_width_; }
+  void set_pixel_height(uint64_t height) { pixel_height_ = height; }
+  uint64_t pixel_height() const { return pixel_height_; }
+  void set_pixel_width(uint64_t width) { pixel_width_ = width; }
+  uint64_t pixel_width() const { return pixel_width_; }

  void set_crop_left(uint64_t crop_left) { crop_left_ = crop_left; }
  uint64_t crop_left() const { return crop_left_; }
@@ -605,6 +801,11 @@ class VideoTrack : public Track {
  // Deep copies |colour|.
  bool SetColour(const Colour& colour);

+  Projection* projection() { return projection_; }
+
+  // Deep copies |projection|.
+  bool SetProjection(const Projection& projection);
+
 private:
  // Returns the size in bytes of the Video element.
  uint64_t VideoPayloadSize() const;
@@ -612,6 +813,8 @@ class VideoTrack : public Track {
  // Video track element names.
  uint64_t display_height_;
  uint64_t display_width_;
+  uint64_t pixel_height_;
+  uint64_t pixel_width_;
  uint64_t crop_left_;
  uint64_t crop_right_;
  uint64_t crop_top_;
@@ -623,6 +826,7 @@ class VideoTrack : public Track {
  uint64_t width_;

  Colour* colour_;
+  Projection* projection_;

  LIBWEBM_DISALLOW_COPY_AND_ASSIGN(VideoTrack);
 };
@@ -670,6 +874,10 @@ class Tracks {
  static const char kVp8CodecId[];
  static const char kVp9CodecId[];
  static const char kVp10CodecId[];
+  static const char kWebVttCaptionsId[];
+  static const char kWebVttDescriptionsId[];
+  static const char kWebVttMetadataId[];
+  static const char kWebVttSubtitlesId[];

  Tracks();
  ~Tracks();
@@ -1294,8 +1502,8 @@ class Segment {
    kBeforeClusters = 0x1  // Position Cues before Clusters
  };

-  const static uint32_t kDefaultDocTypeVersion = 2;
-  const static uint64_t kDefaultMaxClusterDuration = 30000000000ULL;
+  static const uint32_t kDefaultDocTypeVersion = 4;
+  static const uint64_t kDefaultMaxClusterDuration = 30000000000ULL;

  Segment();
  ~Segment();
@@ -1481,7 +1689,16 @@ class Segment {
  Mode mode() const { return mode_; }
  CuesPosition cues_position() const { return cues_position_; }
  bool output_cues() const { return output_cues_; }
+  void set_estimate_file_duration(bool estimate_duration) {
+    estimate_file_duration_ = estimate_duration;
+  }
+  bool estimate_file_duration() const { return estimate_file_duration_; }
  const SegmentInfo* segment_info() const { return &segment_info_; }
+  void set_duration(double duration) { duration_ = duration; }
+  double duration() const { return duration_; }
+
+  // Returns true when codec IDs are valid for WebM.
+  bool DocTypeIsWebm() const;

 private:
  // Checks if header information has been output and initialized. If not it
@@ -1637,6 +1854,9 @@ class Segment {
  // Last timestamp in nanoseconds by track number added to a cluster.
  uint64_t last_track_timestamp_[kMaxTrackNumber];

+  // Number of frames written per track.
+  uint64_t track_frames_written_[kMaxTrackNumber];
+
  // Maximum time in nanoseconds for a cluster duration. This variable is a
  // guideline and some clusters may have a longer duration. Default is 30
  // seconds.
@@ -1665,6 +1885,9 @@ class Segment {
  // Flag whether or not to write the Cluster Timecode using exactly 8 bytes.
  bool fixed_size_cluster_timecode_;

+  // Flag whether or not to estimate the file duration.
+  bool estimate_file_duration_;
+
  // The size of the EBML header, used to validate the header if
  // WriteEbmlHeader() is called more than once.
  int32_t ebml_header_size_;
@@ -1682,6 +1905,9 @@ class Segment {
  uint32_t doc_type_version_;
  uint32_t doc_type_version_written_;

+  // If |duration_| is > 0, then explicitly set the duration of the segment.
+  double duration_;
+
  // Pointer to the writer objects. Not owned by this class.
  IMkvWriter* writer_cluster_;
  IMkvWriter* writer_cues_;
--- a/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
+++ b/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc
@@ -31,20 +31,20 @@ namespace {
 // Date elements are always 8 octets in size.
 const int kDateElementSize = 8;

-uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
-                    int64_t timecode, uint64_t timecode_scale) {
-  uint64_t block_additional_elem_size = 0;
-  uint64_t block_addid_elem_size = 0;
-  uint64_t block_more_payload_size = 0;
-  uint64_t block_more_elem_size = 0;
-  uint64_t block_additions_payload_size = 0;
-  uint64_t block_additions_elem_size = 0;
+uint64 WriteBlock(IMkvWriter* writer, const Frame* const frame, int64 timecode,
+                  uint64 timecode_scale) {
+  uint64 block_additional_elem_size = 0;
+  uint64 block_addid_elem_size = 0;
+  uint64 block_more_payload_size = 0;
+  uint64 block_more_elem_size = 0;
+  uint64 block_additions_payload_size = 0;
+  uint64 block_additions_elem_size = 0;
  if (frame->additional()) {
    block_additional_elem_size =
        EbmlElementSize(libwebm::kMkvBlockAdditional, frame->additional(),
                        frame->additional_length());
-    block_addid_elem_size =
-        EbmlElementSize(libwebm::kMkvBlockAddID, frame->add_id());
+    block_addid_elem_size = EbmlElementSize(
+        libwebm::kMkvBlockAddID, static_cast<uint64>(frame->add_id()));

    block_more_payload_size =
        block_addid_elem_size + block_additional_elem_size;
@@ -58,32 +58,33 @@ uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
        block_additions_payload_size;
  }

-  uint64_t discard_padding_elem_size = 0;
+  uint64 discard_padding_elem_size = 0;
  if (frame->discard_padding() != 0) {
    discard_padding_elem_size =
-        EbmlElementSize(libwebm::kMkvDiscardPadding, frame->discard_padding());
+        EbmlElementSize(libwebm::kMkvDiscardPadding,
+                        static_cast<int64>(frame->discard_padding()));
  }

-  const uint64_t reference_block_timestamp =
+  const uint64 reference_block_timestamp =
      frame->reference_block_timestamp() / timecode_scale;
-  uint64_t reference_block_elem_size = 0;
+  uint64 reference_block_elem_size = 0;
  if (!frame->is_key()) {
    reference_block_elem_size =
        EbmlElementSize(libwebm::kMkvReferenceBlock, reference_block_timestamp);
  }

-  const uint64_t duration = frame->duration() / timecode_scale;
-  uint64_t block_duration_elem_size = 0;
+  const uint64 duration = frame->duration() / timecode_scale;
+  uint64 block_duration_elem_size = 0;
  if (duration > 0)
    block_duration_elem_size =
        EbmlElementSize(libwebm::kMkvBlockDuration, duration);

-  const uint64_t block_payload_size = 4 + frame->length();
-  const uint64_t block_elem_size =
+  const uint64 block_payload_size = 4 + frame->length();
+  const uint64 block_elem_size =
      EbmlMasterElementSize(libwebm::kMkvBlock, block_payload_size) +
      block_payload_size;

-  const uint64_t block_group_payload_size =
+  const uint64 block_group_payload_size =
      block_elem_size + block_additions_elem_size + block_duration_elem_size +
      discard_padding_elem_size + reference_block_elem_size;

@@ -105,7 +106,7 @@ uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
  if (SerializeInt(writer, 0, 1))
    return 0;

-  if (writer->Write(frame->frame(), static_cast<uint32_t>(frame->length())))
+  if (writer->Write(frame->frame(), static_cast<uint32>(frame->length())))
    return 0;

  if (frame->additional()) {
@@ -118,7 +119,8 @@ uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
                                block_more_payload_size))
      return 0;

-    if (!WriteEbmlElement(writer, libwebm::kMkvBlockAddID, frame->add_id()))
+    if (!WriteEbmlElement(writer, libwebm::kMkvBlockAddID,
+                          static_cast<uint64>(frame->add_id())))
      return 0;

    if (!WriteEbmlElement(writer, libwebm::kMkvBlockAdditional,
@@ -129,7 +131,7 @@ uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,

  if (frame->discard_padding() != 0 &&
      !WriteEbmlElement(writer, libwebm::kMkvDiscardPadding,
-                        frame->discard_padding())) {
+                        static_cast<int64>(frame->discard_padding()))) {
    return false;
  }

@@ -148,38 +150,38 @@ uint64_t WriteBlock(IMkvWriter* writer, const Frame* const frame,
         block_group_payload_size;
 }

-uint64_t WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame,
-                          int64_t timecode) {
+uint64 WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame,
+                        int64 timecode) {
  if (WriteID(writer, libwebm::kMkvSimpleBlock))
    return 0;

-  const int32_t size = static_cast<int32_t>(frame->length()) + 4;
+  const int32 size = static_cast<int32>(frame->length()) + 4;
  if (WriteUInt(writer, size))
    return 0;

-  if (WriteUInt(writer, static_cast<uint64_t>(frame->track_number())))
+  if (WriteUInt(writer, static_cast<uint64>(frame->track_number())))
    return 0;

  if (SerializeInt(writer, timecode, 2))
    return 0;

-  uint64_t flags = 0;
+  uint64 flags = 0;
  if (frame->is_key())
    flags |= 0x80;

  if (SerializeInt(writer, flags, 1))
    return 0;

-  if (writer->Write(frame->frame(), static_cast<uint32_t>(frame->length())))
+  if (writer->Write(frame->frame(), static_cast<uint32>(frame->length())))
    return 0;

-  return static_cast<uint64_t>(GetUIntSize(libwebm::kMkvSimpleBlock) +
-                               GetCodedUIntSize(size) + 4 + frame->length());
+  return GetUIntSize(libwebm::kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 +
+         frame->length();
 }

 }  // namespace

-int32_t GetCodedUIntSize(uint64_t value) {
+int32 GetCodedUIntSize(uint64 value) {
  if (value < 0x000000000000007FULL)
    return 1;
  else if (value < 0x0000000000003FFFULL)
@@ -197,7 +199,7 @@ int32_t GetCodedUIntSize(uint64_t value) {
  return 8;
 }

-int32_t GetUIntSize(uint64_t value) {
+int32 GetUIntSize(uint64 value) {
  if (value < 0x0000000000000100ULL)
    return 1;
  else if (value < 0x0000000000010000ULL)
@@ -215,26 +217,26 @@ int32_t GetUIntSize(uint64_t value) {
  return 8;
 }

-int32_t GetIntSize(int64_t value) {
+int32 GetIntSize(int64 value) {
  // Doubling the requested value ensures positive values with their high bit
  // set are written with 0-padding to avoid flipping the signedness.
-  const uint64_t v = (value < 0) ? value ^ -1LL : value;
+  const uint64 v = (value < 0) ? value ^ -1LL : value;
  return GetUIntSize(2 * v);
 }

-uint64_t EbmlMasterElementSize(uint64_t type, uint64_t value) {
+uint64 EbmlMasterElementSize(uint64 type, uint64 value) {
  // Size of EBML ID
-  int32_t ebml_size = GetUIntSize(type);
+  int32 ebml_size = GetUIntSize(type);

  // Datasize
  ebml_size += GetCodedUIntSize(value);

-  return static_cast<uint64_t>(ebml_size);
+  return ebml_size;
 }

-uint64_t EbmlElementSize(uint64_t type, int64_t value) {
+uint64 EbmlElementSize(uint64 type, int64 value) {
  // Size of EBML ID
-  int32_t ebml_size = GetUIntSize(type);
+  int32 ebml_size = GetUIntSize(type);

  // Datasize
  ebml_size += GetIntSize(value);
@@ -242,20 +244,19 @@ uint64_t EbmlElementSize(uint64_t type, int64_t value) {
  // Size of Datasize
  ebml_size++;

-  return static_cast<uint64_t>(ebml_size);
+  return ebml_size;
 }

-uint64_t EbmlElementSize(uint64_t type, uint64_t value) {
+uint64 EbmlElementSize(uint64 type, uint64 value) {
  return EbmlElementSize(type, value, 0);
 }

-uint64_t EbmlElementSize(uint64_t type, uint64_t value, uint64_t fixed_size) {
+uint64 EbmlElementSize(uint64 type, uint64 value, uint64 fixed_size) {
  // Size of EBML ID
-  uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
+  uint64 ebml_size = GetUIntSize(type);

  // Datasize
-  ebml_size +=
-      (fixed_size > 0) ? fixed_size : static_cast<uint64_t>(GetUIntSize(value));
+  ebml_size += (fixed_size > 0) ? fixed_size : GetUIntSize(value);

  // Size of Datasize
  ebml_size++;
@@ -263,9 +264,9 @@ uint64_t EbmlElementSize(uint64_t type, uint64_t value, uint64_t fixed_size) {
  return ebml_size;
 }

-uint64_t EbmlElementSize(uint64_t type, float /* value */) {
+uint64 EbmlElementSize(uint64 type, float /* value */) {
  // Size of EBML ID
-  uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
+  uint64 ebml_size = GetUIntSize(type);

  // Datasize
  ebml_size += sizeof(float);
@@ -276,12 +277,12 @@ uint64_t EbmlElementSize(uint64_t type, float /* value */) {
  return ebml_size;
 }

-uint64_t EbmlElementSize(uint64_t type, const char* value) {
+uint64 EbmlElementSize(uint64 type, const char* value) {
  if (!value)
    return 0;

  // Size of EBML ID
-  uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
+  uint64 ebml_size = GetUIntSize(type);

  // Datasize
  ebml_size += strlen(value);
@@ -292,12 +293,12 @@ uint64_t EbmlElementSize(uint64_t type, const char* value) {
  return ebml_size;
 }

-uint64_t EbmlElementSize(uint64_t type, const uint8_t* value, uint64_t size) {
+uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) {
  if (!value)
    return 0;

  // Size of EBML ID
-  uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
+  uint64 ebml_size = GetUIntSize(type);

  // Datasize
  ebml_size += size;
@@ -308,9 +309,9 @@ uint64_t EbmlElementSize(uint64_t type, const uint8_t* value, uint64_t size) {
  return ebml_size;
 }

-uint64_t EbmlDateElementSize(uint64_t type) {
+uint64 EbmlDateElementSize(uint64 type) {
  // Size of EBML ID
-  uint64_t ebml_size = static_cast<uint64_t>(GetUIntSize(type));
+  uint64 ebml_size = GetUIntSize(type);

  // Datasize
  ebml_size += kDateElementSize;
@@ -321,18 +322,18 @@ uint64_t EbmlDateElementSize(uint64_t type) {
  return ebml_size;
 }

-int32_t SerializeInt(IMkvWriter* writer, int64_t value, int32_t size) {
+int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) {
  if (!writer || size < 1 || size > 8)
    return -1;

-  for (int32_t i = 1; i <= size; ++i) {
-    const int32_t byte_count = size - i;
-    const int32_t bit_count = byte_count * 8;
+  for (int32 i = 1; i <= size; ++i) {
+    const int32 byte_count = size - i;
+    const int32 bit_count = byte_count * 8;

-    const int64_t bb = value >> bit_count;
-    const uint8_t b = static_cast<uint8_t>(bb);
+    const int64 bb = value >> bit_count;
+    const uint8 b = static_cast<uint8>(bb);

-    const int32_t status = writer->Write(&b, 1);
+    const int32 status = writer->Write(&b, 1);

    if (status < 0)
      return status;
@@ -341,26 +342,26 @@ int32_t SerializeInt(IMkvWriter* writer, int64_t value, int32_t size) {
  return 0;
 }

-int32_t SerializeFloat(IMkvWriter* writer, float f) {
+int32 SerializeFloat(IMkvWriter* writer, float f) {
  if (!writer)
    return -1;

-  assert(sizeof(uint32_t) == sizeof(float));
+  assert(sizeof(uint32) == sizeof(float));
  // This union is merely used to avoid a reinterpret_cast from float& to
  // uint32& which will result in violation of strict aliasing.
  union U32 {
-    uint32_t u32;
+    uint32 u32;
    float f;
  } value;
  value.f = f;

-  for (int32_t i = 1; i <= 4; ++i) {
-    const int32_t byte_count = 4 - i;
-    const int32_t bit_count = byte_count * 8;
+  for (int32 i = 1; i <= 4; ++i) {
+    const int32 byte_count = 4 - i;
+    const int32 bit_count = byte_count * 8;

-    const uint8_t byte = static_cast<uint8_t>(value.u32 >> bit_count);
+    const uint8 byte = static_cast<uint8>(value.u32 >> bit_count);

-    const int32_t status = writer->Write(&byte, 1);
+    const int32 status = writer->Write(&byte, 1);

    if (status < 0)
      return status;
@@ -369,21 +370,21 @@ int32_t SerializeFloat(IMkvWriter* writer, float f) {
  return 0;
 }

-int32_t WriteUInt(IMkvWriter* writer, uint64_t value) {
+int32 WriteUInt(IMkvWriter* writer, uint64 value) {
  if (!writer)
    return -1;

-  int32_t size = GetCodedUIntSize(value);
+  int32 size = GetCodedUIntSize(value);

  return WriteUIntSize(writer, value, size);
 }

-int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size) {
+int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) {
  if (!writer || size < 0 || size > 8)
    return -1;

  if (size > 0) {
-    const uint64_t bit = 1LL << (size * 7);
+    const uint64 bit = 1LL << (size * 7);

    if (value > (bit - 2))
      return -1;
@@ -391,11 +392,11 @@ int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size) {
    value |= bit;
  } else {
    size = 1;
-    int64_t bit;
+    int64 bit;

    for (;;) {
      bit = 1LL << (size * 7);
-      const uint64_t max = bit - 2;
+      const uint64 max = bit - 2;

      if (value <= max)
        break;
@@ -412,18 +413,18 @@ int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size) {
  return SerializeInt(writer, value, size);
 }

-int32_t WriteID(IMkvWriter* writer, uint64_t type) {
+int32 WriteID(IMkvWriter* writer, uint64 type) {
  if (!writer)
    return -1;

  writer->ElementStartNotify(type, writer->Position());

-  const int32_t size = GetUIntSize(type);
+  const int32 size = GetUIntSize(type);

  return SerializeInt(writer, type, size);
 }

-bool WriteEbmlMasterElement(IMkvWriter* writer, uint64_t type, uint64_t size) {
+bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 type, uint64 size) {
  if (!writer)
    return false;

@@ -436,19 +437,19 @@ bool WriteEbmlMasterElement(IMkvWriter* writer, uint64_t type, uint64_t size) {
  return true;
 }

-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value) {
  return WriteEbmlElement(writer, type, value, 0);
 }

-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value,
-                      uint64_t fixed_size) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value,
+                      uint64 fixed_size) {
  if (!writer)
    return false;

  if (WriteID(writer, type))
    return false;

-  uint64_t size = static_cast<uint64_t>(GetUIntSize(value));
+  uint64 size = GetUIntSize(value);
  if (fixed_size > 0) {
    if (size > fixed_size)
      return false;
@@ -457,30 +458,30 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value,
  if (WriteUInt(writer, size))
    return false;

-  if (SerializeInt(writer, value, static_cast<int32_t>(size)))
+  if (SerializeInt(writer, value, static_cast<int32>(size)))
    return false;

  return true;
 }

-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, int64_t value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value) {
  if (!writer)
    return false;

  if (WriteID(writer, type))
    return 0;

-  const uint64_t size = GetIntSize(value);
+  const uint64 size = GetIntSize(value);
  if (WriteUInt(writer, size))
    return false;

-  if (SerializeInt(writer, value, static_cast<int32_t>(size)))
+  if (SerializeInt(writer, value, static_cast<int32>(size)))
    return false;

  return true;
 }

-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, float value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) {
  if (!writer)
    return false;

@@ -496,25 +497,25 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, float value) {
  return true;
 }

-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const char* value) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) {
  if (!writer || !value)
    return false;

  if (WriteID(writer, type))
    return false;

-  const uint64_t length = strlen(value);
+  const uint64 length = strlen(value);
  if (WriteUInt(writer, length))
    return false;

-  if (writer->Write(value, static_cast<const uint32_t>(length)))
+  if (writer->Write(value, static_cast<const uint32>(length)))
    return false;

  return true;
 }

-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const uint8_t* value,
-                      uint64_t size) {
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
+                      uint64 size) {
  if (!writer || !value || size < 1)
    return false;

@@ -524,13 +525,13 @@ bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const uint8_t* value,
  if (WriteUInt(writer, size))
    return false;

-  if (writer->Write(value, static_cast<uint32_t>(size)))
+  if (writer->Write(value, static_cast<uint32>(size)))
    return false;

  return true;
 }

-bool WriteEbmlDateElement(IMkvWriter* writer, uint64_t type, int64_t value) {
+bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) {
  if (!writer)
    return false;

@@ -546,8 +547,8 @@ bool WriteEbmlDateElement(IMkvWriter* writer, uint64_t type, int64_t value) {
  return true;
 }

-uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame,
-                    Cluster* cluster) {
+uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
+                  Cluster* cluster) {
  if (!writer || !frame || !frame->IsValid() || !cluster ||
      !cluster->timecode_scale())
    return 0;
@@ -556,7 +557,7 @@ uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame,
  //  timecode for the cluster itself (remember that block timecode
  //  is a signed, 16-bit integer).  However, as a simplification we
  //  only permit non-negative cluster-relative timecodes for blocks.
-  const int64_t relative_timecode = cluster->GetRelativeTimecode(
+  const int64 relative_timecode = cluster->GetRelativeTimecode(
      frame->timestamp() / cluster->timecode_scale());
  if (relative_timecode < 0 || relative_timecode > kMaxBlockTimecode)
    return 0;
@@ -567,20 +568,19 @@ uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame,
                        cluster->timecode_scale());
 }

-uint64_t WriteVoidElement(IMkvWriter* writer, uint64_t size) {
+uint64 WriteVoidElement(IMkvWriter* writer, uint64 size) {
  if (!writer)
    return false;

  // Subtract one for the void ID and the coded size.
-  uint64_t void_entry_size = size - 1 - GetCodedUIntSize(size - 1);
-  uint64_t void_size =
-      EbmlMasterElementSize(libwebm::kMkvVoid, void_entry_size) +
-      void_entry_size;
+  uint64 void_entry_size = size - 1 - GetCodedUIntSize(size - 1);
+  uint64 void_size = EbmlMasterElementSize(libwebm::kMkvVoid, void_entry_size) +
+                     void_entry_size;

  if (void_size != size)
    return 0;

-  const int64_t payload_position = writer->Position();
+  const int64 payload_position = writer->Position();
  if (payload_position < 0)
    return 0;

@@ -590,30 +590,29 @@ uint64_t WriteVoidElement(IMkvWriter* writer, uint64_t size) {
  if (WriteUInt(writer, void_entry_size))
    return 0;

-  const uint8_t value = 0;
-  for (int32_t i = 0; i < static_cast<int32_t>(void_entry_size); ++i) {
+  const uint8 value = 0;
+  for (int32 i = 0; i < static_cast<int32>(void_entry_size); ++i) {
    if (writer->Write(&value, 1))
      return 0;
  }

-  const int64_t stop_position = writer->Position();
+  const int64 stop_position = writer->Position();
  if (stop_position < 0 ||
-      stop_position - payload_position != static_cast<int64_t>(void_size))
+      stop_position - payload_position != static_cast<int64>(void_size))
    return 0;

  return void_size;
 }

-void GetVersion(int32_t* major, int32_t* minor, int32_t* build,
-                int32_t* revision) {
+void GetVersion(int32* major, int32* minor, int32* build, int32* revision) {
  *major = 0;
  *minor = 2;
  *build = 1;
  *revision = 0;
 }

-uint64_t MakeUID(unsigned int* seed) {
-  uint64_t uid = 0;
+uint64 MakeUID(unsigned int* seed) {
+  uint64 uid = 0;

 #ifdef __MINGW32__
  srand(*seed);
@@ -625,21 +624,22 @@ uint64_t MakeUID(unsigned int* seed) {
 // TODO(fgalligan): Move random number generation to platform specific code.
 #ifdef _MSC_VER
    (void)seed;
-    const int32_t nn = rand();
+    const int32 nn = rand();
 #elif __ANDROID__
-    int32_t temp_num = 1;
+    (void)seed;
+    int32 temp_num = 1;
    int fd = open("/dev/urandom", O_RDONLY);
    if (fd != -1) {
      read(fd, &temp_num, sizeof(temp_num));
      close(fd);
    }
-    const int32_t nn = temp_num;
+    const int32 nn = temp_num;
 #elif defined __MINGW32__
-    const int32_t nn = rand();
+    const int32 nn = rand();
 #else
-    const int32_t nn = rand_r(seed);
+    const int32 nn = rand_r(seed);
 #endif
-    const int32_t n = 0xFF & (nn >> 4);  // throw away low-order bits
+    const int32 n = 0xFF & (nn >> 4);  // throw away low-order bits

    uid |= n;
  }
@@ -647,4 +647,97 @@ uint64_t MakeUID(unsigned int* seed) {
  return uid;
 }

+bool IsMatrixCoefficientsValueValid(uint64_t value) {
+  switch (value) {
+    case mkvmuxer::Colour::kGbr:
+    case mkvmuxer::Colour::kBt709:
+    case mkvmuxer::Colour::kUnspecifiedMc:
+    case mkvmuxer::Colour::kReserved:
+    case mkvmuxer::Colour::kFcc:
+    case mkvmuxer::Colour::kBt470bg:
+    case mkvmuxer::Colour::kSmpte170MMc:
+    case mkvmuxer::Colour::kSmpte240MMc:
+    case mkvmuxer::Colour::kYcocg:
+    case mkvmuxer::Colour::kBt2020NonConstantLuminance:
+    case mkvmuxer::Colour::kBt2020ConstantLuminance:
+      return true;
+  }
+  return false;
+}
+
+bool IsChromaSitingHorzValueValid(uint64_t value) {
+  switch (value) {
+    case mkvmuxer::Colour::kUnspecifiedCsh:
+    case mkvmuxer::Colour::kLeftCollocated:
+    case mkvmuxer::Colour::kHalfCsh:
+      return true;
+  }
+  return false;
+}
+
+bool IsChromaSitingVertValueValid(uint64_t value) {
+  switch (value) {
+    case mkvmuxer::Colour::kUnspecifiedCsv:
+    case mkvmuxer::Colour::kTopCollocated:
+    case mkvmuxer::Colour::kHalfCsv:
+      return true;
+  }
+  return false;
+}
+
+bool IsColourRangeValueValid(uint64_t value) {
+  switch (value) {
+    case mkvmuxer::Colour::kUnspecifiedCr:
+    case mkvmuxer::Colour::kBroadcastRange:
+    case mkvmuxer::Colour::kFullRange:
+    case mkvmuxer::Colour::kMcTcDefined:
+      return true;
+  }
+  return false;
+}
+
+bool IsTransferCharacteristicsValueValid(uint64_t value) {
+  switch (value) {
+    case mkvmuxer::Colour::kIturBt709Tc:
+    case mkvmuxer::Colour::kUnspecifiedTc:
+    case mkvmuxer::Colour::kReservedTc:
+    case mkvmuxer::Colour::kGamma22Curve:
+    case mkvmuxer::Colour::kGamma28Curve:
+    case mkvmuxer::Colour::kSmpte170MTc:
+    case mkvmuxer::Colour::kSmpte240MTc:
+    case mkvmuxer::Colour::kLinear:
+    case mkvmuxer::Colour::kLog:
+    case mkvmuxer::Colour::kLogSqrt:
+    case mkvmuxer::Colour::kIec6196624:
+    case mkvmuxer::Colour::kIturBt1361ExtendedColourGamut:
+    case mkvmuxer::Colour::kIec6196621:
+    case mkvmuxer::Colour::kIturBt202010bit:
+    case mkvmuxer::Colour::kIturBt202012bit:
+    case mkvmuxer::Colour::kSmpteSt2084:
+    case mkvmuxer::Colour::kSmpteSt4281Tc:
+    case mkvmuxer::Colour::kAribStdB67Hlg:
+      return true;
+  }
+  return false;
+}
+
+bool IsPrimariesValueValid(uint64_t value) {
+  switch (value) {
+    case mkvmuxer::Colour::kReservedP0:
+    case mkvmuxer::Colour::kIturBt709P:
+    case mkvmuxer::Colour::kUnspecifiedP:
+    case mkvmuxer::Colour::kReservedP3:
+    case mkvmuxer::Colour::kIturBt470M:
+    case mkvmuxer::Colour::kIturBt470Bg:
+    case mkvmuxer::Colour::kSmpte170MP:
+    case mkvmuxer::Colour::kSmpte240MP:
+    case mkvmuxer::Colour::kFilm:
+    case mkvmuxer::Colour::kIturBt2020:
+    case mkvmuxer::Colour::kSmpteSt4281P:
+    case mkvmuxer::Colour::kJedecP22Phosphors:
+      return true;
+  }
+  return false;
+}
+
 }  // namespace mkvmuxer
--- a/third_party/libwebm/mkvmuxer/mkvmuxerutil.h
+++ b/third_party/libwebm/mkvmuxer/mkvmuxerutil.h
@@ -8,87 +8,104 @@
 #ifndef MKVMUXER_MKVMUXERUTIL_H_
 #define MKVMUXER_MKVMUXERUTIL_H_

-#include <stdint.h>
+#include "mkvmuxertypes.h"
+
+#include "stdint.h"

 namespace mkvmuxer {
 class Cluster;
 class Frame;
 class IMkvWriter;

-const uint64_t kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL;
-const int64_t kMaxBlockTimecode = 0x07FFFLL;
+// TODO(tomfinegan): mkvmuxer:: integer types continue to be used here because
+// changing them causes pain for downstream projects. It would be nice if a
+// solution that allows removal of the mkvmuxer:: integer types while avoiding
+// pain for downstream users of libwebm. Considering that mkvmuxerutil.{cc,h}
+// are really, for the great majority of cases, EBML size calculation and writer
+// functions, perhaps a more EBML focused utility would be the way to go as a
+// first step.
+
+const uint64 kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL;
+const int64 kMaxBlockTimecode = 0x07FFFLL;

 // Writes out |value| in Big Endian order. Returns 0 on success.
-int32_t SerializeInt(IMkvWriter* writer, int64_t value, int32_t size);
+int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size);

 // Returns the size in bytes of the element.
-int32_t GetUIntSize(uint64_t value);
-int32_t GetIntSize(int64_t value);
-int32_t GetCodedUIntSize(uint64_t value);
-uint64_t EbmlMasterElementSize(uint64_t type, uint64_t value);
-uint64_t EbmlElementSize(uint64_t type, int64_t value);
-uint64_t EbmlElementSize(uint64_t type, uint64_t value);
-uint64_t EbmlElementSize(uint64_t type, float value);
-uint64_t EbmlElementSize(uint64_t type, const char* value);
-uint64_t EbmlElementSize(uint64_t type, const uint8_t* value, uint64_t size);
-uint64_t EbmlDateElementSize(uint64_t type);
+int32 GetUIntSize(uint64 value);
+int32 GetIntSize(int64 value);
+int32 GetCodedUIntSize(uint64 value);
+uint64 EbmlMasterElementSize(uint64 type, uint64 value);
+uint64 EbmlElementSize(uint64 type, int64 value);
+uint64 EbmlElementSize(uint64 type, uint64 value);
+uint64 EbmlElementSize(uint64 type, float value);
+uint64 EbmlElementSize(uint64 type, const char* value);
+uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size);
+uint64 EbmlDateElementSize(uint64 type);

 // Returns the size in bytes of the element assuming that the element was
 // written using |fixed_size| bytes. If |fixed_size| is set to zero, then it
 // computes the necessary number of bytes based on |value|.
-uint64_t EbmlElementSize(uint64_t type, uint64_t value, uint64_t fixed_size);
+uint64 EbmlElementSize(uint64 type, uint64 value, uint64 fixed_size);

 // Creates an EBML coded number from |value| and writes it out. The size of
 // the coded number is determined by the value of |value|. |value| must not
 // be in a coded form. Returns 0 on success.
-int32_t WriteUInt(IMkvWriter* writer, uint64_t value);
+int32 WriteUInt(IMkvWriter* writer, uint64 value);

 // Creates an EBML coded number from |value| and writes it out. The size of
 // the coded number is determined by the value of |size|. |value| must not
 // be in a coded form. Returns 0 on success.
-int32_t WriteUIntSize(IMkvWriter* writer, uint64_t value, int32_t size);
+int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size);

 // Output an Mkv master element. Returns true if the element was written.
-bool WriteEbmlMasterElement(IMkvWriter* writer, uint64_t value, uint64_t size);
+bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 value, uint64 size);

 // Outputs an Mkv ID, calls |IMkvWriter::ElementStartNotify|, and passes the
 // ID to |SerializeInt|. Returns 0 on success.
-int32_t WriteID(IMkvWriter* writer, uint64_t type);
+int32 WriteID(IMkvWriter* writer, uint64 type);

 // Output an Mkv non-master element. Returns true if the element was written.
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, int64_t value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, float value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const char* value);
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, const uint8_t* value,
-                      uint64_t size);
-bool WriteEbmlDateElement(IMkvWriter* writer, uint64_t type, int64_t value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value,
+                      uint64 size);
+bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value);

 // Output an Mkv non-master element using fixed size. The element will be
 // written out using exactly |fixed_size| bytes. If |fixed_size| is set to zero
 // then it computes the necessary number of bytes based on |value|. Returns true
 // if the element was written.
-bool WriteEbmlElement(IMkvWriter* writer, uint64_t type, uint64_t value,
-                      uint64_t fixed_size);
+bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value,
+                      uint64 fixed_size);

 // Output a Mkv Frame. It decides the correct element to write (Block vs
 // SimpleBlock) based on the parameters of the Frame.
-uint64_t WriteFrame(IMkvWriter* writer, const Frame* const frame,
-                    Cluster* cluster);
+uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame,
+                  Cluster* cluster);

 // Output a void element. |size| must be the entire size in bytes that will be
 // void. The function will calculate the size of the void header and subtract
 // it from |size|.
-uint64_t WriteVoidElement(IMkvWriter* writer, uint64_t size);
+uint64 WriteVoidElement(IMkvWriter* writer, uint64 size);

 // Returns the version number of the muxer in |major|, |minor|, |build|,
 // and |revision|.
-void GetVersion(int32_t* major, int32_t* minor, int32_t* build,
-                int32_t* revision);
+void GetVersion(int32* major, int32* minor, int32* build, int32* revision);

 // Returns a random number to be used for UID, using |seed| to seed
 // the random-number generator (see POSIX rand_r() for semantics).
-uint64_t MakeUID(unsigned int* seed);
+uint64 MakeUID(unsigned int* seed);
+
+// Colour field validation helpers. All return true when |value| is valid.
+bool IsMatrixCoefficientsValueValid(uint64_t value);
+bool IsChromaSitingHorzValueValid(uint64_t value);
+bool IsChromaSitingVertValueValid(uint64_t value);
+bool IsColourRangeValueValid(uint64_t value);
+bool IsTransferCharacteristicsValueValid(uint64_t value);
+bool IsPrimariesValueValid(uint64_t value);

 }  // namespace mkvmuxer

--- a/third_party/libwebm/mkvmuxer/mkvwriter.cc
+++ b/third_party/libwebm/mkvmuxer/mkvwriter.cc
@@ -77,7 +77,7 @@ int32 MkvWriter::Position(int64 position) {
 #ifdef _MSC_VER
  return _fseeki64(file_, position, SEEK_SET);
 #else
-  return fseek(file_, position, SEEK_SET);
+  return fseeko(file_, static_cast<off_t>(position), SEEK_SET);
 #endif
 }

--- a/third_party/libwebm/mkvparser/mkvparser.cc
+++ b/third_party/libwebm/mkvparser/mkvparser.cc
@@ -25,6 +25,7 @@
 namespace mkvparser {
 const float MasteringMetadata::kValueNotPresent = FLT_MAX;
 const long long Colour::kValueNotPresent = LLONG_MAX;
+const float Projection::kValueNotPresent = FLT_MAX;

 #ifdef MSC_COMPAT
 inline bool isnan(double val) { return !!_isnan(val); }
@@ -1475,6 +1476,8 @@ long Segment::Load() {
  }
 }

+SeekHead::Entry::Entry() : id(0), pos(0), element_start(0), element_size(0) {}
+
 SeekHead::SeekHead(Segment* pSegment, long long start, long long size_,
                   long long element_start, long long element_size)
    : m_pSegment(pSegment),
@@ -1766,18 +1769,7 @@ bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_,
  if ((pos + seekIdSize) > stop)
    return false;

-  // Note that the SeekId payload really is serialized
-  // as a "Matroska integer", not as a plain binary value.
-  // In fact, Matroska requires that ID values in the
-  // stream exactly match the binary representation as listed
-  // in the Matroska specification.
-  //
-  // This parser is more liberal, and permits IDs to have
-  // any width.  (This could make the representation in the stream
-  // different from what's in the spec, but it doesn't matter here,
-  // since we always normalize "Matroska integer" values.)
-
-  pEntry->id = ReadUInt(pReader, pos, len);  // payload
+  pEntry->id = ReadID(pReader, pos, len);  // payload

  if (pEntry->id <= 0)
    return false;
@@ -4125,7 +4117,7 @@ ContentEncoding::~ContentEncoding() {
 }

 const ContentEncoding::ContentCompression*
-    ContentEncoding::GetCompressionByIndex(unsigned long idx) const {
+ContentEncoding::GetCompressionByIndex(unsigned long idx) const {
  const ptrdiff_t count = compression_entries_end_ - compression_entries_;
  assert(count >= 0);

@@ -5188,11 +5180,92 @@ bool Colour::Parse(IMkvReader* reader, long long colour_start,
  return true;
 }

+bool Projection::Parse(IMkvReader* reader, long long start, long long size,
+                       Projection** projection) {
+  if (!reader || *projection)
+    return false;
+
+  std::auto_ptr<Projection> projection_ptr(new Projection());
+  if (!projection_ptr.get())
+    return false;
+
+  const long long end = start + size;
+  long long read_pos = start;
+
+  while (read_pos < end) {
+    long long child_id = 0;
+    long long child_size = 0;
+
+    const long long status =
+        ParseElementHeader(reader, read_pos, end, child_id, child_size);
+    if (status < 0)
+      return false;
+
+    if (child_id == libwebm::kMkvProjectionType) {
+      long long projection_type = kTypeNotPresent;
+      projection_type = UnserializeUInt(reader, read_pos, child_size);
+      if (projection_type < 0)
+        return false;
+
+      projection_ptr->type = static_cast<ProjectionType>(projection_type);
+    } else if (child_id == libwebm::kMkvProjectionPrivate) {
+      unsigned char* data = SafeArrayAlloc<unsigned char>(1, child_size);
+
+      if (data == NULL)
+        return false;
+
+      const int status =
+          reader->Read(read_pos, static_cast<long>(child_size), data);
+
+      if (status) {
+        delete[] data;
+        return false;
+      }
+
+      projection_ptr->private_data = data;
+      projection_ptr->private_data_length = static_cast<size_t>(child_size);
+    } else {
+      double value = 0;
+      const long long value_parse_status =
+          UnserializeFloat(reader, read_pos, child_size, value);
+      if (value_parse_status < 0) {
+        return false;
+      }
+
+      switch (child_id) {
+        case libwebm::kMkvProjectionPoseYaw:
+          projection_ptr->pose_yaw = static_cast<float>(value);
+          break;
+        case libwebm::kMkvProjectionPosePitch:
+          projection_ptr->pose_pitch = static_cast<float>(value);
+          break;
+        case libwebm::kMkvProjectionPoseRoll:
+          projection_ptr->pose_roll = static_cast<float>(value);
+          break;
+        default:
+          return false;
+      }
+    }
+
+    read_pos += child_size;
+    if (read_pos > end)
+      return false;
+  }
+
+  *projection = projection_ptr.release();
+  return true;
+}
+
 VideoTrack::VideoTrack(Segment* pSegment, long long element_start,
                       long long element_size)
-    : Track(pSegment, element_start, element_size), m_colour(NULL) {}
+    : Track(pSegment, element_start, element_size),
+      m_colour(NULL),
+      m_projection(NULL) {}

-VideoTrack::~VideoTrack() { delete m_colour; }
+VideoTrack::~VideoTrack() {
+  delete m_colour;
+  delete m_projection;
+}

 long VideoTrack::Parse(Segment* pSegment, const Info& info,
                       long long element_start, long long element_size,
@@ -5224,6 +5297,7 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info,
  const long long stop = pos + s.size;

  Colour* colour = NULL;
+  Projection* projection = NULL;

  while (pos < stop) {
    long long id, size;
@@ -5274,6 +5348,9 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info,
    } else if (id == libwebm::kMkvColour) {
      if (!Colour::Parse(pReader, pos, size, &colour))
        return E_FILE_FORMAT_INVALID;
+    } else if (id == libwebm::kMkvProjection) {
+      if (!Projection::Parse(pReader, pos, size, &projection))
+        return E_FILE_FORMAT_INVALID;
    }

    pos += size;  // consume payload
@@ -5305,6 +5382,7 @@ long VideoTrack::Parse(Segment* pSegment, const Info& info,
  pTrack->m_stereo_mode = stereo_mode;
  pTrack->m_rate = rate;
  pTrack->m_colour = colour;
+  pTrack->m_projection = projection;

  pResult = pTrack;
  return 0;  // success
@@ -5405,6 +5483,8 @@ long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const {

 Colour* VideoTrack::GetColour() const { return m_colour; }

+Projection* VideoTrack::GetProjection() const { return m_projection; }
+
 long long VideoTrack::GetWidth() const { return m_width; }

 long long VideoTrack::GetHeight() const { return m_height; }
@@ -6698,8 +6778,10 @@ Cluster::Cluster(Segment* pSegment, long idx, long long element_start
 {}

 Cluster::~Cluster() {
-  if (m_entries_count <= 0)
+  if (m_entries_count <= 0) {
+    delete[] m_entries;
    return;
+  }

  BlockEntry** i = m_entries;
  BlockEntry** const j = m_entries + m_entries_count;
--- a/third_party/libwebm/mkvparser/mkvparser.h
+++ b/third_party/libwebm/mkvparser/mkvparser.h
@@ -473,6 +473,34 @@ struct Colour {
  MasteringMetadata* mastering_metadata;
 };

+struct Projection {
+  enum ProjectionType {
+    kTypeNotPresent = -1,
+    kRectangular = 0,
+    kEquirectangular = 1,
+    kCubeMap = 2,
+    kMesh = 3,
+  };
+  static const float kValueNotPresent;
+  Projection()
+      : type(kTypeNotPresent),
+        private_data(NULL),
+        private_data_length(0),
+        pose_yaw(kValueNotPresent),
+        pose_pitch(kValueNotPresent),
+        pose_roll(kValueNotPresent) {}
+  ~Projection() { delete[] private_data; }
+  static bool Parse(IMkvReader* reader, long long element_start,
+                    long long element_size, Projection** projection);
+
+  ProjectionType type;
+  unsigned char* private_data;
+  size_t private_data_length;
+  float pose_yaw;
+  float pose_pitch;
+  float pose_roll;
+};
+
 class VideoTrack : public Track {
  VideoTrack(const VideoTrack&);
  VideoTrack& operator=(const VideoTrack&);
@@ -497,6 +525,8 @@ class VideoTrack : public Track {

  Colour* GetColour() const;

+  Projection* GetProjection() const;
+
 private:
  long long m_width;
  long long m_height;
@@ -508,6 +538,7 @@ class VideoTrack : public Track {
  double m_rate;

  Colour* m_colour;
+  Projection* m_projection;
 };

 class AudioTrack : public Track {
@@ -813,6 +844,8 @@ class SeekHead {
  long Parse();

  struct Entry {
+    Entry();
+
    // the SeekHead entry payload
    long long id;
    long long pos;
--- a/third_party/libwebm/mkvparser/mkvreader.cc
+++ b/third_party/libwebm/mkvparser/mkvreader.cc
@@ -117,7 +117,7 @@ int MkvReader::Read(long long offset, long len, unsigned char* buffer) {
  if (status)
    return -1;  // error
 #else
-  fseek(m_file, offset, SEEK_SET);
+  fseeko(m_file, static_cast<off_t>(offset), SEEK_SET);
 #endif

  const size_t size = fread(buffer, 1, len, m_file);
@@ -128,4 +128,4 @@ int MkvReader::Read(long long offset, long len, unsigned char* buffer) {
  return 0;  // success
 }

-}  // namespace mkvparser
+}  // namespace mkvparser
--- a/third_party/libyuv/README.libvpx
+++ b/third_party/libyuv/README.libvpx
@@ -1,6 +1,6 @@
 Name: libyuv
-URL: http://code.google.com/p/libyuv/
-Version: 1456
+URL: https://chromium.googlesource.com/libyuv/libyuv
+Version: de944ed8c74909ea6fbd743a22efe1e55e851b83
 License: BSD
 License File: LICENSE

@@ -13,3 +13,10 @@ which down-samples the original input video (f.g. 1280x720) a number of times
 in order to encode multiple resolution bit streams.

 Local Modifications:
+rm -rf .gitignore .gn AUTHORS Android.mk BUILD.gn CMakeLists.txt DEPS LICENSE \
+  LICENSE_THIRD_PARTY OWNERS PATENTS PRESUBMIT.py README.chromium README.md \
+  all.gyp build_overrides/ chromium/ codereview.settings docs/ \
+  download_vs_toolchain.py gyp_libyuv gyp_libyuv.py include/libyuv.h \
+  include/libyuv/compare_row.h libyuv.gyp libyuv.gypi libyuv_nacl.gyp \
+  libyuv_test.gyp linux.mk public.mk setup_links.py sync_chromium.py \
+  third_party/ tools/ unit_test/ util/ winarm.mk
--- a/third_party/libyuv/include/libyuv/basic_types.h
+++ b/third_party/libyuv/include/libyuv/basic_types.h
@@ -13,7 +13,7 @@

 #include <stddef.h>  // for NULL, size_t

-#if defined(__ANDROID__) || (defined(_MSC_VER) && (_MSC_VER < 1600))
+#if defined(_MSC_VER) && (_MSC_VER < 1600)
 #include <sys/types.h>  // for uintptr_t on x86
 #else
 #include <stdint.h>  // for uintptr_t
--- a/third_party/libyuv/include/libyuv/convert.h
+++ b/third_party/libyuv/include/libyuv/convert.h
@@ -12,10 +12,13 @@
 #define INCLUDE_LIBYUV_CONVERT_H_

 #include "libyuv/basic_types.h"
-// TODO(fbarchard): Remove the following headers includes.
-#include "libyuv/convert_from.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
+
+#include "libyuv/rotate.h"  // For enum RotationMode.
+
+// TODO(fbarchard): fix WebRTC source to include following libyuv headers:
+#include "libyuv/convert_argb.h"  // For WebRTC I420ToARGB. b/620
+#include "libyuv/convert_from.h"  // For WebRTC ConvertFromI420. b/620
+#include "libyuv/planar_functions.h"  // For WebRTC I420Rect, CopyPlane. b/618

 #ifdef __cplusplus
 namespace libyuv {
@@ -115,6 +118,17 @@ int M420ToI420(const uint8* src_m420, int src_stride_m420,
               uint8* dst_v, int dst_stride_v,
               int width, int height);

+// Convert Android420 to I420.
+LIBYUV_API
+int Android420ToI420(const uint8* src_y, int src_stride_y,
+                     const uint8* src_u, int src_stride_u,
+                     const uint8* src_v, int src_stride_v,
+                     int pixel_stride_uv,
+                     uint8* dst_y, int dst_stride_y,
+                     uint8* dst_u, int dst_stride_u,
+                     uint8* dst_v, int dst_stride_v,
+                     int width, int height);
+
 // ARGB little endian (bgra in memory) to I420.
 LIBYUV_API
 int ARGBToI420(const uint8* src_frame, int src_stride_frame,
--- a/third_party/libyuv/include/libyuv/convert_argb.h
+++ b/third_party/libyuv/include/libyuv/convert_argb.h
@@ -12,10 +12,8 @@
 #define INCLUDE_LIBYUV_CONVERT_ARGB_H_

 #include "libyuv/basic_types.h"
-// TODO(fbarchard): Remove the following headers includes
-#include "libyuv/convert_from.h"
-#include "libyuv/planar_functions.h"
-#include "libyuv/rotate.h"
+
+#include "libyuv/rotate.h"  // For enum RotationMode.

 // TODO(fbarchard): This set of functions should exactly match convert.h
 // TODO(fbarchard): Add tests. Create random content of right size and convert
@@ -44,6 +42,14 @@ int I420ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height);

+// Duplicate prototype for function in convert_from.h for remoting.
+LIBYUV_API
+int I420ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
 // Convert I422 to ARGB.
 LIBYUV_API
 int I422ToARGB(const uint8* src_y, int src_stride_y,
@@ -60,6 +66,22 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height);

+// Convert J444 to ARGB.
+LIBYUV_API
+int J444ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert I444 to ABGR.
+LIBYUV_API
+int I444ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height);
+
 // Convert I411 to ARGB.
 LIBYUV_API
 int I411ToARGB(const uint8* src_y, int src_stride_y,
@@ -68,6 +90,24 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height);

+// Convert I420 with Alpha to preattenuated ARGB.
+LIBYUV_API
+int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    const uint8* src_a, int src_stride_a,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height, int attenuate);
+
+// Convert I420 with Alpha to preattenuated ABGR.
+LIBYUV_API
+int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    const uint8* src_a, int src_stride_a,
+                    uint8* dst_abgr, int dst_stride_abgr,
+                    int width, int height, int attenuate);
+
 // Convert I400 (grey) to ARGB.  Reverse of ARGBToI400.
 LIBYUV_API
 int I400ToARGB(const uint8* src_y, int src_stride_y,
@@ -131,6 +171,54 @@ int J422ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height);

+// Convert J420 to ABGR.
+LIBYUV_API
+int J420ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height);
+
+// Convert J422 to ABGR.
+LIBYUV_API
+int J422ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height);
+
+// Convert H420 to ARGB.
+LIBYUV_API
+int H420ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert H422 to ARGB.
+LIBYUV_API
+int H422ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height);
+
+// Convert H420 to ABGR.
+LIBYUV_API
+int H420ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height);
+
+// Convert H422 to ABGR.
+LIBYUV_API
+int H422ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height);
+
 // BGRA little endian (argb in memory) to ARGB.
 LIBYUV_API
 int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
--- a/third_party/libyuv/include/libyuv/convert_from.h
+++ b/third_party/libyuv/include/libyuv/convert_from.h
@@ -56,8 +56,6 @@ int I400Copy(const uint8* src_y, int src_stride_y,
             uint8* dst_y, int dst_stride_y,
             int width, int height);

-// TODO(fbarchard): I420ToM420
-
 LIBYUV_API
 int I420ToNV12(const uint8* src_y, int src_stride_y,
               const uint8* src_u, int src_stride_u,
--- a/third_party/libyuv/include/libyuv/cpu_id.h
+++ b/third_party/libyuv/include/libyuv/cpu_id.h
@@ -18,9 +18,8 @@ namespace libyuv {
 extern "C" {
 #endif

-// TODO(fbarchard): Consider overlapping bits for different architectures.
 // Internal flag to indicate cpuid requires initialization.
-#define kCpuInit 0x1
+static const int kCpuInitialized = 0x1;

 // These flags are only valid on ARM processors.
 static const int kCpuHasARM = 0x2;
@@ -37,12 +36,12 @@ static const int kCpuHasAVX = 0x200;
 static const int kCpuHasAVX2 = 0x400;
 static const int kCpuHasERMS = 0x800;
 static const int kCpuHasFMA3 = 0x1000;
+static const int kCpuHasAVX3 = 0x2000;
 // 0x2000, 0x4000, 0x8000 reserved for future X86 flags.

 // These flags are only valid on MIPS processors.
 static const int kCpuHasMIPS = 0x10000;
-static const int kCpuHasMIPS_DSP = 0x20000;
-static const int kCpuHasMIPS_DSPR2 = 0x40000;
+static const int kCpuHasDSPR2 = 0x20000;

 // Internal function used to auto-init.
 LIBYUV_API
@@ -57,13 +56,13 @@ int ArmCpuCaps(const char* cpuinfo_name);
 // returns non-zero if instruction set is detected
 static __inline int TestCpuFlag(int test_flag) {
  LIBYUV_API extern int cpu_info_;
-  return (cpu_info_ == kCpuInit ? InitCpuFlags() : cpu_info_) & test_flag;
+  return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag;
 }

 // For testing, allow CPU flags to be disabled.
 // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
 // MaskCpuFlags(-1) to enable all cpu specific optimizations.
-// MaskCpuFlags(0) to disable all cpu specific optimizations.
+// MaskCpuFlags(1) to disable all cpu specific optimizations.
 LIBYUV_API
 void MaskCpuFlags(int enable_flags);

--- a/third_party/libyuv/include/libyuv/planar_functions.h
+++ b/third_party/libyuv/include/libyuv/planar_functions.h
@@ -39,6 +39,20 @@ void SetPlane(uint8* dst_y, int dst_stride_y,
              int width, int height,
              uint32 value);

+// Split interleaved UV plane into separate U and V planes.
+LIBYUV_API
+void SplitUVPlane(const uint8* src_uv, int src_stride_uv,
+                  uint8* dst_u, int dst_stride_u,
+                  uint8* dst_v, int dst_stride_v,
+                  int width, int height);
+
+// Merge separate U and V planes into one interleaved UV plane.
+LIBYUV_API
+void MergeUVPlane(const uint8* src_u, int src_stride_u,
+                  const uint8* src_v, int src_stride_v,
+                  uint8* dst_uv, int dst_stride_uv,
+                  int width, int height);
+
 // Copy I400.  Supports inverting.
 LIBYUV_API
 int I400ToI400(const uint8* src_y, int src_stride_y,
@@ -145,13 +159,6 @@ int NV12ToRGB565(const uint8* src_y, int src_stride_y,
                 uint8* dst_rgb565, int dst_stride_rgb565,
                 int width, int height);

-// Convert NV21 to RGB565.
-LIBYUV_API
-int NV21ToRGB565(const uint8* src_y, int src_stride_y,
-                 const uint8* src_uv, int src_stride_uv,
-                 uint8* dst_rgb565, int dst_stride_rgb565,
-                 int width, int height);
-
 // I422ToARGB is in convert_argb.h
 // Convert I422 to BGRA.
 LIBYUV_API
@@ -177,6 +184,14 @@ int I422ToRGBA(const uint8* src_y, int src_stride_y,
               uint8* dst_rgba, int dst_stride_rgba,
               int width, int height);

+// Alias
+#define RGB24ToRAW RAWToRGB24
+
+LIBYUV_API
+int RAWToRGB24(const uint8* src_raw, int src_stride_raw,
+               uint8* dst_rgb24, int dst_stride_rgb24,
+               int width, int height);
+
 // Draw a rectangle into I420.
 LIBYUV_API
 int I420Rect(uint8* dst_y, int dst_stride_y,
@@ -281,13 +296,19 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
             uint8* dst_argb, int dst_stride_argb,
             int width, int height);

-// Copy ARGB to ARGB.
+// Copy Alpha channel of ARGB to alpha of ARGB.
 LIBYUV_API
 int ARGBCopyAlpha(const uint8* src_argb, int src_stride_argb,
                  uint8* dst_argb, int dst_stride_argb,
                  int width, int height);

-// Copy ARGB to ARGB.
+// Extract the alpha channel from ARGB.
+LIBYUV_API
+int ARGBExtractAlpha(const uint8* src_argb, int src_stride_argb,
+                     uint8* dst_a, int dst_stride_a,
+                     int width, int height);
+
+// Copy Y channel to Alpha of ARGB.
 LIBYUV_API
 int ARGBCopyYToAlpha(const uint8* src_y, int src_stride_y,
                     uint8* dst_argb, int dst_stride_argb,
@@ -301,6 +322,7 @@ LIBYUV_API
 ARGBBlendRow GetARGBBlend();

 // Alpha Blend ARGB images and store to destination.
+// Source is pre-multiplied by alpha using ARGBAttenuate.
 // Alpha of destination is set to 255.
 LIBYUV_API
 int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
@@ -308,6 +330,31 @@ int ARGBBlend(const uint8* src_argb0, int src_stride_argb0,
              uint8* dst_argb, int dst_stride_argb,
              int width, int height);

+// Alpha Blend plane and store to destination.
+// Source is not pre-multiplied by alpha.
+LIBYUV_API
+int BlendPlane(const uint8* src_y0, int src_stride_y0,
+               const uint8* src_y1, int src_stride_y1,
+               const uint8* alpha, int alpha_stride,
+               uint8* dst_y, int dst_stride_y,
+               int width, int height);
+
+// Alpha Blend YUV images and store to destination.
+// Source is not pre-multiplied by alpha.
+// Alpha is full width x height and subsampled to half size to apply to UV.
+LIBYUV_API
+int I420Blend(const uint8* src_y0, int src_stride_y0,
+              const uint8* src_u0, int src_stride_u0,
+              const uint8* src_v0, int src_stride_v0,
+              const uint8* src_y1, int src_stride_y1,
+              const uint8* src_u1, int src_stride_u1,
+              const uint8* src_v1, int src_stride_v1,
+              const uint8* alpha, int alpha_stride,
+              uint8* dst_y, int dst_stride_y,
+              uint8* dst_u, int dst_stride_u,
+              uint8* dst_v, int dst_stride_v,
+              int width, int height);
+
 // Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255.
 LIBYUV_API
 int ARGBMultiply(const uint8* src_argb0, int src_stride_argb0,
@@ -357,12 +404,6 @@ int ARGBUnattenuate(const uint8* src_argb, int src_stride_argb,
                    uint8* dst_argb, int dst_stride_argb,
                    int width, int height);

-// Convert MJPG to ARGB.
-LIBYUV_API
-int MJPGToARGB(const uint8* sample, size_t sample_size,
-               uint8* argb, int argb_stride,
-               int w, int h, int dw, int dh);
-
 // Internal function - do not call directly.
 // Computes table of cumulative sum for image where the value is the sum
 // of all values above and to the left of the entry. Used by ARGBBlur.
@@ -389,22 +430,49 @@ int ARGBShade(const uint8* src_argb, int src_stride_argb,
              uint8* dst_argb, int dst_stride_argb,
              int width, int height, uint32 value);

-// Interpolate between two ARGB images using specified amount of interpolation
+// Interpolate between two images using specified amount of interpolation
 // (0 to 255) and store to destination.
-// 'interpolation' is specified as 8 bit fraction where 0 means 100% src_argb0
-// and 255 means 1% src_argb0 and 99% src_argb1.
-// Internally uses ARGBScale bilinear filtering.
-// Caveat: This function will write up to 16 bytes beyond the end of dst_argb.
+// 'interpolation' is specified as 8 bit fraction where 0 means 100% src0
+// and 255 means 1% src0 and 99% src1.
+LIBYUV_API
+int InterpolatePlane(const uint8* src0, int src_stride0,
+                     const uint8* src1, int src_stride1,
+                     uint8* dst, int dst_stride,
+                     int width, int height, int interpolation);
+
+// Interpolate between two ARGB images using specified amount of interpolation
+// Internally calls InterpolatePlane with width * 4 (bpp).
 LIBYUV_API
 int ARGBInterpolate(const uint8* src_argb0, int src_stride_argb0,
                    const uint8* src_argb1, int src_stride_argb1,
                    uint8* dst_argb, int dst_stride_argb,
                    int width, int height, int interpolation);

+// Interpolate between two YUV images using specified amount of interpolation
+// Internally calls InterpolatePlane on each plane where the U and V planes
+// are half width and half height.
+LIBYUV_API
+int I420Interpolate(const uint8* src0_y, int src0_stride_y,
+                    const uint8* src0_u, int src0_stride_u,
+                    const uint8* src0_v, int src0_stride_v,
+                    const uint8* src1_y, int src1_stride_y,
+                    const uint8* src1_u, int src1_stride_u,
+                    const uint8* src1_v, int src1_stride_v,
+                    uint8* dst_y, int dst_stride_y,
+                    uint8* dst_u, int dst_stride_u,
+                    uint8* dst_v, int dst_stride_v,
+                    int width, int height, int interpolation);
+
 #if defined(__pnacl__) || defined(__CLR_VER) || \
    (defined(__i386__) && !defined(__SSE2__))
 #define LIBYUV_DISABLE_X86
 #endif
+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define LIBYUV_DISABLE_X86
+#endif
+#endif
 // The following are available on all x86 platforms:
 #if !defined(LIBYUV_DISABLE_X86) && \
    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
--- a/third_party/libyuv/include/libyuv/rotate_row.h
+++ b/third_party/libyuv/include/libyuv/rotate_row.h
@@ -22,53 +22,24 @@ extern "C" {
    (defined(__i386__) && !defined(__SSE2__))
 #define LIBYUV_DISABLE_X86
 #endif
-
-// Visual C 2012 required for AVX2.
-#if defined(_M_IX86) && !defined(__clang__) && \
-    defined(_MSC_VER) && _MSC_VER >= 1700
-#define VISUALC_HAS_AVX2 1
-#endif  // VisualStudio >= 2012
-
-// TODO(fbarchard): switch to standard form of inline; fails on clangcl.
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#if defined(__APPLE__) && defined(__i386__)
-#define DECLARE_FUNCTION(name)                                                 \
-    ".text                                     \n"                             \
-    ".private_extern _" #name "                \n"                             \
-    ".align 4,0x90                             \n"                             \
-"_" #name ":                                   \n"
-#elif defined(__MINGW32__) || defined(__CYGWIN__) && defined(__i386__)
-#define DECLARE_FUNCTION(name)                                                 \
-    ".text                                     \n"                             \
-    ".align 4,0x90                             \n"                             \
-"_" #name ":                                   \n"
-#else
-#define DECLARE_FUNCTION(name)                                                 \
-    ".text                                     \n"                             \
-    ".align 4,0x90                             \n"                             \
-#name ":                                       \n"
+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define LIBYUV_DISABLE_X86
 #endif
 #endif
-
-// The following are available for Visual C:
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
-    defined(_MSC_VER) && !defined(__clang__)
+// The following are available for Visual C and clangcl 32 bit:
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
 #define HAS_TRANSPOSEWX8_SSSE3
 #define HAS_TRANSPOSEUVWX8_SSE2
 #endif

-// The following are available for GCC but not NaCL:
+// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit:
 #if !defined(LIBYUV_DISABLE_X86) && \
    (defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
 #define HAS_TRANSPOSEWX8_SSSE3
 #endif

-// The following are available for 32 bit GCC:
-#if !defined(LIBYUV_DISABLE_X86) && defined(__i386__)  && !defined(__clang__)
-#define HAS_TRANSPOSEUVWX8_SSE2
-#endif
-
 // The following are available for 64 bit GCC but not NaCL:
 #if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
    defined(__x86_64__)
@@ -85,8 +56,8 @@ extern "C" {
 #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
    defined(__mips__) && \
    defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_TRANSPOSEWX8_MIPS_DSPR2
-#define HAS_TRANSPOSEUVWx8_MIPS_DSPR2
+#define HAS_TRANSPOSEWX8_DSPR2
+#define HAS_TRANSPOSEUVWX8_DSPR2
 #endif  // defined(__mips__)

 void TransposeWxH_C(const uint8* src, int src_stride,
@@ -100,7 +71,9 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride,
                        uint8* dst, int dst_stride, int width);
 void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
                             uint8* dst, int dst_stride, int width);
-void TransposeWx8_MIPS_DSPR2(const uint8* src, int src_stride,
+void TransposeWx8_DSPR2(const uint8* src, int src_stride,
+                        uint8* dst, int dst_stride, int width);
+void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
                             uint8* dst, int dst_stride, int width);

 void TransposeWx8_Any_NEON(const uint8* src, int src_stride,
@@ -109,8 +82,8 @@ void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride,
                            uint8* dst, int dst_stride, int width);
 void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride,
                                 uint8* dst, int dst_stride, int width);
-void TransposeWx8_Any_MIPS_DSPR2(const uint8* src, int src_stride,
-                                 uint8* dst, int dst_stride, int width);
+void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride,
+                            uint8* dst, int dst_stride, int width);

 void TransposeUVWxH_C(const uint8* src, int src_stride,
                      uint8* dst_a, int dst_stride_a,
@@ -126,9 +99,19 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
 void TransposeUVWx8_NEON(const uint8* src, int src_stride,
                         uint8* dst_a, int dst_stride_a,
                         uint8* dst_b, int dst_stride_b, int width);
-void TransposeUVWx8_MIPS_DSPR2(const uint8* src, int src_stride,
-                               uint8* dst_a, int dst_stride_a,
-                               uint8* dst_b, int dst_stride_b, int width);
+void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
+                          uint8* dst_a, int dst_stride_a,
+                          uint8* dst_b, int dst_stride_b, int width);
+
+void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride,
+                             uint8* dst_a, int dst_stride_a,
+                             uint8* dst_b, int dst_stride_b, int width);
+void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride,
+                             uint8* dst_a, int dst_stride_a,
+                             uint8* dst_b, int dst_stride_b, int width);
+void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride,
+                              uint8* dst_a, int dst_stride_a,
+                              uint8* dst_b, int dst_stride_b, int width);

 #ifdef __cplusplus
 }  // extern "C"
--- a/third_party/libyuv/include/libyuv/row.h
+++ b/third_party/libyuv/include/libyuv/row.h
--- a/third_party/libyuv/include/libyuv/scale_argb.h
+++ b/third_party/libyuv/include/libyuv/scale_argb.h
@@ -35,7 +35,6 @@ int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
                  int clip_x, int clip_y, int clip_width, int clip_height,
                  enum FilterMode filtering);

-// TODO(fbarchard): Implement this.
 // Scale with YUV conversion to ARGB and clipping.
 LIBYUV_API
 int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
--- a/third_party/libyuv/include/libyuv/scale_row.h
+++ b/third_party/libyuv/include/libyuv/scale_row.h
@@ -23,6 +23,26 @@ extern "C" {
    (defined(__i386__) && !defined(__SSE2__))
 #define LIBYUV_DISABLE_X86
 #endif
+// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
+#if defined(__has_feature)
+#if __has_feature(memory_sanitizer)
+#define LIBYUV_DISABLE_X86
+#endif
+#endif
+
+// GCC >= 4.7.0 required for AVX2.
+#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
+#if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7))
+#define GCC_HAS_AVX2 1
+#endif  // GNUC >= 4.7
+#endif  // __GNUC__
+
+// clang >= 3.4.0 required for AVX2.
+#if defined(__clang__) && (defined(__x86_64__) || defined(__i386__))
+#if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4))
+#define CLANG_HAS_AVX2 1
+#endif  // clang >= 3.4
+#endif  // __clang__

 // Visual C 2012 required for AVX2.
 #if defined(_M_IX86) && !defined(__clang__) && \
@@ -42,24 +62,23 @@ extern "C" {
 #define HAS_SCALEARGBROWDOWNEVEN_SSE2
 #define HAS_SCALECOLSUP2_SSE2
 #define HAS_SCALEFILTERCOLS_SSSE3
-#define HAS_SCALEROWDOWN2_SSE2
+#define HAS_SCALEROWDOWN2_SSSE3
 #define HAS_SCALEROWDOWN34_SSSE3
 #define HAS_SCALEROWDOWN38_SSSE3
-#define HAS_SCALEROWDOWN4_SSE2
+#define HAS_SCALEROWDOWN4_SSSE3
+#define HAS_SCALEADDROW_SSE2
 #endif

-// The following are available on VS2012:
-#if !defined(LIBYUV_DISABLE_X86) && defined(VISUALC_HAS_AVX2)
+// The following are available on all x86 platforms, but
+// require VS2012, clang 3.4 or gcc 4.7.
+// The code supports NaCL but requires a new compiler and validator.
+#if !defined(LIBYUV_DISABLE_X86) && (defined(VISUALC_HAS_AVX2) || \
+    defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2))
 #define HAS_SCALEADDROW_AVX2
 #define HAS_SCALEROWDOWN2_AVX2
 #define HAS_SCALEROWDOWN4_AVX2
 #endif

-// The following are available on Visual C:
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && !defined(__clang__)
-#define HAS_SCALEADDROW_SSE2
-#endif
-
 // The following are available on Neon platforms:
 #if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
@@ -77,10 +96,10 @@ extern "C" {
 // The following are available on Mips platforms:
 #if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
    defined(__mips__) && defined(__mips_dsp) && (__mips_dsp_rev >= 2)
-#define HAS_SCALEROWDOWN2_MIPS_DSPR2
-#define HAS_SCALEROWDOWN4_MIPS_DSPR2
-#define HAS_SCALEROWDOWN34_MIPS_DSPR2
-#define HAS_SCALEROWDOWN38_MIPS_DSPR2
+#define HAS_SCALEROWDOWN2_DSPR2
+#define HAS_SCALEROWDOWN4_DSPR2
+#define HAS_SCALEROWDOWN34_DSPR2
+#define HAS_SCALEROWDOWN38_DSPR2
 #endif

 // Scale ARGB vertically with bilinear interpolation.
@@ -133,6 +152,8 @@ void ScaleRowDown2Linear_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
                              uint16* dst, int dst_width);
 void ScaleRowDown2Box_C(const uint8* src_ptr, ptrdiff_t src_stride,
                        uint8* dst, int dst_width);
+void ScaleRowDown2Box_Odd_C(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst, int dst_width);
 void ScaleRowDown2Box_16_C(const uint16* src_ptr, ptrdiff_t src_stride,
                           uint16* dst, int dst_width);
 void ScaleRowDown4_C(const uint8* src_ptr, ptrdiff_t src_stride,
@@ -214,22 +235,22 @@ void ScaleARGBFilterCols64_C(uint8* dst_argb, const uint8* src_argb,
                             int dst_width, int x, int dx);

 // Specialized scalers for x86.
-void ScaleRowDown2_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                              uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width);
+void ScaleRowDown2_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                         uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Linear_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst_ptr, int dst_width);
 void ScaleRowDown2_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
                        uint8* dst_ptr, int dst_width);
 void ScaleRowDown2Linear_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
                              uint8* dst_ptr, int dst_width);
 void ScaleRowDown2Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
                           uint8* dst_ptr, int dst_width);
-void ScaleRowDown4_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                        uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width);
+void ScaleRowDown4_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                         uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst_ptr, int dst_width);
 void ScaleRowDown4_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
                        uint8* dst_ptr, int dst_width);
 void ScaleRowDown4Box_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
@@ -251,22 +272,26 @@ void ScaleRowDown38_3_Box_SSSE3(const uint8* src_ptr,
 void ScaleRowDown38_2_Box_SSSE3(const uint8* src_ptr,
                                ptrdiff_t src_stride,
                                uint8* dst_ptr, int dst_width);
-void ScaleRowDown2_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                            uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Linear_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                  uint8* dst_ptr, int dst_width);
-void ScaleRowDown2Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                               uint8* dst_ptr, int dst_width);
+void ScaleRowDown2_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                             uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Linear_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                                   uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_Odd_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width);
 void ScaleRowDown2_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
                            uint8* dst_ptr, int dst_width);
 void ScaleRowDown2Linear_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
                                  uint8* dst_ptr, int dst_width);
 void ScaleRowDown2Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
-                           uint8* dst_ptr, int dst_width);
-void ScaleRowDown4_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
-                            uint8* dst_ptr, int dst_width);
-void ScaleRowDown4Box_Any_SSE2(const uint8* src_ptr, ptrdiff_t src_stride,
                               uint8* dst_ptr, int dst_width);
+void ScaleRowDown2Box_Odd_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst_ptr, int dst_width);
+void ScaleRowDown4_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                             uint8* dst_ptr, int dst_width);
+void ScaleRowDown4Box_Any_SSSE3(const uint8* src_ptr, ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width);
 void ScaleRowDown4_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
                            uint8* dst_ptr, int dst_width);
 void ScaleRowDown4Box_Any_AVX2(const uint8* src_ptr, ptrdiff_t src_stride,
@@ -418,6 +443,8 @@ void ScaleRowDown2Linear_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                                  uint8* dst, int dst_width);
 void ScaleRowDown2Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                               uint8* dst, int dst_width);
+void ScaleRowDown2Box_Odd_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
+                               uint8* dst, int dst_width);
 void ScaleRowDown4_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
                            uint8* dst_ptr, int dst_width);
 void ScaleRowDown4Box_Any_NEON(const uint8* src_ptr, ptrdiff_t src_stride,
@@ -447,28 +474,26 @@ void ScaleFilterCols_NEON(uint8* dst_ptr, const uint8* src_ptr,
 void ScaleFilterCols_Any_NEON(uint8* dst_ptr, const uint8* src_ptr,
                              int dst_width, int x, int dx);

-
-void ScaleRowDown2_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                              uint8* dst, int dst_width);
-void ScaleRowDown2Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                 uint8* dst, int dst_width);
-void ScaleRowDown4_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                              uint8* dst, int dst_width);
-void ScaleRowDown4Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                 uint8* dst, int dst_width);
-void ScaleRowDown34_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                               uint8* dst, int dst_width);
-void ScaleRowDown34_0_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                     uint8* d, int dst_width);
-void ScaleRowDown34_1_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                     uint8* d, int dst_width);
-void ScaleRowDown38_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                               uint8* dst, int dst_width);
-void ScaleRowDown38_2_Box_MIPS_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
-                                     uint8* dst_ptr, int dst_width);
-void ScaleRowDown38_3_Box_MIPS_DSPR2(const uint8* src_ptr,
-                                     ptrdiff_t src_stride,
-                                     uint8* dst_ptr, int dst_width);
+void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                         uint8* dst, int dst_width);
+void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst, int dst_width);
+void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                         uint8* dst, int dst_width);
+void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                            uint8* dst, int dst_width);
+void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                          uint8* dst, int dst_width);
+void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                uint8* d, int dst_width);
+void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                uint8* d, int dst_width);
+void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                          uint8* dst, int dst_width);
+void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width);
+void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
+                                uint8* dst_ptr, int dst_width);

 #ifdef __cplusplus
 }  // extern "C"
--- a/third_party/libyuv/include/libyuv/version.h
+++ b/third_party/libyuv/include/libyuv/version.h
@@ -11,6 +11,6 @@
 #ifndef INCLUDE_LIBYUV_VERSION_H_  // NOLINT
 #define INCLUDE_LIBYUV_VERSION_H_

-#define LIBYUV_VERSION 1456
+#define LIBYUV_VERSION 1616

 #endif  // INCLUDE_LIBYUV_VERSION_H_  NOLINT
--- a/third_party/libyuv/include/libyuv/video_common.h
+++ b/third_party/libyuv/include/libyuv/video_common.h
@@ -62,7 +62,7 @@ enum FourCC {

  // 2 Secondary YUV formats: row biplanar.
  FOURCC_M420 = FOURCC('M', '4', '2', '0'),
-  FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
+  FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),  // deprecated.

  // 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
  FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
@@ -90,7 +90,8 @@ enum FourCC {
  FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
  FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'),  // Linux version of I420.
  FOURCC_J420 = FOURCC('J', '4', '2', '0'),
-  FOURCC_J400 = FOURCC('J', '4', '0', '0'),
+  FOURCC_J400 = FOURCC('J', '4', '0', '0'),  // unofficial fourcc
+  FOURCC_H420 = FOURCC('H', '4', '2', '0'),  // unofficial fourcc

  // 14 Auxiliary aliases.  CanonicalFourCC() maps these to canonical fourcc.
  FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'),  // Alias for I420.
@@ -150,6 +151,7 @@ enum FourCCBpp {
  FOURCC_BPP_YU12 = 12,
  FOURCC_BPP_J420 = 12,
  FOURCC_BPP_J400 = 8,
+  FOURCC_BPP_H420 = 12,
  FOURCC_BPP_MJPG = 0,  // 0 means unknown.
  FOURCC_BPP_H264 = 0,
  FOURCC_BPP_IYUV = 12,
--- a/third_party/libyuv/source/compare.cc
+++ b/third_party/libyuv/source/compare.cc
@@ -17,6 +17,7 @@
 #endif

 #include "libyuv/basic_types.h"
+#include "libyuv/compare_row.h"
 #include "libyuv/cpu_id.h"
 #include "libyuv/row.h"
 #include "libyuv/video_common.h"
@@ -26,30 +27,13 @@ namespace libyuv {
 extern "C" {
 #endif

-// hash seed of 5381 recommended.
-// Internal C version of HashDjb2 with int sized count for efficiency.
-uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
-
-// This module is for Visual C x86
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || \
-    (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))))
-#define HAS_HASHDJB2_SSE41
-uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
-
-#ifdef VISUALC_HAS_AVX2
-#define HAS_HASHDJB2_AVX2
-uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
-#endif
-
-#endif  // HAS_HASHDJB2_SSE41
-
 // hash seed of 5381 recommended.
 LIBYUV_API
 uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
  const int kBlockSize = 1 << 15;  // 32768;
  int remainder;
-  uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) = HashDjb2_C;
+  uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
+      HashDjb2_C;
 #if defined(HAS_HASHDJB2_SSE41)
  if (TestCpuFlag(kCpuHasSSE41)) {
    HashDjb2_SSE = HashDjb2_SSE41;
@@ -127,23 +111,6 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
  return fourcc;
 }

-uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
-#if !defined(LIBYUV_DISABLE_NEON) && \
-    (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
-#define HAS_SUMSQUAREERROR_NEON
-uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
-#endif
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__))
-#define HAS_SUMSQUAREERROR_SSE2
-uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
-#endif
-
-#ifdef VISUALC_HAS_AVX2
-#define HAS_SUMSQUAREERROR_AVX2
-uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
-#endif
-
 // TODO(fbarchard): Refactor into row function.
 LIBYUV_API
 uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
--- a/third_party/libyuv/source/compare_common.cc
+++ b/third_party/libyuv/source/compare_common.cc
@@ -10,6 +10,8 @@

 #include "libyuv/basic_types.h"

+#include "libyuv/compare_row.h"
+
 #ifdef __cplusplus
 namespace libyuv {
 extern "C" {
--- a/third_party/libyuv/source/compare_gcc.cc
+++ b/third_party/libyuv/source/compare_gcc.cc
@@ -9,6 +9,8 @@
 */

 #include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
 #include "libyuv/row.h"

 #ifdef __cplusplus
@@ -16,11 +18,13 @@ namespace libyuv {
 extern "C" {
 #endif

-#if !defined(LIBYUV_DISABLE_X86) && (defined(__x86_64__) || defined(__i386__))
+// This module is for GCC x86 and x64.
+#if !defined(LIBYUV_DISABLE_X86) && \
+    (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))

 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
  uint32 sse;
-  asm volatile (  // NOLINT
+  asm volatile (
    "pxor      %%xmm0,%%xmm0                   \n"
    "pxor      %%xmm5,%%xmm5                   \n"
    LABELALIGN
@@ -54,15 +58,10 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
    "+r"(count),      // %2
    "=g"(sse)         // %3
  :: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
-  );  // NOLINT
+  );
  return sse;
 }

-#endif  // defined(__x86_64__) || defined(__i386__)
-
-#if !defined(LIBYUV_DISABLE_X86) && \
-    (defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
-#define HAS_HASHDJB2_SSE41
 static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
 static uvec32 kHashMul0 = {
  0x0c3525e1,  // 33 ^ 15
@@ -91,7 +90,7 @@ static uvec32 kHashMul3 = {

 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
  uint32 hash;
-  asm volatile (  // NOLINT
+  asm volatile (
    "movd      %2,%%xmm0                       \n"
    "pxor      %%xmm7,%%xmm7                   \n"
    "movdqa    %4,%%xmm6                       \n"
@@ -140,7 +139,7 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
    "m"(kHashMul3)    // %8
  : "memory", "cc"
    , "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
-  );  // NOLINT
+  );
  return hash;
 }
 #endif  // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
--- a/third_party/libyuv/source/compare_neon.cc
+++ b/third_party/libyuv/source/compare_neon.cc
@@ -9,6 +9,8 @@
 */

 #include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
 #include "libyuv/row.h"

 #ifdef __cplusplus
@@ -27,7 +29,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
    "vmov.u8    q9, #0                         \n"
    "vmov.u8    q11, #0                        \n"

-    ".p2align  2                               \n"
  "1:                                          \n"
    MEMACCESS(0)
    "vld1.8     {q0}, [%0]!                    \n"
--- a/third_party/libyuv/source/compare_neon64.cc
+++ b/third_party/libyuv/source/compare_neon64.cc
@@ -9,6 +9,8 @@
 */

 #include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
 #include "libyuv/row.h"

 #ifdef __cplusplus
@@ -26,7 +28,6 @@ uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
    "eor        v17.16b, v17.16b, v17.16b      \n"
    "eor        v19.16b, v19.16b, v19.16b      \n"

-    ".p2align  2                               \n"
  "1:                                          \n"
    MEMACCESS(0)
    "ld1        {v0.16b}, [%0], #16            \n"
--- a/third_party/libyuv/source/compare_win.cc
+++ b/third_party/libyuv/source/compare_win.cc
@@ -9,6 +9,8 @@
 */

 #include "libyuv/basic_types.h"
+
+#include "libyuv/compare_row.h"
 #include "libyuv/row.h"

 #ifdef __cplusplus
@@ -16,9 +18,8 @@ namespace libyuv {
 extern "C" {
 #endif

-// This module is for Visual C x86.
-#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
-    defined(_MSC_VER) && !defined(__clang__)
+// This module is for 32 bit Visual C x86 and clangcl
+#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)

 __declspec(naked)
 uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
@@ -100,41 +101,32 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
 }
 #endif  // _MSC_VER >= 1700

-#define HAS_HASHDJB2_SSE41
-static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
-static uvec32 kHashMul0 = {
+uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 };  // 33 ^ 16
+uvec32 kHashMul0 = {
  0x0c3525e1,  // 33 ^ 15
  0xa3476dc1,  // 33 ^ 14
  0x3b4039a1,  // 33 ^ 13
  0x4f5f0981,  // 33 ^ 12
 };
-static uvec32 kHashMul1 = {
+uvec32 kHashMul1 = {
  0x30f35d61,  // 33 ^ 11
  0x855cb541,  // 33 ^ 10
  0x040a9121,  // 33 ^ 9
  0x747c7101,  // 33 ^ 8
 };
-static uvec32 kHashMul2 = {
+uvec32 kHashMul2 = {
  0xec41d4e1,  // 33 ^ 7
  0x4cfa3cc1,  // 33 ^ 6
  0x025528a1,  // 33 ^ 5
  0x00121881,  // 33 ^ 4
 };
-static uvec32 kHashMul3 = {
+uvec32 kHashMul3 = {
  0x00008c61,  // 33 ^ 3
  0x00000441,  // 33 ^ 2
  0x00000021,  // 33 ^ 1
  0x00000001,  // 33 ^ 0
 };

-// 27: 66 0F 38 40 C6     pmulld      xmm0,xmm6
-// 44: 66 0F 38 40 DD     pmulld      xmm3,xmm5
-// 59: 66 0F 38 40 E5     pmulld      xmm4,xmm5
-// 72: 66 0F 38 40 D5     pmulld      xmm2,xmm5
-// 83: 66 0F 38 40 CD     pmulld      xmm1,xmm5
-#define pmulld(reg) _asm _emit 0x66 _asm _emit 0x0F _asm _emit 0x38 \
-    _asm _emit 0x40 _asm _emit reg
-
 __declspec(naked)
 uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
  __asm {
@@ -143,30 +135,30 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
    movd       xmm0, [esp + 12]  // seed

    pxor       xmm7, xmm7        // constant 0 for unpck
-    movdqa     xmm6, kHash16x33
+    movdqa     xmm6, xmmword ptr kHash16x33

  wloop:
    movdqu     xmm1, [eax]       // src[0-15]
    lea        eax, [eax + 16]
-    pmulld(0xc6)                 // pmulld      xmm0,xmm6  hash *= 33 ^ 16
-    movdqa     xmm5, kHashMul0
+    pmulld     xmm0, xmm6        // hash *= 33 ^ 16
+    movdqa     xmm5, xmmword ptr kHashMul0
    movdqa     xmm2, xmm1
    punpcklbw  xmm2, xmm7        // src[0-7]
    movdqa     xmm3, xmm2
    punpcklwd  xmm3, xmm7        // src[0-3]
-    pmulld(0xdd)                 // pmulld     xmm3, xmm5
-    movdqa     xmm5, kHashMul1
+    pmulld     xmm3, xmm5
+    movdqa     xmm5, xmmword ptr kHashMul1
    movdqa     xmm4, xmm2
    punpckhwd  xmm4, xmm7        // src[4-7]
-    pmulld(0xe5)                 // pmulld     xmm4, xmm5
-    movdqa     xmm5, kHashMul2
+    pmulld     xmm4, xmm5
+    movdqa     xmm5, xmmword ptr kHashMul2
    punpckhbw  xmm1, xmm7        // src[8-15]
    movdqa     xmm2, xmm1
    punpcklwd  xmm2, xmm7        // src[8-11]
-    pmulld(0xd5)                 // pmulld     xmm2, xmm5
-    movdqa     xmm5, kHashMul3
+    pmulld     xmm2, xmm5
+    movdqa     xmm5, xmmword ptr kHashMul3
    punpckhwd  xmm1, xmm7        // src[12-15]
-    pmulld(0xcd)                 // pmulld     xmm1, xmm5
+    pmulld     xmm1, xmm5
    paddd      xmm3, xmm4        // add 16 results
    paddd      xmm1, xmm2
    paddd      xmm1, xmm3
@@ -191,36 +183,37 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
  __asm {
    mov        eax, [esp + 4]    // src
    mov        ecx, [esp + 8]    // count
-    movd       xmm0, [esp + 12]  // seed
-    movdqa     xmm6, kHash16x33
+    vmovd      xmm0, [esp + 12]  // seed

  wloop:
-    vpmovzxbd  xmm3, dword ptr [eax]  // src[0-3]
-    pmulld     xmm0, xmm6  // hash *= 33 ^ 16
-    vpmovzxbd  xmm4, dword ptr [eax + 4]  // src[4-7]
-    pmulld     xmm3, kHashMul0
-    vpmovzxbd  xmm2, dword ptr [eax + 8]  // src[8-11]
-    pmulld     xmm4, kHashMul1
-    vpmovzxbd  xmm1, dword ptr [eax + 12]  // src[12-15]
-    pmulld     xmm2, kHashMul2
+    vpmovzxbd  xmm3, [eax]  // src[0-3]
+    vpmulld    xmm0, xmm0, xmmword ptr kHash16x33  // hash *= 33 ^ 16
+    vpmovzxbd  xmm4, [eax + 4]  // src[4-7]
+    vpmulld    xmm3, xmm3, xmmword ptr kHashMul0
+    vpmovzxbd  xmm2, [eax + 8]  // src[8-11]
+    vpmulld    xmm4, xmm4, xmmword ptr kHashMul1
+    vpmovzxbd  xmm1, [eax + 12]  // src[12-15]
+    vpmulld    xmm2, xmm2, xmmword ptr kHashMul2
    lea        eax, [eax + 16]
-    pmulld     xmm1, kHashMul3
-    paddd      xmm3, xmm4        // add 16 results
-    paddd      xmm1, xmm2
-    paddd      xmm1, xmm3
-    pshufd     xmm2, xmm1, 0x0e  // upper 2 dwords
-    paddd      xmm1, xmm2
-    pshufd     xmm2, xmm1, 0x01
-    paddd      xmm1, xmm2
-    paddd      xmm0, xmm1
+    vpmulld    xmm1, xmm1, xmmword ptr kHashMul3
+    vpaddd     xmm3, xmm3, xmm4        // add 16 results
+    vpaddd     xmm1, xmm1, xmm2
+    vpaddd     xmm1, xmm1, xmm3
+    vpshufd    xmm2, xmm1, 0x0e  // upper 2 dwords
+    vpaddd     xmm1, xmm1,xmm2
+    vpshufd    xmm2, xmm1, 0x01
+    vpaddd     xmm1, xmm1, xmm2
+    vpaddd     xmm0, xmm0, xmm1
    sub        ecx, 16
    jg         wloop

-    movd       eax, xmm0         // return hash
+    vmovd      eax, xmm0         // return hash
+    vzeroupper
    ret
  }
 }
 #endif  // _MSC_VER >= 1700
+
 #endif  // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)

 #ifdef __cplusplus
--- a/third_party/libyuv/source/convert.cc
+++ b/third_party/libyuv/source/convert.cc
@@ -40,13 +40,14 @@ static int I4xxToI420(const uint8* src_y, int src_stride_y,
  const int dst_y_height = Abs(src_y_height);
  const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1);
  const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1);
-  if (src_y_width == 0 || src_y_height == 0 ||
-      src_uv_width == 0 || src_uv_height == 0) {
+  if (src_uv_width == 0 || src_uv_height == 0) {
    return -1;
  }
-  ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
-             dst_y, dst_stride_y, dst_y_width, dst_y_height,
-             kFilterBilinear);
+  if (dst_y) {
+    ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
+               dst_y, dst_stride_y, dst_y_width, dst_y_height,
+               kFilterBilinear);
+  }
  ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
             dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
             kFilterBilinear);
@@ -69,8 +70,8 @@ int I420Copy(const uint8* src_y, int src_stride_y,
             int width, int height) {
  int halfwidth = (width + 1) >> 1;
  int halfheight = (height + 1) >> 1;
-  if (!src_y || !src_u || !src_v ||
-      !dst_y || !dst_u || !dst_v ||
+  if (!src_u || !src_v ||
+      !dst_u || !dst_v ||
      width <= 0 || height == 0) {
    return -1;
  }
@@ -166,7 +167,7 @@ int I400ToI420(const uint8* src_y, int src_stride_y,
               int width, int height) {
  int halfwidth = (width + 1) >> 1;
  int halfheight = (height + 1) >> 1;
-  if (!src_y || !dst_y || !dst_u || !dst_v ||
+  if (!dst_u || !dst_v ||
      width <= 0 || height == 0) {
    return -1;
  }
@@ -177,7 +178,9 @@ int I400ToI420(const uint8* src_y, int src_stride_y,
    src_y = src_y + (height - 1) * src_stride_y;
    src_stride_y = -src_stride_y;
  }
-  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  if (dst_y) {
+    CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  }
  SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128);
  SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128);
  return 0;
@@ -242,13 +245,9 @@ static int X420ToI420(const uint8* src_y,
                      uint8* dst_u, int dst_stride_u,
                      uint8* dst_v, int dst_stride_v,
                      int width, int height) {
-  int y;
  int halfwidth = (width + 1) >> 1;
  int halfheight = (height + 1) >> 1;
-  void (*SplitUVRow)(const uint8* src_uv, uint8* dst_u, uint8* dst_v, int pix) =
-      SplitUVRow_C;
-  if (!src_y || !src_uv ||
-      !dst_y || !dst_u || !dst_v ||
+  if (!src_uv || !dst_u || !dst_v ||
      width <= 0 || height == 0) {
    return -1;
  }
@@ -256,7 +255,9 @@ static int X420ToI420(const uint8* src_y,
  if (height < 0) {
    height = -height;
    halfheight = (height + 1) >> 1;
-    dst_y = dst_y + (height - 1) * dst_stride_y;
+    if (dst_y) {
+      dst_y = dst_y + (height - 1) * dst_stride_y;
+    }
    dst_u = dst_u + (halfheight - 1) * dst_stride_u;
    dst_v = dst_v + (halfheight - 1) * dst_stride_v;
    dst_stride_y = -dst_stride_y;
@@ -279,41 +280,6 @@ static int X420ToI420(const uint8* src_y,
    halfheight = 1;
    src_stride_uv = dst_stride_u = dst_stride_v = 0;
  }
-#if defined(HAS_SPLITUVROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    SplitUVRow = SplitUVRow_Any_SSE2;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      SplitUVRow = SplitUVRow_SSE2;
-    }
-  }
-#endif
-#if defined(HAS_SPLITUVROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    SplitUVRow = SplitUVRow_Any_AVX2;
-    if (IS_ALIGNED(halfwidth, 32)) {
-      SplitUVRow = SplitUVRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_SPLITUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    SplitUVRow = SplitUVRow_Any_NEON;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      SplitUVRow = SplitUVRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_SPLITUVROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) &&
-      IS_ALIGNED(src_uv, 4) && IS_ALIGNED(src_stride_uv, 4) &&
-      IS_ALIGNED(dst_u, 4) && IS_ALIGNED(dst_stride_u, 4) &&
-      IS_ALIGNED(dst_v, 4) && IS_ALIGNED(dst_stride_v, 4)) {
-    SplitUVRow = SplitUVRow_Any_MIPS_DSPR2;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      SplitUVRow = SplitUVRow_MIPS_DSPR2;
-    }
-  }
-#endif

  if (dst_y) {
    if (src_stride_y0 == src_stride_y1) {
@@ -324,13 +290,10 @@ static int X420ToI420(const uint8* src_y,
    }
  }

-  for (y = 0; y < halfheight; ++y) {
-    // Copy a row of UV.
-    SplitUVRow(src_uv, dst_u, dst_v, halfwidth);
-    dst_u += dst_stride_u;
-    dst_v += dst_stride_v;
-    src_uv += src_stride_uv;
-  }
+  // Split UV plane - NV12 / NV21
+  SplitUVPlane(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, dst_stride_v,
+               halfwidth, halfheight);
+
  return 0;
 }

@@ -390,9 +353,9 @@ int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
               int width, int height) {
  int y;
  void (*YUY2ToUVRow)(const uint8* src_yuy2, int src_stride_yuy2,
-      uint8* dst_u, uint8* dst_v, int pix) = YUY2ToUVRow_C;
+      uint8* dst_u, uint8* dst_v, int width) = YUY2ToUVRow_C;
  void (*YUY2ToYRow)(const uint8* src_yuy2,
-      uint8* dst_y, int pix) = YUY2ToYRow_C;
+      uint8* dst_y, int width) = YUY2ToYRow_C;
  // Negative height means invert the image.
  if (height < 0) {
    height = -height;
@@ -455,9 +418,9 @@ int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
               int width, int height) {
  int y;
  void (*UYVYToUVRow)(const uint8* src_uyvy, int src_stride_uyvy,
-      uint8* dst_u, uint8* dst_v, int pix) = UYVYToUVRow_C;
+      uint8* dst_u, uint8* dst_v, int width) = UYVYToUVRow_C;
  void (*UYVYToYRow)(const uint8* src_uyvy,
-      uint8* dst_y, int pix) = UYVYToYRow_C;
+      uint8* dst_y, int width) = UYVYToYRow_C;
  // Negative height means invert the image.
  if (height < 0) {
    height = -height;
@@ -521,7 +484,7 @@ int ARGBToI420(const uint8* src_argb, int src_stride_argb,
  int y;
  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
  if (!src_argb ||
      !dst_y || !dst_u || !dst_v ||
@@ -597,7 +560,7 @@ int BGRAToI420(const uint8* src_bgra, int src_stride_bgra,
  int y;
  void (*BGRAToUVRow)(const uint8* src_bgra0, int src_stride_bgra,
      uint8* dst_u, uint8* dst_v, int width) = BGRAToUVRow_C;
-  void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int pix) =
+  void (*BGRAToYRow)(const uint8* src_bgra, uint8* dst_y, int width) =
      BGRAToYRow_C;
  if (!src_bgra ||
      !dst_y || !dst_u || !dst_v ||
@@ -663,7 +626,7 @@ int ABGRToI420(const uint8* src_abgr, int src_stride_abgr,
  int y;
  void (*ABGRToUVRow)(const uint8* src_abgr0, int src_stride_abgr,
      uint8* dst_u, uint8* dst_v, int width) = ABGRToUVRow_C;
-  void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int pix) =
+  void (*ABGRToYRow)(const uint8* src_abgr, uint8* dst_y, int width) =
      ABGRToYRow_C;
  if (!src_abgr ||
      !dst_y || !dst_u || !dst_v ||
@@ -729,7 +692,7 @@ int RGBAToI420(const uint8* src_rgba, int src_stride_rgba,
  int y;
  void (*RGBAToUVRow)(const uint8* src_rgba0, int src_stride_rgba,
      uint8* dst_u, uint8* dst_v, int width) = RGBAToUVRow_C;
-  void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int pix) =
+  void (*RGBAToYRow)(const uint8* src_rgba, uint8* dst_y, int width) =
      RGBAToYRow_C;
  if (!src_rgba ||
      !dst_y || !dst_u || !dst_v ||
@@ -796,14 +759,14 @@ int RGB24ToI420(const uint8* src_rgb24, int src_stride_rgb24,
 #if defined(HAS_RGB24TOYROW_NEON)
  void (*RGB24ToUVRow)(const uint8* src_rgb24, int src_stride_rgb24,
      uint8* dst_u, uint8* dst_v, int width) = RGB24ToUVRow_C;
-  void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int pix) =
+  void (*RGB24ToYRow)(const uint8* src_rgb24, uint8* dst_y, int width) =
      RGB24ToYRow_C;
 #else
-  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
      RGB24ToARGBRow_C;
  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
 #endif
  if (!src_rgb24 || !dst_y || !dst_u || !dst_v ||
@@ -910,14 +873,14 @@ int RAWToI420(const uint8* src_raw, int src_stride_raw,
 #if defined(HAS_RAWTOYROW_NEON)
  void (*RAWToUVRow)(const uint8* src_raw, int src_stride_raw,
      uint8* dst_u, uint8* dst_v, int width) = RAWToUVRow_C;
-  void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int pix) =
+  void (*RAWToYRow)(const uint8* src_raw, uint8* dst_y, int width) =
      RAWToYRow_C;
 #else
-  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
      RAWToARGBRow_C;
  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
 #endif
  if (!src_raw || !dst_y || !dst_u || !dst_v ||
@@ -1024,14 +987,14 @@ int RGB565ToI420(const uint8* src_rgb565, int src_stride_rgb565,
 #if defined(HAS_RGB565TOYROW_NEON)
  void (*RGB565ToUVRow)(const uint8* src_rgb565, int src_stride_rgb565,
      uint8* dst_u, uint8* dst_v, int width) = RGB565ToUVRow_C;
-  void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int pix) =
+  void (*RGB565ToYRow)(const uint8* src_rgb565, uint8* dst_y, int width) =
      RGB565ToYRow_C;
 #else
-  void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+  void (*RGB565ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
      RGB565ToARGBRow_C;
  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
 #endif
  if (!src_rgb565 || !dst_y || !dst_u || !dst_v ||
@@ -1146,14 +1109,14 @@ int ARGB1555ToI420(const uint8* src_argb1555, int src_stride_argb1555,
 #if defined(HAS_ARGB1555TOYROW_NEON)
  void (*ARGB1555ToUVRow)(const uint8* src_argb1555, int src_stride_argb1555,
      uint8* dst_u, uint8* dst_v, int width) = ARGB1555ToUVRow_C;
-  void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int pix) =
+  void (*ARGB1555ToYRow)(const uint8* src_argb1555, uint8* dst_y, int width) =
      ARGB1555ToYRow_C;
 #else
-  void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+  void (*ARGB1555ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
      ARGB1555ToARGBRow_C;
  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
 #endif
  if (!src_argb1555 || !dst_y || !dst_u || !dst_v ||
@@ -1270,14 +1233,14 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
 #if defined(HAS_ARGB4444TOYROW_NEON)
  void (*ARGB4444ToUVRow)(const uint8* src_argb4444, int src_stride_argb4444,
      uint8* dst_u, uint8* dst_v, int width) = ARGB4444ToUVRow_C;
-  void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int pix) =
+  void (*ARGB4444ToYRow)(const uint8* src_argb4444, uint8* dst_y, int width) =
      ARGB4444ToYRow_C;
 #else
-  void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+  void (*ARGB4444ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
      ARGB4444ToARGBRow_C;
  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
 #endif
  if (!src_argb4444 || !dst_y || !dst_u || !dst_v ||
@@ -1383,6 +1346,81 @@ int ARGB4444ToI420(const uint8* src_argb4444, int src_stride_argb4444,
  return 0;
 }

+static void SplitPixels(const uint8* src_u, int src_pixel_stride_uv,
+                        uint8* dst_u, int width) {
+  int i;
+  for (i = 0; i < width; ++i) {
+    *dst_u = *src_u;
+    ++dst_u;
+    src_u += src_pixel_stride_uv;
+  }
+}
+
+// Convert Android420 to I420.
+LIBYUV_API
+int Android420ToI420(const uint8* src_y, int src_stride_y,
+                     const uint8* src_u, int src_stride_u,
+                     const uint8* src_v, int src_stride_v,
+                     int src_pixel_stride_uv,
+                     uint8* dst_y, int dst_stride_y,
+                     uint8* dst_u, int dst_stride_u,
+                     uint8* dst_v, int dst_stride_v,
+                     int width, int height) {
+  int y;
+  const int vu_off = src_v - src_u;
+  int halfwidth = (width + 1) >> 1;
+  int halfheight = (height + 1) >> 1;
+  if (!src_u || !src_v ||
+      !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    halfheight = (height + 1) >> 1;
+    src_y = src_y + (height - 1) * src_stride_y;
+    src_u = src_u + (halfheight - 1) * src_stride_u;
+    src_v = src_v + (halfheight - 1) * src_stride_v;
+    src_stride_y = -src_stride_y;
+    src_stride_u = -src_stride_u;
+    src_stride_v = -src_stride_v;
+  }
+
+  if (dst_y) {
+    CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
+  }
+
+  // Copy UV planes as is - I420
+  if (src_pixel_stride_uv == 1) {
+    CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight);
+    CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight);
+    return 0;
+  // Split UV planes - NV21
+  } else if (src_pixel_stride_uv == 2 && vu_off == -1 &&
+             src_stride_u == src_stride_v) {
+    SplitUVPlane(src_v, src_stride_v, dst_v, dst_stride_v, dst_u, dst_stride_u,
+                 halfwidth, halfheight);
+    return 0;
+  // Split UV planes - NV12
+  } else if (src_pixel_stride_uv == 2 && vu_off == 1 &&
+             src_stride_u == src_stride_v) {
+    SplitUVPlane(src_u, src_stride_u, dst_u, dst_stride_u, dst_v, dst_stride_v,
+                 halfwidth, halfheight);
+    return 0;
+  }
+
+  for (y = 0; y < halfheight; ++y) {
+    SplitPixels(src_u, src_pixel_stride_uv, dst_u, halfwidth);
+    SplitPixels(src_v, src_pixel_stride_uv, dst_v, halfwidth);
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+    dst_u += dst_stride_u;
+    dst_v += dst_stride_v;
+  }
+  return 0;
+}
+
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
--- a/third_party/libyuv/source/convert_argb.cc
+++ b/third_party/libyuv/source/convert_argb.cc
@@ -14,6 +14,7 @@
 #ifdef HAVE_JPEG
 #include "libyuv/mjpeg_decoder.h"
 #endif
+#include "libyuv/planar_functions.h"  // For CopyPlane and ARGBShuffle.
 #include "libyuv/rotate_argb.h"
 #include "libyuv/row.h"
 #include "libyuv/video_common.h"
@@ -44,18 +45,347 @@ int ARGBCopy(const uint8* src_argb, int src_stride_argb,
  return 0;
 }

-// Convert I444 to ARGB.
+// Convert I422 to ARGB with matrix
+static int I420ToARGBMatrix(const uint8* src_y, int src_stride_y,
+                            const uint8* src_u, int src_stride_u,
+                            const uint8* src_v, int src_stride_v,
+                            uint8* dst_argb, int dst_stride_argb,
+                            const struct YuvConstants* yuvconstants,
+                            int width, int height) {
+  int y;
+  void (*I422ToARGBRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        const struct YuvConstants* yuvconstants,
+                        int width) = I422ToARGBRow_C;
+  if (!src_y || !src_u || !src_v || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_I422TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToARGBRow = I422ToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_DSPR2)
+  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
+    I422ToARGBRow = I422ToARGBRow_DSPR2;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+// Convert I420 to ARGB.
 LIBYUV_API
-int I444ToARGB(const uint8* src_y, int src_stride_y,
+int I420ToARGB(const uint8* src_y, int src_stride_y,
               const uint8* src_u, int src_stride_u,
               const uint8* src_v, int src_stride_v,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
+  return I420ToARGBMatrix(src_y, src_stride_y,
+                          src_u, src_stride_u,
+                          src_v, src_stride_v,
+                          dst_argb, dst_stride_argb,
+                          &kYuvI601Constants,
+                          width, height);
+}
+
+// Convert I420 to ABGR.
+LIBYUV_API
+int I420ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height) {
+  return I420ToARGBMatrix(src_y, src_stride_y,
+                          src_v, src_stride_v,  // Swap U and V
+                          src_u, src_stride_u,
+                          dst_abgr, dst_stride_abgr,
+                          &kYvuI601Constants,  // Use Yvu matrix
+                          width, height);
+}
+
+// Convert J420 to ARGB.
+LIBYUV_API
+int J420ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  return I420ToARGBMatrix(src_y, src_stride_y,
+                          src_u, src_stride_u,
+                          src_v, src_stride_v,
+                          dst_argb, dst_stride_argb,
+                          &kYuvJPEGConstants,
+                          width, height);
+}
+
+// Convert J420 to ABGR.
+LIBYUV_API
+int J420ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height) {
+  return I420ToARGBMatrix(src_y, src_stride_y,
+                          src_v, src_stride_v,  // Swap U and V
+                          src_u, src_stride_u,
+                          dst_abgr, dst_stride_abgr,
+                          &kYvuJPEGConstants,  // Use Yvu matrix
+                          width, height);
+}
+
+// Convert H420 to ARGB.
+LIBYUV_API
+int H420ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  return I420ToARGBMatrix(src_y, src_stride_y,
+                          src_u, src_stride_u,
+                          src_v, src_stride_v,
+                          dst_argb, dst_stride_argb,
+                          &kYuvH709Constants,
+                          width, height);
+}
+
+// Convert H420 to ABGR.
+LIBYUV_API
+int H420ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height) {
+  return I420ToARGBMatrix(src_y, src_stride_y,
+                          src_v, src_stride_v,  // Swap U and V
+                          src_u, src_stride_u,
+                          dst_abgr, dst_stride_abgr,
+                          &kYvuH709Constants,  // Use Yvu matrix
+                          width, height);
+}
+
+// Convert I422 to ARGB with matrix
+static int I422ToARGBMatrix(const uint8* src_y, int src_stride_y,
+                            const uint8* src_u, int src_stride_u,
+                            const uint8* src_v, int src_stride_v,
+                            uint8* dst_argb, int dst_stride_argb,
+                            const struct YuvConstants* yuvconstants,
+                            int width, int height) {
+  int y;
+  void (*I422ToARGBRow)(const uint8* y_buf,
+                        const uint8* u_buf,
+                        const uint8* v_buf,
+                        uint8* rgb_buf,
+                        const struct YuvConstants* yuvconstants,
+                        int width) = I422ToARGBRow_C;
+  if (!src_y || !src_u || !src_v ||
+      !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+  // Coalesce rows.
+  if (src_stride_y == width &&
+      src_stride_u * 2 == width &&
+      src_stride_v * 2 == width &&
+      dst_stride_argb == width * 4) {
+    width *= height;
+    height = 1;
+    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
+  }
+#if defined(HAS_I422TOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422ToARGBRow = I422ToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I422ToARGBRow = I422ToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422ToARGBRow = I422ToARGBRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I422TOARGBROW_DSPR2)
+  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
+    I422ToARGBRow = I422ToARGBRow_DSPR2;
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
+    dst_argb += dst_stride_argb;
+    src_y += src_stride_y;
+    src_u += src_stride_u;
+    src_v += src_stride_v;
+  }
+  return 0;
+}
+
+// Convert I422 to ARGB.
+LIBYUV_API
+int I422ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  return I422ToARGBMatrix(src_y, src_stride_y,
+                          src_u, src_stride_u,
+                          src_v, src_stride_v,
+                          dst_argb, dst_stride_argb,
+                          &kYuvI601Constants,
+                          width, height);
+}
+
+// Convert I422 to ABGR.
+LIBYUV_API
+int I422ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height) {
+  return I422ToARGBMatrix(src_y, src_stride_y,
+                          src_v, src_stride_v,  // Swap U and V
+                          src_u, src_stride_u,
+                          dst_abgr, dst_stride_abgr,
+                          &kYvuI601Constants,  // Use Yvu matrix
+                          width, height);
+}
+
+// Convert J422 to ARGB.
+LIBYUV_API
+int J422ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  return I422ToARGBMatrix(src_y, src_stride_y,
+                          src_u, src_stride_u,
+                          src_v, src_stride_v,
+                          dst_argb, dst_stride_argb,
+                          &kYuvJPEGConstants,
+                          width, height);
+}
+
+// Convert J422 to ABGR.
+LIBYUV_API
+int J422ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height) {
+  return I422ToARGBMatrix(src_y, src_stride_y,
+                          src_v, src_stride_v,  // Swap U and V
+                          src_u, src_stride_u,
+                          dst_abgr, dst_stride_abgr,
+                          &kYvuJPEGConstants,  // Use Yvu matrix
+                          width, height);
+}
+
+// Convert H422 to ARGB.
+LIBYUV_API
+int H422ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  return I422ToARGBMatrix(src_y, src_stride_y,
+                          src_u, src_stride_u,
+                          src_v, src_stride_v,
+                          dst_argb, dst_stride_argb,
+                          &kYuvH709Constants,
+                          width, height);
+}
+
+// Convert H422 to ABGR.
+LIBYUV_API
+int H422ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height) {
+  return I422ToARGBMatrix(src_y, src_stride_y,
+                          src_v, src_stride_v,  // Swap U and V
+                          src_u, src_stride_u,
+                          dst_abgr, dst_stride_abgr,
+                          &kYvuH709Constants,  // Use Yvu matrix
+                          width, height);
+}
+
+// Convert I444 to ARGB with matrix
+static int I444ToARGBMatrix(const uint8* src_y, int src_stride_y,
+                            const uint8* src_u, int src_stride_u,
+                            const uint8* src_v, int src_stride_v,
+                            uint8* dst_argb, int dst_stride_argb,
+                            const struct YuvConstants* yuvconstants,
+                            int width, int height) {
  int y;
  void (*I444ToARGBRow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        const struct YuvConstants* yuvconstants,
                        int width) = I444ToARGBRow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_argb ||
@@ -103,7 +433,7 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I444ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    src_u += src_stride_u;
@@ -112,81 +442,49 @@ int I444ToARGB(const uint8* src_y, int src_stride_y,
  return 0;
 }

-// Convert I422 to ARGB.
+// Convert I444 to ARGB.
 LIBYUV_API
-int I422ToARGB(const uint8* src_y, int src_stride_y,
+int I444ToARGB(const uint8* src_y, int src_stride_y,
               const uint8* src_u, int src_stride_u,
               const uint8* src_v, int src_stride_v,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
-  int y;
-  void (*I422ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToARGBRow_C;
-  if (!src_y || !src_u || !src_v ||
-      !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      src_stride_u * 2 == width &&
-      src_stride_v * 2 == width &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
-  }
-#if defined(HAS_I422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToARGBRow = I422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I422ToARGBRow = I422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
-  }
-#endif
+  return I444ToARGBMatrix(src_y, src_stride_y,
+                          src_u, src_stride_u,
+                          src_v, src_stride_v,
+                          dst_argb, dst_stride_argb,
+                          &kYuvI601Constants,
+                          width, height);
+}

-  for (y = 0; y < height; ++y) {
-    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-  }
-  return 0;
+// Convert I444 to ABGR.
+LIBYUV_API
+int I444ToABGR(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_abgr, int dst_stride_abgr,
+               int width, int height) {
+  return I444ToARGBMatrix(src_y, src_stride_y,
+                          src_v, src_stride_v,  // Swap U and V
+                          src_u, src_stride_u,
+                          dst_abgr, dst_stride_abgr,
+                          &kYvuI601Constants,  // Use Yvu matrix
+                          width, height);
+}
+
+// Convert J444 to ARGB.
+LIBYUV_API
+int J444ToARGB(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_argb, int dst_stride_argb,
+               int width, int height) {
+  return I444ToARGBMatrix(src_y, src_stride_y,
+                          src_u, src_stride_u,
+                          src_v, src_stride_v,
+                          dst_argb, dst_stride_argb,
+                          &kYuvJPEGConstants,
+                          width, height);
 }

 // Convert I411 to ARGB.
@@ -201,6 +499,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        const struct YuvConstants* yuvconstants,
                        int width) = I411ToARGBRow_C;
  if (!src_y || !src_u || !src_v ||
      !dst_argb ||
@@ -248,7 +547,7 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I411ToARGBRow(src_y, src_u, src_v, dst_argb, width);
+    I411ToARGBRow(src_y, src_u, src_v, dst_argb, &kYuvI601Constants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    src_u += src_stride_u;
@@ -257,6 +556,143 @@ int I411ToARGB(const uint8* src_y, int src_stride_y,
  return 0;
 }

+// Convert I420 with Alpha to preattenuated ARGB.
+static int I420AlphaToARGBMatrix(const uint8* src_y, int src_stride_y,
+                                 const uint8* src_u, int src_stride_u,
+                                 const uint8* src_v, int src_stride_v,
+                                 const uint8* src_a, int src_stride_a,
+                                 uint8* dst_argb, int dst_stride_argb,
+                                 const struct YuvConstants* yuvconstants,
+                                 int width, int height, int attenuate) {
+  int y;
+  void (*I422AlphaToARGBRow)(const uint8* y_buf,
+                             const uint8* u_buf,
+                             const uint8* v_buf,
+                             const uint8* a_buf,
+                             uint8* dst_argb,
+                             const struct YuvConstants* yuvconstants,
+                             int width) = I422AlphaToARGBRow_C;
+  void (*ARGBAttenuateRow)(const uint8* src_argb, uint8* dst_argb,
+                           int width) = ARGBAttenuateRow_C;
+  if (!src_y || !src_u || !src_v || !dst_argb ||
+      width <= 0 || height == 0) {
+    return -1;
+  }
+  // Negative height means invert the image.
+  if (height < 0) {
+    height = -height;
+    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
+    dst_stride_argb = -dst_stride_argb;
+  }
+#if defined(HAS_I422ALPHATOARGBROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    I422AlphaToARGBRow = I422AlphaToARGBRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 8)) {
+      I422AlphaToARGBRow = I422AlphaToARGBRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_I422ALPHATOARGBROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    I422AlphaToARGBRow = I422AlphaToARGBRow_Any_AVX2;
+    if (IS_ALIGNED(width, 16)) {
+      I422AlphaToARGBRow = I422AlphaToARGBRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_I422ALPHATOARGBROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    I422AlphaToARGBRow = I422AlphaToARGBRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      I422AlphaToARGBRow = I422AlphaToARGBRow_NEON;
+    }
+  }
+#endif
+#if defined(HAS_I422ALPHATOARGBROW_DSPR2)
+  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
+    I422AlphaToARGBRow = I422AlphaToARGBRow_DSPR2;
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_SSSE3)
+  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3;
+    if (IS_ALIGNED(width, 4)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_SSSE3;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_AVX2)
+  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_AVX2;
+    }
+  }
+#endif
+#if defined(HAS_ARGBATTENUATEROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON;
+    if (IS_ALIGNED(width, 8)) {
+      ARGBAttenuateRow = ARGBAttenuateRow_NEON;
+    }
+  }
+#endif
+
+  for (y = 0; y < height; ++y) {
+    I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants,
+                       width);
+    if (attenuate) {
+      ARGBAttenuateRow(dst_argb, dst_argb, width);
+    }
+    dst_argb += dst_stride_argb;
+    src_a += src_stride_a;
+    src_y += src_stride_y;
+    if (y & 1) {
+      src_u += src_stride_u;
+      src_v += src_stride_v;
+    }
+  }
+  return 0;
+}
+
+// Convert I420 with Alpha to ARGB.
+LIBYUV_API
+int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    const uint8* src_a, int src_stride_a,
+                    uint8* dst_argb, int dst_stride_argb,
+                    int width, int height, int attenuate) {
+  return I420AlphaToARGBMatrix(src_y, src_stride_y,
+                               src_u, src_stride_u,
+                               src_v, src_stride_v,
+                               src_a, src_stride_a,
+                               dst_argb, dst_stride_argb,
+                               &kYuvI601Constants,
+                               width, height, attenuate);
+}
+
+// Convert I420 with Alpha to ABGR.
+LIBYUV_API
+int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
+                    const uint8* src_u, int src_stride_u,
+                    const uint8* src_v, int src_stride_v,
+                    const uint8* src_a, int src_stride_a,
+                    uint8* dst_abgr, int dst_stride_abgr,
+                    int width, int height, int attenuate) {
+  return I420AlphaToARGBMatrix(src_y, src_stride_y,
+                               src_v, src_stride_v,  // Swap U and V
+                               src_u, src_stride_u,
+                               src_a, src_stride_a,
+                               dst_abgr, dst_stride_abgr,
+                               &kYvuI601Constants,  // Use Yvu matrix
+                               width, height, attenuate);
+}
+
 // Convert I400 to ARGB.
 LIBYUV_API
 int I400ToARGB(const uint8* src_y, int src_stride_y,
@@ -322,7 +758,7 @@ int J400ToARGB(const uint8* src_y, int src_stride_y,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int pix) =
+  void (*J400ToARGBRow)(const uint8* src_y, uint8* dst_argb, int width) =
      J400ToARGBRow_C;
  if (!src_y || !dst_argb ||
      width <= 0 || height == 0) {
@@ -449,7 +885,7 @@ int RGB24ToARGB(const uint8* src_rgb24, int src_stride_rgb24,
                uint8* dst_argb, int dst_stride_argb,
                int width, int height) {
  int y;
-  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+  void (*RGB24ToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
      RGB24ToARGBRow_C;
  if (!src_rgb24 || !dst_argb ||
      width <= 0 || height == 0) {
@@ -499,7 +935,7 @@ int RAWToARGB(const uint8* src_raw, int src_stride_raw,
              uint8* dst_argb, int dst_stride_argb,
              int width, int height) {
  int y;
-  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int pix) =
+  void (*RAWToARGBRow)(const uint8* src_rgb, uint8* dst_argb, int width) =
      RAWToARGBRow_C;
  if (!src_raw || !dst_argb ||
      width <= 0 || height == 0) {
@@ -549,7 +985,7 @@ int RGB565ToARGB(const uint8* src_rgb565, int src_stride_rgb565,
                 uint8* dst_argb, int dst_stride_argb,
                 int width, int height) {
  int y;
-  void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int pix) =
+  void (*RGB565ToARGBRow)(const uint8* src_rgb565, uint8* dst_argb, int width) =
      RGB565ToARGBRow_C;
  if (!src_rgb565 || !dst_argb ||
      width <= 0 || height == 0) {
@@ -608,7 +1044,7 @@ int ARGB1555ToARGB(const uint8* src_argb1555, int src_stride_argb1555,
                   int width, int height) {
  int y;
  void (*ARGB1555ToARGBRow)(const uint8* src_argb1555, uint8* dst_argb,
-      int pix) = ARGB1555ToARGBRow_C;
+      int width) = ARGB1555ToARGBRow_C;
  if (!src_argb1555 || !dst_argb ||
      width <= 0 || height == 0) {
    return -1;
@@ -666,7 +1102,7 @@ int ARGB4444ToARGB(const uint8* src_argb4444, int src_stride_argb4444,
                   int width, int height) {
  int y;
  void (*ARGB4444ToARGBRow)(const uint8* src_argb4444, uint8* dst_argb,
-      int pix) = ARGB4444ToARGBRow_C;
+      int width) = ARGB4444ToARGBRow_C;
  if (!src_argb4444 || !dst_argb ||
      width <= 0 || height == 0) {
    return -1;
@@ -727,6 +1163,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
  void (*NV12ToARGBRow)(const uint8* y_buf,
                        const uint8* uv_buf,
                        uint8* rgb_buf,
+                        const struct YuvConstants* yuvconstants,
                        int width) = NV12ToARGBRow_C;
  if (!src_y || !src_uv || !dst_argb ||
      width <= 0 || height == 0) {
@@ -764,7 +1201,7 @@ int NV12ToARGB(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    NV12ToARGBRow(src_y, src_uv, dst_argb, width);
+    NV12ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    if (y & 1) {
@@ -784,6 +1221,7 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
  void (*NV21ToARGBRow)(const uint8* y_buf,
                        const uint8* uv_buf,
                        uint8* rgb_buf,
+                        const struct YuvConstants* yuvconstants,
                        int width) = NV21ToARGBRow_C;
  if (!src_y || !src_uv || !dst_argb ||
      width <= 0 || height == 0) {
@@ -821,7 +1259,7 @@ int NV21ToARGB(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    NV21ToARGBRow(src_y, src_uv, dst_argb, width);
+    NV21ToARGBRow(src_y, src_uv, dst_argb, &kYuvI601Constants, width);
    dst_argb += dst_stride_argb;
    src_y += src_stride_y;
    if (y & 1) {
@@ -840,6 +1278,7 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
  void (*NV12ToARGBRow)(const uint8* y_buf,
                        const uint8* uv_buf,
                        uint8* rgb_buf,
+                        const struct YuvConstants* yuvconstants,
                        int width) = NV12ToARGBRow_C;
  if (!src_m420 || !dst_argb ||
      width <= 0 || height == 0) {
@@ -877,14 +1316,16 @@ int M420ToARGB(const uint8* src_m420, int src_stride_m420,
 #endif

  for (y = 0; y < height - 1; y += 2) {
-    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+                  &kYuvI601Constants, width);
    NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2,
-                  dst_argb + dst_stride_argb, width);
+                  dst_argb + dst_stride_argb, &kYuvI601Constants, width);
    dst_argb += dst_stride_argb * 2;
    src_m420 += src_stride_m420 * 3;
  }
  if (height & 1) {
-    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, width);
+    NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb,
+                  &kYuvI601Constants, width);
  }
  return 0;
 }
@@ -895,7 +1336,10 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*YUY2ToARGBRow)(const uint8* src_yuy2, uint8* dst_argb, int pix) =
+  void (*YUY2ToARGBRow)(const uint8* src_yuy2,
+                        uint8* dst_argb,
+                        const struct YuvConstants* yuvconstants,
+                        int width) =
      YUY2ToARGBRow_C;
  if (!src_yuy2 || !dst_argb ||
      width <= 0 || height == 0) {
@@ -939,7 +1383,7 @@ int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
  }
 #endif
  for (y = 0; y < height; ++y) {
-    YUY2ToARGBRow(src_yuy2, dst_argb, width);
+    YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvI601Constants, width);
    src_yuy2 += src_stride_yuy2;
    dst_argb += dst_stride_argb;
  }
@@ -952,7 +1396,10 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
               uint8* dst_argb, int dst_stride_argb,
               int width, int height) {
  int y;
-  void (*UYVYToARGBRow)(const uint8* src_uyvy, uint8* dst_argb, int pix) =
+  void (*UYVYToARGBRow)(const uint8* src_uyvy,
+                        uint8* dst_argb,
+                        const struct YuvConstants* yuvconstants,
+                        int width) =
      UYVYToARGBRow_C;
  if (!src_uyvy || !dst_argb ||
      width <= 0 || height == 0) {
@@ -996,159 +1443,13 @@ int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
  }
 #endif
  for (y = 0; y < height; ++y) {
-    UYVYToARGBRow(src_uyvy, dst_argb, width);
+    UYVYToARGBRow(src_uyvy, dst_argb, &kYuvI601Constants, width);
    src_uyvy += src_stride_uyvy;
    dst_argb += dst_stride_argb;
  }
  return 0;
 }

-// Convert J420 to ARGB.
-LIBYUV_API
-int J420ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  int y;
-  void (*J422ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = J422ToARGBRow_C;
-  if (!src_y || !src_u || !src_v || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-#if defined(HAS_J422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToARGBRow = J422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      J422ToARGBRow = J422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToARGBRow = J422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert J422 to ARGB.
-LIBYUV_API
-int J422ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  int y;
-  void (*J422ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = J422ToARGBRow_C;
-  if (!src_y || !src_u || !src_v ||
-      !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-  // Coalesce rows.
-  if (src_stride_y == width &&
-      src_stride_u * 2 == width &&
-      src_stride_v * 2 == width &&
-      dst_stride_argb == width * 4) {
-    width *= height;
-    height = 1;
-    src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0;
-  }
-#if defined(HAS_J422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    J422ToARGBRow = J422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    J422ToARGBRow = J422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      J422ToARGBRow = J422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    J422ToARGBRow = J422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      J422ToARGBRow = J422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_J422TOARGBROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    J422ToARGBRow = J422ToARGBRow_MIPS_DSPR2;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    J422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-  }
-  return 0;
-}
-
 #ifdef __cplusplus
 }  // extern "C"
 }  // namespace libyuv
--- a/third_party/libyuv/source/convert_from.cc
+++ b/third_party/libyuv/source/convert_from.cc
@@ -46,9 +46,11 @@ static int I420ToI4xx(const uint8* src_y, int src_stride_y,
      dst_uv_width <= 0 || dst_uv_height <= 0) {
    return -1;
  }
-  ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
-             dst_y, dst_stride_y, dst_y_width, dst_y_height,
-             kFilterBilinear);
+  if (dst_y) {
+    ScalePlane(src_y, src_stride_y, src_y_width, src_y_height,
+               dst_y, dst_stride_y, dst_y_width, dst_y_height,
+               kFilterBilinear);
+  }
  ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height,
             dst_u, dst_stride_u, dst_uv_width, dst_uv_height,
             kFilterBilinear);
@@ -359,6 +361,7 @@ int I420ToUYVY(const uint8* src_y, int src_stride_y,
  return 0;
 }

+// TODO(fbarchard): test negative height for invert.
 LIBYUV_API
 int I420ToNV12(const uint8* src_y, int src_stride_y,
               const uint8* src_u, int src_stride_u,
@@ -366,72 +369,19 @@ int I420ToNV12(const uint8* src_y, int src_stride_y,
               uint8* dst_y, int dst_stride_y,
               uint8* dst_uv, int dst_stride_uv,
               int width, int height) {
-  int y;
-  void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
-      int width) = MergeUVRow_C;
-  // Coalesce rows.
-  int halfwidth = (width + 1) >> 1;
-  int halfheight = (height + 1) >> 1;
  if (!src_y || !src_u || !src_v || !dst_y || !dst_uv ||
      width <= 0 || height == 0) {
    return -1;
  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    halfheight = (height + 1) >> 1;
-    dst_y = dst_y + (height - 1) * dst_stride_y;
-    dst_uv = dst_uv + (halfheight - 1) * dst_stride_uv;
-    dst_stride_y = -dst_stride_y;
-    dst_stride_uv = -dst_stride_uv;
-  }
-  if (src_stride_y == width &&
-      dst_stride_y == width) {
-    width *= height;
-    height = 1;
-    src_stride_y = dst_stride_y = 0;
-  }
-  // Coalesce rows.
-  if (src_stride_u == halfwidth &&
-      src_stride_v == halfwidth &&
-      dst_stride_uv == halfwidth * 2) {
-    halfwidth *= halfheight;
-    halfheight = 1;
-    src_stride_u = src_stride_v = dst_stride_uv = 0;
-  }
-#if defined(HAS_MERGEUVROW_SSE2)
-  if (TestCpuFlag(kCpuHasSSE2)) {
-    MergeUVRow_ = MergeUVRow_Any_SSE2;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      MergeUVRow_ = MergeUVRow_SSE2;
-    }
-  }
-#endif
-#if defined(HAS_MERGEUVROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    MergeUVRow_ = MergeUVRow_Any_AVX2;
-    if (IS_ALIGNED(halfwidth, 32)) {
-      MergeUVRow_ = MergeUVRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_MERGEUVROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    MergeUVRow_ = MergeUVRow_Any_NEON;
-    if (IS_ALIGNED(halfwidth, 16)) {
-      MergeUVRow_ = MergeUVRow_NEON;
-    }
-  }
-#endif
-
-  CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
-  for (y = 0; y < halfheight; ++y) {
-    // Merge a row of U and V into a row of UV.
-    MergeUVRow_(src_u, src_v, dst_uv, halfwidth);
-    src_u += src_stride_u;
-    src_v += src_stride_v;
-    dst_uv += dst_stride_uv;
+  int halfwidth = (width + 1) / 2;
+  int halfheight = height > 0 ? (height + 1) / 2 : (height - 1) / 2;
+  if (dst_y) {
+    CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
  }
+  MergeUVPlane(src_u, src_stride_u,
+               src_v, src_stride_v,
+               dst_uv, dst_stride_uv,
+               halfwidth, halfheight);
  return 0;
 }

@@ -445,221 +395,24 @@ int I420ToNV21(const uint8* src_y, int src_stride_y,
  return I420ToNV12(src_y, src_stride_y,
                    src_v, src_stride_v,
                    src_u, src_stride_u,
-                    dst_y, src_stride_y,
+                    dst_y, dst_stride_y,
                    dst_vu, dst_stride_vu,
                    width, height);
 }

-// Convert I420 to ARGB.
-LIBYUV_API
-int I420ToARGB(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_argb, int dst_stride_argb,
-               int width, int height) {
-  int y;
-  void (*I422ToARGBRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToARGBRow_C;
-  if (!src_y || !src_u || !src_v || !dst_argb ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_argb = dst_argb + (height - 1) * dst_stride_argb;
-    dst_stride_argb = -dst_stride_argb;
-  }
-#if defined(HAS_I422TOARGBROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    I422ToARGBRow = I422ToARGBRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToARGBRow = I422ToARGBRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I422ToARGBRow = I422ToARGBRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToARGBRow = I422ToARGBRow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
-    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    I422ToARGBRow(src_y, src_u, src_v, dst_argb, width);
-    dst_argb += dst_stride_argb;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert I420 to BGRA.
-LIBYUV_API
-int I420ToBGRA(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_bgra, int dst_stride_bgra,
-               int width, int height) {
-  int y;
-  void (*I422ToBGRARow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToBGRARow_C;
-  if (!src_y || !src_u || !src_v || !dst_bgra ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_bgra = dst_bgra + (height - 1) * dst_stride_bgra;
-    dst_stride_bgra = -dst_stride_bgra;
-  }
-#if defined(HAS_I422TOBGRAROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    I422ToBGRARow = I422ToBGRARow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToBGRARow = I422ToBGRARow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_I422TOBGRAROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    I422ToBGRARow = I422ToBGRARow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToBGRARow = I422ToBGRARow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422TOBGRAROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I422ToBGRARow = I422ToBGRARow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToBGRARow = I422ToBGRARow_NEON;
-    }
-  }
-#endif
-#if defined(HAS_I422TOBGRAROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
-      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
-      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
-      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
-      IS_ALIGNED(dst_bgra, 4) && IS_ALIGNED(dst_stride_bgra, 4)) {
-    I422ToBGRARow = I422ToBGRARow_MIPS_DSPR2;
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    I422ToBGRARow(src_y, src_u, src_v, dst_bgra, width);
-    dst_bgra += dst_stride_bgra;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert I420 to ABGR.
-LIBYUV_API
-int I420ToABGR(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_abgr, int dst_stride_abgr,
-               int width, int height) {
-  int y;
-  void (*I422ToABGRRow)(const uint8* y_buf,
-                        const uint8* u_buf,
-                        const uint8* v_buf,
-                        uint8* rgb_buf,
-                        int width) = I422ToABGRRow_C;
-  if (!src_y || !src_u || !src_v || !dst_abgr ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_abgr = dst_abgr + (height - 1) * dst_stride_abgr;
-    dst_stride_abgr = -dst_stride_abgr;
-  }
-#if defined(HAS_I422TOABGRROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    I422ToABGRRow = I422ToABGRRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToABGRRow = I422ToABGRRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_I422TOABGRROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    I422ToABGRRow = I422ToABGRRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToABGRRow = I422ToABGRRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422TOABGRROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I422ToABGRRow = I422ToABGRRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToABGRRow = I422ToABGRRow_NEON;
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    I422ToABGRRow(src_y, src_u, src_v, dst_abgr, width);
-    dst_abgr += dst_stride_abgr;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
-}
-
-// Convert I420 to RGBA.
-LIBYUV_API
-int I420ToRGBA(const uint8* src_y, int src_stride_y,
-               const uint8* src_u, int src_stride_u,
-               const uint8* src_v, int src_stride_v,
-               uint8* dst_rgba, int dst_stride_rgba,
-               int width, int height) {
+// Convert I422 to RGBA with matrix
+static int I420ToRGBAMatrix(const uint8* src_y, int src_stride_y,
+                            const uint8* src_u, int src_stride_u,
+                            const uint8* src_v, int src_stride_v,
+                            uint8* dst_rgba, int dst_stride_rgba,
+                            const struct YuvConstants* yuvconstants,
+                            int width, int height) {
  int y;
  void (*I422ToRGBARow)(const uint8* y_buf,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        const struct YuvConstants* yuvconstants,
                        int width) = I422ToRGBARow_C;
  if (!src_y || !src_u || !src_v || !dst_rgba ||
      width <= 0 || height == 0) {
@@ -695,9 +448,18 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
    }
  }
 #endif
+#if defined(HAS_I422TORGBAROW_DSPR2)
+  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
+      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
+      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
+      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
+      IS_ALIGNED(dst_rgba, 4) && IS_ALIGNED(dst_stride_rgba, 4)) {
+    I422ToRGBARow = I422ToRGBARow_DSPR2;
+  }
+#endif

  for (y = 0; y < height; ++y) {
-    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, width);
+    I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width);
    dst_rgba += dst_stride_rgba;
    src_y += src_stride_y;
    if (y & 1) {
@@ -708,18 +470,49 @@ int I420ToRGBA(const uint8* src_y, int src_stride_y,
  return 0;
 }

-// Convert I420 to RGB24.
+// Convert I420 to RGBA.
 LIBYUV_API
-int I420ToRGB24(const uint8* src_y, int src_stride_y,
-                const uint8* src_u, int src_stride_u,
-                const uint8* src_v, int src_stride_v,
-                uint8* dst_rgb24, int dst_stride_rgb24,
-                int width, int height) {
+int I420ToRGBA(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_rgba, int dst_stride_rgba,
+               int width, int height) {
+  return I420ToRGBAMatrix(src_y, src_stride_y,
+                          src_u, src_stride_u,
+                          src_v, src_stride_v,
+                          dst_rgba, dst_stride_rgba,
+                          &kYuvI601Constants,
+                          width, height);
+}
+
+// Convert I420 to BGRA.
+LIBYUV_API
+int I420ToBGRA(const uint8* src_y, int src_stride_y,
+               const uint8* src_u, int src_stride_u,
+               const uint8* src_v, int src_stride_v,
+               uint8* dst_bgra, int dst_stride_bgra,
+               int width, int height) {
+  return I420ToRGBAMatrix(src_y, src_stride_y,
+                          src_v, src_stride_v,  // Swap U and V
+                          src_u, src_stride_u,
+                          dst_bgra, dst_stride_bgra,
+                          &kYvuI601Constants,  // Use Yvu matrix
+                          width, height);
+}
+
+// Convert I420 to RGB24 with matrix
+static int I420ToRGB24Matrix(const uint8* src_y, int src_stride_y,
+                             const uint8* src_u, int src_stride_u,
+                             const uint8* src_v, int src_stride_v,
+                             uint8* dst_rgb24, int dst_stride_rgb24,
+                             const struct YuvConstants* yuvconstants,
+                             int width, int height) {
  int y;
  void (*I422ToRGB24Row)(const uint8* y_buf,
                         const uint8* u_buf,
                         const uint8* v_buf,
                         uint8* rgb_buf,
+                         const struct YuvConstants* yuvconstants,
                         int width) = I422ToRGB24Row_C;
  if (!src_y || !src_u || !src_v || !dst_rgb24 ||
      width <= 0 || height == 0) {
@@ -757,7 +550,7 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, width);
+    I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width);
    dst_rgb24 += dst_stride_rgb24;
    src_y += src_stride_y;
    if (y & 1) {
@@ -768,64 +561,34 @@ int I420ToRGB24(const uint8* src_y, int src_stride_y,
  return 0;
 }

+// Convert I420 to RGB24.
+LIBYUV_API
+int I420ToRGB24(const uint8* src_y, int src_stride_y,
+                const uint8* src_u, int src_stride_u,
+                const uint8* src_v, int src_stride_v,
+                uint8* dst_rgb24, int dst_stride_rgb24,
+                int width, int height) {
+  return I420ToRGB24Matrix(src_y, src_stride_y,
+                           src_u, src_stride_u,
+                           src_v, src_stride_v,
+                           dst_rgb24, dst_stride_rgb24,
+                           &kYuvI601Constants,
+                           width, height);
+}
+
 // Convert I420 to RAW.
 LIBYUV_API
 int I420ToRAW(const uint8* src_y, int src_stride_y,
-                const uint8* src_u, int src_stride_u,
-                const uint8* src_v, int src_stride_v,
-                uint8* dst_raw, int dst_stride_raw,
-                int width, int height) {
-  int y;
-  void (*I422ToRAWRow)(const uint8* y_buf,
-                       const uint8* u_buf,
-                       const uint8* v_buf,
-                       uint8* rgb_buf,
-                       int width) = I422ToRAWRow_C;
-  if (!src_y || !src_u || !src_v || !dst_raw ||
-      width <= 0 || height == 0) {
-    return -1;
-  }
-  // Negative height means invert the image.
-  if (height < 0) {
-    height = -height;
-    dst_raw = dst_raw + (height - 1) * dst_stride_raw;
-    dst_stride_raw = -dst_stride_raw;
-  }
-#if defined(HAS_I422TORAWROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    I422ToRAWRow = I422ToRAWRow_Any_SSSE3;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToRAWRow = I422ToRAWRow_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_I422TORAWROW_AVX2)
-  if (TestCpuFlag(kCpuHasAVX2)) {
-    I422ToRAWRow = I422ToRAWRow_Any_AVX2;
-    if (IS_ALIGNED(width, 16)) {
-      I422ToRAWRow = I422ToRAWRow_AVX2;
-    }
-  }
-#endif
-#if defined(HAS_I422TORAWROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    I422ToRAWRow = I422ToRAWRow_Any_NEON;
-    if (IS_ALIGNED(width, 8)) {
-      I422ToRAWRow = I422ToRAWRow_NEON;
-    }
-  }
-#endif
-
-  for (y = 0; y < height; ++y) {
-    I422ToRAWRow(src_y, src_u, src_v, dst_raw, width);
-    dst_raw += dst_stride_raw;
-    src_y += src_stride_y;
-    if (y & 1) {
-      src_u += src_stride_u;
-      src_v += src_stride_v;
-    }
-  }
-  return 0;
+              const uint8* src_u, int src_stride_u,
+              const uint8* src_v, int src_stride_v,
+              uint8* dst_raw, int dst_stride_raw,
+              int width, int height) {
+  return I420ToRGB24Matrix(src_y, src_stride_y,
+                           src_v, src_stride_v,  // Swap U and V
+                           src_u, src_stride_u,
+                           dst_raw, dst_stride_raw,
+                           &kYvuI601Constants,  // Use Yvu matrix
+                           width, height);
 }

 // Convert I420 to ARGB1555.
@@ -840,6 +603,7 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
                            const uint8* u_buf,
                            const uint8* v_buf,
                            uint8* rgb_buf,
+                            const struct YuvConstants* yuvconstants,
                            int width) = I422ToARGB1555Row_C;
  if (!src_y || !src_u || !src_v || !dst_argb1555 ||
      width <= 0 || height == 0) {
@@ -877,7 +641,8 @@ int I420ToARGB1555(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, width);
+    I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants,
+                      width);
    dst_argb1555 += dst_stride_argb1555;
    src_y += src_stride_y;
    if (y & 1) {
@@ -901,6 +666,7 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
                            const uint8* u_buf,
                            const uint8* v_buf,
                            uint8* rgb_buf,
+                            const struct YuvConstants* yuvconstants,
                            int width) = I422ToARGB4444Row_C;
  if (!src_y || !src_u || !src_v || !dst_argb4444 ||
      width <= 0 || height == 0) {
@@ -938,7 +704,8 @@ int I420ToARGB4444(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, width);
+    I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants,
+                      width);
    dst_argb4444 += dst_stride_argb4444;
    src_y += src_stride_y;
    if (y & 1) {
@@ -961,6 +728,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
                          const uint8* u_buf,
                          const uint8* v_buf,
                          uint8* rgb_buf,
+                          const struct YuvConstants* yuvconstants,
                          int width) = I422ToRGB565Row_C;
  if (!src_y || !src_u || !src_v || !dst_rgb565 ||
      width <= 0 || height == 0) {
@@ -998,7 +766,7 @@ int I420ToRGB565(const uint8* src_y, int src_stride_y,
 #endif

  for (y = 0; y < height; ++y) {
-    I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, width);
+    I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width);
    dst_rgb565 += dst_stride_rgb565;
    src_y += src_stride_y;
    if (y & 1) {
@@ -1029,9 +797,10 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
                        const uint8* u_buf,
                        const uint8* v_buf,
                        uint8* rgb_buf,
+                        const struct YuvConstants* yuvconstants,
                        int width) = I422ToARGBRow_C;
  void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
-      const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
+      const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
  if (!src_y || !src_u || !src_v || !dst_rgb565 ||
      width <= 0 || height == 0) {
    return -1;
@@ -1069,12 +838,12 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
    }
  }
 #endif
-#if defined(HAS_I422TOARGBROW_MIPS_DSPR2)
-  if (TestCpuFlag(kCpuHasMIPS_DSPR2) && IS_ALIGNED(width, 4) &&
+#if defined(HAS_I422TOARGBROW_DSPR2)
+  if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 4) &&
      IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
      IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
      IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2)) {
-    I422ToARGBRow = I422ToARGBRow_MIPS_DSPR2;
+    I422ToARGBRow = I422ToARGBRow_DSPR2;
  }
 #endif
 #if defined(HAS_ARGBTORGB565DITHERROW_SSE2)
@@ -1105,7 +874,7 @@ int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
    // Allocate a row of argb.
    align_buffer_64(row_argb, width * 4);
    for (y = 0; y < height; ++y) {
-      I422ToARGBRow(src_y, src_u, src_v, row_argb, width);
+      I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width);
      ARGBToRGB565DitherRow(row_argb, dst_rgb565,
                            *(uint32*)(dither4x4 + ((y & 3) << 2)), width);
      dst_rgb565 += dst_stride_rgb565;
@@ -1258,7 +1027,6 @@ int ConvertFromI420(const uint8* y, int y_stride,
    // Triplanar formats
    // TODO(fbarchard): halfstride instead of halfwidth
    case FOURCC_I420:
-    case FOURCC_YU12:
    case FOURCC_YV12: {
      int halfwidth = (width + 1) / 2;
      int halfheight = (height + 1) / 2;
--- a/third_party/libyuv/source/convert_from_argb.cc
+++ b/third_party/libyuv/source/convert_from_argb.cc
@@ -28,10 +28,10 @@ int ARGBToI444(const uint8* src_argb, int src_stride_argb,
               uint8* dst_v, int dst_stride_v,
               int width, int height) {
  int y;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
  void (*ARGBToUV444Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-      int pix) = ARGBToUV444Row_C;
+      int width) = ARGBToUV444Row_C;
  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
    return -1;
  }
@@ -109,13 +109,16 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
               uint8* dst_v, int dst_stride_v,
               int width, int height) {
  int y;
-  void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-      int pix) = ARGBToUV422Row_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
+      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
-  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+  if (!src_argb ||
+      !dst_y || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
    return -1;
  }
+  // Negative height means invert the image.
  if (height < 0) {
    height = -height;
    src_argb = src_argb + (height - 1) * src_stride_argb;
@@ -130,34 +133,22 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
    height = 1;
    src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
  }
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUV422Row = ARGBToUV422Row_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOUV422ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUV422Row = ARGBToUV422Row_NEON;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
      ARGBToYRow = ARGBToYRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_ARGBTOYROW_AVX2)
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
    ARGBToYRow = ARGBToYRow_Any_AVX2;
    if (IS_ALIGNED(width, 32)) {
+      ARGBToUVRow = ARGBToUVRow_AVX2;
      ARGBToYRow = ARGBToYRow_AVX2;
    }
  }
@@ -170,9 +161,17 @@ int ARGBToI422(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBToUVRow = ARGBToUVRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_NEON;
+    }
+  }
+#endif

  for (y = 0; y < height; ++y) {
-    ARGBToUV422Row(src_argb, dst_u, dst_v, width);
+    ARGBToUVRow(src_argb, 0, dst_u, dst_v, width);
    ARGBToYRow(src_argb, dst_y, width);
    src_argb += src_stride_argb;
    dst_y += dst_stride_y;
@@ -191,8 +190,8 @@ int ARGBToI411(const uint8* src_argb, int src_stride_argb,
               int width, int height) {
  int y;
  void (*ARGBToUV411Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-      int pix) = ARGBToUV411Row_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+      int width) = ARGBToUV411Row_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
    return -1;
@@ -264,7 +263,7 @@ int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
  int halfwidth = (width + 1) >> 1;
  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
  void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
                      int width) = MergeUVRow_C;
@@ -373,7 +372,7 @@ int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
  int halfwidth = (width + 1) >> 1;
  void (*ARGBToUVRow)(const uint8* src_argb0, int src_stride_argb,
                      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
  void (*MergeUVRow_)(const uint8* src_u, const uint8* src_v, uint8* dst_uv,
                      int width) = MergeUVRow_C;
@@ -478,9 +477,9 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
               uint8* dst_yuy2, int dst_stride_yuy2,
               int width, int height) {
  int y;
-  void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-      int pix) = ARGBToUV422Row_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb,
+      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
  void (*I422ToYUY2Row)(const uint8* src_y, const uint8* src_u,
      const uint8* src_v, uint8* dst_yuy2, int width) = I422ToYUY2Row_C;
@@ -502,34 +501,22 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
    height = 1;
    src_stride_argb = dst_stride_yuy2 = 0;
  }
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUV422Row = ARGBToUV422Row_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOUV422ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUV422Row = ARGBToUV422Row_NEON;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
      ARGBToYRow = ARGBToYRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_ARGBTOYROW_AVX2)
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
    ARGBToYRow = ARGBToYRow_Any_AVX2;
    if (IS_ALIGNED(width, 32)) {
+      ARGBToUVRow = ARGBToUVRow_AVX2;
      ARGBToYRow = ARGBToYRow_AVX2;
    }
  }
@@ -542,7 +529,14 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
-
+#if defined(HAS_ARGBTOUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBToUVRow = ARGBToUVRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_NEON;
+    }
+  }
+#endif
 #if defined(HAS_I422TOYUY2ROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
    I422ToYUY2Row = I422ToYUY2Row_Any_SSE2;
@@ -567,7 +561,7 @@ int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
    uint8* row_v = row_u + ((width + 63) & ~63) / 2;

    for (y = 0; y < height; ++y) {
-      ARGBToUV422Row(src_argb, row_u, row_v, width);
+      ARGBToUVRow(src_argb, 0, row_u, row_v, width);
      ARGBToYRow(src_argb, row_y, width);
      I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width);
      src_argb += src_stride_argb;
@@ -585,9 +579,9 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
               uint8* dst_uyvy, int dst_stride_uyvy,
               int width, int height) {
  int y;
-  void (*ARGBToUV422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-      int pix) = ARGBToUV422Row_C;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToUVRow)(const uint8* src_argb, int src_stride_argb,
+      uint8* dst_u, uint8* dst_v, int width) = ARGBToUVRow_C;
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
  void (*I422ToUYVYRow)(const uint8* src_y, const uint8* src_u,
      const uint8* src_v, uint8* dst_uyvy, int width) = I422ToUYVYRow_C;
@@ -609,34 +603,22 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
    height = 1;
    src_stride_argb = dst_stride_uyvy = 0;
  }
-#if defined(HAS_ARGBTOUV422ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    ARGBToUV422Row = ARGBToUV422Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUV422Row = ARGBToUV422Row_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOUV422ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    ARGBToUV422Row = ARGBToUV422Row_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUV422Row = ARGBToUV422Row_NEON;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOYROW_SSSE3)
+#if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBToUVRow = ARGBToUVRow_Any_SSSE3;
    ARGBToYRow = ARGBToYRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_SSSE3;
      ARGBToYRow = ARGBToYRow_SSSE3;
    }
  }
 #endif
-#if defined(HAS_ARGBTOYROW_AVX2)
+#if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2)
  if (TestCpuFlag(kCpuHasAVX2)) {
+    ARGBToUVRow = ARGBToUVRow_Any_AVX2;
    ARGBToYRow = ARGBToYRow_Any_AVX2;
    if (IS_ALIGNED(width, 32)) {
+      ARGBToUVRow = ARGBToUVRow_AVX2;
      ARGBToYRow = ARGBToYRow_AVX2;
    }
  }
@@ -649,7 +631,14 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
-
+#if defined(HAS_ARGBTOUVROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBToUVRow = ARGBToUVRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVRow = ARGBToUVRow_NEON;
+    }
+  }
+#endif
 #if defined(HAS_I422TOUYVYROW_SSE2)
  if (TestCpuFlag(kCpuHasSSE2)) {
    I422ToUYVYRow = I422ToUYVYRow_Any_SSE2;
@@ -674,7 +663,7 @@ int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
    uint8* row_v = row_u + ((width + 63) & ~63) / 2;

    for (y = 0; y < height; ++y) {
-      ARGBToUV422Row(src_argb, row_u, row_v, width);
+      ARGBToUVRow(src_argb, 0, row_u, row_v, width);
      ARGBToYRow(src_argb, row_y, width);
      I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width);
      src_argb += src_stride_argb;
@@ -692,7 +681,7 @@ int ARGBToI400(const uint8* src_argb, int src_stride_argb,
               uint8* dst_y, int dst_stride_y,
               int width, int height) {
  int y;
-  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToYRow)(const uint8* src_argb, uint8* dst_y, int width) =
      ARGBToYRow_C;
  if (!src_argb || !dst_y || width <= 0 || height == 0) {
    return -1;
@@ -764,7 +753,7 @@ int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
                uint8* dst_rgb24, int dst_stride_rgb24,
                int width, int height) {
  int y;
-  void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+  void (*ARGBToRGB24Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
      ARGBToRGB24Row_C;
  if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) {
    return -1;
@@ -812,7 +801,7 @@ int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
              uint8* dst_raw, int dst_stride_raw,
              int width, int height) {
  int y;
-  void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+  void (*ARGBToRAWRow)(const uint8* src_argb, uint8* dst_rgb, int width) =
      ARGBToRAWRow_C;
  if (!src_argb || !dst_raw || width <= 0 || height == 0) {
    return -1;
@@ -869,7 +858,7 @@ int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
                       const uint8* dither4x4, int width, int height) {
  int y;
  void (*ARGBToRGB565DitherRow)(const uint8* src_argb, uint8* dst_rgb,
-      const uint32 dither4, int pix) = ARGBToRGB565DitherRow_C;
+      const uint32 dither4, int width) = ARGBToRGB565DitherRow_C;
  if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
    return -1;
  }
@@ -921,7 +910,7 @@ int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
                 uint8* dst_rgb565, int dst_stride_rgb565,
                 int width, int height) {
  int y;
-  void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+  void (*ARGBToRGB565Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
      ARGBToRGB565Row_C;
  if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) {
    return -1;
@@ -977,7 +966,7 @@ int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
                   uint8* dst_argb1555, int dst_stride_argb1555,
                   int width, int height) {
  int y;
-  void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+  void (*ARGBToARGB1555Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
      ARGBToARGB1555Row_C;
  if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) {
    return -1;
@@ -1033,7 +1022,7 @@ int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
                   uint8* dst_argb4444, int dst_stride_argb4444,
                   int width, int height) {
  int y;
-  void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int pix) =
+  void (*ARGBToARGB4444Row)(const uint8* src_argb, uint8* dst_rgb, int width) =
      ARGBToARGB4444Row_C;
  if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) {
    return -1;
@@ -1093,7 +1082,7 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
  int y;
  void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
                       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
-  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
+  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
      ARGBToYJRow_C;
  if (!src_argb ||
      !dst_yj || !dst_u || !dst_v ||
@@ -1157,21 +1146,24 @@ int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
  return 0;
 }

-// ARGB little endian (bgra in memory) to J422
+// Convert ARGB to J422. (JPeg full range I422).
 LIBYUV_API
 int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
-               uint8* dst_y, int dst_stride_y,
+               uint8* dst_yj, int dst_stride_yj,
               uint8* dst_u, int dst_stride_u,
               uint8* dst_v, int dst_stride_v,
               int width, int height) {
  int y;
-  void (*ARGBToUVJ422Row)(const uint8* src_argb, uint8* dst_u, uint8* dst_v,
-      int pix) = ARGBToUVJ422Row_C;
-  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_y, int pix) =
+  void (*ARGBToUVJRow)(const uint8* src_argb0, int src_stride_argb,
+                       uint8* dst_u, uint8* dst_v, int width) = ARGBToUVJRow_C;
+  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
      ARGBToYJRow_C;
-  if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) {
+  if (!src_argb ||
+      !dst_yj || !dst_u || !dst_v ||
+      width <= 0 || height == 0) {
    return -1;
  }
+  // Negative height means invert the image.
  if (height < 0) {
    height = -height;
    src_argb = src_argb + (height - 1) * src_stride_argb;
@@ -1179,34 +1171,19 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
  }
  // Coalesce rows.
  if (src_stride_argb == width * 4 &&
-      dst_stride_y == width &&
+      dst_stride_yj == width &&
      dst_stride_u * 2 == width &&
      dst_stride_v * 2 == width) {
    width *= height;
    height = 1;
-    src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0;
+    src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0;
  }
-#if defined(HAS_ARGBTOUVJ422ROW_SSSE3)
-  if (TestCpuFlag(kCpuHasSSSE3)) {
-    ARGBToUVJ422Row = ARGBToUVJ422Row_Any_SSSE3;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVJ422Row = ARGBToUVJ422Row_SSSE3;
-    }
-  }
-#endif
-#if defined(HAS_ARGBTOUVJ422ROW_NEON)
-  if (TestCpuFlag(kCpuHasNEON)) {
-    ARGBToUVJ422Row = ARGBToUVJ422Row_Any_NEON;
-    if (IS_ALIGNED(width, 16)) {
-      ARGBToUVJ422Row = ARGBToUVJ422Row_NEON;
-    }
-  }
-#endif
-
-#if defined(HAS_ARGBTOYJROW_SSSE3)
+#if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3)
  if (TestCpuFlag(kCpuHasSSSE3)) {
+    ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3;
    ARGBToYJRow = ARGBToYJRow_Any_SSSE3;
    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVJRow = ARGBToUVJRow_SSSE3;
      ARGBToYJRow = ARGBToYJRow_SSSE3;
    }
  }
@@ -1227,12 +1204,20 @@ int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
    }
  }
 #endif
+#if defined(HAS_ARGBTOUVJROW_NEON)
+  if (TestCpuFlag(kCpuHasNEON)) {
+    ARGBToUVJRow = ARGBToUVJRow_Any_NEON;
+    if (IS_ALIGNED(width, 16)) {
+      ARGBToUVJRow = ARGBToUVJRow_NEON;
+    }
+  }
+#endif

  for (y = 0; y < height; ++y) {
-    ARGBToUVJ422Row(src_argb, dst_u, dst_v, width);
-    ARGBToYJRow(src_argb, dst_y, width);
+    ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width);
+    ARGBToYJRow(src_argb, dst_yj, width);
    src_argb += src_stride_argb;
-    dst_y += dst_stride_y;
+    dst_yj += dst_stride_yj;
    dst_u += dst_stride_u;
    dst_v += dst_stride_v;
  }
@@ -1245,7 +1230,7 @@ int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
               uint8* dst_yj, int dst_stride_yj,
               int width, int height) {
  int y;
-  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int pix) =
+  void (*ARGBToYJRow)(const uint8* src_argb, uint8* dst_yj, int width) =
      ARGBToYJRow_C;
  if (!src_argb || !dst_yj || width <= 0 || height == 0) {
    return -1;
--- a/third_party/libyuv/source/convert_jpeg.cc
+++ b/third_party/libyuv/source/convert_jpeg.cc
@@ -9,6 +9,7 @@
 */

 #include "libyuv/convert.h"
+#include "libyuv/convert_argb.h"

 #ifdef HAVE_JPEG
 #include "libyuv/mjpeg_decoder.h"
--- a/third_party/libyuv/source/convert_to_argb.cc
+++ b/third_party/libyuv/source/convert_to_argb.cc
@@ -23,7 +23,7 @@ namespace libyuv {
 extern "C" {
 #endif

-// Convert camera sample to I420 with cropping, rotation and vertical flip.
+// Convert camera sample to ARGB with cropping, rotation and vertical flip.
 // src_width is used for source stride computation
 // src_height is used to compute location of planes, and indicate inversion
 // sample_size is measured in bytes and is the size of the frame.
@@ -51,8 +51,8 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
  // also enable temporary buffer.
  LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
      crop_argb == sample;
-  uint8* tmp_argb = crop_argb;
-  int tmp_argb_stride = argb_stride;
+  uint8* dest_argb = crop_argb;
+  int dest_argb_stride = argb_stride;
  uint8* rotate_buffer = NULL;
  int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;

@@ -66,13 +66,13 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
  }

  if (need_buf) {
-    int argb_size = crop_width * abs_crop_height * 4;
+    int argb_size = crop_width * 4 * abs_crop_height;
    rotate_buffer = (uint8*)malloc(argb_size);
    if (!rotate_buffer) {
      return 1;  // Out of memory runtime error.
    }
    crop_argb = rotate_buffer;
-    argb_stride = crop_width;
+    argb_stride = crop_width * 4;
  }

  switch (format) {
@@ -176,7 +176,6 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
      break;
    // Triplanar formats
    case FOURCC_I420:
-    case FOURCC_YU12:
    case FOURCC_YV12: {
      const uint8* src_y = sample + (src_width * crop_y + crop_x);
      const uint8* src_u;
@@ -291,7 +290,7 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
  if (need_buf) {
    if (!r) {
      r = ARGBRotate(crop_argb, argb_stride,
-                     tmp_argb, tmp_argb_stride,
+                     dest_argb, dest_argb_stride,
                     crop_width, abs_crop_height, rotation);
    }
    free(rotate_buffer);
--- a/third_party/libyuv/source/convert_to_i420.cc
+++ b/third_party/libyuv/source/convert_to_i420.cc
@@ -39,12 +39,13 @@ int ConvertToI420(const uint8* sample,
  int aligned_src_width = (src_width + 1) & ~1;
  const uint8* src;
  const uint8* src_uv;
-  int abs_src_height = (src_height < 0) ? -src_height : src_height;
-  int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
+  const int abs_src_height = (src_height < 0) ? -src_height : src_height;
+  // TODO(nisse): Why allow crop_height < 0?
+  const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
  int r = 0;
  LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
      format != FOURCC_NV12 && format != FOURCC_NV21 &&
-      format != FOURCC_YU12 && format != FOURCC_YV12) || y == sample;
+      format != FOURCC_YV12) || y == sample;
  uint8* tmp_y = y;
  uint8* tmp_u = u;
  uint8* tmp_v = v;
@@ -52,16 +53,14 @@ int ConvertToI420(const uint8* sample,
  int tmp_u_stride = u_stride;
  int tmp_v_stride = v_stride;
  uint8* rotate_buffer = NULL;
-  int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
+  const int inv_crop_height =
+      (src_height < 0) ? -abs_crop_height : abs_crop_height;

  if (!y || !u || !v || !sample ||
      src_width <= 0 || crop_width <= 0  ||
      src_height == 0 || crop_height == 0) {
    return -1;
  }
-  if (src_height < 0) {
-    inv_crop_height = -inv_crop_height;
-  }

  // One pass rotation is available for some formats. For the rest, convert
  // to I420 (with optional vertical flipping) into a temporary I420 buffer,
@@ -214,7 +213,6 @@ int ConvertToI420(const uint8* sample,
      break;
    // Triplanar formats
    case FOURCC_I420:
-    case FOURCC_YU12:
    case FOURCC_YV12: {
      const uint8* src_y = sample + (src_width * crop_y + crop_x);
      const uint8* src_u;
--- a/third_party/libyuv/source/cpu_id.cc
+++ b/third_party/libyuv/source/cpu_id.cc
@@ -10,12 +10,12 @@

 #include "libyuv/cpu_id.h"

-#if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
+#if defined(_MSC_VER)
 #include <intrin.h>  // For __cpuidex()
 #endif
 #if !defined(__pnacl__) && !defined(__CLR_VER) && \
    !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
-    defined(_MSC_VER) && !defined(__clang__) && (_MSC_FULL_VER >= 160040219)
+    defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
 #include <immintrin.h>  // For _xgetbv()
 #endif

@@ -36,7 +36,8 @@ extern "C" {

 // For functions that use the stack and have runtime checks for overflow,
 // use SAFEBUFFERS to avoid additional check.
-#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
+#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && \
+    !defined(__clang__)
 #define SAFEBUFFERS __declspec(safebuffers)
 #else
 #define SAFEBUFFERS
@@ -48,9 +49,9 @@ extern "C" {
    !defined(__pnacl__) && !defined(__CLR_VER)
 LIBYUV_API
 void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
-#if (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
+#if defined(_MSC_VER)
 // Visual C version uses intrinsic or inline x86 assembly.
-#if (_MSC_FULL_VER >= 160040219)
+#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
  __cpuidex((int*)(cpu_info), info_eax, info_ecx);
 #elif defined(_M_IX86)
  __asm {
@@ -63,7 +64,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
    mov        [edi + 8], ecx
    mov        [edi + 12], edx
  }
-#else
+#else  // Visual C but not x86
  if (info_ecx == 0) {
    __cpuid((int*)(cpu_info), info_eax);
  } else {
@@ -71,9 +72,9 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
  }
 #endif
 // GCC version uses inline x86 assembly.
-#else  // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
+#else  // defined(_MSC_VER)
  uint32 info_ebx, info_edx;
-  asm volatile (  // NOLINT
+  asm volatile (
 #if defined( __i386__) && defined(__PIC__)
    // Preserve ebx for fpic 32 bit.
    "mov %%ebx, %%edi                          \n"
@@ -89,7 +90,7 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
  cpu_info[1] = info_ebx;
  cpu_info[2] = info_ecx;
  cpu_info[3] = info_edx;
-#endif  // (defined(_MSC_VER) && !defined(__clang__)) && !defined(__clang__)
+#endif  // defined(_MSC_VER)
 }
 #else  // (defined(_M_IX86) || defined(_M_X64) ...
 LIBYUV_API
@@ -98,28 +99,37 @@ void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
 }
 #endif

-// TODO(fbarchard): Enable xgetbv when validator supports it.
+// For VS2010 and earlier emit can be used:
+//   _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0  // For VS2010 and earlier.
+//  __asm {
+//    xor        ecx, ecx    // xcr 0
+//    xgetbv
+//    mov        xcr0, eax
+//  }
+// For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code.
+// https://code.google.com/p/libyuv/issues/detail?id=529
+#if defined(_M_IX86) && (_MSC_VER < 1900)
+#pragma optimize("g", off)
+#endif
 #if (defined(_M_IX86) || defined(_M_X64) || \
    defined(__i386__) || defined(__x86_64__)) && \
    !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
 #define HAS_XGETBV
 // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
-int TestOsSaveYmm() {
+int GetXCR0() {
  uint32 xcr0 = 0u;
-#if (defined(_MSC_VER) && !defined(__clang__)) && (_MSC_FULL_VER >= 160040219)
+#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
  xcr0 = (uint32)(_xgetbv(0));  // VS2010 SP1 required.
-#elif defined(_M_IX86) && defined(_MSC_VER) && !defined(__clang__)
-  __asm {
-    xor        ecx, ecx    // xcr 0
-    _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0  // For VS2010 and earlier.
-    mov        xcr0, eax
-  }
 #elif defined(__i386__) || defined(__x86_64__)
  asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
 #endif  // defined(__i386__) || defined(__x86_64__)
-  return((xcr0 & 6) == 6);  // Is ymm saved?
+  return xcr0;
 }
 #endif  // defined(_M_IX86) || defined(_M_X64) ..
+// Return optimization to previous setting.
+#if defined(_M_IX86) && (_MSC_VER < 1900)
+#pragma optimize("g", on)
+#endif

 // based on libvpx arm_cpudetect.c
 // For Arm, but public to allow testing on any CPU
@@ -151,30 +161,9 @@ int ArmCpuCaps(const char* cpuinfo_name) {
  return 0;
 }

-#if defined(__mips__) && defined(__linux__)
-static int MipsCpuCaps(const char* search_string) {
-  char cpuinfo_line[512];
-  const char* file_name = "/proc/cpuinfo";
-  FILE* f = fopen(file_name, "r");
-  if (!f) {
-    // Assume DSP if /proc/cpuinfo is unavailable.
-    // This will occur for Chrome sandbox for Pepper or Render process.
-    return kCpuHasMIPS_DSP;
-  }
-  while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f) != NULL) {
-    if (strstr(cpuinfo_line, search_string) != NULL) {
-      fclose(f);
-      return kCpuHasMIPS_DSP;
-    }
-  }
-  fclose(f);
-  return 0;
-}
-#endif
-
 // CPU detect function for SIMD instruction sets.
 LIBYUV_API
-int cpu_info_ = kCpuInit;  // cpu_info is not initialized yet.
+int cpu_info_ = 0;  // cpu_info is not initialized yet.

 // Test environment variable for disabling CPU features. Any non-zero value
 // to disable. Zero ignored to make it easy to set the variable on/off.
@@ -197,8 +186,9 @@ static LIBYUV_BOOL TestEnv(const char*) {

 LIBYUV_API SAFEBUFFERS
 int InitCpuFlags(void) {
+  // TODO(fbarchard): swap kCpuInit logic so 0 means uninitialized.
+  int cpu_info = 0;
 #if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
-
  uint32 cpu_info0[4] = { 0, 0, 0, 0 };
  uint32 cpu_info1[4] = { 0, 0, 0, 0 };
  uint32 cpu_info7[4] = { 0, 0, 0, 0 };
@@ -207,66 +197,66 @@ int InitCpuFlags(void) {
  if (cpu_info0[0] >= 7) {
    CpuId(7, 0, cpu_info7);
  }
-  cpu_info_ = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
-              ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
-              ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
-              ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
-              ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
-              ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
-              kCpuHasX86;
+  cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
+             ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
+             ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
+             ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
+             ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
+             ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
+             kCpuHasX86;

 #ifdef HAS_XGETBV
-  if ((cpu_info1[2] & 0x18000000) == 0x18000000 &&  // AVX and OSSave
-      TestOsSaveYmm()) {  // Saves YMM.
-    cpu_info_ |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
-                 kCpuHasAVX;
+  // AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv
+  if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) &&  // AVX and OSXSave
+      ((GetXCR0() & 6) == 6)) {  // Test OS saves YMM registers
+    cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX;
+
+    // Detect AVX512bw
+    if ((GetXCR0() & 0xe0) == 0xe0) {
+      cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
+    }
  }
 #endif
+
  // Environment variable overrides for testing.
  if (TestEnv("LIBYUV_DISABLE_X86")) {
-    cpu_info_ &= ~kCpuHasX86;
+    cpu_info &= ~kCpuHasX86;
  }
  if (TestEnv("LIBYUV_DISABLE_SSE2")) {
-    cpu_info_ &= ~kCpuHasSSE2;
+    cpu_info &= ~kCpuHasSSE2;
  }
  if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
-    cpu_info_ &= ~kCpuHasSSSE3;
+    cpu_info &= ~kCpuHasSSSE3;
  }
  if (TestEnv("LIBYUV_DISABLE_SSE41")) {
-    cpu_info_ &= ~kCpuHasSSE41;
+    cpu_info &= ~kCpuHasSSE41;
  }
  if (TestEnv("LIBYUV_DISABLE_SSE42")) {
-    cpu_info_ &= ~kCpuHasSSE42;
+    cpu_info &= ~kCpuHasSSE42;
  }
  if (TestEnv("LIBYUV_DISABLE_AVX")) {
-    cpu_info_ &= ~kCpuHasAVX;
+    cpu_info &= ~kCpuHasAVX;
  }
  if (TestEnv("LIBYUV_DISABLE_AVX2")) {
-    cpu_info_ &= ~kCpuHasAVX2;
+    cpu_info &= ~kCpuHasAVX2;
  }
  if (TestEnv("LIBYUV_DISABLE_ERMS")) {
-    cpu_info_ &= ~kCpuHasERMS;
+    cpu_info &= ~kCpuHasERMS;
  }
  if (TestEnv("LIBYUV_DISABLE_FMA3")) {
-    cpu_info_ &= ~kCpuHasFMA3;
+    cpu_info &= ~kCpuHasFMA3;
+  }
+  if (TestEnv("LIBYUV_DISABLE_AVX3")) {
+    cpu_info &= ~kCpuHasAVX3;
  }
 #endif
 #if defined(__mips__) && defined(__linux__)
-  // Linux mips parse text file for dsp detect.
-  cpu_info_ = MipsCpuCaps("dsp");  // set kCpuHasMIPS_DSP.
 #if defined(__mips_dspr2)
-  cpu_info_ |= kCpuHasMIPS_DSPR2;
+  cpu_info |= kCpuHasDSPR2;
 #endif
-  cpu_info_ |= kCpuHasMIPS;
-
-  if (getenv("LIBYUV_DISABLE_MIPS")) {
-    cpu_info_ &= ~kCpuHasMIPS;
-  }
-  if (getenv("LIBYUV_DISABLE_MIPS_DSP")) {
-    cpu_info_ &= ~kCpuHasMIPS_DSP;
-  }
-  if (getenv("LIBYUV_DISABLE_MIPS_DSPR2")) {
-    cpu_info_ &= ~kCpuHasMIPS_DSPR2;
+  cpu_info |= kCpuHasMIPS;
+  if (getenv("LIBYUV_DISABLE_DSPR2")) {
+    cpu_info &= ~kCpuHasDSPR2;
  }
 #endif
 #if defined(__arm__) || defined(__aarch64__)
@@ -274,28 +264,31 @@ int InitCpuFlags(void) {
 // __ARM_NEON__ generates code that requires Neon.  NaCL also requires Neon.
 // For Linux, /proc/cpuinfo can be tested but without that assume Neon.
 #if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__)
-  cpu_info_ = kCpuHasNEON;
+  cpu_info = kCpuHasNEON;
 // For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon
 // flag in it.
 // So for aarch64, neon enabling is hard coded here.
 #endif
 #if defined(__aarch64__)
-  cpu_info_ = kCpuHasNEON;
+  cpu_info = kCpuHasNEON;
 #else
  // Linux arm parse text file for neon detect.
-  cpu_info_ = ArmCpuCaps("/proc/cpuinfo");
+  cpu_info = ArmCpuCaps("/proc/cpuinfo");
 #endif
-  cpu_info_ |= kCpuHasARM;
+  cpu_info |= kCpuHasARM;
  if (TestEnv("LIBYUV_DISABLE_NEON")) {
-    cpu_info_ &= ~kCpuHasNEON;
+    cpu_info &= ~kCpuHasNEON;
  }
 #endif  // __arm__
  if (TestEnv("LIBYUV_DISABLE_ASM")) {
-    cpu_info_ = 0;
+    cpu_info = 0;
  }
-  return cpu_info_;
+  cpu_info  |= kCpuInitialized;
+  cpu_info_ = cpu_info;
+  return cpu_info;
 }

+// Note that use of this function is not thread safe.
 LIBYUV_API
 void MaskCpuFlags(int enable_flags) {
  cpu_info_ = InitCpuFlags() & enable_flags;
--- a/third_party/libyuv/source/mjpeg_decoder.cc
+++ b/third_party/libyuv/source/mjpeg_decoder.cc
@@ -59,10 +59,10 @@ const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK;
 // Methods that are passed to jpeglib.
 boolean fill_input_buffer(jpeg_decompress_struct* cinfo);
 void init_source(jpeg_decompress_struct* cinfo);
-void skip_input_data(jpeg_decompress_struct* cinfo,
-                     long num_bytes);  // NOLINT
+void skip_input_data(jpeg_decompress_struct* cinfo, long num_bytes);  // NOLINT
 void term_source(jpeg_decompress_struct* cinfo);
 void ErrorHandler(jpeg_common_struct* cinfo);
+void OutputHandler(jpeg_common_struct* cinfo);

 MJpegDecoder::MJpegDecoder()
    : has_scanline_padding_(LIBYUV_FALSE),
@@ -78,6 +78,7 @@ MJpegDecoder::MJpegDecoder()
  decompress_struct_->err = jpeg_std_error(&error_mgr_->base);
  // Override standard exit()-based error handler.
  error_mgr_->base.error_exit = &ErrorHandler;
+  error_mgr_->base.output_message = &OutputHandler;
 #endif
  decompress_struct_->client_data = NULL;
  source_mgr_->init_source = &init_source;
@@ -429,8 +430,7 @@ boolean fill_input_buffer(j_decompress_ptr cinfo) {
  return TRUE;
 }

-void skip_input_data(j_decompress_ptr cinfo,
-                     long num_bytes) {  // NOLINT
+void skip_input_data(j_decompress_ptr cinfo, long num_bytes) {  // NOLINT
  cinfo->src->next_input_byte += num_bytes;
 }

@@ -458,7 +458,12 @@ void ErrorHandler(j_common_ptr cinfo) {
  // and causes it to return (for a second time) with value 1.
  longjmp(mgr->setjmp_buffer, 1);
 }
-#endif
+
+void OutputHandler(j_common_ptr cinfo) {
+  // Suppress fprintf warnings.
+}
+
+#endif  // HAVE_SETJMP

 void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
  if (num_outbufs != num_outbufs_) {
--- a/Show More
+++ b/Show More