vpx_dsp/get_prob: relocate den == 0 test

to get_binary_prob(). the only other caller mode_mv_merge_probs() does its own test on 0. BUG=chromium:639712 Change-Id: I1178688706baeca2883f7aadbc254abb219a44ce (cherry picked from commit 93c823e24b)
vpx_dsp/get_prob: make clip_prob branchless
2016-10-04 15:18:58 -07:00 · 2016-10-04 15:18:58 -07:00
696 changed files with 76344 additions and 113453 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -1,11 +1,10 @@
 ---
 Language:        Cpp
 # BasedOnStyle:  Google
-# Generated with clang-format 4.0.1
+# Generated with clang-format 3.7.1
 AccessModifierOffset: -1
-AlignAfterOpenBracket: Align
+AlignAfterOpenBracket: true
 AlignConsecutiveAssignments: false
-AlignConsecutiveDeclarations: false
 AlignEscapedNewlinesLeft: true
 AlignOperands:   true
 AlignTrailingComments: true
@@ -16,29 +15,14 @@ AllowShortFunctionsOnASingleLine: All
 AllowShortIfStatementsOnASingleLine: true
 AllowShortLoopsOnASingleLine: true
 AlwaysBreakAfterDefinitionReturnType: None
-AlwaysBreakAfterReturnType: None
 AlwaysBreakBeforeMultilineStrings: true
 AlwaysBreakTemplateDeclarations: true
 BinPackArguments: true
 BinPackParameters: true
-BraceWrapping:
-  AfterClass:      false
-  AfterControlStatement: false
-  AfterEnum:       false
-  AfterFunction:   false
-  AfterNamespace:  false
-  AfterObjCDeclaration: false
-  AfterStruct:     false
-  AfterUnion:      false
-  BeforeCatch:     false
-  BeforeElse:      false
-  IndentBraces:    false
 BreakBeforeBinaryOperators: None
 BreakBeforeBraces: Attach
 BreakBeforeTernaryOperators: true
 BreakConstructorInitializersBeforeComma: false
-BreakAfterJavaFieldAnnotations: false
-BreakStringLiterals: true
 ColumnLimit:     80
 CommentPragmas:  '^ IWYU pragma:'
 ConstructorInitializerAllOnOneLineOrOnePerLine: false
@@ -49,19 +33,9 @@ DerivePointerAlignment: false
 DisableFormat:   false
 ExperimentalAutoDetectBinPacking: false
 ForEachMacros:   [ foreach, Q_FOREACH, BOOST_FOREACH ]
-IncludeCategories:
-  - Regex:           '^<.*\.h>'
-    Priority:        1
-  - Regex:           '^<.*'
-    Priority:        2
-  - Regex:           '.*'
-    Priority:        3
-IncludeIsMainRegex: '([-_](test|unittest))?$'
 IndentCaseLabels: true
 IndentWidth:     2
 IndentWrappedFunctionNames: false
-JavaScriptQuotes: Leave
-JavaScriptWrapImports: true
 KeepEmptyLinesAtTheStartOfBlocks: false
 MacroBlockBegin: ''
 MacroBlockEnd:   ''
@@ -77,10 +51,7 @@ PenaltyBreakString: 1000
 PenaltyExcessCharacter: 1000000
 PenaltyReturnTypeOnItsOwnLine: 200
 PointerAlignment: Right
-ReflowComments:  true
-SortIncludes:    false
 SpaceAfterCStyleCast: false
-SpaceAfterTemplateKeyword: true
 SpaceBeforeAssignmentOperators: true
 SpaceBeforeParens: ControlStatements
 SpaceInEmptyParentheses: false
--- a/.gitignore
+++ b/.gitignore
@@ -37,9 +37,9 @@
 /examples/twopass_encoder
 /examples/vp8_multi_resolution_encoder
 /examples/vp8cx_set_ref
-/examples/vp9cx_set_ref
 /examples/vp9_lossless_encoder
-/examples/vp9_spatial_svc_encoder
+/examples/vp9_spatial_scalable_encoder
+/examples/vpx_temporal_scalable_patterns
 /examples/vpx_temporal_svc_encoder
 /ivfdec
 /ivfdec.dox
@@ -50,9 +50,6 @@
 /samples.dox
 /test_intra_pred_speed
 /test_libvpx
-/tools.dox
-/tools/*.dox
-/tools/tiny_ssim
 /vp8_api1_migration.dox
 /vp[89x]_rtcd.h
 /vpx.pc
--- a/.mailmap
+++ b/.mailmap
@@ -3,7 +3,6 @@ Aℓex Converse <aconverse@google.com>
 Aℓex Converse <aconverse@google.com> <alex.converse@gmail.com>
 Alexis Ballier <aballier@gentoo.org> <alexis.ballier@gmail.com>
 Alpha Lam <hclam@google.com> <hclam@chromium.org>
-Chris Cunningham <chcunningham@chromium.org>
 Daniele Castagna <dcastagna@chromium.org> <dcastagna@google.com>
 Deb Mukherjee <debargha@google.com>
 Erik Niemeyer <erik.a.niemeyer@intel.com> <erik.a.niemeyer@gmail.com>
@@ -22,21 +21,17 @@ Marco Paniconi <marpan@google.com>
 Marco Paniconi <marpan@google.com> <marpan@chromium.org>
 Pascal Massimino <pascal.massimino@gmail.com>
 Paul Wilkins <paulwilkins@google.com>
-Peter Boström <pbos@chromium.org> <pbos@google.com>
 Peter de Rivaz <peter.derivaz@gmail.com>
 Peter de Rivaz <peter.derivaz@gmail.com> <peter.derivaz@argondesign.com>
 Ralph Giles <giles@xiph.org> <giles@entropywave.com>
 Ralph Giles <giles@xiph.org> <giles@mozilla.com>
 Ronald S. Bultje <rsbultje@gmail.com> <rbultje@google.com>
 Sami Pietilä <samipietila@google.com>
-Shiyou Yin <yinshiyou-hf@loongson.cn>
 Tamar Levy <tamar.levy@intel.com>
 Tamar Levy <tamar.levy@intel.com> <levytamar82@gmail.com>
 Tero Rintaluoma <teror@google.com> <tero.rintaluoma@on2.com>
 Timothy B. Terriberry <tterribe@xiph.org> <tterriberry@mozilla.com>
 Tom Finegan <tomfinegan@google.com>
 Tom Finegan <tomfinegan@google.com> <tomfinegan@chromium.org>
-Urvang Joshi <urvang@google.com> <urvang@chromium.org>
-Yaowu Xu <yaowu@google.com> <adam@xuyaowu.com>
 Yaowu Xu <yaowu@google.com> <yaowu@xuyaowu.com>
 Yaowu Xu <yaowu@google.com> <Yaowu Xu>
--- a/30
+++ b/30
@@ -3,13 +3,11 @@

 Aaron Watry <awatry@gmail.com>
 Abo Talib Mahfoodh <ab.mahfoodh@gmail.com>
+Adam Xu <adam@xuyaowu.com>
 Adrian Grange <agrange@google.com>
 Aℓex Converse <aconverse@google.com>
 Ahmad Sharif <asharif@google.com>
-Aleksey Vasenev <margtu-fivt@ya.ru>
-Alexander Potapenko <glider@google.com>
 Alexander Voronov <avoronov@graphics.cs.msu.ru>
-Alexandra Hájková <alexandra.khirnova@gmail.com>
 Alexis Ballier <aballier@gentoo.org>
 Alok Ahuja <waveletcoeff@gmail.com>
 Alpha Lam <hclam@google.com>
@@ -17,7 +15,6 @@ A.Mahfoodh <ab.mahfoodh@gmail.com>
 Ami Fischman <fischman@chromium.org>
 Andoni Morales Alastruey <ylatuya@gmail.com>
 Andres Mejia <mcitadel@gmail.com>
-Andrew Lewis <andrewlewis@google.com>
 Andrew Russell <anrussell@google.com>
 Angie Chiang <angiebird@google.com>
 Aron Rosenberg <arosenberg@logitech.com>
@@ -25,14 +22,11 @@ Attila Nagy <attilanagy@google.com>
 Brion Vibber <bvibber@wikimedia.org>
 changjun.yang <changjun.yang@intel.com>
 Charles 'Buck' Krasic <ckrasic@google.com>
-Cheng Chen <chengchen@google.com>
 chm <chm@rock-chips.com>
-Chris Cunningham <chcunningham@chromium.org>
 Christian Duvivier <cduvivier@google.com>
 Daniele Castagna <dcastagna@chromium.org>
 Daniel Kang <ddkang@google.com>
 Deb Mukherjee <debargha@google.com>
-Deepa K G <deepa.kg@ittiam.com>
 Dim Temp <dimtemp0@gmail.com>
 Dmitry Kovalev <dkovalev@google.com>
 Dragan Mrdjan <dmrdjan@mips.com>
@@ -43,21 +37,17 @@ Fabio Pedretti <fabio.ped@libero.it>
 Frank Galligan <fgalligan@google.com>
 Fredrik Söderquist <fs@opera.com>
 Fritz Koenig <frkoenig@google.com>
-Gabriel Marin <gmx@chromium.org>
 Gaute Strokkenes <gaute.strokkenes@broadcom.com>
 Geza Lore <gezalore@gmail.com>
 Ghislain MARY <ghislainmary2@gmail.com>
 Giuseppe Scrivano <gscrivano@gnu.org>
 Gordana Cmiljanovic <gordana.cmiljanovic@imgtec.com>
-Gregor Jasny <gjasny@gmail.com>
 Guillaume Martres <gmartres@google.com>
 Guillermo Ballester Valor <gbvalor@gmail.com>
 Hangyu Kuang <hkuang@google.com>
 Hanno Böck <hanno@hboeck.de>
-Han Shen <shenhan@google.com>
 Henrik Lundin <hlundin@google.com>
 Hui Su <huisu@google.com>
-Ivan Krasin <krasin@chromium.org>
 Ivan Maltz <ivanmaltz@google.com>
 Jacek Caban <cjacek@gmail.com>
 Jacky Chen <jackychen@google.com>
@@ -71,7 +61,6 @@ Jean-Yves Avenard <jyavenard@mozilla.com>
 Jeff Faust <jfaust@google.com>
 Jeff Muizelaar <jmuizelaar@mozilla.com>
 Jeff Petkau <jpet@chromium.org>
-Jerome Jiang <jianj@google.com>
 Jia Jia <jia.jia@linaro.org>
 Jian Zhou <zhoujian@google.com>
 Jim Bankoski <jimbankoski@google.com>
@@ -86,9 +75,7 @@ Joshua Litt <joshualitt@google.com>
 Julia Robson <juliamrobson@gmail.com>
 Justin Clift <justin@salasaga.org>
 Justin Lebar <justin.lebar@gmail.com>
-Kaustubh Raste <kaustubh.raste@imgtec.com>
 KO Myung-Hun <komh@chollian.net>
-Kyle Siefring <kylesiefring@gmail.com>
 Lawrence Velázquez <larryv@macports.org>
 Linfeng Zhang <linfengz@google.com>
 Lou Quillio <louquillio@google.com>
@@ -104,12 +91,8 @@ Michael Kohler <michaelkohler@live.com>
 Mike Frysinger <vapier@chromium.org>
 Mike Hommey <mhommey@mozilla.com>
 Mikhal Shemer <mikhal@google.com>
-Min Chen <chenm003@gmail.com>
 Minghai Shang <minghai@google.com>
-Min Ye <yeemmi@google.com>
-Moriyoshi Koizumi <mozo@mozo.jp>
 Morton Jonuschat <yabawock@gmail.com>
-Nathan E. Egge <negge@mozilla.com>
 Nico Weber <thakis@chromium.org>
 Parag Salasakar <img.mips1@gmail.com>
 Pascal Massimino <pascal.massimino@gmail.com>
@@ -118,22 +101,16 @@ Paul Wilkins <paulwilkins@google.com>
 Pavol Rusnak <stick@gk2.sk>
 Paweł Hajdan <phajdan@google.com>
 Pengchong Jin <pengchong@google.com>
-Peter Boström <pbos@chromium.org>
-Peter Collingbourne <pcc@chromium.org>
 Peter de Rivaz <peter.derivaz@gmail.com>
 Philip Jägenstedt <philipj@opera.com>
 Priit Laes <plaes@plaes.org>
 Rafael Ávila de Espíndola <rafael.espindola@gmail.com>
 Rafaël Carré <funman@videolan.org>
-Rafael de Lucena Valle <rafaeldelucena@gmail.com>
-Rahul Chaudhry <rahulchaudhry@google.com>
 Ralph Giles <giles@xiph.org>
-Ranjit Kumar Tulabandu <ranjit.tulabandu@ittiam.com>
 Rob Bradford <rob@linux.intel.com>
 Ronald S. Bultje <rsbultje@gmail.com>
 Rui Ueyama <ruiu@google.com>
 Sami Pietilä <samipietila@google.com>
-Sarah Parker <sarahparker@google.com>
 Sasi Inguva <isasi@google.com>
 Scott Graham <scottmg@chromium.org>
 Scott LaVarnway <slavarnway@google.com>
@@ -141,11 +118,9 @@ Sean McGovern <gseanmcg@gmail.com>
 Sergey Kolomenkin <kolomenkin@gmail.com>
 Sergey Ulanov <sergeyu@chromium.org>
 Shimon Doodkin <helpmepro1@gmail.com>
-Shiyou Yin <yinshiyou-hf@loongson.cn>
 Shunyao Li <shunyaoli@google.com>
 Stefan Holmer <holmer@google.com>
 Suman Sunkara <sunkaras@google.com>
-Sylvestre Ledru <sylvestre@mozilla.com>
 Taekhyun Kim <takim@nvidia.com>
 Takanori MATSUURA <t.matsuu@gmail.com>
 Tamar Levy <tamar.levy@intel.com>
@@ -155,10 +130,7 @@ Thijs Vermeir <thijsvermeir@gmail.com>
 Tim Kopp <tkopp@google.com>
 Timothy B. Terriberry <tterribe@xiph.org>
 Tom Finegan <tomfinegan@google.com>
-Tristan Matthews <le.businessman@gmail.com>
-Urvang Joshi <urvang@google.com>
 Vignesh Venkatasubramanian <vigneshv@google.com>
-Vlad Tsyrklevich <vtsyrklevich@chromium.org>
 Yaowu Xu <yaowu@google.com>
 Yi Luo <luoyi@google.com>
 Yongzhe Wang <yongzhe@google.com>
--- a/41
+++ b/41
@@ -1,44 +1,3 @@
-2017-01-04 v1.7.0 "Mandarin Duck"
-  This release focused on high bit depth performance (10/12 bit) and vp9
-  encoding improvements.
-
-  - Upgrading:
-    This release is ABI incompatible due to new vp9 encoder features.
-
-    Frame parallel decoding for vp9 has been removed.
-
-  - Enhancements:
-    vp9 encoding supports additional threads with --row-mt. This can be greater
-    than the number of tiles.
-
-    Two new vp9 encoder options have been added:
-      --corpus-complexity
-      --tune-content=film
-
-    Additional tooling for respecting the vp9 "level" profiles has been added.
-
-  - Bug fixes:
-    A variety of fuzzing issues.
-    vp8 threading fix for ARM.
-    Codec control VP9_SET_SKIP_LOOP_FILTER fixed.
-    Reject invalid multi resolution configurations.
-
-2017-01-09 v1.6.1 "Long Tailed Duck"
-  This release improves upon the VP9 encoder and speeds up the encoding and
-  decoding processes.
-
-  - Upgrading:
-    This release is ABI compatible with 1.6.0.
-
-  - Enhancements:
-    Faster VP9 encoding and decoding.
-    High bit depth builds now provide similar speed for 8 bit encode and decode
-    for x86 targets. Other platforms and higher bit depth improvements are in
-    progress.
-
-  - Bug Fixes:
-    A variety of fuzzing issues.
-
 2016-07-20 v1.6.0 "Khaki Campbell Duck"
  This release improves upon the VP9 encoder and speeds up the encoding and
  decoding processes.
--- a/10
+++ b/10
@@ -1,4 +1,4 @@
-README - 24 January 2018
+README - 20 July 2016

 Welcome to the WebM VP8/VP9 Codec SDK!

@@ -47,7 +47,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
  --help output of the configure script. As of this writing, the list of
  available targets is:

-    arm64-android-gcc
    arm64-darwin-gcc
    arm64-linux-gcc
    armv7-android-gcc
@@ -58,13 +57,10 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    armv7-win32-vs11
    armv7-win32-vs12
    armv7-win32-vs14
-    armv7-win32-vs15
    armv7s-darwin-gcc
    armv8-linux-gcc
    mips32-linux-gcc
    mips64-linux-gcc
-    ppc64-linux-gcc
-    ppc64le-linux-gcc
    sparc-solaris-gcc
    x86-android-gcc
    x86-darwin8-gcc
@@ -77,7 +73,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-darwin13-gcc
    x86-darwin14-gcc
    x86-darwin15-gcc
-    x86-darwin16-gcc
    x86-iphonesimulator-gcc
    x86-linux-gcc
    x86-linux-icc
@@ -88,7 +83,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86-win32-vs11
    x86-win32-vs12
    x86-win32-vs14
-    x86-win32-vs15
    x86_64-android-gcc
    x86_64-darwin9-gcc
    x86_64-darwin10-gcc
@@ -97,7 +91,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-darwin13-gcc
    x86_64-darwin14-gcc
    x86_64-darwin15-gcc
-    x86_64-darwin16-gcc
    x86_64-iphonesimulator-gcc
    x86_64-linux-gcc
    x86_64-linux-icc
@@ -107,7 +100,6 @@ COMPILING THE APPLICATIONS/LIBRARIES:
    x86_64-win64-vs11
    x86_64-win64-vs12
    x86_64-win64-vs14
-    x86_64-win64-vs15
    generic-gnu

  The generic-gnu target, in conjunction with the CROSS environment variable,
--- a/args.c
+++ b/args.c
@@ -13,7 +13,6 @@
 #include <limits.h>
 #include "args.h"

-#include "vpx/vpx_integer.h"
 #include "vpx_ports/msvc.h"

 #if defined(__GNUC__) && __GNUC__
@@ -119,13 +118,13 @@ void arg_show_usage(FILE *fp, const struct arg_def *const *defs) {
 }

 unsigned int arg_parse_uint(const struct arg *arg) {
-  uint32_t rawval;
+  long int rawval;
  char *endptr;

-  rawval = (uint32_t)strtoul(arg->val, &endptr, 10);
+  rawval = strtol(arg->val, &endptr, 10);

  if (arg->val[0] != '\0' && endptr[0] == '\0') {
-    if (rawval <= UINT_MAX) return rawval;
+    if (rawval >= 0 && rawval <= UINT_MAX) return (unsigned int)rawval;

    die("Option %s: Value %ld out of range for unsigned int\n", arg->name,
        rawval);
@@ -136,10 +135,10 @@ unsigned int arg_parse_uint(const struct arg *arg) {
 }

 int arg_parse_int(const struct arg *arg) {
-  int32_t rawval;
+  long int rawval;
  char *endptr;

-  rawval = (int32_t)strtol(arg->val, &endptr, 10);
+  rawval = strtol(arg->val, &endptr, 10);

  if (arg->val[0] != '\0' && endptr[0] == '\0') {
    if (rawval >= INT_MIN && rawval <= INT_MAX) return (int)rawval;
--- a/build/make/Android.mk
+++ b/build/make/Android.mk
@@ -41,32 +41,10 @@
 # Running ndk-build will build libvpx and include it in your project.
 #

-# Alternatively, building the examples and unit tests can be accomplished in the
-# following way:
-#
-# Create a standalone toolchain from the NDK:
-# https://developer.android.com/ndk/guides/standalone_toolchain.html
-#
-# For example - to test on arm64 devices with clang:
-# $NDK/build/tools/make_standalone_toolchain.py \
-#   --arch arm64 --install-dir=/tmp/my-android-toolchain
-# export PATH=/tmp/my-android-toolchain/bin:$PATH
-# CROSS=aarch64-linux-android- CC=clang CXX=clang++ /path/to/libvpx/configure \
-#   --target=arm64-android-gcc
-#
-# Push the resulting binaries to a device and run them:
-# adb push test_libvpx /data/tmp/test_libvpx
-# adb shell /data/tmp/test_libvpx --gtest_filter=\*Sixtap\*
-#
-# Make sure to push the test data as well and set LIBVPX_TEST_DATA
-
 CONFIG_DIR := $(LOCAL_PATH)/
 LIBVPX_PATH := $(LOCAL_PATH)/libvpx
 ASM_CNV_PATH_LOCAL := $(TARGET_ARCH_ABI)/ads2gas
 ASM_CNV_PATH := $(LOCAL_PATH)/$(ASM_CNV_PATH_LOCAL)
-ifneq ($(V),1)
-  qexec := @
-endif

 # Use the makefiles generated by upstream configure to determine which files to
 # build. Also set any architecture-specific flags.
@@ -74,7 +52,7 @@ ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
  include $(CONFIG_DIR)libs-armv7-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq  ($(TARGET_ARCH_ABI),arm64-v8a)
-  include $(CONFIG_DIR)libs-arm64-android-gcc.mk
+  include $(CONFIG_DIR)libs-armv8-android-gcc.mk
  LOCAL_ARM_MODE := arm
 else ifeq ($(TARGET_ARCH_ABI),x86)
  include $(CONFIG_DIR)libs-x86-android-gcc.mk
@@ -104,10 +82,10 @@ LOCAL_CFLAGS := -O3
 # like x86inc.asm and x86_abi_support.asm
 LOCAL_ASMFLAGS := -I$(LIBVPX_PATH)

-.PRECIOUS: %.asm.S
-$(ASM_CNV_PATH)/libvpx/%.asm.S: $(LIBVPX_PATH)/%.asm
-	$(qexec)mkdir -p $(dir $@)
-	$(qexec)$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@
+.PRECIOUS: %.asm.s
+$(ASM_CNV_PATH)/libvpx/%.asm.s: $(LIBVPX_PATH)/%.asm
+	@mkdir -p $(dir $@)
+	@$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@

 # For building *_rtcd.h, which have rules in libs.mk
 TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN)))
@@ -135,7 +113,7 @@ endif

 # Pull out assembly files, splitting NEON from the rest.  This is
 # done to specify that the NEON assembly files use NEON assembler flags.
-# x86 assembly matches %.asm, arm matches %.asm.S
+# x86 assembly matches %.asm, arm matches %.asm.s

 # x86:

@@ -143,44 +121,31 @@ CODEC_SRCS_ASM_X86 = $(filter %.asm, $(CODEC_SRCS_UNIQUE))
 LOCAL_SRC_FILES += $(foreach file, $(CODEC_SRCS_ASM_X86), libvpx/$(file))

 # arm:
-CODEC_SRCS_ASM_ARM_ALL = $(filter %.asm.S, $(CODEC_SRCS_UNIQUE))
+CODEC_SRCS_ASM_ARM_ALL = $(filter %.asm.s, $(CODEC_SRCS_UNIQUE))
 CODEC_SRCS_ASM_ARM = $(foreach v, \
                     $(CODEC_SRCS_ASM_ARM_ALL), \
                     $(if $(findstring neon,$(v)),,$(v)))
-CODEC_SRCS_ASM_ADS2GAS = $(patsubst %.S, \
-                         $(ASM_CNV_PATH_LOCAL)/libvpx/%.S, \
+CODEC_SRCS_ASM_ADS2GAS = $(patsubst %.s, \
+                         $(ASM_CNV_PATH_LOCAL)/libvpx/%.s, \
                         $(CODEC_SRCS_ASM_ARM))
 LOCAL_SRC_FILES += $(CODEC_SRCS_ASM_ADS2GAS)

 ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
-  ASM_INCLUDES := vpx_dsp/arm/idct_neon.asm.S
  CODEC_SRCS_ASM_NEON = $(foreach v, \
                        $(CODEC_SRCS_ASM_ARM_ALL),\
                        $(if $(findstring neon,$(v)),$(v),))
-  CODEC_SRCS_ASM_NEON := $(filter-out $(addprefix %, $(ASM_INCLUDES)), \
-                         $(CODEC_SRCS_ASM_NEON))
-  CODEC_SRCS_ASM_NEON_ADS2GAS = $(patsubst %.S, \
-                                $(ASM_CNV_PATH_LOCAL)/libvpx/%.S, \
+  CODEC_SRCS_ASM_NEON_ADS2GAS = $(patsubst %.s, \
+                                $(ASM_CNV_PATH_LOCAL)/libvpx/%.s, \
                                $(CODEC_SRCS_ASM_NEON))
-  LOCAL_SRC_FILES += $(patsubst %.S, \
-                     %.S.neon, \
+  LOCAL_SRC_FILES += $(patsubst %.s, \
+                     %.s.neon, \
                     $(CODEC_SRCS_ASM_NEON_ADS2GAS))
-
-  NEON_ASM_TARGETS = $(patsubst %.S, \
-                     $(ASM_CNV_PATH)/libvpx/%.S, \
-                     $(CODEC_SRCS_ASM_NEON))
-# add a dependency to the full path to the ads2gas output to ensure the
-# includes are converted first.
-ifneq ($(strip $(NEON_ASM_TARGETS)),)
-$(NEON_ASM_TARGETS): $(addprefix $(ASM_CNV_PATH)/libvpx/, $(ASM_INCLUDES))
-endif
 endif

 LOCAL_CFLAGS += \
    -DHAVE_CONFIG_H=vpx_config.h \
    -I$(LIBVPX_PATH) \
-    -I$(ASM_CNV_PATH) \
-    -I$(ASM_CNV_PATH)/libvpx
+    -I$(ASM_CNV_PATH)

 LOCAL_MODULE := libvpx

@@ -201,8 +166,7 @@ endif
 $$(rtcd_dep_template_SRCS): vpx_scale_rtcd.h
 $$(rtcd_dep_template_SRCS): vpx_dsp_rtcd.h

-rtcd_dep_template_CONFIG_ASM_ABIS := x86 x86_64 armeabi-v7a
-ifneq ($$(findstring $(TARGET_ARCH_ABI),$$(rtcd_dep_template_CONFIG_ASM_ABIS)),)
+ifneq ($(findstring $(TARGET_ARCH_ABI),x86 x86_64),)
 $$(rtcd_dep_template_SRCS): vpx_config.asm
 endif
 endef
@@ -212,17 +176,16 @@ $(eval $(call rtcd_dep_template))
 .PHONY: clean
 clean:
 	@echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]"
-	$(qexec)$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
-	$(qexec)$(RM) -r $(ASM_CNV_PATH)
-	$(qexec)$(RM) $(CLEAN-OBJS)
+	@$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS)
+	@$(RM) -r $(ASM_CNV_PATH)
+	@$(RM) $(CLEAN-OBJS)

 ifeq ($(ENABLE_SHARED),1)
-  LOCAL_CFLAGS += -fPIC
  include $(BUILD_SHARED_LIBRARY)
 else
  include $(BUILD_STATIC_LIBRARY)
 endif

 ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes)
-$(call import-module,android/cpufeatures)
+$(call import-module,cpufeatures)
 endif
--- a/build/make/Makefile
+++ b/build/make/Makefile
@@ -90,7 +90,7 @@ all:

 .PHONY: clean
 clean::
-	rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.S.o=.asm.S)
+	rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.s.o=.asm.s)
 	rm -f $(CLEAN-OBJS)

 .PHONY: clean
@@ -124,7 +124,6 @@ ifeq ($(TOOLCHAIN), x86-os2-gcc)
 CFLAGS += -mstackrealign
 endif

-# x86[_64]
 $(BUILD_PFX)%_mmx.c.d: CFLAGS += -mmmx
 $(BUILD_PFX)%_mmx.c.o: CFLAGS += -mmmx
 $(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2
@@ -139,12 +138,6 @@ $(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx
 $(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx
 $(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2
 $(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2
-$(BUILD_PFX)%_avx512.c.d: CFLAGS += -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl
-$(BUILD_PFX)%_avx512.c.o: CFLAGS += -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl
-
-# POWER
-$(BUILD_PFX)%_vsx.c.d: CFLAGS += -maltivec -mvsx
-$(BUILD_PFX)%_vsx.c.o: CFLAGS += -maltivec -mvsx

 $(BUILD_PFX)%.c.d: %.c
 	$(if $(quiet),@echo "    [DEP] $@")
@@ -187,13 +180,13 @@ $(BUILD_PFX)%.asm.o: %.asm
 	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<

-$(BUILD_PFX)%.S.d: %.S
+$(BUILD_PFX)%.s.d: %.s
 	$(if $(quiet),@echo "    [DEP] $@")
 	$(qexec)mkdir -p $(dir $@)
 	$(qexec)$(SRC_PATH_BARE)/build/make/gen_asm_deps.sh \
            --build-pfx=$(BUILD_PFX) --depfile=$@ $(ASFLAGS) $< > $@

-$(BUILD_PFX)%.S.o: %.S
+$(BUILD_PFX)%.s.o: %.s
 	$(if $(quiet),@echo "    [AS] $@")
 	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(AS) $(ASFLAGS) -o $@ $<
@@ -205,8 +198,8 @@ $(BUILD_PFX)%.c.S: %.c
 	$(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@))
 	$(qexec)$(CC) -S $(CFLAGS) -o $@ $<

-.PRECIOUS: %.asm.S
-$(BUILD_PFX)%.asm.S: %.asm
+.PRECIOUS: %.asm.s
+$(BUILD_PFX)%.asm.s: %.asm
 	$(if $(quiet),@echo "    [ASM CONVERSION] $@")
 	$(qexec)mkdir -p $(dir $@)
 	$(qexec)$(ASM_CONVERSION) <$< >$@
--- a/build/make/ads2gas.pl
+++ b/build/make/ads2gas.pl
@@ -138,6 +138,14 @@ while (<STDIN>)
    s/DCD(.*)/.long $1/;
    s/DCB(.*)/.byte $1/;

+    # RN to .req
+    if (s/RN\s+([Rr]\d+|lr)/.req $1/)
+    {
+        print;
+        print "$comment_sub$comment\n" if defined $comment;
+        next;
+    }
+
    # Make function visible to linker, and make additional symbol with
    # prepended underscore
    s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/;
--- a/build/make/ads2gas_apple.pl
+++ b/build/make/ads2gas_apple.pl
@@ -120,6 +120,18 @@ while (<STDIN>)
    s/DCD(.*)/.long $1/;
    s/DCB(.*)/.byte $1/;

+    # Build a hash of all the register - alias pairs.
+    if (s/(.*)RN(.*)/$1 .req $2/g)
+    {
+        $register_aliases{trim($1)} = trim($2);
+        next;
+    }
+
+    while (($key, $value) = each(%register_aliases))
+    {
+        s/\b$key\b/$value/g;
+    }
+
    # Make function visible to linker, and make additional symbol with
    # prepended underscore
    s/EXPORT\s+\|([\$\w]*)\|/.globl _$1\n\t.globl $1/;
--- a/build/make/configure.sh
+++ b/build/make/configure.sh
@@ -403,23 +403,6 @@ check_gcc_machine_option() {
  fi
 }

-# tests for -m$2, -m$3, -m$4... toggling the feature given in $1.
-check_gcc_machine_options() {
-  feature="$1"
-  shift
-  flags="-m$1"
-  shift
-  for opt in $*; do
-    flags="$flags -m$opt"
-  done
-
-  if enabled gcc && ! disabled "$feature" && ! check_cflags $flags; then
-    RTCD_OPTIONS="${RTCD_OPTIONS}--disable-$feature "
-  else
-    soft_enable "$feature"
-  fi
-}
-
 write_common_config_banner() {
  print_webm_license config.mk "##" ""
  echo '# This file automatically generated by configure. Do not edit!' >> config.mk
@@ -652,7 +635,7 @@ setup_gnu_toolchain() {
  AS=${AS:-${CROSS}as}
  STRIP=${STRIP:-${CROSS}strip}
  NM=${NM:-${CROSS}nm}
-  AS_SFX=.S
+  AS_SFX=.s
  EXE_SFX=
 }

@@ -691,6 +674,7 @@ check_xcode_minimum_version() {
 process_common_toolchain() {
  if [ -z "$toolchain" ]; then
    gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}"
+
    # detect tgt_isa
    case "$gcctarget" in
      aarch64*)
@@ -713,18 +697,6 @@ process_common_toolchain() {
      *sparc*)
        tgt_isa=sparc
        ;;
-      power*64*-*)
-        tgt_isa=ppc64
-        ;;
-      power*)
-        tgt_isa=ppc
-        ;;
-      *mips64el*)
-        tgt_isa=mips64
-        ;;
-      *mips32el*)
-        tgt_isa=mips32
-        ;;
    esac

    # detect tgt_os
@@ -753,16 +725,9 @@ process_common_toolchain() {
        tgt_isa=x86_64
        tgt_os=darwin15
        ;;
-      *darwin16*)
-        tgt_isa=x86_64
-        tgt_os=darwin16
-        ;;
      x86_64*mingw32*)
        tgt_os=win64
        ;;
-      x86_64*cygwin*)
-        tgt_os=win64
-        ;;
      *mingw32*|*cygwin*)
        [ -z "$tgt_isa" ] && tgt_isa=x86
        tgt_os=win32
@@ -810,9 +775,6 @@ process_common_toolchain() {
    mips*)
      enable_feature mips
      ;;
-    ppc*)
-      enable_feature ppc
-      ;;
  esac

  # PIC is probably what we want when building shared libs
@@ -881,10 +843,6 @@ process_common_toolchain() {
      add_cflags  "-mmacosx-version-min=10.11"
      add_ldflags "-mmacosx-version-min=10.11"
      ;;
-    *-darwin16-*)
-      add_cflags  "-mmacosx-version-min=10.12"
-      add_ldflags "-mmacosx-version-min=10.12"
-      ;;
    *-iphonesimulator-*)
      add_cflags  "-miphoneos-version-min=${IOS_VERSION_MIN}"
      add_ldflags "-miphoneos-version-min=${IOS_VERSION_MIN}"
@@ -968,7 +926,7 @@ EOF
          ;;
        vs*)
          asm_conversion_cmd="${source_path}/build/make/ads2armasm_ms.pl"
-          AS_SFX=.S
+          AS_SFX=.s
          msvs_arch_dir=arm-msvs
          disable_feature multithread
          disable_feature unit_tests
@@ -978,7 +936,6 @@ EOF
            # only "AppContainerApplication" which requires an AppxManifest.
            # Therefore disable the examples, just build the library.
            disable_feature examples
-            disable_feature tools
          fi
          ;;
        rvct)
@@ -1021,50 +978,47 @@ EOF
          ;;

        android*)
-          if [ -n "${sdk_path}" ]; then
-            SDK_PATH=${sdk_path}
-            COMPILER_LOCATION=`find "${SDK_PATH}" \
-              -name "arm-linux-androideabi-gcc*" -print -quit`
-            TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi-
-            CC=${TOOLCHAIN_PATH}gcc
-            CXX=${TOOLCHAIN_PATH}g++
-            AR=${TOOLCHAIN_PATH}ar
-            LD=${TOOLCHAIN_PATH}gcc
-            AS=${TOOLCHAIN_PATH}as
-            STRIP=${TOOLCHAIN_PATH}strip
-            NM=${TOOLCHAIN_PATH}nm
+          if [ -z "${sdk_path}" ]; then
+            die "Must specify --sdk-path for Android builds."
+          fi

-            if [ -z "${alt_libc}" ]; then
-              alt_libc=`find "${SDK_PATH}" -name arch-arm -print | \
-                awk '{n = split($0,a,"/"); \
+          SDK_PATH=${sdk_path}
+          COMPILER_LOCATION=`find "${SDK_PATH}" \
+                             -name "arm-linux-androideabi-gcc*" -print -quit`
+          TOOLCHAIN_PATH=${COMPILER_LOCATION%/*}/arm-linux-androideabi-
+          CC=${TOOLCHAIN_PATH}gcc
+          CXX=${TOOLCHAIN_PATH}g++
+          AR=${TOOLCHAIN_PATH}ar
+          LD=${TOOLCHAIN_PATH}gcc
+          AS=${TOOLCHAIN_PATH}as
+          STRIP=${TOOLCHAIN_PATH}strip
+          NM=${TOOLCHAIN_PATH}nm
+
+          if [ -z "${alt_libc}" ]; then
+            alt_libc=`find "${SDK_PATH}" -name arch-arm -print | \
+              awk '{n = split($0,a,"/"); \
                split(a[n-1],b,"-"); \
                print $0 " " b[2]}' | \
                sort -g -k 2 | \
                awk '{ print $1 }' | tail -1`
-            fi
+          fi

-            if [ -d "${alt_libc}" ]; then
-              add_cflags "--sysroot=${alt_libc}"
-              add_ldflags "--sysroot=${alt_libc}"
-            fi
+          if [ -d "${alt_libc}" ]; then
+            add_cflags "--sysroot=${alt_libc}"
+            add_ldflags "--sysroot=${alt_libc}"
+          fi

-            # linker flag that routes around a CPU bug in some
-            # Cortex-A8 implementations (NDK Dev Guide)
-            add_ldflags "-Wl,--fix-cortex-a8"
+          # linker flag that routes around a CPU bug in some
+          # Cortex-A8 implementations (NDK Dev Guide)
+          add_ldflags "-Wl,--fix-cortex-a8"

-            enable_feature pic
-            soft_enable realtime_only
-            if [ ${tgt_isa} = "armv7" ]; then
-              soft_enable runtime_cpu_detect
-            fi
-            if enabled runtime_cpu_detect; then
-              add_cflags "-I${SDK_PATH}/sources/android/cpufeatures"
-            fi
-          else
-            echo "Assuming standalone build with NDK toolchain."
-            echo "See build/make/Android.mk for details."
-            check_add_ldflags -static
-            soft_enable unit_tests
+          enable_feature pic
+          soft_enable realtime_only
+          if [ ${tgt_isa} = "armv7" ]; then
+            soft_enable runtime_cpu_detect
+          fi
+          if enabled runtime_cpu_detect; then
+            add_cflags "-I${SDK_PATH}/sources/android/cpufeatures"
          fi
          ;;

@@ -1077,7 +1031,7 @@ EOF
          STRIP="$(${XCRUN_FIND} strip)"
          NM="$(${XCRUN_FIND} nm)"
          RANLIB="$(${XCRUN_FIND} ranlib)"
-          AS_SFX=.S
+          AS_SFX=.s
          LD="${CXX:-$(${XCRUN_FIND} ld)}"

          # ASFLAGS is written here instead of using check_add_asflags
@@ -1186,20 +1140,10 @@ EOF
        fi
      fi

-      if enabled mmi; then
-        tgt_isa=loongson3a
-        check_add_ldflags -march=loongson3a
-      fi
-
      check_add_cflags -march=${tgt_isa}
      check_add_asflags -march=${tgt_isa}
      check_add_asflags -KPIC
      ;;
-    ppc*)
-      link_with_cc=gcc
-      setup_gnu_toolchain
-      check_gcc_machine_option "vsx"
-      ;;
    x86*)
      case  ${tgt_os} in
        win*)
@@ -1254,13 +1198,6 @@ EOF
          AS=msvs
          msvs_arch_dir=x86-msvs
          vc_version=${tgt_cc##vs}
-          case $vc_version in
-            7|8|9|10|11|12|13|14)
-              echo "${tgt_cc} does not support avx512, disabling....."
-              RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx512 "
-              soft_disable avx512
-              ;;
-          esac
          case $vc_version in
            7|8|9|10)
              echo "${tgt_cc} does not support avx/avx2, disabling....."
@@ -1305,18 +1242,9 @@ EOF
        elif disabled $ext; then
          disable_exts="yes"
        else
-          if [ "$ext" = "avx512" ]; then
-            check_gcc_machine_options $ext avx512f avx512cd avx512bw avx512dq avx512vl
-          else
-            # use the shortened version for the flag: sse4_1 -> sse4
-            check_gcc_machine_option ${ext%_*} $ext
-          fi
+          # use the shortened version for the flag: sse4_1 -> sse4
+          check_gcc_machine_option ${ext%_*} $ext
        fi
-
-        # https://bugs.chromium.org/p/webm/issues/detail?id=1464
-        # The assembly optimizations for vpx_sub_pixel_variance do not link with
-        # gcc 6.
-        enabled sse2 && soft_enable pic
      done

      if enabled external_build; then
@@ -1341,6 +1269,7 @@ EOF
        esac
        log_echo "  using $AS"
      fi
+      [ "${AS##*/}" = nasm ] && add_asflags -Ox
      AS_SFX=.asm
      case  ${tgt_os} in
        win32)
@@ -1349,7 +1278,7 @@ EOF
          EXE_SFX=.exe
          ;;
        win64)
-          add_asflags -f win64
+          add_asflags -f x64
          enabled debug && add_asflags -g cv8
          EXE_SFX=.exe
          ;;
@@ -1463,7 +1392,6 @@ EOF
      *-win*-vs*)
        ;;
      *-android-gcc)
-        # bionic includes basic pthread functionality, obviating -lpthread.
        ;;
      *)
        check_header pthread.h && add_extralibs -lpthread
@@ -1483,10 +1411,6 @@ EOF
          echo "msa optimizations are available only for little endian platforms"
          disable_feature msa
        fi
-        if enabled mmi; then
-          echo "mmi optimizations are available only for little endian platforms"
-          disable_feature mmi
-        fi
      fi
      ;;
  esac
--- a/build/make/gen_msvs_sln.sh
+++ b/build/make/gen_msvs_sln.sh
@@ -25,7 +25,7 @@ files.
 Options:
    --help                      Print this message
    --out=outfile               Redirect output to a file
-    --ver=version               Version (7,8,9,10,11,12,14,15) of visual studio to generate for
+    --ver=version               Version (7,8,9,10,11,12,14) of visual studio to generate for
    --target=isa-os-cc          Target specifier
 EOF
    exit 1
@@ -215,7 +215,7 @@ for opt in "$@"; do
    ;;
    --ver=*) vs_ver="$optval"
             case $optval in
-             10|11|12|14|15)
+             10|11|12|14)
             ;;
             *) die Unrecognized Visual Studio Version in $opt
             ;;
@@ -240,12 +240,9 @@ case "${vs_ver:-10}" in
    12) sln_vers="12.00"
       sln_vers_str="Visual Studio 2013"
    ;;
-    14) sln_vers="12.00"
+    14) sln_vers="14.00"
       sln_vers_str="Visual Studio 2015"
    ;;
-    15) sln_vers="12.00"
-       sln_vers_str="Visual Studio 2017"
-    ;;
 esac
 sfx=vcxproj

--- a/build/make/gen_msvs_vcxproj.sh
+++ b/build/make/gen_msvs_vcxproj.sh
@@ -34,7 +34,7 @@ Options:
    --name=project_name         Name of the project (required)
    --proj-guid=GUID            GUID to use for the project
    --module-def=filename       File containing export definitions (for DLLs)
-    --ver=version               Version (10,11,12,14,15) of visual studio to generate for
+    --ver=version               Version (10,11,12,14) of visual studio to generate for
    --src-path-bare=dir         Path to root of source tree
    -Ipath/to/include           Additional include directories
    -DFLAG[=value]              Preprocessor macros to define
@@ -82,7 +82,7 @@ generate_filter() {
                       | sed -e "s,$src_path_bare,," \
                             -e 's/^[\./]\+//g' -e 's,[:/ ],_,g')

-                if ([ "$pat" == "asm" ] || [ "$pat" == "s" ] || [ "$pat" == "S" ]) && $asm_use_custom_step; then
+                if ([ "$pat" == "asm" ] || [ "$pat" == "s" ]) && $asm_use_custom_step; then
                    # Avoid object file name collisions, i.e. vpx_config.c and
                    # vpx_config.asm produce the same object file without
                    # this additional suffix.
@@ -168,7 +168,7 @@ for opt in "$@"; do
        --ver=*)
            vs_ver="$optval"
            case "$optval" in
-                10|11|12|14|15)
+                10|11|12|14)
                ;;
                *) die Unrecognized Visual Studio Version in $opt
                ;;
@@ -203,7 +203,7 @@ for opt in "$@"; do
            # The paths in file_list are fixed outside of the loop.
            file_list[${#file_list[@]}]="$opt"
            case "$opt" in
-                 *.asm|*.[Ss]) uses_asm=true
+                 *.asm|*.s) uses_asm=true
                 ;;
            esac
        ;;
@@ -218,7 +218,7 @@ guid=${guid:-`generate_uuid`}
 asm_use_custom_step=false
 uses_asm=${uses_asm:-false}
 case "${vs_ver:-11}" in
-    10|11|12|14|15)
+    10|11|12|14)
       asm_use_custom_step=$uses_asm
    ;;
 esac
@@ -347,9 +347,6 @@ generate_vcxproj() {
            if [ "$vs_ver" = "14" ]; then
                tag_content PlatformToolset v140
            fi
-            if [ "$vs_ver" = "15" ]; then
-                tag_content PlatformToolset v141
-            fi
            tag_content CharacterSet Unicode
            if [ "$config" = "Release" ]; then
                tag_content WholeProgramOptimization true
@@ -455,7 +452,7 @@ generate_vcxproj() {
    done

    open_tag ItemGroup
-    generate_filter "Source Files"   "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s;S"
+    generate_filter "Source Files"   "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s"
    close_tag ItemGroup
    open_tag ItemGroup
    generate_filter "Header Files"   "h;hm;inl;inc;xsd"
--- a/build/make/iosbuild.sh
+++ b/build/make/iosbuild.sh
@@ -35,8 +35,8 @@ ARM_TARGETS="arm64-darwin-gcc
             armv7s-darwin-gcc"
 SIM_TARGETS="x86-iphonesimulator-gcc
             x86_64-iphonesimulator-gcc"
-OSX_TARGETS="x86-darwin16-gcc
-             x86_64-darwin16-gcc"
+OSX_TARGETS="x86-darwin15-gcc
+             x86_64-darwin15-gcc"
 TARGETS="${ARM_TARGETS} ${SIM_TARGETS}"

 # Configures for the target specified by $1, and invokes make with the dist
@@ -271,7 +271,7 @@ cat << EOF
    --help: Display this message and exit.
    --enable-shared: Build a dynamic framework for use on iOS 8 or later.
    --extra-configure-args <args>: Extra args to pass when configuring libvpx.
-    --macosx: Uses darwin16 targets instead of iphonesimulator targets for x86
+    --macosx: Uses darwin15 targets instead of iphonesimulator targets for x86
              and x86_64. Allows linking to framework when builds target MacOSX
              instead of iOS.
    --preserve-build-output: Do not delete the build directory.
--- a/build/make/rtcd.pl
+++ b/build/make/rtcd.pl
@@ -1,13 +1,4 @@
 #!/usr/bin/env perl
-##
-##  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license
-##  that can be found in the LICENSE file in the root of the source
-##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may
-##  be found in the AUTHORS file in the root of the source tree.
-##

 no strict 'refs';
 use warnings;
@@ -209,7 +200,6 @@ sub filter {
 sub common_top() {
  my $include_guard = uc($opts{sym})."_H_";
  print <<EOF;
-// This file is generated. Do not edit.
 #ifndef ${include_guard}
 #define ${include_guard}

@@ -345,36 +335,6 @@ EOF
  common_bottom;
 }

-sub ppc() {
-  determine_indirection("c", @ALL_ARCHS);
-
-  # Assign the helper variable for each enabled extension
-  foreach my $opt (@ALL_ARCHS) {
-    my $opt_uc = uc $opt;
-    eval "\$have_${opt}=\"flags & HAS_${opt_uc}\"";
-  }
-
-  common_top;
-  print <<EOF;
-#include "vpx_config.h"
-
-#ifdef RTCD_C
-#include "vpx_ports/ppc.h"
-static void setup_rtcd_internal(void)
-{
-    int flags = ppc_simd_caps();
-    (void)flags;
-EOF
-
-  set_function_pointers("c", @ALL_ARCHS);
-
-  print <<EOF;
-}
-#endif
-EOF
-  common_bottom;
-}
-
 sub unoptimized() {
  determine_indirection "c";
  common_top;
@@ -401,10 +361,10 @@ EOF

 &require("c");
 if ($opts{arch} eq 'x86') {
-  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2 avx512/);
+  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/);
  x86;
 } elsif ($opts{arch} eq 'x86_64') {
-  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2 avx512/);
+  @ALL_ARCHS = filter(qw/mmx sse sse2 sse3 ssse3 sse4_1 avx avx2/);
  @REQUIRES = filter(keys %required ? keys %required : qw/mmx sse sse2/);
  &require(@REQUIRES);
  x86;
@@ -421,10 +381,6 @@ if ($opts{arch} eq 'x86') {
      @ALL_ARCHS = filter("$opts{arch}", qw/msa/);
      last;
    }
-    if (/HAVE_MMI=yes/) {
-      @ALL_ARCHS = filter("$opts{arch}", qw/mmi/);
-      last;
-    }
  }
  close CONFIG_FILE;
  mips;
@@ -434,9 +390,6 @@ if ($opts{arch} eq 'x86') {
 } elsif ($opts{arch} eq 'armv8' || $opts{arch} eq 'arm64' ) {
  @ALL_ARCHS = filter(qw/neon/);
  arm;
-} elsif ($opts{arch} =~ /^ppc/ ) {
-  @ALL_ARCHS = filter(qw/vsx/);
-  ppc;
 } else {
  unoptimized;
 }
--- a/build/make/version.sh
+++ b/build/make/version.sh
@@ -60,7 +60,6 @@ if [ ${bare} ]; then
    echo "${changelog_version}${git_version_id}" > $$.tmp
 else
    cat<<EOF>$$.tmp
-// This file is generated. Do not edit.
 #define VERSION_MAJOR  $major_version
 #define VERSION_MINOR  $minor_version
 #define VERSION_PATCH  $patch_version
--- a/55
+++ b/55
@@ -22,7 +22,6 @@ show_help(){
 Advanced options:
  ${toggle_libs}                  libraries
  ${toggle_examples}              examples
-  ${toggle_tools}                 tools
  ${toggle_docs}                  documentation
  ${toggle_unit_tests}            unit tests
  ${toggle_decode_perf_tests}     build decoder perf tests with unit tests
@@ -98,7 +97,6 @@ EOF

 # all_platforms is a list of all supported target platforms. Maintain
 # alphabetically by architecture, generic-gnu last.
-all_platforms="${all_platforms} arm64-android-gcc"
 all_platforms="${all_platforms} arm64-darwin-gcc"
 all_platforms="${all_platforms} arm64-linux-gcc"
 all_platforms="${all_platforms} armv7-android-gcc"   #neon Cortex-A8
@@ -109,13 +107,10 @@ all_platforms="${all_platforms} armv7-none-rvct"     #neon Cortex-A8
 all_platforms="${all_platforms} armv7-win32-vs11"
 all_platforms="${all_platforms} armv7-win32-vs12"
 all_platforms="${all_platforms} armv7-win32-vs14"
-all_platforms="${all_platforms} armv7-win32-vs15"
 all_platforms="${all_platforms} armv7s-darwin-gcc"
 all_platforms="${all_platforms} armv8-linux-gcc"
 all_platforms="${all_platforms} mips32-linux-gcc"
 all_platforms="${all_platforms} mips64-linux-gcc"
-all_platforms="${all_platforms} ppc64-linux-gcc"
-all_platforms="${all_platforms} ppc64le-linux-gcc"
 all_platforms="${all_platforms} sparc-solaris-gcc"
 all_platforms="${all_platforms} x86-android-gcc"
 all_platforms="${all_platforms} x86-darwin8-gcc"
@@ -128,7 +123,6 @@ all_platforms="${all_platforms} x86-darwin12-gcc"
 all_platforms="${all_platforms} x86-darwin13-gcc"
 all_platforms="${all_platforms} x86-darwin14-gcc"
 all_platforms="${all_platforms} x86-darwin15-gcc"
-all_platforms="${all_platforms} x86-darwin16-gcc"
 all_platforms="${all_platforms} x86-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86-linux-gcc"
 all_platforms="${all_platforms} x86-linux-icc"
@@ -139,7 +133,6 @@ all_platforms="${all_platforms} x86-win32-vs10"
 all_platforms="${all_platforms} x86-win32-vs11"
 all_platforms="${all_platforms} x86-win32-vs12"
 all_platforms="${all_platforms} x86-win32-vs14"
-all_platforms="${all_platforms} x86-win32-vs15"
 all_platforms="${all_platforms} x86_64-android-gcc"
 all_platforms="${all_platforms} x86_64-darwin9-gcc"
 all_platforms="${all_platforms} x86_64-darwin10-gcc"
@@ -148,7 +141,6 @@ all_platforms="${all_platforms} x86_64-darwin12-gcc"
 all_platforms="${all_platforms} x86_64-darwin13-gcc"
 all_platforms="${all_platforms} x86_64-darwin14-gcc"
 all_platforms="${all_platforms} x86_64-darwin15-gcc"
-all_platforms="${all_platforms} x86_64-darwin16-gcc"
 all_platforms="${all_platforms} x86_64-iphonesimulator-gcc"
 all_platforms="${all_platforms} x86_64-linux-gcc"
 all_platforms="${all_platforms} x86_64-linux-icc"
@@ -158,26 +150,22 @@ all_platforms="${all_platforms} x86_64-win64-vs10"
 all_platforms="${all_platforms} x86_64-win64-vs11"
 all_platforms="${all_platforms} x86_64-win64-vs12"
 all_platforms="${all_platforms} x86_64-win64-vs14"
-all_platforms="${all_platforms} x86_64-win64-vs15"
 all_platforms="${all_platforms} generic-gnu"

 # all_targets is a list of all targets that can be configured
 # note that these should be in dependency order for now.
-all_targets="libs examples tools docs"
+all_targets="libs examples docs"

 # all targets available are enabled, by default.
 for t in ${all_targets}; do
    [ -f "${source_path}/${t}.mk" ] && enable_feature ${t}
 done

-if ! diff --version >/dev/null; then
-  die "diff missing: Try installing diffutils via your package manager."
-fi
-
 if ! perl --version >/dev/null; then
    die "Perl is required to build"
 fi

+
 if [ "`cd \"${source_path}\" && pwd`" != "`pwd`" ]; then
  # test to see if source_path already configured
  if [ -f "${source_path}/vpx_config.h" ]; then
@@ -233,7 +221,6 @@ ARCH_LIST="
    mips
    x86
    x86_64
-    ppc
 "
 ARCH_EXT_LIST_X86="
    mmx
@@ -244,13 +231,7 @@ ARCH_EXT_LIST_X86="
    sse4_1
    avx
    avx2
-    avx512
 "
-
-ARCH_EXT_LIST_LOONGSON="
-    mmi
-"
-
 ARCH_EXT_LIST="
    neon
    neon_asm
@@ -261,10 +242,6 @@ ARCH_EXT_LIST="
    mips64

    ${ARCH_EXT_LIST_X86}
-
-    vsx
-
-    ${ARCH_EXT_LIST_LOONGSON}
 "
 HAVE_LIST="
    ${ARCH_EXT_LIST}
@@ -276,6 +253,7 @@ EXPERIMENT_LIST="
    spatial_svc
    fp_mb_stats
    emulate_hardware
+    misc_fixes
 "
 CONFIG_LIST="
    dependency_tracking
@@ -330,7 +308,6 @@ CONFIG_LIST="
    better_hw_compatibility
    experimental
    size_limit
-    always_adjust_bpm
    ${EXPERIMENT_LIST}
 "
 CMDLINE_SELECT="
@@ -353,7 +330,6 @@ CMDLINE_SELECT="

    libs
    examples
-    tools
    docs
    libc
    as
@@ -390,7 +366,6 @@ CMDLINE_SELECT="
    better_hw_compatibility
    vp9_highbitdepth
    experimental
-    always_adjust_bpm
 "

 process_cmdline() {
@@ -500,7 +475,7 @@ EOF
    #
    # Write makefiles for all enabled targets
    #
-    for tgt in libs examples tools docs solution; do
+    for tgt in libs examples docs solution; do
        tgt_fn="$tgt-$toolchain.mk"

        if enabled $tgt; then
@@ -592,7 +567,6 @@ process_toolchain() {
        check_add_cflags -Wdeclaration-after-statement
        check_add_cflags -Wdisabled-optimization
        check_add_cflags -Wfloat-conversion
-        check_add_cflags -Wparentheses-equality
        check_add_cflags -Wpointer-arith
        check_add_cflags -Wtype-limits
        check_add_cflags -Wcast-qual
@@ -600,20 +574,17 @@ process_toolchain() {
        check_add_cflags -Wimplicit-function-declaration
        check_add_cflags -Wuninitialized
        check_add_cflags -Wunused
-        # -Wextra has some tricky cases. Rather than fix them all now, get the
-        # flag for as many files as possible and fix the remaining issues
-        # piecemeal.
-        # https://bugs.chromium.org/p/webm/issues/detail?id=1069
-        check_add_cflags -Wextra
-        # check_add_cflags also adds to cxxflags. gtest does not do well with
-        # -Wundef so add it explicitly to CFLAGS only.
-        check_cflags -Wundef && add_cflags_only -Wundef
+        case ${CC} in
+          *clang*)
+              # libvpx and/or clang have issues with aliasing:
+              # https://code.google.com/p/webm/issues/detail?id=603
+              # work around them until they are fixed
+              check_add_cflags -fno-strict-aliasing
+          ;;
+        esac
        if enabled mips || [ -z "${INLINE}" ]; then
          enabled extra_warnings || check_add_cflags -Wno-unused-function
        fi
-        # Avoid this warning for third_party C++ sources. Some reorganization
-        # would be needed to apply this only to test/*.cc.
-        check_cflags -Wshorten-64-to-32 && add_cflags_only -Wshorten-64-to-32
    fi

    if enabled icc; then
@@ -665,7 +636,7 @@ process_toolchain() {
             gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh
             enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror"
             all_targets="${all_targets} solution"
-             INLINE="__inline"
+             INLINE="__forceinline"
        ;;
    esac

--- a/examples.mk
+++ b/examples.mk
@@ -76,7 +76,6 @@ vpxdec.SRCS                 += tools_common.c tools_common.h
 vpxdec.SRCS                 += y4menc.c y4menc.h
 ifeq ($(CONFIG_LIBYUV),yes)
  vpxdec.SRCS                 += $(LIBYUV_SRCS)
-  $(BUILD_PFX)third_party/libyuv/%.cc.o: CXXFLAGS += -Wno-unused-parameter
 endif
 ifeq ($(CONFIG_WEBM_IO),yes)
  vpxdec.SRCS                 += $(LIBWEBM_COMMON_SRCS)
--- a/examples/decode_with_drops.c
+++ b/examples/decode_with_drops.c
@@ -92,8 +92,8 @@ int main(int argc, char **argv) {
  if (!(outfile = fopen(argv[2], "wb")))
    die("Failed to open %s for writing.", argv[2]);

-  n = (int)strtol(argv[3], &nptr, 0);
-  m = (int)strtol(nptr + 1, NULL, 0);
+  n = strtol(argv[3], &nptr, 0);
+  m = strtol(nptr + 1, NULL, 0);
  is_range = (*nptr == '-');
  if (!n || !m || (*nptr != '-' && *nptr != '/'))
    die("Couldn't parse pattern %s.\n", argv[3]);
--- a/examples/set_maps.c
+++ b/examples/set_maps.c
@@ -174,8 +174,8 @@ int main(int argc, char **argv) {
  }
  assert(encoder != NULL);
  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = (int)strtol(argv[2], NULL, 0);
-  info.frame_height = (int)strtol(argv[3], NULL, 0);
+  info.frame_width = strtol(argv[2], NULL, 0);
+  info.frame_height = strtol(argv[3], NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

--- a/examples/simple_encoder.c
+++ b/examples/simple_encoder.c
@@ -150,7 +150,7 @@ int main(int argc, char **argv) {
  int frame_count = 0;
  vpx_image_t raw;
  vpx_codec_err_t res;
-  VpxVideoInfo info = { 0, 0, 0, { 0, 0 } };
+  VpxVideoInfo info = { 0 };
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;
  const int fps = 30;
@@ -175,14 +175,14 @@ int main(int argc, char **argv) {
  infile_arg = argv[4];
  outfile_arg = argv[5];
  keyframe_interval_arg = argv[6];
-  max_frames = (int)strtol(argv[8], NULL, 0);
+  max_frames = strtol(argv[8], NULL, 0);

  encoder = get_vpx_encoder_by_name(codec_arg);
  if (!encoder) die("Unsupported codec.");

  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = (int)strtol(width_arg, NULL, 0);
-  info.frame_height = (int)strtol(height_arg, NULL, 0);
+  info.frame_width = strtol(width_arg, NULL, 0);
+  info.frame_height = strtol(height_arg, NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

@@ -196,7 +196,7 @@ int main(int argc, char **argv) {
    die("Failed to allocate image.");
  }

-  keyframe_interval = (int)strtol(keyframe_interval_arg, NULL, 0);
+  keyframe_interval = strtol(keyframe_interval_arg, NULL, 0);
  if (keyframe_interval < 0) die("Invalid keyframe interval value.");

  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));
@@ -209,7 +209,7 @@ int main(int argc, char **argv) {
  cfg.g_timebase.num = info.time_base.numerator;
  cfg.g_timebase.den = info.time_base.denominator;
  cfg.rc_target_bitrate = bitrate;
-  cfg.g_error_resilient = (vpx_codec_er_flags_t)strtoul(argv[7], NULL, 0);
+  cfg.g_error_resilient = strtol(argv[7], NULL, 0);

  writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info);
  if (!writer) die("Failed to open %s for writing.", outfile_arg);
--- a/examples/twopass_encoder.c
+++ b/examples/twopass_encoder.c
@@ -209,13 +209,13 @@ int main(int argc, char **argv) {

  if (argc != 7) die("Invalid number of arguments.");

-  max_frames = (int)strtol(argv[6], NULL, 0);
+  max_frames = strtol(argv[6], NULL, 0);

  encoder = get_vpx_encoder_by_name(codec_arg);
  if (!encoder) die("Unsupported codec.");

-  w = (int)strtol(width_arg, NULL, 0);
-  h = (int)strtol(height_arg, NULL, 0);
+  w = strtol(width_arg, NULL, 0);
+  h = strtol(height_arg, NULL, 0);

  if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0)
    die("Invalid frame size: %dx%d", w, h);
--- a/examples/vp8_multi_resolution_encoder.c
+++ b/examples/vp8_multi_resolution_encoder.c
@@ -151,7 +151,7 @@ static void write_ivf_frame_header(FILE *outfile,
  if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return;

  pts = pkt->data.frame.pts;
-  mem_put_le32(header, (int)pkt->data.frame.sz);
+  mem_put_le32(header, pkt->data.frame.sz);
  mem_put_le32(header + 4, pts & 0xFFFFFFFF);
  mem_put_le32(header + 8, pts >> 32);

@@ -190,7 +190,7 @@ static void set_temporal_layer_pattern(int num_temporal_layers,
      cfg->ts_layer_id[0] = 0;
      cfg->ts_layer_id[1] = 1;
      // Use 60/40 bit allocation as example.
-      cfg->ts_target_bitrate[0] = (int)(0.6f * bitrate);
+      cfg->ts_target_bitrate[0] = 0.6f * bitrate;
      cfg->ts_target_bitrate[1] = bitrate;

      /* 0=L, 1=GF */
@@ -240,9 +240,9 @@ static void set_temporal_layer_pattern(int num_temporal_layers,
      cfg->ts_layer_id[1] = 2;
      cfg->ts_layer_id[2] = 1;
      cfg->ts_layer_id[3] = 2;
-      // Use 45/20/35 bit allocation as example.
-      cfg->ts_target_bitrate[0] = (int)(0.45f * bitrate);
-      cfg->ts_target_bitrate[1] = (int)(0.65f * bitrate);
+      // Use 40/20/40 bit allocation as example.
+      cfg->ts_target_bitrate[0] = 0.4f * bitrate;
+      cfg->ts_target_bitrate[1] = 0.6f * bitrate;
      cfg->ts_target_bitrate[2] = bitrate;

      /* 0=L, 1=GF, 2=ARF */
@@ -294,8 +294,8 @@ int main(int argc, char **argv) {
  vpx_codec_err_t res[NUM_ENCODERS];

  int i;
-  int width;
-  int height;
+  long width;
+  long height;
  int length_frame;
  int frame_avail;
  int got_data;
@@ -340,16 +340,17 @@ int main(int argc, char **argv) {
  unsigned int num_temporal_layers[NUM_ENCODERS] = { 3, 3, 3 };

  if (argc != (7 + 3 * NUM_ENCODERS))
-    die("Usage: %s <width> <height> <frame_rate>  <infile> <outfile(s)> "
+    die(
+        "Usage: %s <width> <height> <frame_rate>  <infile> <outfile(s)> "
        "<rate_encoder(s)> <temporal_layer(s)> <key_frame_insert> <output "
        "psnr?> \n",
        argv[0]);

  printf("Using %s\n", vpx_codec_iface_name(interface));

-  width = (int)strtol(argv[1], NULL, 0);
-  height = (int)strtol(argv[2], NULL, 0);
-  framerate = (int)strtol(argv[3], NULL, 0);
+  width = strtol(argv[1], NULL, 0);
+  height = strtol(argv[2], NULL, 0);
+  framerate = strtol(argv[3], NULL, 0);

  if (width < 16 || width % 2 || height < 16 || height % 2)
    die("Invalid resolution: %ldx%ld", width, height);
@@ -371,13 +372,12 @@ int main(int argc, char **argv) {

  // Bitrates per spatial layer: overwrite default rates above.
  for (i = 0; i < NUM_ENCODERS; i++) {
-    target_bitrate[i] = (int)strtol(argv[NUM_ENCODERS + 5 + i], NULL, 0);
+    target_bitrate[i] = strtol(argv[NUM_ENCODERS + 5 + i], NULL, 0);
  }

  // Temporal layers per spatial layers: overwrite default settings above.
  for (i = 0; i < NUM_ENCODERS; i++) {
-    num_temporal_layers[i] =
-        (int)strtol(argv[2 * NUM_ENCODERS + 5 + i], NULL, 0);
+    num_temporal_layers[i] = strtol(argv[2 * NUM_ENCODERS + 5 + i], NULL, 0);
    if (num_temporal_layers[i] < 1 || num_temporal_layers[i] > 3)
      die("Invalid temporal layers: %d, Must be 1, 2, or 3. \n",
          num_temporal_layers);
@@ -392,9 +392,9 @@ int main(int argc, char **argv) {
    downsampled_input[i] = fopen(filename, "wb");
  }

-  key_frame_insert = (int)strtol(argv[3 * NUM_ENCODERS + 5], NULL, 0);
+  key_frame_insert = strtol(argv[3 * NUM_ENCODERS + 5], NULL, 0);

-  show_psnr = (int)strtol(argv[3 * NUM_ENCODERS + 6], NULL, 0);
+  show_psnr = strtol(argv[3 * NUM_ENCODERS + 6], NULL, 0);

  /* Populate default encoder configuration */
  for (i = 0; i < NUM_ENCODERS; i++) {
@@ -461,7 +461,7 @@ int main(int argc, char **argv) {

  // Set the number of threads per encode/spatial layer.
  // (1, 1, 1) means no encoder threading.
-  cfg[0].g_threads = 1;
+  cfg[0].g_threads = 2;
  cfg[1].g_threads = 1;
  cfg[2].g_threads = 1;

@@ -470,7 +470,7 @@ int main(int argc, char **argv) {
    if (!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32))
      die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h);

-  if (raw[0].stride[VPX_PLANE_Y] == (int)raw[0].d_w)
+  if (raw[0].stride[VPX_PLANE_Y] == raw[0].d_w)
    read_frame_p = read_frame;
  else
    read_frame_p = read_frame_by_row;
@@ -508,11 +508,9 @@ int main(int argc, char **argv) {

  /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */
  /* Enable denoising for the highest-resolution encoder. */
-  if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 1))
+  if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 4))
    die_codec(&codec[0], "Failed to set noise_sensitivity");
-  if (vpx_codec_control(&codec[1], VP8E_SET_NOISE_SENSITIVITY, 1))
-    die_codec(&codec[1], "Failed to set noise_sensitivity");
-  for (i = 2; i < NUM_ENCODERS; i++) {
+  for (i = 1; i < NUM_ENCODERS; i++) {
    if (vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0))
      die_codec(&codec[i], "Failed to set noise_sensitivity");
  }
@@ -559,8 +557,7 @@ int main(int argc, char **argv) {
        /* Write out down-sampled input. */
        length_frame = cfg[i].g_w * cfg[i].g_h * 3 / 2;
        if (fwrite(raw[i].planes[0], 1, length_frame,
-                   downsampled_input[NUM_ENCODERS - i - 1]) !=
-            (unsigned int)length_frame) {
+                   downsampled_input[NUM_ENCODERS - i - 1]) != length_frame) {
          return EXIT_FAILURE;
        }
      }
@@ -621,6 +618,10 @@ int main(int argc, char **argv) {
            break;
          default: break;
        }
+        printf(pkt[i]->kind == VPX_CODEC_CX_FRAME_PKT &&
+                       (pkt[i]->data.frame.flags & VPX_FRAME_IS_KEY)
+                   ? "K"
+                   : "");
        fflush(stdout);
      }
    }
@@ -661,6 +662,7 @@ int main(int argc, char **argv) {
      write_ivf_file_header(outfile[i], &cfg[i], frame_cnt - 1);
    fclose(outfile[i]);
  }
+  printf("\n");

  return EXIT_SUCCESS;
 }
--- a/examples/vp8cx_set_ref.c
+++ b/examples/vp8cx_set_ref.c
@@ -51,7 +51,6 @@

 #include "vpx/vp8cx.h"
 #include "vpx/vpx_encoder.h"
-#include "vp8/common/common.h"

 #include "../tools_common.h"
 #include "../video_writer.h"
@@ -94,22 +93,18 @@ static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img,

 int main(int argc, char **argv) {
  FILE *infile = NULL;
-  vpx_codec_ctx_t codec;
-  vpx_codec_enc_cfg_t cfg;
+  vpx_codec_ctx_t codec = { 0 };
+  vpx_codec_enc_cfg_t cfg = { 0 };
  int frame_count = 0;
  vpx_image_t raw;
  vpx_codec_err_t res;
-  VpxVideoInfo info;
+  VpxVideoInfo info = { 0 };
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;
  int update_frame_num = 0;
  const int fps = 30;       // TODO(dkovalev) add command line argument
  const int bitrate = 200;  // kbit/s TODO(dkovalev) add command line argument

-  vp8_zero(codec);
-  vp8_zero(cfg);
-  vp8_zero(info);
-
  exec_name = argv[0];

  if (argc != 6) die("Invalid number of arguments");
@@ -122,8 +117,8 @@ int main(int argc, char **argv) {
  if (!update_frame_num) die("Couldn't parse frame number '%s'\n", argv[5]);

  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = (int)strtol(argv[1], NULL, 0);
-  info.frame_height = (int)strtol(argv[2], NULL, 0);
+  info.frame_width = strtol(argv[1], NULL, 0);
+  info.frame_height = strtol(argv[2], NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

--- a/examples/vp9_lossless_encoder.c
+++ b/examples/vp9_lossless_encoder.c
@@ -14,7 +14,6 @@

 #include "vpx/vpx_encoder.h"
 #include "vpx/vp8cx.h"
-#include "vp9/common/vp9_common.h"

 #include "../tools_common.h"
 #include "../video_writer.h"
@@ -63,13 +62,11 @@ int main(int argc, char **argv) {
  int frame_count = 0;
  vpx_image_t raw;
  vpx_codec_err_t res;
-  VpxVideoInfo info;
+  VpxVideoInfo info = { 0 };
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;
  const int fps = 30;

-  vp9_zero(info);
-
  exec_name = argv[0];

  if (argc < 5) die("Invalid number of arguments");
@@ -78,8 +75,8 @@ int main(int argc, char **argv) {
  if (!encoder) die("Unsupported codec.");

  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = (int)strtol(argv[1], NULL, 0);
-  info.frame_height = (int)strtol(argv[2], NULL, 0);
+  info.frame_width = strtol(argv[1], NULL, 0);
+  info.frame_height = strtol(argv[2], NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

--- a/examples/vp9_spatial_svc_encoder.c
+++ b/examples/vp9_spatial_svc_encoder.c
@@ -84,8 +84,6 @@ static const arg_def_t speed_arg =
    ARG_DEF("sp", "speed", 1, "speed configuration");
 static const arg_def_t aqmode_arg =
    ARG_DEF("aq", "aqmode", 1, "aq-mode off/on");
-static const arg_def_t bitrates_arg =
-    ARG_DEF("bl", "bitrates", 1, "bitrates[sl * num_tl + tl]");

 #if CONFIG_VP9_HIGHBITDEPTH
 static const struct arg_enum_list bitdepth_enum[] = {
@@ -126,7 +124,6 @@ static const arg_def_t *svc_args[] = { &frames_arg,
 #endif
                                       &speed_arg,
                                       &rc_end_usage_arg,
-                                       &bitrates_arg,
                                       NULL };

 static const uint32_t default_frames_to_skip = 0;
@@ -168,7 +165,7 @@ void usage_exit(void) {
 static void parse_command_line(int argc, const char **argv_,
                               AppInput *app_input, SvcContext *svc_ctx,
                               vpx_codec_enc_cfg_t *enc_cfg) {
-  struct arg arg;
+  struct arg arg = { 0 };
  char **argv = NULL;
  char **argi = NULL;
  char **argj = NULL;
@@ -253,9 +250,6 @@ static void parse_command_line(int argc, const char **argv_,
    } else if (arg_match(&arg, &scale_factors_arg, argi)) {
      snprintf(string_options, sizeof(string_options), "%s scale-factors=%s",
               string_options, arg.val);
-    } else if (arg_match(&arg, &bitrates_arg, argi)) {
-      snprintf(string_options, sizeof(string_options), "%s bitrates=%s",
-               string_options, arg.val);
    } else if (arg_match(&arg, &passes_arg, argi)) {
      passes = arg_parse_uint(&arg);
      if (passes < 1 || passes > 2) {
@@ -423,6 +417,7 @@ static void set_rate_control_stats(struct RateControlStats *rc,
  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
      const int layer = sl * cfg->ts_number_layers + tl;
+      const int tlayer0 = sl * cfg->ts_number_layers;
      if (cfg->ts_number_layers == 1)
        rc->layer_framerate[layer] = framerate;
      else
@@ -433,8 +428,8 @@ static void set_rate_control_stats(struct RateControlStats *rc,
                      cfg->layer_target_bitrate[layer - 1]) /
            (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]);
      } else {
-        rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] /
-                               rc->layer_framerate[layer];
+        rc->layer_pfb[tlayer0] = 1000.0 * cfg->layer_target_bitrate[tlayer0] /
+                                 rc->layer_framerate[tlayer0];
      }
      rc->layer_input_frames[layer] = 0;
      rc->layer_enc_frames[layer] = 0;
@@ -454,13 +449,12 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
                                          vpx_codec_enc_cfg_t *cfg,
                                          int frame_cnt) {
  unsigned int sl, tl;
-  double perc_fluctuation = 0.0;
  int tot_num_frames = 0;
+  double perc_fluctuation = 0.0;
  printf("Total number of processed frames: %d\n\n", frame_cnt - 1);
  printf("Rate control layer stats for sl%d tl%d layer(s):\n\n",
         cfg->ss_number_layers, cfg->ts_number_layers);
  for (sl = 0; sl < cfg->ss_number_layers; ++sl) {
-    tot_num_frames = 0;
    for (tl = 0; tl < cfg->ts_number_layers; ++tl) {
      const int layer = sl * cfg->ts_number_layers + tl;
      const int num_dropped =
@@ -468,7 +462,7 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
              ? (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer])
              : (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] -
                 1);
-      tot_num_frames += rc->layer_input_frames[layer];
+      if (!sl) tot_num_frames += rc->layer_input_frames[layer];
      rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] *
                                          rc->layer_encoding_bitrate[layer] /
                                          tot_num_frames;
@@ -503,13 +497,14 @@ static void printout_rate_control_summary(struct RateControlStats *rc,
         rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate),
         perc_fluctuation);
  if (frame_cnt != tot_num_frames)
-    die("Error: Number of input frames not equal to output encoded frames != "
+    die(
+        "Error: Number of input frames not equal to output encoded frames != "
        "%d tot_num_frames = %d\n",
        frame_cnt, tot_num_frames);
 }

 vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz,
-                                       uint64_t sizes[8], int *count) {
+                                       uint32_t sizes[8], int *count) {
  // A chunk ending with a byte matching 0xc0 is an invalid chunk unless
  // it is a super frame index. If the last byte of real video compression
  // data is 0xc0 the encoder must add a 0 byte. If we have the marker but
@@ -606,9 +601,9 @@ void set_frame_flags_bypass_mode(int sl, int tl, int num_spatial_layers,
 }

 int main(int argc, const char **argv) {
-  AppInput app_input;
+  AppInput app_input = { 0 };
  VpxVideoWriter *writer = NULL;
-  VpxVideoInfo info;
+  VpxVideoInfo info = { 0 };
  vpx_codec_ctx_t codec;
  vpx_codec_enc_cfg_t enc_cfg;
  SvcContext svc_ctx;
@@ -626,7 +621,7 @@ int main(int argc, const char **argv) {
  struct RateControlStats rc;
  vpx_svc_layer_id_t layer_id;
  vpx_svc_ref_frame_config_t ref_frame_config;
-  unsigned int sl, tl;
+  int sl, tl;
  double sum_bitrate = 0.0;
  double sum_bitrate2 = 0.0;
  double framerate = 30.0;
@@ -640,9 +635,8 @@ int main(int argc, const char **argv) {

 // Allocate image buffer
 #if CONFIG_VP9_HIGHBITDEPTH
-  if (!vpx_img_alloc(&raw,
-                     enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420
-                                                    : VPX_IMG_FMT_I42016,
+  if (!vpx_img_alloc(&raw, enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420
+                                                          : VPX_IMG_FMT_I42016,
                     enc_cfg.g_w, enc_cfg.g_h, 32)) {
    die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h);
  }
@@ -680,7 +674,7 @@ int main(int argc, const char **argv) {
  }
 #if OUTPUT_RC_STATS
  // For now, just write temporal layer streams.
-  // TODO(marpan): do spatial by re-writing superframe.
+  // TODO(wonkap): do spatial by re-writing superframe.
  if (svc_ctx.output_rc_stat) {
    for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) {
      char file_name[PATH_MAX];
@@ -698,18 +692,10 @@ int main(int argc, const char **argv) {

  if (svc_ctx.speed != -1)
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed);
-  if (svc_ctx.threads) {
+  if (svc_ctx.threads)
    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (svc_ctx.threads >> 1));
-    if (svc_ctx.threads > 1)
-      vpx_codec_control(&codec, VP9E_SET_ROW_MT, 1);
-    else
-      vpx_codec_control(&codec, VP9E_SET_ROW_MT, 0);
-  }
  if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1)
    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
-  if (svc_ctx.speed >= 5)
-    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
-  vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, 900);

  // Encode frames
  while (!end_of_stream) {
@@ -745,7 +731,7 @@ int main(int argc, const char **argv) {
                        &ref_frame_config);
      // Keep track of input frames, to account for frame drops in rate control
      // stats/metrics.
-      for (sl = 0; sl < (unsigned int)enc_cfg.ss_number_layers; ++sl) {
+      for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
        ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers +
                                layer_id.temporal_layer_id];
      }
@@ -770,20 +756,18 @@ int main(int argc, const char **argv) {
          SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal;
          if (cx_pkt->data.frame.sz > 0) {
 #if OUTPUT_RC_STATS
-            uint64_t sizes[8];
+            uint32_t sizes[8];
            int count = 0;
 #endif
            vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf,
                                         cx_pkt->data.frame.sz,
                                         cx_pkt->data.frame.pts);
 #if OUTPUT_RC_STATS
-            // TODO(marpan): Put this (to line728) in separate function.
+            // TODO(marpan/wonkap): Put this (to line728) in separate function.
            if (svc_ctx.output_rc_stat) {
              vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id);
              parse_superframe_index(cx_pkt->data.frame.buf,
                                     cx_pkt->data.frame.sz, sizes, &count);
-              if (enc_cfg.ss_number_layers == 1)
-                sizes[0] = cx_pkt->data.frame.sz;
              // Note computing input_layer_frames here won't account for frame
              // drops in rate control stats.
              // TODO(marpan): Fix this for non-bypass mode so we can get stats
@@ -810,7 +794,7 @@ int main(int argc, const char **argv) {
                  rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl];
                  // Keep count of rate control stats per layer, for non-key
                  // frames.
-                  if (tl == (unsigned int)layer_id.temporal_layer_id &&
+                  if (tl == layer_id.temporal_layer_id &&
                      !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) {
                    rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl];
                    rc.layer_avg_rate_mismatch[layer] +=
@@ -824,7 +808,7 @@ int main(int argc, const char **argv) {
              // Update for short-time encoding bitrate states, for moving
              // window of size rc->window, shifted by rc->window / 2.
              // Ignore first window segment, due to key frame.
-              if (frame_cnt > (unsigned int)rc.window_size) {
+              if (frame_cnt > rc.window_size) {
                tl = layer_id.temporal_layer_id;
                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
                  sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate;
@@ -840,14 +824,13 @@ int main(int argc, const char **argv) {
              }

              // Second shifted window.
-              if (frame_cnt >
-                  (unsigned int)(rc.window_size + rc.window_size / 2)) {
+              if (frame_cnt > rc.window_size + rc.window_size / 2) {
                tl = layer_id.temporal_layer_id;
                for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) {
                  sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate;
                }

-                if (frame_cnt > (unsigned int)(2 * rc.window_size) &&
+                if (frame_cnt > 2 * rc.window_size &&
                    frame_cnt % rc.window_size == 0) {
                  rc.window_count += 1;
                  rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size;
@@ -860,11 +843,10 @@ int main(int argc, const char **argv) {
            }
 #endif
          }
-          /*
+
          printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received,
                 !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY),
                 (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts);
-          */
          if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1)
            si->bytes_sum[0] += (int)cx_pkt->data.frame.sz;
          ++frames_received;
--- a/examples/vp9cx_set_ref.c
+++ b/examples/vp9cx_set_ref.c
@@ -53,7 +53,6 @@
 #include "vpx/vp8cx.h"
 #include "vpx/vpx_decoder.h"
 #include "vpx/vpx_encoder.h"
-#include "vp9/common/vp9_common.h"

 #include "./tools_common.h"
 #include "./video_writer.h"
@@ -191,7 +190,8 @@ static void find_mismatch(const vpx_image_t *const img1,
 }

 static void testing_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder,
-                           unsigned int frame_out, int *mismatch_seen) {
+                           vpx_codec_enc_cfg_t *cfg, unsigned int frame_out,
+                           int *mismatch_seen) {
  vpx_image_t enc_img, dec_img;
  struct vp9_ref_frame ref_enc, ref_dec;

@@ -225,10 +225,11 @@ static void testing_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder,
  vpx_img_free(&dec_img);
 }

-static int encode_frame(vpx_codec_ctx_t *ecodec, vpx_image_t *img,
-                        unsigned int frame_in, VpxVideoWriter *writer,
-                        int test_decode, vpx_codec_ctx_t *dcodec,
-                        unsigned int *frame_out, int *mismatch_seen) {
+static int encode_frame(vpx_codec_ctx_t *ecodec, vpx_codec_enc_cfg_t *cfg,
+                        vpx_image_t *img, unsigned int frame_in,
+                        VpxVideoWriter *writer, int test_decode,
+                        vpx_codec_ctx_t *dcodec, unsigned int *frame_out,
+                        int *mismatch_seen) {
  int got_pkts = 0;
  vpx_codec_iter_t iter = NULL;
  const vpx_codec_cx_pkt_t *pkt = NULL;
@@ -269,7 +270,7 @@ static int encode_frame(vpx_codec_ctx_t *ecodec, vpx_image_t *img,

  // Mismatch checking
  if (got_data && test_decode) {
-    testing_decode(ecodec, dcodec, *frame_out, mismatch_seen);
+    testing_decode(ecodec, dcodec, cfg, *frame_out, mismatch_seen);
  }

  return got_pkts;
@@ -278,12 +279,12 @@ static int encode_frame(vpx_codec_ctx_t *ecodec, vpx_image_t *img,
 int main(int argc, char **argv) {
  FILE *infile = NULL;
  // Encoder
-  vpx_codec_ctx_t ecodec;
-  vpx_codec_enc_cfg_t cfg;
+  vpx_codec_ctx_t ecodec = { 0 };
+  vpx_codec_enc_cfg_t cfg = { 0 };
  unsigned int frame_in = 0;
  vpx_image_t raw;
  vpx_codec_err_t res;
-  VpxVideoInfo info;
+  VpxVideoInfo info = { 0 };
  VpxVideoWriter *writer = NULL;
  const VpxInterface *encoder = NULL;

@@ -304,13 +305,7 @@ int main(int argc, char **argv) {
  const char *height_arg = NULL;
  const char *infile_arg = NULL;
  const char *outfile_arg = NULL;
-  const char *update_frame_num_arg = NULL;
  unsigned int limit = 0;
-
-  vp9_zero(ecodec);
-  vp9_zero(cfg);
-  vp9_zero(info);
-
  exec_name = argv[0];

  if (argc < 6) die("Invalid number of arguments");
@@ -319,28 +314,25 @@ int main(int argc, char **argv) {
  height_arg = argv[2];
  infile_arg = argv[3];
  outfile_arg = argv[4];
-  update_frame_num_arg = argv[5];

  encoder = get_vpx_encoder_by_name("vp9");
  if (!encoder) die("Unsupported codec.");

-  update_frame_num = (unsigned int)strtoul(update_frame_num_arg, NULL, 0);
+  update_frame_num = atoi(argv[5]);
  // In VP9, the reference buffers (cm->buffer_pool->frame_bufs[i].buf) are
  // allocated while calling vpx_codec_encode(), thus, setting reference for
  // 1st frame isn't supported.
-  if (update_frame_num <= 1) {
-    die("Couldn't parse frame number '%s'\n", update_frame_num_arg);
-  }
+  if (update_frame_num <= 1) die("Couldn't parse frame number '%s'\n", argv[5]);

  if (argc > 6) {
-    limit = (unsigned int)strtoul(argv[6], NULL, 0);
+    limit = atoi(argv[6]);
    if (update_frame_num > limit)
      die("Update frame number couldn't larger than limit\n");
  }

  info.codec_fourcc = encoder->fourcc;
-  info.frame_width = (int)strtol(width_arg, NULL, 0);
-  info.frame_height = (int)strtol(height_arg, NULL, 0);
+  info.frame_width = strtol(width_arg, NULL, 0);
+  info.frame_height = strtol(height_arg, NULL, 0);
  info.time_base.numerator = 1;
  info.time_base.denominator = fps;

@@ -405,7 +397,7 @@ int main(int argc, char **argv) {
      }
    }

-    encode_frame(&ecodec, &raw, frame_in, writer, test_decode, &dcodec,
+    encode_frame(&ecodec, &cfg, &raw, frame_in, writer, test_decode, &dcodec,
                 &frame_out, &mismatch_seen);
    frame_in++;
    if (mismatch_seen) break;
@@ -413,8 +405,8 @@ int main(int argc, char **argv) {

  // Flush encoder.
  if (!mismatch_seen)
-    while (encode_frame(&ecodec, NULL, frame_in, writer, test_decode, &dcodec,
-                        &frame_out, &mismatch_seen)) {
+    while (encode_frame(&ecodec, &cfg, NULL, frame_in, writer, test_decode,
+                        &dcodec, &frame_out, &mismatch_seen)) {
    }

  printf("\n");
--- a/examples/vpx_temporal_svc_encoder.c
+++ b/examples/vpx_temporal_svc_encoder.c
@@ -26,27 +26,17 @@
 #include "../tools_common.h"
 #include "../video_writer.h"

-#define VP8_ROI_MAP 0
-
 static const char *exec_name;

 void usage_exit(void) { exit(EXIT_FAILURE); }

-// Denoiser states for vp8, for temporal denoising.
-enum denoiserStateVp8 {
-  kVp8DenoiserOff,
-  kVp8DenoiserOnYOnly,
-  kVp8DenoiserOnYUV,
-  kVp8DenoiserOnYUVAggressive,
-  kVp8DenoiserOnAdaptive
-};
-
-// Denoiser states for vp9, for temporal denoising.
-enum denoiserStateVp9 {
-  kVp9DenoiserOff,
-  kVp9DenoiserOnYOnly,
-  // For SVC: denoise the top two spatial layers.
-  kVp9DenoiserOnYTwoSpatialLayers
+// Denoiser states, for temporal denoising.
+enum denoiserState {
+  kDenoiserOff,
+  kDenoiserOnYOnly,
+  kDenoiserOnYUV,
+  kDenoiserOnYUVAggressive,
+  kDenoiserOnAdaptive
 };

 static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 };
@@ -164,53 +154,6 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc,
    die("Error: Number of input frames not equal to output! \n");
 }

-#if VP8_ROI_MAP
-static void vp8_set_roi_map(vpx_codec_enc_cfg_t *cfg, vpx_roi_map_t *roi) {
-  unsigned int i, j;
-  memset(roi, 0, sizeof(*roi));
-
-  // ROI is based on the segments (4 for vp8, 8 for vp9), smallest unit for
-  // segment is 16x16 for vp8, 8x8 for vp9.
-  roi->rows = (cfg->g_h + 15) / 16;
-  roi->cols = (cfg->g_w + 15) / 16;
-
-  // Applies delta QP on the segment blocks, varies from -63 to 63.
-  // Setting to negative means lower QP (better quality).
-  // Below we set delta_q to the extreme (-63) to show strong effect.
-  roi->delta_q[0] = 0;
-  roi->delta_q[1] = -63;
-  roi->delta_q[2] = 0;
-  roi->delta_q[3] = 0;
-
-  // Applies delta loopfilter strength on the segment blocks, varies from -63 to
-  // 63. Setting to positive means stronger loopfilter.
-  roi->delta_lf[0] = 0;
-  roi->delta_lf[1] = 0;
-  roi->delta_lf[2] = 0;
-  roi->delta_lf[3] = 0;
-
-  // Applies skip encoding threshold on the segment blocks, varies from 0 to
-  // UINT_MAX. Larger value means more skipping of encoding is possible.
-  // This skip threshold only applies on delta frames.
-  roi->static_threshold[0] = 0;
-  roi->static_threshold[1] = 0;
-  roi->static_threshold[2] = 0;
-  roi->static_threshold[3] = 0;
-
-  // Use 2 states: 1 is center square, 0 is the rest.
-  roi->roi_map =
-      (uint8_t *)calloc(roi->rows * roi->cols, sizeof(*roi->roi_map));
-  for (i = 0; i < roi->rows; ++i) {
-    for (j = 0; j < roi->cols; ++j) {
-      if (i > (roi->rows >> 2) && i < ((roi->rows * 3) >> 2) &&
-          j > (roi->cols >> 2) && j < ((roi->cols * 3) >> 2)) {
-        roi->roi_map[i * roi->cols + j] = 1;
-      }
-    }
-  }
-}
-#endif
-
 // Temporal scaling parameters:
 // NOTE: The 3 prediction frames cannot be used interchangeably due to
 // differences in the way they are handled throughout the code. The
@@ -552,7 +495,6 @@ int main(int argc, char **argv) {
  vpx_codec_err_t res;
  unsigned int width;
  unsigned int height;
-  uint32_t error_resilient = 0;
  int speed;
  int frame_avail;
  int got_data;
@@ -563,15 +505,16 @@ int main(int argc, char **argv) {
  int layering_mode = 0;
  int layer_flags[VPX_TS_MAX_PERIODICITY] = { 0 };
  int flag_periodicity = 1;
-#if VP8_ROI_MAP
-  vpx_roi_map_t roi;
-#endif
+#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
  vpx_svc_layer_id_t layer_id = { 0, 0 };
+#else
+  vpx_svc_layer_id_t layer_id = { 0 };
+#endif
  const VpxInterface *encoder = NULL;
  FILE *infile = NULL;
  struct RateControlMetrics rc;
  int64_t cx_time = 0;
-  const int min_args_base = 13;
+  const int min_args_base = 11;
 #if CONFIG_VP9_HIGHBITDEPTH
  vpx_bit_depth_t bit_depth = VPX_BITS_8;
  int input_bit_depth = 8;
@@ -587,15 +530,15 @@ int main(int argc, char **argv) {
  // Check usage and arguments.
  if (argc < min_args) {
 #if CONFIG_VP9_HIGHBITDEPTH
-    die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den> <speed> <frame_drop_threshold> "
-        "<error_resilient> <threads> <mode> "
+    die(
+        "Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
+        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> "
        "<Rate_0> ... <Rate_nlayers-1> <bit-depth> \n",
        argv[0]);
 #else
-    die("Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
-        "<rate_num> <rate_den> <speed> <frame_drop_threshold> "
-        "<error_resilient> <threads> <mode> "
+    die(
+        "Usage: %s <infile> <outfile> <codec_type(vp8/vp9)> <width> <height> "
+        "<rate_num> <rate_den> <speed> <frame_drop_threshold> <mode> "
        "<Rate_0> ... <Rate_nlayers-1> \n",
        argv[0]);
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -606,15 +549,15 @@ int main(int argc, char **argv) {

  printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface()));

-  width = (unsigned int)strtoul(argv[4], NULL, 0);
-  height = (unsigned int)strtoul(argv[5], NULL, 0);
+  width = strtol(argv[4], NULL, 0);
+  height = strtol(argv[5], NULL, 0);
  if (width < 16 || width % 2 || height < 16 || height % 2) {
    die("Invalid resolution: %d x %d", width, height);
  }

-  layering_mode = (int)strtol(argv[12], NULL, 0);
+  layering_mode = strtol(argv[10], NULL, 0);
  if (layering_mode < 0 || layering_mode > 13) {
-    die("Invalid layering mode (0..12) %s", argv[12]);
+    die("Invalid layering mode (0..12) %s", argv[10]);
  }

  if (argc != min_args + mode_to_num_layers[layering_mode]) {
@@ -668,32 +611,32 @@ int main(int argc, char **argv) {
 #endif  // CONFIG_VP9_HIGHBITDEPTH

  // Timebase format e.g. 30fps: numerator=1, demoninator = 30.
-  cfg.g_timebase.num = (int)strtol(argv[6], NULL, 0);
-  cfg.g_timebase.den = (int)strtol(argv[7], NULL, 0);
+  cfg.g_timebase.num = strtol(argv[6], NULL, 0);
+  cfg.g_timebase.den = strtol(argv[7], NULL, 0);

-  speed = (int)strtol(argv[8], NULL, 0);
+  speed = strtol(argv[8], NULL, 0);
  if (speed < 0) {
    die("Invalid speed setting: must be positive");
  }

  for (i = min_args_base;
       (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) {
-    rc.layer_target_bitrate[i - 13] = (int)strtol(argv[i], NULL, 0);
+    rc.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0);
    if (strncmp(encoder->name, "vp8", 3) == 0)
-      cfg.ts_target_bitrate[i - 13] = rc.layer_target_bitrate[i - 13];
+      cfg.ts_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
    else if (strncmp(encoder->name, "vp9", 3) == 0)
-      cfg.layer_target_bitrate[i - 13] = rc.layer_target_bitrate[i - 13];
+      cfg.layer_target_bitrate[i - 11] = rc.layer_target_bitrate[i - 11];
  }

  // Real time parameters.
-  cfg.rc_dropframe_thresh = (unsigned int)strtoul(argv[9], NULL, 0);
+  cfg.rc_dropframe_thresh = strtol(argv[9], NULL, 0);
  cfg.rc_end_usage = VPX_CBR;
  cfg.rc_min_quantizer = 2;
  cfg.rc_max_quantizer = 56;
  if (strncmp(encoder->name, "vp9", 3) == 0) cfg.rc_max_quantizer = 52;
  cfg.rc_undershoot_pct = 50;
  cfg.rc_overshoot_pct = 50;
-  cfg.rc_buf_initial_sz = 600;
+  cfg.rc_buf_initial_sz = 500;
  cfg.rc_buf_optimal_sz = 600;
  cfg.rc_buf_sz = 1000;

@@ -701,14 +644,10 @@ int main(int argc, char **argv) {
  cfg.rc_resize_allowed = 0;

  // Use 1 thread as default.
-  cfg.g_threads = (unsigned int)strtoul(argv[11], NULL, 0);
+  cfg.g_threads = 1;

-  error_resilient = (uint32_t)strtoul(argv[10], NULL, 0);
-  if (error_resilient != 0 && error_resilient != 1) {
-    die("Invalid value for error resilient (0, 1): %d.", error_resilient);
-  }
  // Enable error resilient mode.
-  cfg.g_error_resilient = error_resilient;
+  cfg.g_error_resilient = 1;
  cfg.g_lag_in_frames = 0;
  cfg.kf_mode = VPX_KF_AUTO;

@@ -763,33 +702,18 @@ int main(int argc, char **argv) {

  if (strncmp(encoder->name, "vp8", 3) == 0) {
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed);
-    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff);
+    vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff);
    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
-    vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0);
-#if VP8_ROI_MAP
-    vp8_set_roi_map(&cfg, &roi);
-    if (vpx_codec_control(&codec, VP8E_SET_ROI_MAP, &roi))
-      die_codec(&codec, "Failed to set ROI map");
-#endif
-
  } else if (strncmp(encoder->name, "vp9", 3) == 0) {
    vpx_svc_extra_cfg_t svc_params;
    memset(&svc_params, 0, sizeof(svc_params));
    vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed);
    vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3);
-    vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0);
-    vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0);
    vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0);
-    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kVp9DenoiserOff);
+    vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kDenoiserOff);
    vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1);
    vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0);
    vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1));
-    // TODO(marpan/jianj): There is an issue with row-mt for low resolutons at
-    // high speed settings, disable its use for those cases for now.
-    if (cfg.g_threads > 1 && ((cfg.g_w > 320 && cfg.g_h > 240) || speed < 7))
-      vpx_codec_control(&codec, VP9E_SET_ROW_MT, 1);
-    else
-      vpx_codec_control(&codec, VP9E_SET_ROW_MT, 0);
    if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1 : 0))
      die_codec(&codec, "Failed to set SVC");
    for (i = 0; i < cfg.ts_number_layers; ++i) {
@@ -808,7 +732,7 @@ int main(int argc, char **argv) {
  // For generating smaller key frames, use a smaller max_intra_size_pct
  // value, like 100 or 200.
  {
-    const int max_intra_size_pct = 1000;
+    const int max_intra_size_pct = 900;
    vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT,
                      max_intra_size_pct);
  }
@@ -818,8 +742,10 @@ int main(int argc, char **argv) {
    struct vpx_usec_timer timer;
    vpx_codec_iter_t iter = NULL;
    const vpx_codec_cx_pkt_t *pkt;
+#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION)
    // Update the temporal layer_id. No spatial layers in this test.
    layer_id.spatial_layer_id = 0;
+#endif
    layer_id.temporal_layer_id =
        cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity];
    if (strncmp(encoder->name, "vp9", 3) == 0) {
--- a/libs.mk
+++ b/libs.mk
@@ -12,7 +12,7 @@
 # ARM assembly files are written in RVCT-style. We use some make magic to
 # filter those files to allow GCC compilation
 ifeq ($(ARCH_ARM),yes)
-  ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.S,.asm)
+  ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.s,.asm)
 else
  ASM:=.asm
 endif
@@ -106,6 +106,9 @@ ifeq ($(CONFIG_VP9_DECODER),yes)
  CODEC_DOC_SECTIONS += vp9 vp9_decoder
 endif

+VP9_PREFIX=vp9/
+$(BUILD_PFX)$(VP9_PREFIX)%.c.o: CFLAGS += -Wextra
+
 ifeq ($(CONFIG_ENCODERS),yes)
  CODEC_DOC_SECTIONS += encoder
 endif
@@ -113,12 +116,6 @@ ifeq ($(CONFIG_DECODERS),yes)
  CODEC_DOC_SECTIONS += decoder
 endif

-# Suppress -Wextra warnings in third party code.
-$(BUILD_PFX)third_party/googletest/%.cc.o: CXXFLAGS += -Wno-missing-field-initializers
-# Suppress -Wextra warnings in first party code pending investigation.
-# https://bugs.chromium.org/p/webm/issues/detail?id=1069
-$(BUILD_PFX)vp8/encoder/onyx_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered
-$(BUILD_PFX)vp8/decoder/onyxd_if.c.o: CFLAGS += -Wno-unknown-warning-option -Wno-clobbered

 ifeq ($(CONFIG_MSVS),yes)
 CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)
@@ -149,7 +146,6 @@ CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c
 INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c
 ifeq ($(ARCH_X86)$(ARCH_X86_64),yes)
 INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm
-INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += vpx_dsp/x86/bitdepth_conversion_sse2.asm
 endif
 CODEC_EXPORTS-yes += vpx/exports_com
 CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc
@@ -188,13 +184,6 @@ libvpx_srcs.txt:
 	@echo $(CODEC_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@
 CLEAN-OBJS += libvpx_srcs.txt

-# Assembly files that are included, but don't define symbols themselves.
-# Filtered out to avoid Windows build warnings.
-ASM_INCLUDES := \
-    third_party/x86inc/x86inc.asm \
-    vpx_config.asm \
-    vpx_ports/x86_abi_support.asm \
-    vpx_dsp/x86/bitdepth_conversion_sse2.asm \

 ifeq ($(CONFIG_EXTERNAL_BUILD),yes)
 ifeq ($(CONFIG_MSVS),yes)
@@ -206,6 +195,13 @@ vpx.def: $(call enabled,CODEC_EXPORTS)
            --out=$@ $^
 CLEAN-OBJS += vpx.def

+# Assembly files that are included, but don't define symbols themselves.
+# Filtered out to avoid Visual Studio build warnings.
+ASM_INCLUDES := \
+    third_party/x86inc/x86inc.asm \
+    vpx_config.asm \
+    vpx_ports/x86_abi_support.asm \
+
 vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def
 	@echo "    [CREATE] $@"
 	$(qexec)$(GEN_VCPROJ) \
@@ -228,12 +224,12 @@ vpx.$(VCPROJ_SFX): $(RTCD)

 endif
 else
-LIBVPX_OBJS=$(call objs, $(filter-out $(ASM_INCLUDES), $(CODEC_SRCS)))
+LIBVPX_OBJS=$(call objs,$(CODEC_SRCS))
 OBJS-yes += $(LIBVPX_OBJS)
 LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a
 $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS)

-SO_VERSION_MAJOR := 5
+SO_VERSION_MAJOR := 4
 SO_VERSION_MINOR := 0
 SO_VERSION_PATCH := 0
 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS))
@@ -367,7 +363,7 @@ endif
 #
 # Add assembler dependencies for configuration.
 #
-$(filter %.S.o,$(OBJS-yes)):     $(BUILD_PFX)vpx_config.asm
+$(filter %.s.o,$(OBJS-yes)):     $(BUILD_PFX)vpx_config.asm
 $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm


@@ -392,7 +388,7 @@ LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS))
 LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX)
 LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\
                     $(call enabled,LIBVPX_TEST_DATA))
-libvpx_test_data_url=https://storage.googleapis.com/downloads.webmproject.org/test_data/libvpx/$(1)
+libvpx_test_data_url=http://downloads.webmproject.org/test_data/libvpx/$(1)

 TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX)
 TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS))
@@ -405,16 +401,8 @@ CLEAN-OBJS += libvpx_test_srcs.txt

 $(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1
 	@echo "    [DOWNLOAD] $@"
-	# Attempt to download the file using curl, retrying once if it fails for a
-	# partial file (18).
-	$(qexec)( \
-	  trap 'rm -f $@' INT TERM; \
-	  curl="curl --retry 1 -L -o $@ $(call libvpx_test_data_url,$(@F))"; \
-	  $$curl; \
-	  case "$$?" in \
-	    18) $$curl -C -;; \
-	  esac \
-	)
+	$(qexec)trap 'rm -f $@' INT TERM &&\
+            curl -L -o $@ $(call libvpx_test_data_url,$(@F))

 testdata:: $(LIBVPX_TEST_DATA)
 	$(qexec)[ -x "$$(which sha1sum)" ] && sha1sum=sha1sum;\
--- a/rate_hist.c
+++ b/rate_hist.c
@@ -37,13 +37,7 @@ struct rate_hist {
 struct rate_hist *init_rate_histogram(const vpx_codec_enc_cfg_t *cfg,
                                      const vpx_rational_t *fps) {
  int i;
-  struct rate_hist *hist = calloc(1, sizeof(*hist));
-
-  if (hist == NULL || cfg == NULL || fps == NULL || fps->num == 0 ||
-      fps->den == 0) {
-    destroy_rate_histogram(hist);
-    return NULL;
-  }
+  struct rate_hist *hist = malloc(sizeof(*hist));

  // Determine the number of samples in the buffer. Use the file's framerate
  // to determine the number of frames in rc_buf_sz milliseconds, with an
@@ -86,11 +80,7 @@ void update_rate_histogram(struct rate_hist *hist,
                      (uint64_t)cfg->g_timebase.num /
                      (uint64_t)cfg->g_timebase.den;

-  int idx;
-
-  if (hist == NULL || cfg == NULL || pkt == NULL) return;
-
-  idx = hist->frames++ % hist->samples;
+  int idx = hist->frames++ % hist->samples;
  hist->pts[idx] = now;
  hist->sz[idx] = (int)pkt->data.frame.sz;

@@ -126,14 +116,9 @@ void update_rate_histogram(struct rate_hist *hist,
 static int merge_hist_buckets(struct hist_bucket *bucket, int max_buckets,
                              int *num_buckets) {
  int small_bucket = 0, merge_bucket = INT_MAX, big_bucket = 0;
-  int buckets;
+  int buckets = *num_buckets;
  int i;

-  assert(bucket != NULL);
-  assert(num_buckets != NULL);
-
-  buckets = *num_buckets;
-
  /* Find the extrema for this list of buckets */
  big_bucket = small_bucket = 0;
  for (i = 0; i < buckets; i++) {
@@ -196,8 +181,6 @@ static void show_histogram(const struct hist_bucket *bucket, int buckets,
  const char *pat1, *pat2;
  int i;

-  assert(bucket != NULL);
-
  switch ((int)(log(bucket[buckets - 1].high) / log(10)) + 1) {
    case 1:
    case 2:
@@ -276,8 +259,6 @@ void show_rate_histogram(struct rate_hist *hist, const vpx_codec_enc_cfg_t *cfg,
  int i, scale;
  int buckets = 0;

-  if (hist == NULL || cfg == NULL) return;
-
  for (i = 0; i < RATE_BINS; i++) {
    if (hist->bucket[i].low == INT_MAX) continue;
    hist->bucket[buckets++] = hist->bucket[i];
--- a/test/acm_random.h
+++ b/test/acm_random.h
@@ -11,10 +11,6 @@
 #ifndef TEST_ACM_RANDOM_H_
 #define TEST_ACM_RANDOM_H_

-#include <assert.h>
-
-#include <limits>
-
 #include "third_party/googletest/src/include/gtest/gtest.h"

 #include "vpx/vpx_integer.h"
@@ -54,13 +50,6 @@ class ACMRandom {
    return r < 128 ? r << 4 : r >> 4;
  }

-  uint32_t RandRange(const uint32_t range) {
-    // testing::internal::Random::Generate provides values in the range
-    // testing::internal::Random::kMaxRange.
-    assert(range <= testing::internal::Random::kMaxRange);
-    return random_.Generate(range);
-  }
-
  int PseudoUniform(int range) { return random_.Generate(range); }

  int operator()(int n) { return PseudoUniform(n); }
--- a/test/alt_ref_aq_segment_test.cc
+++ b/test/alt_ref_aq_segment_test.cc
@@ -1,157 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-#include "third_party/googletest/src/include/gtest/gtest.h"
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/i420_video_source.h"
-#include "test/util.h"
-
-namespace {
-
-class AltRefAqSegmentTest
-    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
- protected:
-  AltRefAqSegmentTest() : EncoderTest(GET_PARAM(0)) {}
-  virtual ~AltRefAqSegmentTest() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(GET_PARAM(1));
-    set_cpu_used_ = GET_PARAM(2);
-    aq_mode_ = 0;
-    alt_ref_aq_mode_ = 0;
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
-      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-      encoder->Control(VP9E_SET_ALT_REF_AQ, alt_ref_aq_mode_);
-      encoder->Control(VP9E_SET_AQ_MODE, aq_mode_);
-      encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 100);
-    }
-  }
-
-  int set_cpu_used_;
-  int aq_mode_;
-  int alt_ref_aq_mode_;
-};
-
-// Validate that this ALT_REF_AQ/AQ segmentation mode
-// (ALT_REF_AQ=0, AQ=0/no_aq)
-// encodes and decodes without a mismatch.
-TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ0) {
-  cfg_.rc_min_quantizer = 8;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_VBR;
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_target_bitrate = 300;
-
-  aq_mode_ = 0;
-  alt_ref_aq_mode_ = 1;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 100);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-// Validate that this ALT_REF_AQ/AQ segmentation mode
-// (ALT_REF_AQ=0, AQ=1/variance_aq)
-// encodes and decodes without a mismatch.
-TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ1) {
-  cfg_.rc_min_quantizer = 8;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_VBR;
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_target_bitrate = 300;
-
-  aq_mode_ = 1;
-  alt_ref_aq_mode_ = 1;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 100);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-// Validate that this ALT_REF_AQ/AQ segmentation mode
-// (ALT_REF_AQ=0, AQ=2/complexity_aq)
-// encodes and decodes without a mismatch.
-TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ2) {
-  cfg_.rc_min_quantizer = 8;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_VBR;
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_target_bitrate = 300;
-
-  aq_mode_ = 2;
-  alt_ref_aq_mode_ = 1;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 100);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-// Validate that this ALT_REF_AQ/AQ segmentation mode
-// (ALT_REF_AQ=0, AQ=3/cyclicrefresh_aq)
-// encodes and decodes without a mismatch.
-TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ3) {
-  cfg_.rc_min_quantizer = 8;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_VBR;
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_target_bitrate = 300;
-
-  aq_mode_ = 3;
-  alt_ref_aq_mode_ = 1;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 100);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-// Validate that this ALT_REF_AQ/AQ segmentation mode
-// (ALT_REF_AQ=0, AQ=4/equator360_aq)
-// encodes and decodes without a mismatch.
-TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ4) {
-  cfg_.rc_min_quantizer = 8;
-  cfg_.rc_max_quantizer = 56;
-  cfg_.rc_end_usage = VPX_VBR;
-  cfg_.rc_buf_initial_sz = 500;
-  cfg_.rc_buf_optimal_sz = 500;
-  cfg_.rc_buf_sz = 1000;
-  cfg_.rc_target_bitrate = 300;
-
-  aq_mode_ = 4;
-  alt_ref_aq_mode_ = 1;
-
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 100);
-
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-}
-
-VP9_INSTANTIATE_TEST_CASE(AltRefAqSegmentTest,
-                          ::testing::Values(::libvpx_test::kOnePassGood,
-                                            ::libvpx_test::kTwoPassGood),
-                          ::testing::Range(2, 5));
-}  // namespace
--- a/test/altref_test.cc
+++ b/test/altref_test.cc
@@ -31,7 +31,7 @@ class AltRefTest : public ::libvpx_test::EncoderTest,
    SetMode(libvpx_test::kTwoPassGood);
  }

-  virtual void BeginPassHook(unsigned int /*pass*/) { altref_count_ = 0; }
+  virtual void BeginPassHook(unsigned int pass) { altref_count_ = 0; }

  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
                                  libvpx_test::Encoder *encoder) {
--- a/test/android/Android.mk
+++ b/test/android/Android.mk
@@ -32,7 +32,6 @@ LOCAL_CPP_EXTENSION := .cc
 LOCAL_MODULE := gtest
 LOCAL_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/
 LOCAL_C_INCLUDES += $(LOCAL_PATH)/third_party/googletest/src/include/
-LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/include/
 LOCAL_SRC_FILES := ./third_party/googletest/src/src/gtest-all.cc
 include $(BUILD_STATIC_LIBRARY)

--- a/test/avg_test.cc
+++ b/test/avg_test.cc
@@ -14,7 +14,6 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "./vp9_rtcd.h"
 #include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"

@@ -23,7 +22,6 @@
 #include "test/register_state_check.h"
 #include "test/util.h"
 #include "vpx_mem/vpx_mem.h"
-#include "vpx_ports/vpx_timer.h"

 using libvpx_test::ACMRandom;

@@ -55,7 +53,7 @@ class AverageTestBase : public ::testing::Test {
  }

  // Sum Pixels
-  static unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
+  unsigned int ReferenceAverage8x8(const uint8_t *source, int pitch) {
    unsigned int average = 0;
    for (int h = 0; h < 8; ++h) {
      for (int w = 0; w < 8; ++w) average += source[h * pitch + w];
@@ -63,7 +61,7 @@ class AverageTestBase : public ::testing::Test {
    return ((average + 32) >> 6);
  }

-  static unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
+  unsigned int ReferenceAverage4x4(const uint8_t *source, int pitch) {
    unsigned int average = 0;
    for (int h = 0; h < 4; ++h) {
      for (int w = 0; w < 4; ++w) average += source[h * pitch + w];
@@ -100,12 +98,11 @@ class AverageTest : public AverageTestBase,

 protected:
  void CheckAverages() {
-    const int block_size = GET_PARAM(3);
    unsigned int expected = 0;
-    if (block_size == 8) {
+    if (GET_PARAM(3) == 8) {
      expected =
          ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_);
-    } else if (block_size == 4) {
+    } else if (GET_PARAM(3) == 4) {
      expected =
          ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_);
    }
@@ -188,7 +185,7 @@ class IntProColTest : public AverageTestBase,
  int16_t sum_c_;
 };

-typedef int (*SatdFunc)(const tran_low_t *coeffs, int length);
+typedef int (*SatdFunc)(const int16_t *coeffs, int length);
 typedef std::tr1::tuple<int, SatdFunc> SatdTestParam;

 class SatdTest : public ::testing::Test,
@@ -198,7 +195,7 @@ class SatdTest : public ::testing::Test,
    satd_size_ = GET_PARAM(0);
    satd_func_ = GET_PARAM(1);
    rnd_.Reset(ACMRandom::DeterministicSeed());
-    src_ = reinterpret_cast<tran_low_t *>(
+    src_ = reinterpret_cast<int16_t *>(
        vpx_memalign(16, sizeof(*src_) * satd_size_));
    ASSERT_TRUE(src_ != NULL);
  }
@@ -208,15 +205,12 @@ class SatdTest : public ::testing::Test,
    vpx_free(src_);
  }

-  void FillConstant(const tran_low_t val) {
+  void FillConstant(const int16_t val) {
    for (int i = 0; i < satd_size_; ++i) src_[i] = val;
  }

  void FillRandom() {
-    for (int i = 0; i < satd_size_; ++i) {
-      const int16_t tmp = rnd_.Rand16();
-      src_[i] = (tran_low_t)tmp;
-    }
+    for (int i = 0; i < satd_size_; ++i) src_[i] = rnd_.Rand16();
  }

  void Check(const int expected) {
@@ -228,66 +222,11 @@ class SatdTest : public ::testing::Test,
  int satd_size_;

 private:
-  tran_low_t *src_;
+  int16_t *src_;
  SatdFunc satd_func_;
  ACMRandom rnd_;
 };

-typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff,
-                                  const tran_low_t *dqcoeff, int block_size);
-typedef std::tr1::tuple<int, BlockErrorFunc> BlockErrorTestFPParam;
-
-class BlockErrorTestFP
-    : public ::testing::Test,
-      public ::testing::WithParamInterface<BlockErrorTestFPParam> {
- protected:
-  virtual void SetUp() {
-    txfm_size_ = GET_PARAM(0);
-    block_error_func_ = GET_PARAM(1);
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-    coeff_ = reinterpret_cast<tran_low_t *>(
-        vpx_memalign(16, sizeof(*coeff_) * txfm_size_));
-    dqcoeff_ = reinterpret_cast<tran_low_t *>(
-        vpx_memalign(16, sizeof(*dqcoeff_) * txfm_size_));
-    ASSERT_TRUE(coeff_ != NULL);
-    ASSERT_TRUE(dqcoeff_ != NULL);
-  }
-
-  virtual void TearDown() {
-    libvpx_test::ClearSystemState();
-    vpx_free(coeff_);
-    vpx_free(dqcoeff_);
-  }
-
-  void FillConstant(const tran_low_t coeff_val, const tran_low_t dqcoeff_val) {
-    for (int i = 0; i < txfm_size_; ++i) coeff_[i] = coeff_val;
-    for (int i = 0; i < txfm_size_; ++i) dqcoeff_[i] = dqcoeff_val;
-  }
-
-  void FillRandom() {
-    // Just two fixed seeds
-    rnd_.Reset(0xb0b9);
-    for (int i = 0; i < txfm_size_; ++i) coeff_[i] = rnd_.Rand16() >> 1;
-    rnd_.Reset(0xb0c8);
-    for (int i = 0; i < txfm_size_; ++i) dqcoeff_[i] = rnd_.Rand16() >> 1;
-  }
-
-  void Check(const int64_t expected) {
-    int64_t total;
-    ASM_REGISTER_STATE_CHECK(
-        total = block_error_func_(coeff_, dqcoeff_, txfm_size_));
-    EXPECT_EQ(expected, total);
-  }
-
-  int txfm_size_;
-
- private:
-  tran_low_t *coeff_;
-  tran_low_t *dqcoeff_;
-  BlockErrorFunc block_error_func_;
-  ACMRandom rnd_;
-};
-
 uint8_t *AverageTestBase::source_data_ = NULL;

 TEST_P(AverageTest, MinValue) {
@@ -368,66 +307,6 @@ TEST_P(SatdTest, Random) {
  Check(expected);
 }

-TEST_P(SatdTest, DISABLED_Speed) {
-  const int kCountSpeedTestBlock = 20000;
-  vpx_usec_timer timer;
-  DECLARE_ALIGNED(16, tran_low_t, coeff[1024]);
-  const int blocksize = GET_PARAM(0);
-
-  vpx_usec_timer_start(&timer);
-  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
-    GET_PARAM(1)(coeff, blocksize);
-  }
-  vpx_usec_timer_mark(&timer);
-  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
-  printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time);
-}
-
-TEST_P(BlockErrorTestFP, MinValue) {
-  const int64_t kMin = -32640;
-  const int64_t expected = kMin * kMin * txfm_size_;
-  FillConstant(kMin, 0);
-  Check(expected);
-}
-
-TEST_P(BlockErrorTestFP, MaxValue) {
-  const int64_t kMax = 32640;
-  const int64_t expected = kMax * kMax * txfm_size_;
-  FillConstant(kMax, 0);
-  Check(expected);
-}
-
-TEST_P(BlockErrorTestFP, Random) {
-  int64_t expected;
-  switch (txfm_size_) {
-    case 16: expected = 2051681432; break;
-    case 64: expected = 11075114379; break;
-    case 256: expected = 44386271116; break;
-    case 1024: expected = 184774996089; break;
-    default:
-      FAIL() << "Invalid satd size (" << txfm_size_
-             << ") valid: 16/64/256/1024";
-  }
-  FillRandom();
-  Check(expected);
-}
-
-TEST_P(BlockErrorTestFP, DISABLED_Speed) {
-  const int kCountSpeedTestBlock = 20000;
-  vpx_usec_timer timer;
-  DECLARE_ALIGNED(16, tran_low_t, coeff[1024]);
-  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[1024]);
-  const int blocksize = GET_PARAM(0);
-
-  vpx_usec_timer_start(&timer);
-  for (int i = 0; i < kCountSpeedTestBlock; ++i) {
-    GET_PARAM(1)(coeff, dqcoeff, blocksize);
-  }
-  vpx_usec_timer_mark(&timer);
-  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
-  printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time);
-}
-
 using std::tr1::make_tuple;

 INSTANTIATE_TEST_CASE_P(
@@ -441,13 +320,6 @@ INSTANTIATE_TEST_CASE_P(C, SatdTest,
                                          make_tuple(256, &vpx_satd_c),
                                          make_tuple(1024, &vpx_satd_c)));

-INSTANTIATE_TEST_CASE_P(
-    C, BlockErrorTestFP,
-    ::testing::Values(make_tuple(16, &vp9_block_error_fp_c),
-                      make_tuple(64, &vp9_block_error_fp_c),
-                      make_tuple(256, &vp9_block_error_fp_c),
-                      make_tuple(1024, &vp9_block_error_fp_c)));
-
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
    SSE2, AverageTest,
@@ -477,28 +349,6 @@ INSTANTIATE_TEST_CASE_P(SSE2, SatdTest,
                                          make_tuple(64, &vpx_satd_sse2),
                                          make_tuple(256, &vpx_satd_sse2),
                                          make_tuple(1024, &vpx_satd_sse2)));
-
-INSTANTIATE_TEST_CASE_P(
-    SSE2, BlockErrorTestFP,
-    ::testing::Values(make_tuple(16, &vp9_block_error_fp_sse2),
-                      make_tuple(64, &vp9_block_error_fp_sse2),
-                      make_tuple(256, &vp9_block_error_fp_sse2),
-                      make_tuple(1024, &vp9_block_error_fp_sse2)));
-#endif  // HAVE_SSE2
-
-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2, SatdTest,
-                        ::testing::Values(make_tuple(16, &vpx_satd_avx2),
-                                          make_tuple(64, &vpx_satd_avx2),
-                                          make_tuple(256, &vpx_satd_avx2),
-                                          make_tuple(1024, &vpx_satd_avx2)));
-
-INSTANTIATE_TEST_CASE_P(
-    AVX2, BlockErrorTestFP,
-    ::testing::Values(make_tuple(16, &vp9_block_error_fp_avx2),
-                      make_tuple(64, &vp9_block_error_fp_avx2),
-                      make_tuple(256, &vp9_block_error_fp_avx2),
-                      make_tuple(1024, &vp9_block_error_fp_avx2)));
 #endif

 #if HAVE_NEON
@@ -530,18 +380,7 @@ INSTANTIATE_TEST_CASE_P(NEON, SatdTest,
                                          make_tuple(64, &vpx_satd_neon),
                                          make_tuple(256, &vpx_satd_neon),
                                          make_tuple(1024, &vpx_satd_neon)));
-
-// TODO(jianj): Remove the highbitdepth flag once the SIMD functions are
-// in place.
-#if !CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    NEON, BlockErrorTestFP,
-    ::testing::Values(make_tuple(16, &vp9_block_error_fp_neon),
-                      make_tuple(64, &vp9_block_error_fp_neon),
-                      make_tuple(256, &vp9_block_error_fp_neon),
-                      make_tuple(1024, &vp9_block_error_fp_neon)));
-#endif  // !CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_NEON
+#endif

 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(
@@ -552,30 +391,6 @@ INSTANTIATE_TEST_CASE_P(
                      make_tuple(16, 16, 0, 4, &vpx_avg_4x4_msa),
                      make_tuple(16, 16, 5, 4, &vpx_avg_4x4_msa),
                      make_tuple(32, 32, 15, 4, &vpx_avg_4x4_msa)));
-
-INSTANTIATE_TEST_CASE_P(
-    MSA, IntProRowTest,
-    ::testing::Values(make_tuple(16, &vpx_int_pro_row_msa, &vpx_int_pro_row_c),
-                      make_tuple(32, &vpx_int_pro_row_msa, &vpx_int_pro_row_c),
-                      make_tuple(64, &vpx_int_pro_row_msa,
-                                 &vpx_int_pro_row_c)));
-
-INSTANTIATE_TEST_CASE_P(
-    MSA, IntProColTest,
-    ::testing::Values(make_tuple(16, &vpx_int_pro_col_msa, &vpx_int_pro_col_c),
-                      make_tuple(32, &vpx_int_pro_col_msa, &vpx_int_pro_col_c),
-                      make_tuple(64, &vpx_int_pro_col_msa,
-                                 &vpx_int_pro_col_c)));
-
-// TODO(jingning): Remove the highbitdepth flag once the SIMD functions are
-// in place.
-#if !CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(MSA, SatdTest,
-                        ::testing::Values(make_tuple(16, &vpx_satd_msa),
-                                          make_tuple(64, &vpx_satd_msa),
-                                          make_tuple(256, &vpx_satd_msa),
-                                          make_tuple(1024, &vpx_satd_msa)));
-#endif  // !CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_MSA
+#endif

 }  // namespace
--- a/test/buffer.h
+++ b/test/buffer.h
@@ -1,382 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef TEST_BUFFER_H_
-#define TEST_BUFFER_H_
-
-#include <stdio.h>
-
-#include <limits>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "test/acm_random.h"
-#include "vpx/vpx_integer.h"
-#include "vpx_mem/vpx_mem.h"
-
-namespace libvpx_test {
-
-template <typename T>
-class Buffer {
- public:
-  Buffer(int width, int height, int top_padding, int left_padding,
-         int right_padding, int bottom_padding)
-      : width_(width), height_(height), top_padding_(top_padding),
-        left_padding_(left_padding), right_padding_(right_padding),
-        bottom_padding_(bottom_padding), alignment_(0), padding_value_(0),
-        stride_(0), raw_size_(0), num_elements_(0), raw_buffer_(NULL) {}
-
-  Buffer(int width, int height, int top_padding, int left_padding,
-         int right_padding, int bottom_padding, unsigned int alignment)
-      : width_(width), height_(height), top_padding_(top_padding),
-        left_padding_(left_padding), right_padding_(right_padding),
-        bottom_padding_(bottom_padding), alignment_(alignment),
-        padding_value_(0), stride_(0), raw_size_(0), num_elements_(0),
-        raw_buffer_(NULL) {}
-
-  Buffer(int width, int height, int padding)
-      : width_(width), height_(height), top_padding_(padding),
-        left_padding_(padding), right_padding_(padding),
-        bottom_padding_(padding), alignment_(0), padding_value_(0), stride_(0),
-        raw_size_(0), num_elements_(0), raw_buffer_(NULL) {}
-
-  Buffer(int width, int height, int padding, unsigned int alignment)
-      : width_(width), height_(height), top_padding_(padding),
-        left_padding_(padding), right_padding_(padding),
-        bottom_padding_(padding), alignment_(alignment), padding_value_(0),
-        stride_(0), raw_size_(0), num_elements_(0), raw_buffer_(NULL) {}
-
-  ~Buffer() {
-    if (alignment_) {
-      vpx_free(raw_buffer_);
-    } else {
-      delete[] raw_buffer_;
-    }
-  }
-
-  T *TopLeftPixel() const;
-
-  int stride() const { return stride_; }
-
-  // Set the buffer (excluding padding) to 'value'.
-  void Set(const T value);
-
-  // Set the buffer (excluding padding) to the output of ACMRandom function
-  // 'rand_func'.
-  void Set(ACMRandom *rand_class, T (ACMRandom::*rand_func)());
-
-  // Set the buffer (excluding padding) to the output of ACMRandom function
-  // 'RandRange' with range 'low' to 'high' which typically must be within
-  // testing::internal::Random::kMaxRange (1u << 31). However, because we want
-  // to allow negative low (and high) values, it is restricted to INT32_MAX
-  // here.
-  void Set(ACMRandom *rand_class, const T low, const T high);
-
-  // Copy the contents of Buffer 'a' (excluding padding).
-  void CopyFrom(const Buffer<T> &a);
-
-  void DumpBuffer() const;
-
-  // Highlight the differences between two buffers if they are the same size.
-  void PrintDifference(const Buffer<T> &a) const;
-
-  bool HasPadding() const;
-
-  // Sets all the values in the buffer to 'padding_value'.
-  void SetPadding(const T padding_value);
-
-  // Checks if all the values (excluding padding) are equal to 'value' if the
-  // Buffers are the same size.
-  bool CheckValues(const T value) const;
-
-  // Check that padding matches the expected value or there is no padding.
-  bool CheckPadding() const;
-
-  // Compare the non-padding portion of two buffers if they are the same size.
-  bool CheckValues(const Buffer<T> &a) const;
-
-  bool Init() {
-    if (raw_buffer_ != NULL) return false;
-    EXPECT_GT(width_, 0);
-    EXPECT_GT(height_, 0);
-    EXPECT_GE(top_padding_, 0);
-    EXPECT_GE(left_padding_, 0);
-    EXPECT_GE(right_padding_, 0);
-    EXPECT_GE(bottom_padding_, 0);
-    stride_ = left_padding_ + width_ + right_padding_;
-    num_elements_ = stride_ * (top_padding_ + height_ + bottom_padding_);
-    raw_size_ = num_elements_ * sizeof(T);
-    if (alignment_) {
-      EXPECT_GE(alignment_, sizeof(T));
-      // Ensure alignment of the first value will be preserved.
-      EXPECT_EQ((left_padding_ * sizeof(T)) % alignment_, 0u);
-      // Ensure alignment of the subsequent rows will be preserved when there is
-      // a stride.
-      if (stride_ != width_) {
-        EXPECT_EQ((stride_ * sizeof(T)) % alignment_, 0u);
-      }
-      raw_buffer_ = reinterpret_cast<T *>(vpx_memalign(alignment_, raw_size_));
-    } else {
-      raw_buffer_ = new (std::nothrow) T[num_elements_];
-    }
-    EXPECT_TRUE(raw_buffer_ != NULL);
-    SetPadding(std::numeric_limits<T>::max());
-    return !::testing::Test::HasFailure();
-  }
-
- private:
-  bool BufferSizesMatch(const Buffer<T> &a) const;
-
-  const int width_;
-  const int height_;
-  const int top_padding_;
-  const int left_padding_;
-  const int right_padding_;
-  const int bottom_padding_;
-  const unsigned int alignment_;
-  T padding_value_;
-  int stride_;
-  int raw_size_;
-  int num_elements_;
-  T *raw_buffer_;
-};
-
-template <typename T>
-T *Buffer<T>::TopLeftPixel() const {
-  if (!raw_buffer_) return NULL;
-  return raw_buffer_ + (top_padding_ * stride_) + left_padding_;
-}
-
-template <typename T>
-void Buffer<T>::Set(const T value) {
-  if (!raw_buffer_) return;
-  T *src = TopLeftPixel();
-  for (int height = 0; height < height_; ++height) {
-    for (int width = 0; width < width_; ++width) {
-      src[width] = value;
-    }
-    src += stride_;
-  }
-}
-
-template <typename T>
-void Buffer<T>::Set(ACMRandom *rand_class, T (ACMRandom::*rand_func)()) {
-  if (!raw_buffer_) return;
-  T *src = TopLeftPixel();
-  for (int height = 0; height < height_; ++height) {
-    for (int width = 0; width < width_; ++width) {
-      src[width] = (*rand_class.*rand_func)();
-    }
-    src += stride_;
-  }
-}
-
-template <typename T>
-void Buffer<T>::Set(ACMRandom *rand_class, const T low, const T high) {
-  if (!raw_buffer_) return;
-
-  EXPECT_LE(low, high);
-  EXPECT_LE(static_cast<int64_t>(high) - low,
-            std::numeric_limits<int32_t>::max());
-
-  T *src = TopLeftPixel();
-  for (int height = 0; height < height_; ++height) {
-    for (int width = 0; width < width_; ++width) {
-      // 'low' will be promoted to unsigned given the return type of RandRange.
-      // Store the value as an int to avoid unsigned overflow warnings when
-      // 'low' is negative.
-      const int32_t value =
-          static_cast<int32_t>((*rand_class).RandRange(high - low));
-      src[width] = static_cast<T>(value + low);
-    }
-    src += stride_;
-  }
-}
-
-template <typename T>
-void Buffer<T>::CopyFrom(const Buffer<T> &a) {
-  if (!raw_buffer_) return;
-  if (!BufferSizesMatch(a)) return;
-
-  T *a_src = a.TopLeftPixel();
-  T *b_src = this->TopLeftPixel();
-  for (int height = 0; height < height_; ++height) {
-    for (int width = 0; width < width_; ++width) {
-      b_src[width] = a_src[width];
-    }
-    a_src += a.stride();
-    b_src += this->stride();
-  }
-}
-
-template <typename T>
-void Buffer<T>::DumpBuffer() const {
-  if (!raw_buffer_) return;
-  for (int height = 0; height < height_ + top_padding_ + bottom_padding_;
-       ++height) {
-    for (int width = 0; width < stride_; ++width) {
-      printf("%4d", raw_buffer_[height + width * stride_]);
-    }
-    printf("\n");
-  }
-}
-
-template <typename T>
-bool Buffer<T>::HasPadding() const {
-  if (!raw_buffer_) return false;
-  return top_padding_ || left_padding_ || right_padding_ || bottom_padding_;
-}
-
-template <typename T>
-void Buffer<T>::PrintDifference(const Buffer<T> &a) const {
-  if (!raw_buffer_) return;
-  if (!BufferSizesMatch(a)) return;
-
-  T *a_src = a.TopLeftPixel();
-  T *b_src = TopLeftPixel();
-
-  printf("This buffer:\n");
-  for (int height = 0; height < height_; ++height) {
-    for (int width = 0; width < width_; ++width) {
-      if (a_src[width] != b_src[width]) {
-        printf("*%3d", b_src[width]);
-      } else {
-        printf("%4d", b_src[width]);
-      }
-    }
-    printf("\n");
-    a_src += a.stride();
-    b_src += this->stride();
-  }
-
-  a_src = a.TopLeftPixel();
-  b_src = TopLeftPixel();
-
-  printf("Reference buffer:\n");
-  for (int height = 0; height < height_; ++height) {
-    for (int width = 0; width < width_; ++width) {
-      if (a_src[width] != b_src[width]) {
-        printf("*%3d", a_src[width]);
-      } else {
-        printf("%4d", a_src[width]);
-      }
-    }
-    printf("\n");
-    a_src += a.stride();
-    b_src += this->stride();
-  }
-}
-
-template <typename T>
-void Buffer<T>::SetPadding(const T padding_value) {
-  if (!raw_buffer_) return;
-  padding_value_ = padding_value;
-
-  T *src = raw_buffer_;
-  for (int i = 0; i < num_elements_; ++i) {
-    src[i] = padding_value;
-  }
-}
-
-template <typename T>
-bool Buffer<T>::CheckValues(const T value) const {
-  if (!raw_buffer_) return false;
-  T *src = TopLeftPixel();
-  for (int height = 0; height < height_; ++height) {
-    for (int width = 0; width < width_; ++width) {
-      if (value != src[width]) {
-        return false;
-      }
-    }
-    src += stride_;
-  }
-  return true;
-}
-
-template <typename T>
-bool Buffer<T>::CheckPadding() const {
-  if (!raw_buffer_) return false;
-  if (!HasPadding()) return true;
-
-  // Top padding.
-  T const *top = raw_buffer_;
-  for (int i = 0; i < stride_ * top_padding_; ++i) {
-    if (padding_value_ != top[i]) {
-      return false;
-    }
-  }
-
-  // Left padding.
-  T const *left = TopLeftPixel() - left_padding_;
-  for (int height = 0; height < height_; ++height) {
-    for (int width = 0; width < left_padding_; ++width) {
-      if (padding_value_ != left[width]) {
-        return false;
-      }
-    }
-    left += stride_;
-  }
-
-  // Right padding.
-  T const *right = TopLeftPixel() + width_;
-  for (int height = 0; height < height_; ++height) {
-    for (int width = 0; width < right_padding_; ++width) {
-      if (padding_value_ != right[width]) {
-        return false;
-      }
-    }
-    right += stride_;
-  }
-
-  // Bottom padding
-  T const *bottom = raw_buffer_ + (top_padding_ + height_) * stride_;
-  for (int i = 0; i < stride_ * bottom_padding_; ++i) {
-    if (padding_value_ != bottom[i]) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-template <typename T>
-bool Buffer<T>::CheckValues(const Buffer<T> &a) const {
-  if (!raw_buffer_) return false;
-  if (!BufferSizesMatch(a)) return false;
-
-  T *a_src = a.TopLeftPixel();
-  T *b_src = this->TopLeftPixel();
-  for (int height = 0; height < height_; ++height) {
-    for (int width = 0; width < width_; ++width) {
-      if (a_src[width] != b_src[width]) {
-        return false;
-      }
-    }
-    a_src += a.stride();
-    b_src += this->stride();
-  }
-  return true;
-}
-
-template <typename T>
-bool Buffer<T>::BufferSizesMatch(const Buffer<T> &a) const {
-  if (!raw_buffer_) return false;
-  if (a.width_ != this->width_ || a.height_ != this->height_) {
-    printf(
-        "Reference buffer of size %dx%d does not match this buffer which is "
-        "size %dx%d\n",
-        a.width_, a.height_, this->width_, this->height_);
-    return false;
-  }
-
-  return true;
-}
-}  // namespace libvpx_test
-#endif  // TEST_BUFFER_H_
--- a/test/byte_alignment_test.cc
+++ b/test/byte_alignment_test.cc
@@ -128,8 +128,8 @@ class ByteAlignmentTest
  // TODO(fgalligan): Move the MD5 testing code into another class.
  void OpenMd5File(const std::string &md5_file_name_) {
    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
-    ASSERT_TRUE(md5_file_ != NULL)
-        << "MD5 file open failed. Filename: " << md5_file_name_;
+    ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
+                                   << md5_file_name_;
  }

  void CheckMd5(const vpx_image_t &img) {
--- a/test/codec_factory.h
+++ b/test/codec_factory.h
@@ -65,12 +65,6 @@ class CodecTestWith3Params
    : public ::testing::TestWithParam<
          std::tr1::tuple<const libvpx_test::CodecFactory *, T1, T2, T3> > {};

-template <class T1, class T2, class T3, class T4>
-class CodecTestWith4Params
-    : public ::testing::TestWithParam<
-          std::tr1::tuple<const libvpx_test::CodecFactory *, T1, T2, T3, T4> > {
-};
-
 /*
 * VP8 Codec Definitions
 */
@@ -121,8 +115,6 @@ class VP8CodecFactory : public CodecFactory {
 #if CONFIG_VP8_DECODER
    return new VP8Decoder(cfg, flags);
 #else
-    (void)cfg;
-    (void)flags;
    return NULL;
 #endif
  }
@@ -134,10 +126,6 @@ class VP8CodecFactory : public CodecFactory {
 #if CONFIG_VP8_ENCODER
    return new VP8Encoder(cfg, deadline, init_flags, stats);
 #else
-    (void)cfg;
-    (void)deadline;
-    (void)init_flags;
-    (void)stats;
    return NULL;
 #endif
  }
@@ -147,8 +135,6 @@ class VP8CodecFactory : public CodecFactory {
 #if CONFIG_VP8_ENCODER
    return vpx_codec_enc_config_default(&vpx_codec_vp8_cx_algo, cfg, usage);
 #else
-    (void)cfg;
-    (void)usage;
    return VPX_CODEC_INCAPABLE;
 #endif
  }
@@ -217,8 +203,6 @@ class VP9CodecFactory : public CodecFactory {
 #if CONFIG_VP9_DECODER
    return new VP9Decoder(cfg, flags);
 #else
-    (void)cfg;
-    (void)flags;
    return NULL;
 #endif
  }
@@ -230,10 +214,6 @@ class VP9CodecFactory : public CodecFactory {
 #if CONFIG_VP9_ENCODER
    return new VP9Encoder(cfg, deadline, init_flags, stats);
 #else
-    (void)cfg;
-    (void)deadline;
-    (void)init_flags;
-    (void)stats;
    return NULL;
 #endif
  }
@@ -243,8 +223,6 @@ class VP9CodecFactory : public CodecFactory {
 #if CONFIG_VP9_ENCODER
    return vpx_codec_enc_config_default(&vpx_codec_vp9_cx_algo, cfg, usage);
 #else
-    (void)cfg;
-    (void)usage;
    return VPX_CODEC_INCAPABLE;
 #endif
  }
--- a/test/comp_avg_pred_test.cc
+++ b/test/comp_avg_pred_test.cc
@@ -1,182 +0,0 @@
-/*
- *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_dsp_rtcd.h"
-
-#include "test/acm_random.h"
-#include "test/buffer.h"
-#include "test/register_state_check.h"
-#include "vpx_ports/vpx_timer.h"
-
-namespace {
-
-using ::libvpx_test::ACMRandom;
-using ::libvpx_test::Buffer;
-
-typedef void (*AvgPredFunc)(uint8_t *a, const uint8_t *b, int w, int h,
-                            const uint8_t *c, int c_stride);
-
-uint8_t avg_with_rounding(uint8_t a, uint8_t b) { return (a + b + 1) >> 1; }
-
-void reference_pred(const Buffer<uint8_t> &pred, const Buffer<uint8_t> &ref,
-                    int width, int height, Buffer<uint8_t> *avg) {
-  for (int y = 0; y < height; ++y) {
-    for (int x = 0; x < width; ++x) {
-      avg->TopLeftPixel()[y * avg->stride() + x] =
-          avg_with_rounding(pred.TopLeftPixel()[y * pred.stride() + x],
-                            ref.TopLeftPixel()[y * ref.stride() + x]);
-    }
-  }
-}
-
-class AvgPredTest : public ::testing::TestWithParam<AvgPredFunc> {
- public:
-  virtual void SetUp() {
-    avg_pred_func_ = GetParam();
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
- protected:
-  AvgPredFunc avg_pred_func_;
-  ACMRandom rnd_;
-};
-
-TEST_P(AvgPredTest, SizeCombinations) {
-  // This is called as part of the sub pixel variance. As such it must be one of
-  // the variance block sizes.
-
-  for (int width_pow = 2; width_pow <= 6; ++width_pow) {
-    for (int height_pow = width_pow - 1; height_pow <= width_pow + 1;
-         ++height_pow) {
-      // Don't test 4x2 or 64x128
-      if (height_pow == 1 || height_pow == 7) continue;
-
-      // The sse2 special-cases when ref width == stride, so make sure to test
-      // it.
-      for (int ref_padding = 0; ref_padding < 2; ref_padding++) {
-        const int width = 1 << width_pow;
-        const int height = 1 << height_pow;
-        // Only the reference buffer may have a stride not equal to width.
-        Buffer<uint8_t> ref =
-            Buffer<uint8_t>(width, height, ref_padding ? 8 : 0);
-        ASSERT_TRUE(ref.Init());
-        Buffer<uint8_t> pred = Buffer<uint8_t>(width, height, 0, 16);
-        ASSERT_TRUE(pred.Init());
-        Buffer<uint8_t> avg_ref = Buffer<uint8_t>(width, height, 0, 16);
-        ASSERT_TRUE(avg_ref.Init());
-        Buffer<uint8_t> avg_chk = Buffer<uint8_t>(width, height, 0, 16);
-        ASSERT_TRUE(avg_chk.Init());
-
-        ref.Set(&rnd_, &ACMRandom::Rand8);
-        pred.Set(&rnd_, &ACMRandom::Rand8);
-
-        reference_pred(pred, ref, width, height, &avg_ref);
-        ASM_REGISTER_STATE_CHECK(
-            avg_pred_func_(avg_chk.TopLeftPixel(), pred.TopLeftPixel(), width,
-                           height, ref.TopLeftPixel(), ref.stride()));
-
-        EXPECT_TRUE(avg_chk.CheckValues(avg_ref));
-        if (HasFailure()) {
-          printf("Width: %d Height: %d\n", width, height);
-          avg_chk.PrintDifference(avg_ref);
-          return;
-        }
-      }
-    }
-  }
-}
-
-TEST_P(AvgPredTest, CompareReferenceRandom) {
-  const int width = 64;
-  const int height = 32;
-  Buffer<uint8_t> ref = Buffer<uint8_t>(width, height, 8);
-  ASSERT_TRUE(ref.Init());
-  Buffer<uint8_t> pred = Buffer<uint8_t>(width, height, 0, 16);
-  ASSERT_TRUE(pred.Init());
-  Buffer<uint8_t> avg_ref = Buffer<uint8_t>(width, height, 0, 16);
-  ASSERT_TRUE(avg_ref.Init());
-  Buffer<uint8_t> avg_chk = Buffer<uint8_t>(width, height, 0, 16);
-  ASSERT_TRUE(avg_chk.Init());
-
-  for (int i = 0; i < 500; ++i) {
-    ref.Set(&rnd_, &ACMRandom::Rand8);
-    pred.Set(&rnd_, &ACMRandom::Rand8);
-
-    reference_pred(pred, ref, width, height, &avg_ref);
-    ASM_REGISTER_STATE_CHECK(avg_pred_func_(avg_chk.TopLeftPixel(),
-                                            pred.TopLeftPixel(), width, height,
-                                            ref.TopLeftPixel(), ref.stride()));
-    EXPECT_TRUE(avg_chk.CheckValues(avg_ref));
-    if (HasFailure()) {
-      printf("Width: %d Height: %d\n", width, height);
-      avg_chk.PrintDifference(avg_ref);
-      return;
-    }
-  }
-}
-
-TEST_P(AvgPredTest, DISABLED_Speed) {
-  for (int width_pow = 2; width_pow <= 6; ++width_pow) {
-    for (int height_pow = width_pow - 1; height_pow <= width_pow + 1;
-         ++height_pow) {
-      // Don't test 4x2 or 64x128
-      if (height_pow == 1 || height_pow == 7) continue;
-
-      for (int ref_padding = 0; ref_padding < 2; ref_padding++) {
-        const int width = 1 << width_pow;
-        const int height = 1 << height_pow;
-        Buffer<uint8_t> ref =
-            Buffer<uint8_t>(width, height, ref_padding ? 8 : 0);
-        ASSERT_TRUE(ref.Init());
-        Buffer<uint8_t> pred = Buffer<uint8_t>(width, height, 0, 16);
-        ASSERT_TRUE(pred.Init());
-        Buffer<uint8_t> avg = Buffer<uint8_t>(width, height, 0, 16);
-        ASSERT_TRUE(avg.Init());
-
-        ref.Set(&rnd_, &ACMRandom::Rand8);
-        pred.Set(&rnd_, &ACMRandom::Rand8);
-
-        vpx_usec_timer timer;
-        vpx_usec_timer_start(&timer);
-        for (int i = 0; i < 10000000 / (width * height); ++i) {
-          avg_pred_func_(avg.TopLeftPixel(), pred.TopLeftPixel(), width, height,
-                         ref.TopLeftPixel(), ref.stride());
-        }
-        vpx_usec_timer_mark(&timer);
-
-        const int elapsed_time =
-            static_cast<int>(vpx_usec_timer_elapsed(&timer));
-        printf("Average Test (ref_padding: %d) %dx%d time: %5d us\n",
-               ref_padding, width, height, elapsed_time);
-      }
-    }
-  }
-}
-
-INSTANTIATE_TEST_CASE_P(C, AvgPredTest,
-                        ::testing::Values(&vpx_comp_avg_pred_c));
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, AvgPredTest,
-                        ::testing::Values(&vpx_comp_avg_pred_sse2));
-#endif  // HAVE_SSE2
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, AvgPredTest,
-                        ::testing::Values(&vpx_comp_avg_pred_neon));
-#endif  // HAVE_NEON
-
-#if HAVE_VSX
-INSTANTIATE_TEST_CASE_P(VSX, AvgPredTest,
-                        ::testing::Values(&vpx_comp_avg_pred_vsx));
-#endif  // HAVE_VSX
-}  // namespace
--- a/test/convolve_test.cc
+++ b/test/convolve_test.cc
--- a/test/datarate_test.cc
+++ b/test/datarate_test.cc
--- a/test/dct16x16_test.cc
+++ b/test/dct16x16_test.cc
@@ -255,11 +255,11 @@ void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride,

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 10);
 }

 void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct16x16_256_add_c(in, out, stride, 12);
 }

 void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride,
@@ -273,36 +273,36 @@ void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride,
 }

 void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
-  vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
+  vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 10);
 }

 void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
-  vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
+  vp9_highbd_iht16x16_256_add_c(in, out, stride, tx_type, 12);
 }

 #if HAVE_SSE2
 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 10);
 }

 void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct16x16_10_add_c(in, out, stride, 12);
 }

 void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 10);
 }

 void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct16x16_256_add_sse2(in, out, stride, 12);
 }

 void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 10);
 }

 void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct16x16_10_add_sse2(in, out, stride, 12);
 }
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -353,7 +353,7 @@ class Trans16x16TestBase {
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }

@@ -475,10 +475,10 @@ class Trans16x16TestBase {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
-        inv_txfm_ref(output_ref_block, CAST_TO_BYTEPTR(ref16), pitch_,
+        inv_txfm_ref(output_ref_block, CONVERT_TO_BYTEPTR(ref16), pitch_,
                     tx_type_);
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(output_ref_block, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(output_ref_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }
      if (bit_depth_ == VPX_BITS_8) {
@@ -530,7 +530,8 @@ class Trans16x16TestBase {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), 16));
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), 16));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
      }

@@ -542,8 +543,8 @@ class Trans16x16TestBase {
        const uint32_t diff = dst[j] - src[j];
 #endif  // CONFIG_VP9_HIGHBITDEPTH
        const uint32_t error = diff * diff;
-        EXPECT_GE(1u, error)
-            << "Error: 16x16 IDCT has error " << error << " at index " << j;
+        EXPECT_GE(1u, error) << "Error: 16x16 IDCT has error " << error
+                             << " at index " << j;
      }
    }
  }
@@ -584,9 +585,9 @@ class Trans16x16TestBase {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
      } else {
 #if CONFIG_VP9_HIGHBITDEPTH
-        ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
+        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif  // CONFIG_VP9_HIGHBITDEPTH
      }

@@ -744,6 +745,66 @@ TEST_P(InvTrans16x16DCT, CompareReference) {
  CompareInvReference(ref_txfm_, thresh_);
 }

+class PartialTrans16x16Test : public ::testing::TestWithParam<
+                                  std::tr1::tuple<FdctFunc, vpx_bit_depth_t> > {
+ public:
+  virtual ~PartialTrans16x16Test() {}
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    bit_depth_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  vpx_bit_depth_t bit_depth_;
+  FdctFunc fwd_txfm_;
+};
+
+TEST_P(PartialTrans16x16Test, Extremes) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  const int minval = -maxval;
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+  EXPECT_EQ((maxval * kNumCoeffs) >> 1, output[0]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+  EXPECT_EQ((minval * kNumCoeffs) >> 1, output[0]);
+}
+
+TEST_P(PartialTrans16x16Test, Random) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+  int sum = 0;
+  for (int i = 0; i < kNumCoeffs; ++i) {
+    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
+    input[i] = val;
+    sum += val;
+  }
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 16));
+  EXPECT_EQ(sum >> 1, output[0]);
+}
+
 using std::tr1::make_tuple;

 #if CONFIG_VP9_HIGHBITDEPTH
@@ -776,6 +837,11 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    C, PartialTrans16x16Test,
+    ::testing::Values(make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_8),
+                      make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_10),
+                      make_tuple(&vpx_highbd_fdct16x16_1_c, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans16x16HT,
@@ -784,14 +850,17 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_c,
+                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans16x16DCT,
-    ::testing::Values(make_tuple(&vpx_fdct16x16_neon,
-                                 &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+    ::testing::Values(make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_neon,
+                                 0, VPX_BITS_8)));
+#endif

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -808,6 +877,9 @@ INSTANTIATE_TEST_CASE_P(
                                 2, VPX_BITS_8),
                      make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2,
                                 3, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -842,6 +914,9 @@ INSTANTIATE_TEST_CASE_P(
                                 &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12),
                      make_tuple(&idct16x16_12, &idct16x16_256_add_12_sse2,
                                 3167, VPX_BITS_12)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_sse2,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -857,12 +932,8 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8),
        make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3,
                   VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(MSA, PartialTrans16x16Test,
+                        ::testing::Values(make_tuple(&vpx_fdct16x16_1_msa,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(VSX, Trans16x16DCT,
-                        ::testing::Values(make_tuple(&vpx_fdct16x16_c,
-                                                     &vpx_idct16x16_256_add_vsx,
-                                                     0, VPX_BITS_8)));
-#endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/dct32x32_test.cc
+++ b/test/dct32x32_test.cc
@@ -71,11 +71,11 @@ typedef std::tr1::tuple<FwdTxfmFunc, InvTxfmFunc, int, vpx_bit_depth_t>

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 10);
 }

 void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct32x32_1024_add_c(in, out, stride, 12);
 }
 #endif  // CONFIG_VP9_HIGHBITDEPTH

@@ -137,7 +137,7 @@ TEST_P(Trans32x32Test, AccuracyCheck) {
 #if CONFIG_VP9_HIGHBITDEPTH
    } else {
      ASM_REGISTER_STATE_CHECK(
-          inv_txfm_(test_temp_block, CAST_TO_BYTEPTR(dst16), 32));
+          inv_txfm_(test_temp_block, CONVERT_TO_BYTEPTR(dst16), 32));
 #endif
    }

@@ -275,7 +275,7 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32));
 #if CONFIG_VP9_HIGHBITDEPTH
    } else {
-      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CAST_TO_BYTEPTR(dst16), 32));
+      ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CONVERT_TO_BYTEPTR(dst16), 32));
 #endif
    }
    for (int j = 0; j < kNumCoeffs; ++j) {
@@ -292,6 +292,67 @@ TEST_P(Trans32x32Test, InverseAccuracy) {
  }
 }

+class PartialTrans32x32Test
+    : public ::testing::TestWithParam<
+          std::tr1::tuple<FwdTxfmFunc, vpx_bit_depth_t> > {
+ public:
+  virtual ~PartialTrans32x32Test() {}
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    bit_depth_ = GET_PARAM(1);
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  vpx_bit_depth_t bit_depth_;
+  FwdTxfmFunc fwd_txfm_;
+};
+
+TEST_P(PartialTrans32x32Test, Extremes) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  const int minval = -maxval;
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = maxval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+  EXPECT_EQ((maxval * kNumCoeffs) >> 3, output[0]);
+
+  for (int i = 0; i < kNumCoeffs; ++i) input[i] = minval;
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+  EXPECT_EQ((minval * kNumCoeffs) >> 3, output[0]);
+}
+
+TEST_P(PartialTrans32x32Test, Random) {
+#if CONFIG_VP9_HIGHBITDEPTH
+  const int16_t maxval =
+      static_cast<int16_t>(clip_pixel_highbd(1 << 30, bit_depth_));
+#else
+  const int16_t maxval = 255;
+#endif
+  DECLARE_ALIGNED(16, int16_t, input[kNumCoeffs]);
+  DECLARE_ALIGNED(16, tran_low_t, output[kNumCoeffs]);
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+
+  int sum = 0;
+  for (int i = 0; i < kNumCoeffs; ++i) {
+    const int val = (i & 1) ? -rnd(maxval + 1) : rnd(maxval + 1);
+    input[i] = val;
+    sum += val;
+  }
+  output[0] = 0;
+  ASM_REGISTER_STATE_CHECK(fwd_txfm_(input, output, 32));
+  EXPECT_EQ(sum >> 3, output[0]);
+}
+
 using std::tr1::make_tuple;

 #if CONFIG_VP9_HIGHBITDEPTH
@@ -305,6 +366,11 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8),
        make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1,
                   VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    C, PartialTrans32x32Test,
+    ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_8),
+                      make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_10),
+                      make_tuple(&vpx_highbd_fdct32x32_1_c, VPX_BITS_12)));
 #else
 INSTANTIATE_TEST_CASE_P(
    C, Trans32x32Test,
@@ -312,16 +378,19 @@ INSTANTIATE_TEST_CASE_P(
                                 VPX_BITS_8),
                      make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c,
                                 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(C, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_c,
+                                                     VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, Trans32x32Test,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_neon,
-                                 &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8),
-                      make_tuple(&vpx_fdct32x32_rd_neon,
+    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_neon,
+                                 0, VPX_BITS_8),
+                      make_tuple(&vpx_fdct32x32_rd_c,
                                 &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8)));
-#endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
@@ -330,6 +399,9 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8),
                      make_tuple(&vpx_fdct32x32_rd_sse2,
                                 &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -346,6 +418,9 @@ INSTANTIATE_TEST_CASE_P(
                   VPX_BITS_8),
        make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1,
                   VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(SSE2, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -364,14 +439,8 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8),
                      make_tuple(&vpx_fdct32x32_rd_msa,
                                 &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(MSA, PartialTrans32x32Test,
+                        ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa,
+                                                     VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    VSX, Trans32x32Test,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_vsx,
-                                 0, VPX_BITS_8),
-                      make_tuple(&vpx_fdct32x32_rd_c,
-                                 &vpx_idct32x32_1024_add_vsx, 1, VPX_BITS_8)));
-#endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/dct_partial_test.cc
+++ b/test/dct_partial_test.cc
@@ -1,169 +0,0 @@
-/*
- *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <limits>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vpx_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/buffer.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "vpx/vpx_codec.h"
-#include "vpx/vpx_integer.h"
-#include "vpx_dsp/vpx_dsp_common.h"
-
-using libvpx_test::ACMRandom;
-using libvpx_test::Buffer;
-using std::tr1::tuple;
-using std::tr1::make_tuple;
-
-namespace {
-typedef void (*PartialFdctFunc)(const int16_t *in, tran_low_t *out, int stride);
-
-typedef tuple<PartialFdctFunc, int /* size */, vpx_bit_depth_t>
-    PartialFdctParam;
-
-tran_low_t partial_fdct_ref(const Buffer<int16_t> &in, int size) {
-  int64_t sum = 0;
-  for (int y = 0; y < size; ++y) {
-    for (int x = 0; x < size; ++x) {
-      sum += in.TopLeftPixel()[y * in.stride() + x];
-    }
-  }
-
-  switch (size) {
-    case 4: sum *= 2; break;
-    case 8: /*sum = sum;*/ break;
-    case 16: sum >>= 1; break;
-    case 32: sum >>= 3; break;
-  }
-
-  return static_cast<tran_low_t>(sum);
-}
-
-class PartialFdctTest : public ::testing::TestWithParam<PartialFdctParam> {
- public:
-  PartialFdctTest() {
-    fwd_txfm_ = GET_PARAM(0);
-    size_ = GET_PARAM(1);
-    bit_depth_ = GET_PARAM(2);
-  }
-
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  void RunTest() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int16_t maxvalue =
-        clip_pixel_highbd(std::numeric_limits<int16_t>::max(), bit_depth_);
-    const int16_t minvalue = -maxvalue;
-    Buffer<int16_t> input_block =
-        Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
-    ASSERT_TRUE(input_block.Init());
-    Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
-    ASSERT_TRUE(output_block.Init());
-
-    for (int i = 0; i < 100; ++i) {
-      if (i == 0) {
-        input_block.Set(maxvalue);
-      } else if (i == 1) {
-        input_block.Set(minvalue);
-      } else {
-        input_block.Set(&rnd, minvalue, maxvalue);
-      }
-
-      ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block.TopLeftPixel(),
-                                         output_block.TopLeftPixel(),
-                                         input_block.stride()));
-
-      EXPECT_EQ(partial_fdct_ref(input_block, size_),
-                output_block.TopLeftPixel()[0]);
-    }
-  }
-
-  PartialFdctFunc fwd_txfm_;
-  vpx_bit_depth_t bit_depth_;
-  int size_;
-};
-
-TEST_P(PartialFdctTest, PartialFdctTest) { RunTest(); }
-
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    C, PartialFdctTest,
-    ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, 32, VPX_BITS_12),
-                      make_tuple(&vpx_highbd_fdct32x32_1_c, 32, VPX_BITS_10),
-                      make_tuple(&vpx_fdct32x32_1_c, 32, VPX_BITS_8),
-                      make_tuple(&vpx_highbd_fdct16x16_1_c, 16, VPX_BITS_12),
-                      make_tuple(&vpx_highbd_fdct16x16_1_c, 16, VPX_BITS_10),
-                      make_tuple(&vpx_fdct16x16_1_c, 16, VPX_BITS_8),
-                      make_tuple(&vpx_highbd_fdct8x8_1_c, 8, VPX_BITS_12),
-                      make_tuple(&vpx_highbd_fdct8x8_1_c, 8, VPX_BITS_10),
-                      make_tuple(&vpx_fdct8x8_1_c, 8, VPX_BITS_8),
-                      make_tuple(&vpx_fdct4x4_1_c, 4, VPX_BITS_8)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    C, PartialFdctTest,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_1_c, 32, VPX_BITS_8),
-                      make_tuple(&vpx_fdct16x16_1_c, 16, VPX_BITS_8),
-                      make_tuple(&vpx_fdct8x8_1_c, 8, VPX_BITS_8),
-                      make_tuple(&vpx_fdct4x4_1_c, 4, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, PartialFdctTest,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2, 32, VPX_BITS_8),
-                      make_tuple(&vpx_fdct16x16_1_sse2, 16, VPX_BITS_8),
-                      make_tuple(&vpx_fdct8x8_1_sse2, 8, VPX_BITS_8),
-                      make_tuple(&vpx_fdct4x4_1_sse2, 4, VPX_BITS_8)));
-#endif  // HAVE_SSE2
-
-#if HAVE_NEON
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    NEON, PartialFdctTest,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_1_neon, 32, VPX_BITS_8),
-                      make_tuple(&vpx_fdct16x16_1_neon, 16, VPX_BITS_8),
-                      make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_12),
-                      make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_10),
-                      make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_8),
-                      make_tuple(&vpx_fdct4x4_1_neon, 4, VPX_BITS_8)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    NEON, PartialFdctTest,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_1_neon, 32, VPX_BITS_8),
-                      make_tuple(&vpx_fdct16x16_1_neon, 16, VPX_BITS_8),
-                      make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_8),
-                      make_tuple(&vpx_fdct4x4_1_neon, 4, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_NEON
-
-#if HAVE_MSA
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(MSA, PartialFdctTest,
-                        ::testing::Values(make_tuple(&vpx_fdct8x8_1_msa, 8,
-                                                     VPX_BITS_8)));
-#else   // !CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    MSA, PartialFdctTest,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa, 32, VPX_BITS_8),
-                      make_tuple(&vpx_fdct16x16_1_msa, 16, VPX_BITS_8),
-                      make_tuple(&vpx_fdct8x8_1_msa, 8, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_MSA
-}  // namespace
--- a/test/dct_test.cc
+++ b/test/dct_test.cc
@@ -1,737 +0,0 @@
-/*
- *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vp9_rtcd.h"
-#include "./vpx_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/buffer.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "vp9/common/vp9_entropy.h"
-#include "vpx/vpx_codec.h"
-#include "vpx/vpx_integer.h"
-#include "vpx_ports/mem.h"
-
-using libvpx_test::ACMRandom;
-using libvpx_test::Buffer;
-using std::tr1::tuple;
-using std::tr1::make_tuple;
-
-namespace {
-typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
-typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
-typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
-                        int tx_type);
-typedef void (*FhtFuncRef)(const Buffer<int16_t> &in, Buffer<tran_low_t> *out,
-                           int size, int tx_type);
-typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
-                        int tx_type);
-
-/* forward transform, inverse transform, size, transform type, bit depth */
-typedef tuple<FdctFunc, IdctFunc, int, int, vpx_bit_depth_t> DctParam;
-typedef tuple<FhtFunc, IhtFunc, int, int, vpx_bit_depth_t> HtParam;
-
-void fdct_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
-              int /*tx_type*/) {
-  const int16_t *i = in.TopLeftPixel();
-  const int i_stride = in.stride();
-  tran_low_t *o = out->TopLeftPixel();
-  if (size == 4) {
-    vpx_fdct4x4_c(i, o, i_stride);
-  } else if (size == 8) {
-    vpx_fdct8x8_c(i, o, i_stride);
-  } else if (size == 16) {
-    vpx_fdct16x16_c(i, o, i_stride);
-  } else if (size == 32) {
-    vpx_fdct32x32_c(i, o, i_stride);
-  }
-}
-
-void fht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
-             int tx_type) {
-  const int16_t *i = in.TopLeftPixel();
-  const int i_stride = in.stride();
-  tran_low_t *o = out->TopLeftPixel();
-  if (size == 4) {
-    vp9_fht4x4_c(i, o, i_stride, tx_type);
-  } else if (size == 8) {
-    vp9_fht8x8_c(i, o, i_stride, tx_type);
-  } else if (size == 16) {
-    vp9_fht16x16_c(i, o, i_stride, tx_type);
-  }
-}
-
-void fwht_ref(const Buffer<int16_t> &in, Buffer<tran_low_t> *out, int size,
-              int /*tx_type*/) {
-  ASSERT_EQ(size, 4);
-  vp9_fwht4x4_c(in.TopLeftPixel(), out->TopLeftPixel(), in.stride());
-}
-
-#if CONFIG_VP9_HIGHBITDEPTH
-#define idctNxN(n, coeffs, bitdepth)                                       \
-  void idct##n##x##n##_##bitdepth(const tran_low_t *in, uint8_t *out,      \
-                                  int stride) {                            \
-    vpx_highbd_idct##n##x##n##_##coeffs##_add_c(in, CAST_TO_SHORTPTR(out), \
-                                                stride, bitdepth);         \
-  }
-
-idctNxN(4, 16, 10);
-idctNxN(4, 16, 12);
-idctNxN(8, 64, 10);
-idctNxN(8, 64, 12);
-idctNxN(16, 256, 10);
-idctNxN(16, 256, 12);
-idctNxN(32, 1024, 10);
-idctNxN(32, 1024, 12);
-
-#define ihtNxN(n, coeffs, bitdepth)                                        \
-  void iht##n##x##n##_##bitdepth(const tran_low_t *in, uint8_t *out,       \
-                                 int stride, int tx_type) {                \
-    vp9_highbd_iht##n##x##n##_##coeffs##_add_c(in, CAST_TO_SHORTPTR(out),  \
-                                               stride, tx_type, bitdepth); \
-  }
-
-ihtNxN(4, 16, 10);
-ihtNxN(4, 16, 12);
-ihtNxN(8, 64, 10);
-ihtNxN(8, 64, 12);
-ihtNxN(16, 256, 10);
-// ihtNxN(16, 256, 12);
-
-void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_iwht4x4_16_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
-}
-
-void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_iwht4x4_16_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
-}
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-class TransTestBase {
- public:
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  virtual void RunFwdTxfm(const Buffer<int16_t> &in,
-                          Buffer<tran_low_t> *out) = 0;
-
-  virtual void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) = 0;
-
-  void RunAccuracyCheck(int limit) {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    Buffer<int16_t> test_input_block =
-        Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
-    ASSERT_TRUE(test_input_block.Init());
-    Buffer<tran_low_t> test_temp_block =
-        Buffer<tran_low_t>(size_, size_, 0, 16);
-    ASSERT_TRUE(test_temp_block.Init());
-    Buffer<uint8_t> dst = Buffer<uint8_t>(size_, size_, 0, 16);
-    ASSERT_TRUE(dst.Init());
-    Buffer<uint8_t> src = Buffer<uint8_t>(size_, size_, 0, 16);
-    ASSERT_TRUE(src.Init());
-#if CONFIG_VP9_HIGHBITDEPTH
-    Buffer<uint16_t> dst16 = Buffer<uint16_t>(size_, size_, 0, 16);
-    ASSERT_TRUE(dst16.Init());
-    Buffer<uint16_t> src16 = Buffer<uint16_t>(size_, size_, 0, 16);
-    ASSERT_TRUE(src16.Init());
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-    uint32_t max_error = 0;
-    int64_t total_error = 0;
-    const int count_test_block = 10000;
-    for (int i = 0; i < count_test_block; ++i) {
-      if (bit_depth_ == 8) {
-        src.Set(&rnd, &ACMRandom::Rand8);
-        dst.Set(&rnd, &ACMRandom::Rand8);
-        // Initialize a test block with input range [-255, 255].
-        for (int h = 0; h < size_; ++h) {
-          for (int w = 0; w < size_; ++w) {
-            test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
-                src.TopLeftPixel()[h * src.stride() + w] -
-                dst.TopLeftPixel()[h * dst.stride() + w];
-          }
-        }
-#if CONFIG_VP9_HIGHBITDEPTH
-      } else {
-        src16.Set(&rnd, 0, max_pixel_value_);
-        dst16.Set(&rnd, 0, max_pixel_value_);
-        for (int h = 0; h < size_; ++h) {
-          for (int w = 0; w < size_; ++w) {
-            test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] =
-                src16.TopLeftPixel()[h * src16.stride() + w] -
-                dst16.TopLeftPixel()[h * dst16.stride() + w];
-          }
-        }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-      }
-
-      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block, &test_temp_block));
-      if (bit_depth_ == VPX_BITS_8) {
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, dst.TopLeftPixel()));
-#if CONFIG_VP9_HIGHBITDEPTH
-      } else {
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16.TopLeftPixel())));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-      }
-
-      for (int h = 0; h < size_; ++h) {
-        for (int w = 0; w < size_; ++w) {
-          int diff;
-#if CONFIG_VP9_HIGHBITDEPTH
-          if (bit_depth_ != 8) {
-            diff = dst16.TopLeftPixel()[h * dst16.stride() + w] -
-                   src16.TopLeftPixel()[h * src16.stride() + w];
-          } else {
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-            diff = dst.TopLeftPixel()[h * dst.stride() + w] -
-                   src.TopLeftPixel()[h * src.stride() + w];
-#if CONFIG_VP9_HIGHBITDEPTH
-          }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-          const uint32_t error = diff * diff;
-          if (max_error < error) max_error = error;
-          total_error += error;
-        }
-      }
-    }
-
-    EXPECT_GE(static_cast<uint32_t>(limit), max_error)
-        << "Error: 4x4 FHT/IHT has an individual round trip error > " << limit;
-
-    EXPECT_GE(count_test_block * limit, total_error)
-        << "Error: 4x4 FHT/IHT has average round trip error > " << limit
-        << " per block";
-  }
-
-  void RunCoeffCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 5000;
-    Buffer<int16_t> input_block =
-        Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
-    ASSERT_TRUE(input_block.Init());
-    Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
-    ASSERT_TRUE(output_ref_block.Init());
-    Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
-    ASSERT_TRUE(output_block.Init());
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-max_pixel_value_,
-      // max_pixel_value_].
-      input_block.Set(&rnd, -max_pixel_value_, max_pixel_value_);
-
-      fwd_txfm_ref(input_block, &output_ref_block, size_, tx_type_);
-      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, &output_block));
-
-      // The minimum quant value is 4.
-      EXPECT_TRUE(output_block.CheckValues(output_ref_block));
-      if (::testing::Test::HasFailure()) {
-        printf("Size: %d Transform type: %d\n", size_, tx_type_);
-        output_block.PrintDifference(output_ref_block);
-        return;
-      }
-    }
-  }
-
-  void RunMemCheck() {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 5000;
-    Buffer<int16_t> input_extreme_block =
-        Buffer<int16_t>(size_, size_, 8, size_ == 4 ? 0 : 16);
-    ASSERT_TRUE(input_extreme_block.Init());
-    Buffer<tran_low_t> output_ref_block = Buffer<tran_low_t>(size_, size_, 0);
-    ASSERT_TRUE(output_ref_block.Init());
-    Buffer<tran_low_t> output_block = Buffer<tran_low_t>(size_, size_, 0, 16);
-    ASSERT_TRUE(output_block.Init());
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with -max_pixel_value_ or max_pixel_value_.
-      if (i == 0) {
-        input_extreme_block.Set(max_pixel_value_);
-      } else if (i == 1) {
-        input_extreme_block.Set(-max_pixel_value_);
-      } else {
-        for (int h = 0; h < size_; ++h) {
-          for (int w = 0; w < size_; ++w) {
-            input_extreme_block
-                .TopLeftPixel()[h * input_extreme_block.stride() + w] =
-                rnd.Rand8() % 2 ? max_pixel_value_ : -max_pixel_value_;
-          }
-        }
-      }
-
-      fwd_txfm_ref(input_extreme_block, &output_ref_block, size_, tx_type_);
-      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block, &output_block));
-
-      // The minimum quant value is 4.
-      EXPECT_TRUE(output_block.CheckValues(output_ref_block));
-      for (int h = 0; h < size_; ++h) {
-        for (int w = 0; w < size_; ++w) {
-          EXPECT_GE(
-              4 * DCT_MAX_VALUE << (bit_depth_ - 8),
-              abs(output_block.TopLeftPixel()[h * output_block.stride() + w]))
-              << "Error: 4x4 FDCT has coefficient larger than "
-                 "4*DCT_MAX_VALUE"
-              << " at " << w << "," << h;
-          if (::testing::Test::HasFailure()) {
-            printf("Size: %d Transform type: %d\n", size_, tx_type_);
-            output_block.DumpBuffer();
-            return;
-          }
-        }
-      }
-    }
-  }
-
-  void RunInvAccuracyCheck(int limit) {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-    const int count_test_block = 1000;
-    Buffer<int16_t> in = Buffer<int16_t>(size_, size_, 4);
-    ASSERT_TRUE(in.Init());
-    Buffer<tran_low_t> coeff = Buffer<tran_low_t>(size_, size_, 0, 16);
-    ASSERT_TRUE(coeff.Init());
-    Buffer<uint8_t> dst = Buffer<uint8_t>(size_, size_, 0, 16);
-    ASSERT_TRUE(dst.Init());
-    Buffer<uint8_t> src = Buffer<uint8_t>(size_, size_, 0);
-    ASSERT_TRUE(src.Init());
-    Buffer<uint16_t> dst16 = Buffer<uint16_t>(size_, size_, 0, 16);
-    ASSERT_TRUE(dst16.Init());
-    Buffer<uint16_t> src16 = Buffer<uint16_t>(size_, size_, 0);
-    ASSERT_TRUE(src16.Init());
-
-    for (int i = 0; i < count_test_block; ++i) {
-      // Initialize a test block with input range [-max_pixel_value_,
-      // max_pixel_value_].
-      if (bit_depth_ == VPX_BITS_8) {
-        src.Set(&rnd, &ACMRandom::Rand8);
-        dst.Set(&rnd, &ACMRandom::Rand8);
-        for (int h = 0; h < size_; ++h) {
-          for (int w = 0; w < size_; ++w) {
-            in.TopLeftPixel()[h * in.stride() + w] =
-                src.TopLeftPixel()[h * src.stride() + w] -
-                dst.TopLeftPixel()[h * dst.stride() + w];
-          }
-        }
-#if CONFIG_VP9_HIGHBITDEPTH
-      } else {
-        src16.Set(&rnd, 0, max_pixel_value_);
-        dst16.Set(&rnd, 0, max_pixel_value_);
-        for (int h = 0; h < size_; ++h) {
-          for (int w = 0; w < size_; ++w) {
-            in.TopLeftPixel()[h * in.stride() + w] =
-                src16.TopLeftPixel()[h * src16.stride() + w] -
-                dst16.TopLeftPixel()[h * dst16.stride() + w];
-          }
-        }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-      }
-
-      fwd_txfm_ref(in, &coeff, size_, tx_type_);
-
-      if (bit_depth_ == VPX_BITS_8) {
-        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst.TopLeftPixel()));
-#if CONFIG_VP9_HIGHBITDEPTH
-      } else {
-        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16.TopLeftPixel())));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-      }
-
-      for (int h = 0; h < size_; ++h) {
-        for (int w = 0; w < size_; ++w) {
-          int diff;
-#if CONFIG_VP9_HIGHBITDEPTH
-          if (bit_depth_ != 8) {
-            diff = dst16.TopLeftPixel()[h * dst16.stride() + w] -
-                   src16.TopLeftPixel()[h * src16.stride() + w];
-          } else {
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-            diff = dst.TopLeftPixel()[h * dst.stride() + w] -
-                   src.TopLeftPixel()[h * src.stride() + w];
-#if CONFIG_VP9_HIGHBITDEPTH
-          }
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-          const uint32_t error = diff * diff;
-          EXPECT_GE(static_cast<uint32_t>(limit), error)
-              << "Error: " << size_ << "x" << size_ << " IDCT has error "
-              << error << " at " << w << "," << h;
-        }
-      }
-    }
-  }
-
-  FhtFuncRef fwd_txfm_ref;
-  vpx_bit_depth_t bit_depth_;
-  int tx_type_;
-  int max_pixel_value_;
-  int size_;
-};
-
-class TransDCT : public TransTestBase,
-                 public ::testing::TestWithParam<DctParam> {
- public:
-  TransDCT() {
-    fwd_txfm_ref = fdct_ref;
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    size_ = GET_PARAM(2);
-    tx_type_ = GET_PARAM(3);
-    bit_depth_ = GET_PARAM(4);
-    max_pixel_value_ = (1 << bit_depth_) - 1;
-  }
-
- protected:
-  void RunFwdTxfm(const Buffer<int16_t> &in, Buffer<tran_low_t> *out) {
-    fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride());
-  }
-
-  void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) {
-    inv_txfm_(in.TopLeftPixel(), out, in.stride());
-  }
-
-  FdctFunc fwd_txfm_;
-  IdctFunc inv_txfm_;
-};
-
-TEST_P(TransDCT, AccuracyCheck) { RunAccuracyCheck(1); }
-
-TEST_P(TransDCT, CoeffCheck) { RunCoeffCheck(); }
-
-TEST_P(TransDCT, MemCheck) { RunMemCheck(); }
-
-TEST_P(TransDCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
-
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    C, TransDCT,
-    ::testing::Values(
-        make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_10, 32, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_12, 32, 0, VPX_BITS_10),
-        make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 32, 0,
-                   VPX_BITS_8),
-        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 16, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 16, 0, VPX_BITS_10),
-        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 16, 0,
-                   VPX_BITS_8),
-        make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 8, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 8, 0, VPX_BITS_10),
-        make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 8, 0, VPX_BITS_8),
-        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 4, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 4, 0, VPX_BITS_12),
-        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 4, 0, VPX_BITS_8)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    C, TransDCT,
-    ::testing::Values(
-        make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 32, 0,
-                   VPX_BITS_8),
-        make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 16, 0,
-                   VPX_BITS_8),
-        make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 8, 0, VPX_BITS_8),
-        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 4, 0, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-#if HAVE_SSE2
-#if !CONFIG_EMULATE_HARDWARE
-#if CONFIG_VP9_HIGHBITDEPTH
-/* TODO:(johannkoenig) Determine why these fail AccuracyCheck
-   make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 32, 0, VPX_BITS_12),
-   make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_12, 16, 0, VPX_BITS_12),
-*/
-INSTANTIATE_TEST_CASE_P(
-    SSE2, TransDCT,
-    ::testing::Values(
-        make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 32, 0,
-                   VPX_BITS_10),
-        make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_sse2, 32, 0,
-                   VPX_BITS_8),
-        make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_10, 16, 0,
-                   VPX_BITS_10),
-        make_tuple(&vpx_fdct16x16_sse2, &vpx_idct16x16_256_add_sse2, 16, 0,
-                   VPX_BITS_8),
-        make_tuple(&vpx_highbd_fdct8x8_sse2, &idct8x8_10, 8, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct8x8_sse2, &idct8x8_12, 8, 0, VPX_BITS_12),
-        make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 8, 0,
-                   VPX_BITS_8),
-        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10, 4, 0, VPX_BITS_10),
-        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12, 4, 0, VPX_BITS_12),
-        make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_sse2, 4, 0,
-                   VPX_BITS_8)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    SSE2, TransDCT,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_sse2,
-                                 &vpx_idct32x32_1024_add_sse2, 32, 0,
-                                 VPX_BITS_8),
-                      make_tuple(&vpx_fdct16x16_sse2,
-                                 &vpx_idct16x16_256_add_sse2, 16, 0,
-                                 VPX_BITS_8),
-                      make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 8,
-                                 0, VPX_BITS_8),
-                      make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_sse2, 4,
-                                 0, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-#endif  // !CONFIG_EMULATE_HARDWARE
-#endif  // HAVE_SSE2
-
-#if !CONFIG_VP9_HIGHBITDEPTH
-#if HAVE_SSSE3 && !CONFIG_EMULATE_HARDWARE
-#if !ARCH_X86_64
-// TODO(johannkoenig): high bit depth fdct8x8.
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, TransDCT,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_sse2,
-                                 32, 0, VPX_BITS_8),
-                      make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_sse2, 8, 0,
-                                 VPX_BITS_8)));
-#else
-// vpx_fdct8x8_ssse3 is only available in 64 bit builds.
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, TransDCT,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_sse2,
-                                 32, 0, VPX_BITS_8),
-                      make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_sse2,
-                                 8, 0, VPX_BITS_8)));
-#endif  // !ARCH_X86_64
-#endif  // HAVE_SSSE3 && !CONFIG_EMULATE_HARDWARE
-#endif  // !CONFIG_VP9_HIGHBITDEPTH
-
-#if !CONFIG_VP9_HIGHBITDEPTH && HAVE_AVX2 && !CONFIG_EMULATE_HARDWARE
-// TODO(johannkoenig): high bit depth fdct32x32.
-INSTANTIATE_TEST_CASE_P(
-    AVX2, TransDCT, ::testing::Values(make_tuple(&vpx_fdct32x32_avx2,
-                                                 &vpx_idct32x32_1024_add_sse2,
-                                                 32, 0, VPX_BITS_8)));
-
-#endif  // !CONFIG_VP9_HIGHBITDEPTH && HAVE_AVX2 && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_NEON
-#if !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    NEON, TransDCT,
-    ::testing::Values(make_tuple(&vpx_fdct32x32_neon,
-                                 &vpx_idct32x32_1024_add_neon, 32, 0,
-                                 VPX_BITS_8),
-                      make_tuple(&vpx_fdct16x16_neon,
-                                 &vpx_idct16x16_256_add_neon, 16, 0,
-                                 VPX_BITS_8),
-                      make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 8,
-                                 0, VPX_BITS_8),
-                      make_tuple(&vpx_fdct4x4_neon, &vpx_idct4x4_16_add_neon, 4,
-                                 0, VPX_BITS_8)));
-#endif  // !CONFIG_EMULATE_HARDWARE
-#endif  // HAVE_NEON
-
-#if HAVE_MSA
-#if !CONFIG_VP9_HIGHBITDEPTH
-#if !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(
-    MSA, TransDCT,
-    ::testing::Values(
-        make_tuple(&vpx_fdct32x32_msa, &vpx_idct32x32_1024_add_msa, 32, 0,
-                   VPX_BITS_8),
-        make_tuple(&vpx_fdct16x16_msa, &vpx_idct16x16_256_add_msa, 16, 0,
-                   VPX_BITS_8),
-        make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 8, 0, VPX_BITS_8),
-        make_tuple(&vpx_fdct4x4_msa, &vpx_idct4x4_16_add_msa, 4, 0,
-                   VPX_BITS_8)));
-#endif  // !CONFIG_EMULATE_HARDWARE
-#endif  // !CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_MSA
-
-#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(VSX, TransDCT,
-                        ::testing::Values(make_tuple(&vpx_fdct4x4_c,
-                                                     &vpx_idct4x4_16_add_vsx, 4,
-                                                     0, VPX_BITS_8)));
-#endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-class TransHT : public TransTestBase, public ::testing::TestWithParam<HtParam> {
- public:
-  TransHT() {
-    fwd_txfm_ref = fht_ref;
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    size_ = GET_PARAM(2);
-    tx_type_ = GET_PARAM(3);
-    bit_depth_ = GET_PARAM(4);
-    max_pixel_value_ = (1 << bit_depth_) - 1;
-  }
-
- protected:
-  void RunFwdTxfm(const Buffer<int16_t> &in, Buffer<tran_low_t> *out) {
-    fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride(), tx_type_);
-  }
-
-  void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) {
-    inv_txfm_(in.TopLeftPixel(), out, in.stride(), tx_type_);
-  }
-
-  FhtFunc fwd_txfm_;
-  IhtFunc inv_txfm_;
-};
-
-TEST_P(TransHT, AccuracyCheck) { RunAccuracyCheck(1); }
-
-TEST_P(TransHT, CoeffCheck) { RunCoeffCheck(); }
-
-TEST_P(TransHT, MemCheck) { RunMemCheck(); }
-
-TEST_P(TransHT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
-
-/* TODO:(johannkoenig) Determine why these fail AccuracyCheck
-   make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 0, VPX_BITS_12),
-   make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 1, VPX_BITS_12),
-   make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 2, VPX_BITS_12),
-   make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 16, 3, VPX_BITS_12),
-  */
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    C, TransHT,
-    ::testing::Values(
-        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 2, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 16, 3, VPX_BITS_10),
-        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 0, VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 1, VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 2, VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 3, VPX_BITS_8),
-        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 2, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 8, 3, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 1, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 2, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 8, 3, VPX_BITS_12),
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 0, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 1, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 2, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 3, VPX_BITS_8),
-        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 1, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 2, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 4, 3, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 0, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 1, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 2, VPX_BITS_12),
-        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 4, 3, VPX_BITS_12),
-        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 0, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 1, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 2, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 3, VPX_BITS_8)));
-#else
-INSTANTIATE_TEST_CASE_P(
-    C, TransHT,
-    ::testing::Values(
-        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 0, VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 1, VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 2, VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 16, 3, VPX_BITS_8),
-
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 0, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 1, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 2, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 8, 3, VPX_BITS_8),
-
-        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 0, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 1, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 2, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 4, 3, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, TransHT,
-    ::testing::Values(
-        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 0,
-                   VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 1,
-                   VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 2,
-                   VPX_BITS_8),
-        make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 16, 3,
-                   VPX_BITS_8),
-
-        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 0, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 1, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 2, VPX_BITS_8),
-        make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 8, 3, VPX_BITS_8),
-
-        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 0, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 1, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 2, VPX_BITS_8),
-        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 4, 3,
-                   VPX_BITS_8)));
-#endif  // HAVE_SSE2
-
-class TransWHT : public TransTestBase,
-                 public ::testing::TestWithParam<DctParam> {
- public:
-  TransWHT() {
-    fwd_txfm_ref = fwht_ref;
-    fwd_txfm_ = GET_PARAM(0);
-    inv_txfm_ = GET_PARAM(1);
-    size_ = GET_PARAM(2);
-    tx_type_ = GET_PARAM(3);
-    bit_depth_ = GET_PARAM(4);
-    max_pixel_value_ = (1 << bit_depth_) - 1;
-  }
-
- protected:
-  void RunFwdTxfm(const Buffer<int16_t> &in, Buffer<tran_low_t> *out) {
-    fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride());
-  }
-
-  void RunInvTxfm(const Buffer<tran_low_t> &in, uint8_t *out) {
-    inv_txfm_(in.TopLeftPixel(), out, in.stride());
-  }
-
-  FdctFunc fwd_txfm_;
-  IdctFunc inv_txfm_;
-};
-
-TEST_P(TransWHT, AccuracyCheck) { RunAccuracyCheck(0); }
-
-TEST_P(TransWHT, CoeffCheck) { RunCoeffCheck(); }
-
-TEST_P(TransWHT, MemCheck) { RunMemCheck(); }
-
-TEST_P(TransWHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
-
-#if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    C, TransWHT,
-    ::testing::Values(
-        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 4, 0, VPX_BITS_10),
-        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 4, 0, VPX_BITS_12),
-        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 4, 0, VPX_BITS_8)));
-#else
-INSTANTIATE_TEST_CASE_P(C, TransWHT,
-                        ::testing::Values(make_tuple(&vp9_fwht4x4_c,
-                                                     &vpx_iwht4x4_16_add_c, 4,
-                                                     0, VPX_BITS_8)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, TransWHT,
-                        ::testing::Values(make_tuple(&vp9_fwht4x4_sse2,
-                                                     &vpx_iwht4x4_16_add_sse2,
-                                                     4, 0, VPX_BITS_8)));
-#endif  // HAVE_SSE2
-}  // namespace
--- a/test/decode_api_test.cc
+++ b/test/decode_api_test.cc
@@ -172,21 +172,4 @@ TEST(DecodeAPI, Vp9PeekSI) {
 }
 #endif  // CONFIG_VP9_DECODER

-TEST(DecodeAPI, HighBitDepthCapability) {
-// VP8 should not claim VP9 HBD as a capability.
-#if CONFIG_VP8_DECODER
-  const vpx_codec_caps_t vp8_caps = vpx_codec_get_caps(&vpx_codec_vp8_dx_algo);
-  EXPECT_EQ(vp8_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0);
-#endif
-
-#if CONFIG_VP9_DECODER
-  const vpx_codec_caps_t vp9_caps = vpx_codec_get_caps(&vpx_codec_vp9_dx_algo);
-#if CONFIG_VP9_HIGHBITDEPTH
-  EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, VPX_CODEC_CAP_HIGHBITDEPTH);
-#else
-  EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0);
-#endif
-#endif
-}
-
 }  // namespace
--- a/test/decode_svc_test.cc
+++ b/test/decode_svc_test.cc
@@ -1,124 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <string>
-
-#include "test/codec_factory.h"
-#include "test/decode_test_driver.h"
-#include "test/ivf_video_source.h"
-#include "test/test_vectors.h"
-#include "test/util.h"
-
-namespace {
-
-const unsigned int kNumFrames = 19;
-
-class DecodeSvcTest : public ::libvpx_test::DecoderTest,
-                      public ::libvpx_test::CodecTestWithParam<const char *> {
- protected:
-  DecodeSvcTest() : DecoderTest(GET_PARAM(::libvpx_test::kCodecFactoryParam)) {}
-  virtual ~DecodeSvcTest() {}
-
-  virtual void PreDecodeFrameHook(
-      const libvpx_test::CompressedVideoSource &video,
-      libvpx_test::Decoder *decoder) {
-    if (video.frame_number() == 0)
-      decoder->Control(VP9_DECODE_SVC_SPATIAL_LAYER, spatial_layer_);
-  }
-
-  virtual void DecompressedFrameHook(const vpx_image_t &img,
-                                     const unsigned int frame_number) {
-    ASSERT_EQ(img.d_w, width_);
-    ASSERT_EQ(img.d_h, height_);
-    total_frames_ = frame_number;
-  }
-
-  int spatial_layer_;
-  unsigned int width_;
-  unsigned int height_;
-  unsigned int total_frames_;
-};
-
-// SVC test vector is 1280x720, with 3 spatial layers, and 20 frames.
-
-// Decode the SVC test vector, which has 3 spatial layers, and decode up to
-// spatial layer 0. Verify the resolution of each decoded frame and the total
-// number of frames decoded. This results in 1/4x1/4 resolution (320x180).
-TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer0) {
-  const std::string filename = GET_PARAM(1);
-  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
-  video.reset(new libvpx_test::IVFVideoSource(filename));
-  ASSERT_TRUE(video.get() != NULL);
-  video->Init();
-  total_frames_ = 0;
-  spatial_layer_ = 0;
-  width_ = 320;
-  height_ = 180;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-  ASSERT_EQ(total_frames_, kNumFrames);
-}
-
-// Decode the SVC test vector, which has 3 spatial layers, and decode up to
-// spatial layer 1. Verify the resolution of each decoded frame and the total
-// number of frames decoded. This results in 1/2x1/2 resolution (640x360).
-TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer1) {
-  const std::string filename = GET_PARAM(1);
-  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
-  video.reset(new libvpx_test::IVFVideoSource(filename));
-  ASSERT_TRUE(video.get() != NULL);
-  video->Init();
-  total_frames_ = 0;
-  spatial_layer_ = 1;
-  width_ = 640;
-  height_ = 360;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-  ASSERT_EQ(total_frames_, kNumFrames);
-}
-
-// Decode the SVC test vector, which has 3 spatial layers, and decode up to
-// spatial layer 2. Verify the resolution of each decoded frame and the total
-// number of frames decoded. This results in the full resolution (1280x720).
-TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer2) {
-  const std::string filename = GET_PARAM(1);
-  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
-  video.reset(new libvpx_test::IVFVideoSource(filename));
-  ASSERT_TRUE(video.get() != NULL);
-  video->Init();
-  total_frames_ = 0;
-  spatial_layer_ = 2;
-  width_ = 1280;
-  height_ = 720;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-  ASSERT_EQ(total_frames_, kNumFrames);
-}
-
-// Decode the SVC test vector, which has 3 spatial layers, and decode up to
-// spatial layer 10. Verify the resolution of each decoded frame and the total
-// number of frames decoded. This is beyond the number of spatial layers, so
-// the decoding should result in the full resolution (1280x720).
-TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer10) {
-  const std::string filename = GET_PARAM(1);
-  testing::internal::scoped_ptr<libvpx_test::CompressedVideoSource> video;
-  video.reset(new libvpx_test::IVFVideoSource(filename));
-  ASSERT_TRUE(video.get() != NULL);
-  video->Init();
-  total_frames_ = 0;
-  spatial_layer_ = 10;
-  width_ = 1280;
-  height_ = 720;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-  ASSERT_EQ(total_frames_, kNumFrames);
-}
-
-VP9_INSTANTIATE_TEST_CASE(
-    DecodeSvcTest, ::testing::ValuesIn(libvpx_test::kVP9TestVectorsSvc,
-                                       libvpx_test::kVP9TestVectorsSvc +
-                                           libvpx_test::kNumVP9TestVectorsSvc));
-}  // namespace
--- a/test/decode_test_driver.cc
+++ b/test/decode_test_driver.cc
@@ -53,13 +53,13 @@ void DecoderTest::HandlePeekResult(Decoder *const decoder,
     * pass it is not a keyframe, so we only expect VPX_CODEC_OK on the first
     * frame, which must be a keyframe. */
    if (video->frame_number() == 0)
-      ASSERT_EQ(VPX_CODEC_OK, res_peek)
-          << "Peek return failed: " << vpx_codec_err_to_string(res_peek);
+      ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: "
+                                        << vpx_codec_err_to_string(res_peek);
  } else {
    /* The Vp9 implementation of PeekStream returns an error only if the
     * data passed to it isn't a valid Vp9 chunk. */
-    ASSERT_EQ(VPX_CODEC_OK, res_peek)
-        << "Peek return failed: " << vpx_codec_err_to_string(res_peek);
+    ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: "
+                                      << vpx_codec_err_to_string(res_peek);
  }
 }

--- a/test/encode_api_test.cc
+++ b/test/encode_api_test.cc
@@ -62,134 +62,4 @@ TEST(EncodeAPI, InvalidParams) {
  }
 }

-TEST(EncodeAPI, HighBitDepthCapability) {
-// VP8 should not claim VP9 HBD as a capability.
-#if CONFIG_VP8_ENCODER
-  const vpx_codec_caps_t vp8_caps = vpx_codec_get_caps(&vpx_codec_vp8_cx_algo);
-  EXPECT_EQ(vp8_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0);
-#endif
-
-#if CONFIG_VP9_ENCODER
-  const vpx_codec_caps_t vp9_caps = vpx_codec_get_caps(&vpx_codec_vp9_cx_algo);
-#if CONFIG_VP9_HIGHBITDEPTH
-  EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, VPX_CODEC_CAP_HIGHBITDEPTH);
-#else
-  EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0);
-#endif
-#endif
-}
-
-#if CONFIG_VP8_ENCODER
-TEST(EncodeAPI, ImageSizeSetting) {
-  const int width = 711;
-  const int height = 360;
-  const int bps = 12;
-  vpx_image_t img;
-  vpx_codec_ctx_t enc;
-  vpx_codec_enc_cfg_t cfg;
-  uint8_t *img_buf = reinterpret_cast<uint8_t *>(
-      calloc(width * height * bps / 8, sizeof(*img_buf)));
-  vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &cfg, 0);
-
-  cfg.g_w = width;
-  cfg.g_h = height;
-
-  vpx_img_wrap(&img, VPX_IMG_FMT_I420, width, height, 1, img_buf);
-
-  vpx_codec_enc_init(&enc, vpx_codec_vp8_cx(), &cfg, 0);
-
-  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_encode(&enc, &img, 0, 1, 0, 0));
-
-  free(img_buf);
-
-  vpx_codec_destroy(&enc);
-}
-#endif
-
-// Set up 2 spatial streams with 2 temporal layers per stream, and generate
-// invalid configuration by setting the temporal layer rate allocation
-// (ts_target_bitrate[]) to 0 for both layers. This should fail independent of
-// CONFIG_MULTI_RES_ENCODING.
-TEST(EncodeAPI, MultiResEncode) {
-  static const vpx_codec_iface_t *kCodecs[] = {
-#if CONFIG_VP8_ENCODER
-    &vpx_codec_vp8_cx_algo,
-#endif
-#if CONFIG_VP9_ENCODER
-    &vpx_codec_vp9_cx_algo,
-#endif
-  };
-  const int width = 1280;
-  const int height = 720;
-  const int width_down = width / 2;
-  const int height_down = height / 2;
-  const int target_bitrate = 1000;
-  const int framerate = 30;
-
-  for (int c = 0; c < NELEMENTS(kCodecs); ++c) {
-    const vpx_codec_iface_t *const iface = kCodecs[c];
-    vpx_codec_ctx_t enc[2];
-    vpx_codec_enc_cfg_t cfg[2];
-    vpx_rational_t dsf[2] = { { 2, 1 }, { 2, 1 } };
-
-    memset(enc, 0, sizeof(enc));
-
-    for (int i = 0; i < 2; i++) {
-      vpx_codec_enc_config_default(iface, &cfg[i], 0);
-    }
-
-    /* Highest-resolution encoder settings */
-    cfg[0].g_w = width;
-    cfg[0].g_h = height;
-    cfg[0].rc_dropframe_thresh = 0;
-    cfg[0].rc_end_usage = VPX_CBR;
-    cfg[0].rc_resize_allowed = 0;
-    cfg[0].rc_min_quantizer = 2;
-    cfg[0].rc_max_quantizer = 56;
-    cfg[0].rc_undershoot_pct = 100;
-    cfg[0].rc_overshoot_pct = 15;
-    cfg[0].rc_buf_initial_sz = 500;
-    cfg[0].rc_buf_optimal_sz = 600;
-    cfg[0].rc_buf_sz = 1000;
-    cfg[0].g_error_resilient = 1; /* Enable error resilient mode */
-    cfg[0].g_lag_in_frames = 0;
-
-    cfg[0].kf_mode = VPX_KF_AUTO;
-    cfg[0].kf_min_dist = 3000;
-    cfg[0].kf_max_dist = 3000;
-
-    cfg[0].rc_target_bitrate = target_bitrate; /* Set target bitrate */
-    cfg[0].g_timebase.num = 1;                 /* Set fps */
-    cfg[0].g_timebase.den = framerate;
-
-    memcpy(&cfg[1], &cfg[0], sizeof(cfg[0]));
-    cfg[1].rc_target_bitrate = 500;
-    cfg[1].g_w = width_down;
-    cfg[1].g_h = height_down;
-
-    for (int i = 0; i < 2; i++) {
-      cfg[i].ts_number_layers = 2;
-      cfg[i].ts_periodicity = 2;
-      cfg[i].ts_rate_decimator[0] = 2;
-      cfg[i].ts_rate_decimator[1] = 1;
-      cfg[i].ts_layer_id[0] = 0;
-      cfg[i].ts_layer_id[1] = 1;
-      // Invalid parameters.
-      cfg[i].ts_target_bitrate[0] = 0;
-      cfg[i].ts_target_bitrate[1] = 0;
-    }
-
-    // VP9 should report incapable, VP8 invalid for all configurations.
-    const char kVP9Name[] = "WebM Project VP9";
-    const bool is_vp9 = strncmp(kVP9Name, vpx_codec_iface_name(iface),
-                                sizeof(kVP9Name) - 1) == 0;
-    EXPECT_EQ(is_vp9 ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM,
-              vpx_codec_enc_init_multi(&enc[0], iface, &cfg[0], 2, 0, &dsf[0]));
-
-    for (int i = 0; i < 2; i++) {
-      vpx_codec_destroy(&enc[i]);
-    }
-  }
-}
-
 }  // namespace
--- a/test/encode_test_driver.cc
+++ b/test/encode_test_driver.cc
@@ -201,8 +201,6 @@ void EncoderTest::RunLoop(VideoSource *video) {
      PreEncodeFrameHook(video, encoder.get());
      encoder->EncodeFrame(video, frame_flags_);

-      PostEncodeFrameHook(encoder.get());
-
      CxDataIterator iter = encoder->GetCxData();

      bool has_cxdata = false;
@@ -228,8 +226,6 @@ void EncoderTest::RunLoop(VideoSource *video) {

          case VPX_CODEC_PSNR_PKT: PSNRPktHook(pkt); break;

-          case VPX_CODEC_STATS_PKT: StatsPktHook(pkt); break;
-
          default: break;
        }
      }
--- a/test/encode_test_driver.h
+++ b/test/encode_test_driver.h
@@ -139,13 +139,6 @@ class Encoder {
  }
 #endif

-#if CONFIG_VP8_ENCODER
-  void Control(int ctrl_id, vpx_roi_map_t *arg) {
-    const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg);
-    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
-  }
-#endif
-
  void Config(const vpx_codec_enc_cfg_t *cfg) {
    const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg);
    ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError();
@@ -219,17 +212,12 @@ class EncoderTest {
  virtual void PreEncodeFrameHook(VideoSource * /*video*/,
                                  Encoder * /*encoder*/) {}

-  virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {}
-
  // Hook to be called on every compressed data packet.
  virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}

  // Hook to be called on every PSNR packet.
  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}

-  // Hook to be called on every first pass stats packet.
-  virtual void StatsPktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {}
-
  // Hook to determine whether the encode loop should continue.
  virtual bool Continue() const {
    return !(::testing::Test::HasFatalFailure() || abort_);
--- a/test/error_resilience_test.cc
+++ b/test/error_resilience_test.cc
@@ -90,7 +90,8 @@ class ErrorResilienceTestLarge
    return frame_flags;
  }

-  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video) {
+  virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video,
+                                  ::libvpx_test::Encoder * /*encoder*/) {
    frame_flags_ &=
        ~(VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF);
    // For temporal layer case.
--- a/test/examples.sh
+++ b/test/examples.sh
@@ -15,7 +15,7 @@
 example_tests=$(ls $(dirname $0)/*.sh)

 # List of script names to exclude.
-exclude_list="examples stress tools_common"
+exclude_list="examples tools_common"

 # Filter out the scripts in $exclude_list.
 for word in ${exclude_list}; do
--- a/test/external_frame_buffer_test.cc
+++ b/test/external_frame_buffer_test.cc
@@ -34,8 +34,7 @@ struct ExternalFrameBuffer {
 // Class to manipulate a list of external frame buffers.
 class ExternalFrameBufferList {
 public:
-  ExternalFrameBufferList()
-      : num_buffers_(0), num_used_buffers_(0), ext_fb_list_(NULL) {}
+  ExternalFrameBufferList() : num_buffers_(0), ext_fb_list_(NULL) {}

  virtual ~ExternalFrameBufferList() {
    for (int i = 0; i < num_buffers_; ++i) {
@@ -72,8 +71,6 @@ class ExternalFrameBufferList {
    }

    SetFrameBuffer(idx, fb);
-
-    num_used_buffers_++;
    return 0;
  }

@@ -109,7 +106,6 @@ class ExternalFrameBufferList {
    }
    EXPECT_EQ(1, ext_fb->in_use);
    ext_fb->in_use = 0;
-    num_used_buffers_--;
    return 0;
  }

@@ -125,8 +121,6 @@ class ExternalFrameBufferList {
    }
  }

-  int num_used_buffers() const { return num_used_buffers_; }
-
 private:
  // Returns the index of the first free frame buffer. Returns |num_buffers_|
  // if there are no free frame buffers.
@@ -151,7 +145,6 @@ class ExternalFrameBufferList {
  }

  int num_buffers_;
-  int num_used_buffers_;
  ExternalFrameBuffer *ext_fb_list_;
 };

@@ -227,8 +220,8 @@ class ExternalFrameBufferMD5Test

  void OpenMD5File(const std::string &md5_file_name_) {
    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
-    ASSERT_TRUE(md5_file_ != NULL)
-        << "Md5 file open failed. Filename: " << md5_file_name_;
+    ASSERT_TRUE(md5_file_ != NULL) << "Md5 file open failed. Filename: "
+                                   << md5_file_name_;
  }

  virtual void DecompressedFrameHook(const vpx_image_t &img,
@@ -280,7 +273,6 @@ class ExternalFrameBufferMD5Test

 #if CONFIG_WEBM_IO
 const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm";
-const char kVP9NonRefTestFile[] = "vp90-2-22-svc_1280x720_1.webm";

 // Class for testing passing in external frame buffers to libvpx.
 class ExternalFrameBufferTest : public ::testing::Test {
@@ -300,9 +292,7 @@ class ExternalFrameBufferTest : public ::testing::Test {

  virtual void TearDown() {
    delete decoder_;
-    decoder_ = NULL;
    delete video_;
-    video_ = NULL;
  }

  // Passes the external frame buffer information to libvpx.
@@ -335,7 +325,7 @@ class ExternalFrameBufferTest : public ::testing::Test {
    return VPX_CODEC_OK;
  }

- protected:
+ private:
  void CheckDecodedFrames() {
    libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData();
    const vpx_image_t *img = NULL;
@@ -351,25 +341,6 @@ class ExternalFrameBufferTest : public ::testing::Test {
  int num_buffers_;
  ExternalFrameBufferList fb_list_;
 };
-
-class ExternalFrameBufferNonRefTest : public ExternalFrameBufferTest {
- protected:
-  virtual void SetUp() {
-    video_ = new libvpx_test::WebMVideoSource(kVP9NonRefTestFile);
-    ASSERT_TRUE(video_ != NULL);
-    video_->Init();
-    video_->Begin();
-
-    vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
-    decoder_ = new libvpx_test::VP9Decoder(cfg, 0);
-    ASSERT_TRUE(decoder_ != NULL);
-  }
-
-  virtual void CheckFrameBufferRelease() {
-    TearDown();
-    ASSERT_EQ(0, fb_list_.num_used_buffers());
-  }
-};
 #endif  // CONFIG_WEBM_IO

 // This test runs through the set of test vectors, and decodes them.
@@ -448,8 +419,6 @@ TEST_F(ExternalFrameBufferTest, NotEnoughBuffers) {
            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer,
                                    release_vp9_frame_buffer));
  ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame());
-  // Only run this on long clips. Decoding a very short clip will return
-  // VPX_CODEC_OK even with only 2 buffers.
  ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeRemainingFrames());
 }

@@ -498,15 +467,6 @@ TEST_F(ExternalFrameBufferTest, SetAfterDecode) {
            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer,
                                    release_vp9_frame_buffer));
 }
-
-TEST_F(ExternalFrameBufferNonRefTest, ReleaseNonRefFrameBuffer) {
-  const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS;
-  ASSERT_EQ(VPX_CODEC_OK,
-            SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer,
-                                    release_vp9_frame_buffer));
-  ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames());
-  CheckFrameBufferRelease();
-}
 #endif  // CONFIG_WEBM_IO

 VP9_INSTANTIATE_TEST_CASE(
--- a/test/fdct4x4_test.cc
+++ b/test/fdct4x4_test.cc
@@ -0,0 +1,512 @@
+/*
+ *  Copyright (c) 2012 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vp9_rtcd.h"
+#include "./vpx_dsp_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vp9/common/vp9_entropy.h"
+#include "vpx/vpx_codec.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_ports/mem.h"
+
+using libvpx_test::ACMRandom;
+
+namespace {
+const int kNumCoeffs = 16;
+typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride);
+typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride);
+typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride,
+                        int tx_type);
+typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride,
+                        int tx_type);
+
+typedef std::tr1::tuple<FdctFunc, IdctFunc, int, vpx_bit_depth_t> Dct4x4Param;
+typedef std::tr1::tuple<FhtFunc, IhtFunc, int, vpx_bit_depth_t> Ht4x4Param;
+
+void fdct4x4_ref(const int16_t *in, tran_low_t *out, int stride,
+                 int /*tx_type*/) {
+  vpx_fdct4x4_c(in, out, stride);
+}
+
+void fht4x4_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {
+  vp9_fht4x4_c(in, out, stride, tx_type);
+}
+
+void fwht4x4_ref(const int16_t *in, tran_low_t *out, int stride,
+                 int /*tx_type*/) {
+  vp9_fwht4x4_c(in, out, stride);
+}
+
+#if CONFIG_VP9_HIGHBITDEPTH
+void idct4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct4x4_16_add_c(in, out, stride, 10);
+}
+
+void idct4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct4x4_16_add_c(in, out, stride, 12);
+}
+
+void iht4x4_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  vp9_highbd_iht4x4_16_add_c(in, out, stride, tx_type, 10);
+}
+
+void iht4x4_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
+  vp9_highbd_iht4x4_16_add_c(in, out, stride, tx_type, 12);
+}
+
+void iwht4x4_10(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 10);
+}
+
+void iwht4x4_12(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_iwht4x4_16_add_c(in, out, stride, 12);
+}
+
+#if HAVE_SSE2
+void idct4x4_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 10);
+}
+
+void idct4x4_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct4x4_16_add_sse2(in, out, stride, 12);
+}
+#endif  // HAVE_SSE2
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+class Trans4x4TestBase {
+ public:
+  virtual ~Trans4x4TestBase() {}
+
+ protected:
+  virtual void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) = 0;
+
+  virtual void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) = 0;
+
+  void RunAccuracyCheck(int limit) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    uint32_t max_error = 0;
+    int64_t total_error = 0;
+    const int count_test_block = 10000;
+    for (int i = 0; i < count_test_block; ++i) {
+      DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+#if CONFIG_VP9_HIGHBITDEPTH
+      DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+      DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+#endif
+
+      // Initialize a test block with input range [-255, 255].
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        if (bit_depth_ == VPX_BITS_8) {
+          src[j] = rnd.Rand8();
+          dst[j] = rnd.Rand8();
+          test_input_block[j] = src[j] - dst[j];
+#if CONFIG_VP9_HIGHBITDEPTH
+        } else {
+          src16[j] = rnd.Rand16() & mask_;
+          dst16[j] = rnd.Rand16() & mask_;
+          test_input_block[j] = src16[j] - dst16[j];
+#endif
+        }
+      }
+
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(test_input_block, test_temp_block, pitch_));
+      if (bit_depth_ == VPX_BITS_8) {
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_));
+#if CONFIG_VP9_HIGHBITDEPTH
+      } else {
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif
+      }
+
+      for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_VP9_HIGHBITDEPTH
+        const int diff =
+            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+        ASSERT_EQ(VPX_BITS_8, bit_depth_);
+        const int diff = dst[j] - src[j];
+#endif
+        const uint32_t error = diff * diff;
+        if (max_error < error) max_error = error;
+        total_error += error;
+      }
+    }
+
+    EXPECT_GE(static_cast<uint32_t>(limit), max_error)
+        << "Error: 4x4 FHT/IHT has an individual round trip error > " << limit;
+
+    EXPECT_GE(count_test_block * limit, total_error)
+        << "Error: 4x4 FHT/IHT has average round trip error > " << limit
+        << " per block";
+  }
+
+  void RunCoeffCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+    DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_);
+      }
+
+      fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_);
+      ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_));
+
+      // The minimum quant value is 4.
+      for (int j = 0; j < kNumCoeffs; ++j)
+        EXPECT_EQ(output_block[j], output_ref_block[j]);
+    }
+  }
+
+  void RunMemCheck() {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 5000;
+    DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]);
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_;
+      }
+      if (i == 0) {
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_;
+      } else if (i == 1) {
+        for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_;
+      }
+
+      fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_);
+      ASM_REGISTER_STATE_CHECK(
+          RunFwdTxfm(input_extreme_block, output_block, pitch_));
+
+      // The minimum quant value is 4.
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        EXPECT_EQ(output_block[j], output_ref_block[j]);
+        EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j]))
+            << "Error: 4x4 FDCT has coefficient larger than 4*DCT_MAX_VALUE";
+      }
+    }
+  }
+
+  void RunInvAccuracyCheck(int limit) {
+    ACMRandom rnd(ACMRandom::DeterministicSeed());
+    const int count_test_block = 1000;
+    DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]);
+    DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]);
+#if CONFIG_VP9_HIGHBITDEPTH
+    DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]);
+    DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]);
+#endif
+
+    for (int i = 0; i < count_test_block; ++i) {
+      // Initialize a test block with input range [-mask_, mask_].
+      for (int j = 0; j < kNumCoeffs; ++j) {
+        if (bit_depth_ == VPX_BITS_8) {
+          src[j] = rnd.Rand8();
+          dst[j] = rnd.Rand8();
+          in[j] = src[j] - dst[j];
+#if CONFIG_VP9_HIGHBITDEPTH
+        } else {
+          src16[j] = rnd.Rand16() & mask_;
+          dst16[j] = rnd.Rand16() & mask_;
+          in[j] = src16[j] - dst16[j];
+#endif
+        }
+      }
+
+      fwd_txfm_ref(in, coeff, pitch_, tx_type_);
+
+      if (bit_depth_ == VPX_BITS_8) {
+        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
+#if CONFIG_VP9_HIGHBITDEPTH
+      } else {
+        ASM_REGISTER_STATE_CHECK(
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
+#endif
+      }
+
+      for (int j = 0; j < kNumCoeffs; ++j) {
+#if CONFIG_VP9_HIGHBITDEPTH
+        const int diff =
+            bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j];
+#else
+        const int diff = dst[j] - src[j];
+#endif
+        const uint32_t error = diff * diff;
+        EXPECT_GE(static_cast<uint32_t>(limit), error)
+            << "Error: 4x4 IDCT has error " << error << " at index " << j;
+      }
+    }
+  }
+
+  int pitch_;
+  int tx_type_;
+  FhtFunc fwd_txfm_ref;
+  vpx_bit_depth_t bit_depth_;
+  int mask_;
+};
+
+class Trans4x4DCT : public Trans4x4TestBase,
+                    public ::testing::TestWithParam<Dct4x4Param> {
+ public:
+  virtual ~Trans4x4DCT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 4;
+    fwd_txfm_ref = fdct4x4_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride);
+  }
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+
+  FdctFunc fwd_txfm_;
+  IdctFunc inv_txfm_;
+};
+
+TEST_P(Trans4x4DCT, AccuracyCheck) { RunAccuracyCheck(1); }
+
+TEST_P(Trans4x4DCT, CoeffCheck) { RunCoeffCheck(); }
+
+TEST_P(Trans4x4DCT, MemCheck) { RunMemCheck(); }
+
+TEST_P(Trans4x4DCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
+
+class Trans4x4HT : public Trans4x4TestBase,
+                   public ::testing::TestWithParam<Ht4x4Param> {
+ public:
+  virtual ~Trans4x4HT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 4;
+    fwd_txfm_ref = fht4x4_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride, tx_type_);
+  }
+
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride, tx_type_);
+  }
+
+  FhtFunc fwd_txfm_;
+  IhtFunc inv_txfm_;
+};
+
+TEST_P(Trans4x4HT, AccuracyCheck) { RunAccuracyCheck(1); }
+
+TEST_P(Trans4x4HT, CoeffCheck) { RunCoeffCheck(); }
+
+TEST_P(Trans4x4HT, MemCheck) { RunMemCheck(); }
+
+TEST_P(Trans4x4HT, InvAccuracyCheck) { RunInvAccuracyCheck(1); }
+
+class Trans4x4WHT : public Trans4x4TestBase,
+                    public ::testing::TestWithParam<Dct4x4Param> {
+ public:
+  virtual ~Trans4x4WHT() {}
+
+  virtual void SetUp() {
+    fwd_txfm_ = GET_PARAM(0);
+    inv_txfm_ = GET_PARAM(1);
+    tx_type_ = GET_PARAM(2);
+    pitch_ = 4;
+    fwd_txfm_ref = fwht4x4_ref;
+    bit_depth_ = GET_PARAM(3);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  void RunFwdTxfm(const int16_t *in, tran_low_t *out, int stride) {
+    fwd_txfm_(in, out, stride);
+  }
+  void RunInvTxfm(const tran_low_t *out, uint8_t *dst, int stride) {
+    inv_txfm_(out, dst, stride);
+  }
+
+  FdctFunc fwd_txfm_;
+  IdctFunc inv_txfm_;
+};
+
+TEST_P(Trans4x4WHT, AccuracyCheck) { RunAccuracyCheck(0); }
+
+TEST_P(Trans4x4WHT, CoeffCheck) { RunCoeffCheck(); }
+
+TEST_P(Trans4x4WHT, MemCheck) { RunMemCheck(); }
+
+TEST_P(Trans4x4WHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); }
+using std::tr1::make_tuple;
+
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    C, Trans4x4DCT,
+    ::testing::Values(
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12, 0, VPX_BITS_12),
+        make_tuple(&vpx_fdct4x4_c, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
+#else
+INSTANTIATE_TEST_CASE_P(C, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct4x4_c,
+                                                     &vpx_idct4x4_16_add_c, 0,
+                                                     VPX_BITS_8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    C, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 1, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 2, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_10, 3, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 1, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 2, VPX_BITS_12),
+        make_tuple(&vp9_highbd_fht4x4_c, &iht4x4_12, 3, VPX_BITS_12),
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
+#else
+INSTANTIATE_TEST_CASE_P(
+    C, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if CONFIG_VP9_HIGHBITDEPTH
+INSTANTIATE_TEST_CASE_P(
+    C, Trans4x4WHT,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_10, 0, VPX_BITS_10),
+        make_tuple(&vp9_highbd_fwht4x4_c, &iwht4x4_12, 0, VPX_BITS_12),
+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8)));
+#else
+INSTANTIATE_TEST_CASE_P(C, Trans4x4WHT,
+                        ::testing::Values(make_tuple(&vp9_fwht4x4_c,
+                                                     &vpx_iwht4x4_16_add_c, 0,
+                                                     VPX_BITS_8)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
+
+#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(NEON, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct4x4_c,
+                                                     &vpx_idct4x4_16_add_neon,
+                                                     0, VPX_BITS_8)));
+#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    NEON, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_c, &vp9_iht4x4_16_add_neon, 3, VPX_BITS_8)));
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4WHT,
+    ::testing::Values(
+        make_tuple(&vp9_fwht4x4_sse2, &vpx_iwht4x4_16_add_c, 0, VPX_BITS_8),
+        make_tuple(&vp9_fwht4x4_c, &vpx_iwht4x4_16_add_sse2, 0, VPX_BITS_8)));
+#endif
+
+#if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(SSE2, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct4x4_sse2,
+                                                     &vpx_idct4x4_16_add_sse2,
+                                                     0, VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_sse2, 3, VPX_BITS_8)));
+#endif  // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4DCT,
+    ::testing::Values(
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_10_sse2, 0, VPX_BITS_10),
+        make_tuple(&vpx_highbd_fdct4x4_c, &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vpx_highbd_fdct4x4_sse2, &idct4x4_12_sse2, 0, VPX_BITS_12),
+        make_tuple(&vpx_fdct4x4_sse2, &vpx_idct4x4_16_add_c, 0, VPX_BITS_8)));
+
+INSTANTIATE_TEST_CASE_P(
+    SSE2, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_sse2, &vp9_iht4x4_16_add_c, 3, VPX_BITS_8)));
+#endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+INSTANTIATE_TEST_CASE_P(MSA, Trans4x4DCT,
+                        ::testing::Values(make_tuple(&vpx_fdct4x4_msa,
+                                                     &vpx_idct4x4_16_add_msa, 0,
+                                                     VPX_BITS_8)));
+INSTANTIATE_TEST_CASE_P(
+    MSA, Trans4x4HT,
+    ::testing::Values(
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 0, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 1, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 2, VPX_BITS_8),
+        make_tuple(&vp9_fht4x4_msa, &vp9_iht4x4_16_add_msa, 3, VPX_BITS_8)));
+#endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+}  // namespace
--- a/test/fdct8x8_test.cc
+++ b/test/fdct8x8_test.cc
@@ -88,45 +88,45 @@ void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) {

 #if CONFIG_VP9_HIGHBITDEPTH
 void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 10);
 }

 void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct8x8_64_add_c(in, out, stride, 12);
 }

 void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
-  vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10);
+  vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 10);
 }

 void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) {
-  vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12);
+  vp9_highbd_iht8x8_64_add_c(in, out, stride, tx_type, 12);
 }

 #if HAVE_SSE2

-void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 10);
+void idct8x8_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_10_add_c(in, out, stride, 10);
 }

-void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 12);
+void idct8x8_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_10_add_c(in, out, stride, 12);
 }

-void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
+void idct8x8_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 10);
 }

-void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
+void idct8x8_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
+  vpx_highbd_idct8x8_10_add_sse2(in, out, stride, 12);
 }

 void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10);
+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 10);
 }

 void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) {
-  vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12);
+  vpx_highbd_idct8x8_64_add_sse2(in, out, stride, 12);
 }
 #endif  // HAVE_SSE2
 #endif  // CONFIG_VP9_HIGHBITDEPTH
@@ -257,7 +257,7 @@ class FwdTrans8x8TestBase {
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }

@@ -340,7 +340,7 @@ class FwdTrans8x8TestBase {
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(test_temp_block, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }

@@ -413,7 +413,7 @@ class FwdTrans8x8TestBase {
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }

@@ -497,9 +497,9 @@ class FwdTrans8x8TestBase {
        ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_));
 #if CONFIG_VP9_HIGHBITDEPTH
      } else {
-        ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_);
+        ref_txfm(coeff, CONVERT_TO_BYTEPTR(ref16), pitch_);
        ASM_REGISTER_STATE_CHECK(
-            RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_));
+            RunInvTxfm(coeff, CONVERT_TO_BYTEPTR(dst16), pitch_));
 #endif
      }

@@ -511,8 +511,8 @@ class FwdTrans8x8TestBase {
        const int diff = dst[j] - ref[j];
 #endif
        const uint32_t error = diff * diff;
-        EXPECT_EQ(0u, error)
-            << "Error: 8x8 IDCT has error " << error << " at index " << j;
+        EXPECT_EQ(0u, error) << "Error: 8x8 IDCT has error " << error
+                             << " at index " << j;
      }
    }
  }
@@ -670,12 +670,14 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8)));
 #endif  // CONFIG_VP9_HIGHBITDEPTH

-#if HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#if HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT,
                        ::testing::Values(make_tuple(&vpx_fdct8x8_neon,
                                                     &vpx_idct8x8_64_add_neon,
                                                     0, VPX_BITS_8)));
-#if !CONFIG_VP9_HIGHBITDEPTH
+#endif  // HAVE_NEON_ASM && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
+
+#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(
    NEON, FwdTrans8x8HT,
    ::testing::Values(
@@ -683,8 +685,7 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8)));
-#endif  // !CONFIG_VP9_HIGHBITDEPTH
-#endif  // HAVE_NEON && !CONFIG_EMULATE_HARDWARE
+#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE

 #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT,
@@ -727,10 +728,10 @@ INSTANTIATE_TEST_CASE_P(
 INSTANTIATE_TEST_CASE_P(
    SSE2, InvTrans8x8DCT,
    ::testing::Values(
-        make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225,
+        make_tuple(&idct8x8_10_add_10_c, &idct8x8_10_add_10_sse2, 6225,
                   VPX_BITS_10),
        make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10),
-        make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225,
+        make_tuple(&idct8x8_10_add_12_c, &idct8x8_10_add_12_sse2, 6225,
                   VPX_BITS_12),
        make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12)));
 #endif  // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
@@ -739,7 +740,7 @@ INSTANTIATE_TEST_CASE_P(
    !CONFIG_EMULATE_HARDWARE
 INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT,
                        ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3,
-                                                     &vpx_idct8x8_64_add_sse2,
+                                                     &vpx_idct8x8_64_add_ssse3,
                                                     0, VPX_BITS_8)));
 #endif

@@ -756,11 +757,4 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8),
        make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8)));
 #endif  // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-
-#if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
-INSTANTIATE_TEST_CASE_P(VSX, FwdTrans8x8DCT,
-                        ::testing::Values(make_tuple(&vpx_fdct8x8_c,
-                                                     &vpx_idct8x8_64_add_vsx, 0,
-                                                     VPX_BITS_8)));
-#endif  // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE
 }  // namespace
--- a/test/hadamard_test.cc
+++ b/test/hadamard_test.cc
@@ -13,7 +13,6 @@
 #include "third_party/googletest/src/include/gtest/gtest.h"

 #include "./vpx_dsp_rtcd.h"
-#include "vpx_ports/vpx_timer.h"

 #include "test/acm_random.h"
 #include "test/register_state_check.h"
@@ -22,8 +21,7 @@ namespace {

 using ::libvpx_test::ACMRandom;

-typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride,
-                             tran_low_t *b);
+typedef void (*HadamardFunc)(const int16_t *a, int a_stride, int16_t *b);

 void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
  int16_t b[8];
@@ -48,16 +46,18 @@ void hadamard_loop(const int16_t *a, int a_stride, int16_t *out) {
  out[5] = c[3] - c[7];
 }

-void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) {
+void reference_hadamard8x8(const int16_t *a, int a_stride, int16_t *b) {
  int16_t buf[64];
-  int16_t buf2[64];
-  for (int i = 0; i < 8; ++i) hadamard_loop(a + i, a_stride, buf + i * 8);
-  for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, 8, buf2 + i * 8);
+  for (int i = 0; i < 8; ++i) {
+    hadamard_loop(a + i, a_stride, buf + i * 8);
+  }

-  for (int i = 0; i < 64; ++i) b[i] = (tran_low_t)buf2[i];
+  for (int i = 0; i < 8; ++i) {
+    hadamard_loop(buf + i, 8, b + i * 8);
+  }
 }

-void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
+void reference_hadamard16x16(const int16_t *a, int a_stride, int16_t *b) {
  /* The source is a 16x16 block. The destination is rearranged to 8x32.
   * Input is 9 bit. */
  reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0);
@@ -68,16 +68,16 @@ void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) {
  /* Overlay the 8x8 blocks and combine. */
  for (int i = 0; i < 64; ++i) {
    /* 8x8 steps the range up to 15 bits. */
-    const tran_low_t a0 = b[0];
-    const tran_low_t a1 = b[64];
-    const tran_low_t a2 = b[128];
-    const tran_low_t a3 = b[192];
+    const int16_t a0 = b[0];
+    const int16_t a1 = b[64];
+    const int16_t a2 = b[128];
+    const int16_t a3 = b[192];

    /* Prevent the result from escaping int16_t. */
-    const tran_low_t b0 = (a0 + a1) >> 1;
-    const tran_low_t b1 = (a0 - a1) >> 1;
-    const tran_low_t b2 = (a2 + a3) >> 1;
-    const tran_low_t b3 = (a2 - a3) >> 1;
+    const int16_t b0 = (a0 + a1) >> 1;
+    const int16_t b1 = (a0 - a1) >> 1;
+    const int16_t b2 = (a2 + a3) >> 1;
+    const int16_t b3 = (a2 - a3) >> 1;

    /* Store a 16 bit value. */
    b[0] = b0 + b2;
@@ -101,35 +101,12 @@ class HadamardTestBase : public ::testing::TestWithParam<HadamardFunc> {
  ACMRandom rnd_;
 };

-void HadamardSpeedTest(const char *name, HadamardFunc const func,
-                       const int16_t *input, int stride, tran_low_t *output,
-                       int times) {
-  int i;
-  vpx_usec_timer timer;
-
-  vpx_usec_timer_start(&timer);
-  for (i = 0; i < times; ++i) {
-    func(input, stride, output);
-  }
-  vpx_usec_timer_mark(&timer);
-
-  const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
-  printf("%s[%12d runs]: %d us\n", name, times, elapsed_time);
-}
-
 class Hadamard8x8Test : public HadamardTestBase {};

-void HadamardSpeedTest8x8(HadamardFunc const func, int times) {
-  DECLARE_ALIGNED(16, int16_t, input[64]);
-  DECLARE_ALIGNED(16, tran_low_t, output[64]);
-  memset(input, 1, sizeof(input));
-  HadamardSpeedTest("Hadamard8x8", func, input, 8, output, times);
-}
-
 TEST_P(Hadamard8x8Test, CompareReferenceRandom) {
  DECLARE_ALIGNED(16, int16_t, a[64]);
-  DECLARE_ALIGNED(16, tran_low_t, b[64]);
-  tran_low_t b_ref[64];
+  DECLARE_ALIGNED(16, int16_t, b[64]);
+  int16_t b_ref[64];
  for (int i = 0; i < 64; ++i) {
    a[i] = rnd_.Rand9Signed();
  }
@@ -147,8 +124,8 @@ TEST_P(Hadamard8x8Test, CompareReferenceRandom) {

 TEST_P(Hadamard8x8Test, VaryStride) {
  DECLARE_ALIGNED(16, int16_t, a[64 * 8]);
-  DECLARE_ALIGNED(16, tran_low_t, b[64]);
-  tran_low_t b_ref[64];
+  DECLARE_ALIGNED(16, int16_t, b[64]);
+  int16_t b_ref[64];
  for (int i = 0; i < 64 * 8; ++i) {
    a[i] = rnd_.Rand9Signed();
  }
@@ -167,12 +144,6 @@ TEST_P(Hadamard8x8Test, VaryStride) {
  }
 }

-TEST_P(Hadamard8x8Test, DISABLED_Speed) {
-  HadamardSpeedTest8x8(h_func_, 10);
-  HadamardSpeedTest8x8(h_func_, 10000);
-  HadamardSpeedTest8x8(h_func_, 10000000);
-}
-
 INSTANTIATE_TEST_CASE_P(C, Hadamard8x8Test,
                        ::testing::Values(&vpx_hadamard_8x8_c));

@@ -191,33 +162,12 @@ INSTANTIATE_TEST_CASE_P(NEON, Hadamard8x8Test,
                        ::testing::Values(&vpx_hadamard_8x8_neon));
 #endif  // HAVE_NEON

-// TODO(jingning): Remove highbitdepth flag when the SIMD functions are
-// in place and turn on the unit test.
-#if !CONFIG_VP9_HIGHBITDEPTH
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, Hadamard8x8Test,
-                        ::testing::Values(&vpx_hadamard_8x8_msa));
-#endif  // HAVE_MSA
-#endif  // !CONFIG_VP9_HIGHBITDEPTH
-
-#if HAVE_VSX
-INSTANTIATE_TEST_CASE_P(VSX, Hadamard8x8Test,
-                        ::testing::Values(&vpx_hadamard_8x8_vsx));
-#endif  // HAVE_VSX
-
 class Hadamard16x16Test : public HadamardTestBase {};

-void HadamardSpeedTest16x16(HadamardFunc const func, int times) {
-  DECLARE_ALIGNED(16, int16_t, input[256]);
-  DECLARE_ALIGNED(16, tran_low_t, output[256]);
-  memset(input, 1, sizeof(input));
-  HadamardSpeedTest("Hadamard16x16", func, input, 16, output, times);
-}
-
 TEST_P(Hadamard16x16Test, CompareReferenceRandom) {
  DECLARE_ALIGNED(16, int16_t, a[16 * 16]);
-  DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]);
-  tran_low_t b_ref[16 * 16];
+  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+  int16_t b_ref[16 * 16];
  for (int i = 0; i < 16 * 16; ++i) {
    a[i] = rnd_.Rand9Signed();
  }
@@ -235,8 +185,8 @@ TEST_P(Hadamard16x16Test, CompareReferenceRandom) {

 TEST_P(Hadamard16x16Test, VaryStride) {
  DECLARE_ALIGNED(16, int16_t, a[16 * 16 * 8]);
-  DECLARE_ALIGNED(16, tran_low_t, b[16 * 16]);
-  tran_low_t b_ref[16 * 16];
+  DECLARE_ALIGNED(16, int16_t, b[16 * 16]);
+  int16_t b_ref[16 * 16];
  for (int i = 0; i < 16 * 16 * 8; ++i) {
    a[i] = rnd_.Rand9Signed();
  }
@@ -255,12 +205,6 @@ TEST_P(Hadamard16x16Test, VaryStride) {
  }
 }

-TEST_P(Hadamard16x16Test, DISABLED_Speed) {
-  HadamardSpeedTest16x16(h_func_, 10);
-  HadamardSpeedTest16x16(h_func_, 10000);
-  HadamardSpeedTest16x16(h_func_, 10000000);
-}
-
 INSTANTIATE_TEST_CASE_P(C, Hadamard16x16Test,
                        ::testing::Values(&vpx_hadamard_16x16_c));

@@ -269,25 +213,8 @@ INSTANTIATE_TEST_CASE_P(SSE2, Hadamard16x16Test,
                        ::testing::Values(&vpx_hadamard_16x16_sse2));
 #endif  // HAVE_SSE2

-#if HAVE_AVX2
-INSTANTIATE_TEST_CASE_P(AVX2, Hadamard16x16Test,
-                        ::testing::Values(&vpx_hadamard_16x16_avx2));
-#endif  // HAVE_AVX2
-
-#if HAVE_VSX
-INSTANTIATE_TEST_CASE_P(VSX, Hadamard16x16Test,
-                        ::testing::Values(&vpx_hadamard_16x16_vsx));
-#endif  // HAVE_VSX
-
 #if HAVE_NEON
 INSTANTIATE_TEST_CASE_P(NEON, Hadamard16x16Test,
                        ::testing::Values(&vpx_hadamard_16x16_neon));
 #endif  // HAVE_NEON
-
-#if !CONFIG_VP9_HIGHBITDEPTH
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, Hadamard16x16Test,
-                        ::testing::Values(&vpx_hadamard_16x16_msa));
-#endif  // HAVE_MSA
-#endif  // !CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
--- a/test/idct_test.cc
+++ b/test/idct_test.cc
@@ -13,7 +13,6 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "test/buffer.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "vpx/vpx_integer.h"
@@ -22,156 +21,106 @@ typedef void (*IdctFunc)(int16_t *input, unsigned char *pred_ptr,
                         int pred_stride, unsigned char *dst_ptr,
                         int dst_stride);
 namespace {
-
-using libvpx_test::Buffer;
-
 class IDCTTest : public ::testing::TestWithParam<IdctFunc> {
 protected:
  virtual void SetUp() {
+    int i;
+
    UUT = GetParam();
-
-    input = new Buffer<int16_t>(4, 4, 0);
-    ASSERT_TRUE(input != NULL);
-    ASSERT_TRUE(input->Init());
-    predict = new Buffer<uint8_t>(4, 4, 3);
-    ASSERT_TRUE(predict != NULL);
-    ASSERT_TRUE(predict->Init());
-    output = new Buffer<uint8_t>(4, 4, 3);
-    ASSERT_TRUE(output != NULL);
-    ASSERT_TRUE(output->Init());
+    memset(input, 0, sizeof(input));
+    /* Set up guard blocks */
+    for (i = 0; i < 256; i++) output[i] = ((i & 0xF) < 4 && (i < 64)) ? 0 : -1;
  }

-  virtual void TearDown() {
-    delete input;
-    delete predict;
-    delete output;
-    libvpx_test::ClearSystemState();
-  }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }

  IdctFunc UUT;
-  Buffer<int16_t> *input;
-  Buffer<uint8_t> *predict;
-  Buffer<uint8_t> *output;
+  int16_t input[16];
+  unsigned char output[256];
+  unsigned char predict[256];
 };

+TEST_P(IDCTTest, TestGuardBlocks) {
+  int i;
+
+  for (i = 0; i < 256; i++) {
+    if ((i & 0xF) < 4 && i < 64)
+      EXPECT_EQ(0, output[i]) << i;
+    else
+      EXPECT_EQ(255, output[i]);
+  }
+}
+
 TEST_P(IDCTTest, TestAllZeros) {
-  // When the input is '0' the output will be '0'.
-  input->Set(0);
-  predict->Set(0);
-  output->Set(0);
+  int i;

-  ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(),
-                               predict->stride(), output->TopLeftPixel(),
-                               output->stride()));
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));

-  ASSERT_TRUE(input->CheckValues(0));
-  ASSERT_TRUE(input->CheckPadding());
-  ASSERT_TRUE(output->CheckValues(0));
-  ASSERT_TRUE(output->CheckPadding());
+  for (i = 0; i < 256; i++) {
+    if ((i & 0xF) < 4 && i < 64)
+      EXPECT_EQ(0, output[i]) << "i==" << i;
+    else
+      EXPECT_EQ(255, output[i]) << "i==" << i;
+  }
 }

 TEST_P(IDCTTest, TestAllOnes) {
-  input->Set(0);
-  // When the first element is '4' it will fill the output buffer with '1'.
-  input->TopLeftPixel()[0] = 4;
-  predict->Set(0);
-  output->Set(0);
+  int i;

-  ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(),
-                               predict->stride(), output->TopLeftPixel(),
-                               output->stride()));
+  input[0] = 4;
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));

-  ASSERT_TRUE(output->CheckValues(1));
-  ASSERT_TRUE(output->CheckPadding());
+  for (i = 0; i < 256; i++) {
+    if ((i & 0xF) < 4 && i < 64)
+      EXPECT_EQ(1, output[i]) << "i==" << i;
+    else
+      EXPECT_EQ(255, output[i]) << "i==" << i;
+  }
 }

 TEST_P(IDCTTest, TestAddOne) {
-  // Set the transform output to '1' and make sure it gets added to the
-  // prediction buffer.
-  input->Set(0);
-  input->TopLeftPixel()[0] = 4;
-  output->Set(0);
+  int i;

-  uint8_t *pred = predict->TopLeftPixel();
-  for (int y = 0; y < 4; ++y) {
-    for (int x = 0; x < 4; ++x) {
-      pred[y * predict->stride() + x] = y * 4 + x;
-    }
+  for (i = 0; i < 256; i++) predict[i] = i;
+  input[0] = 4;
+  ASM_REGISTER_STATE_CHECK(UUT(input, predict, 16, output, 16));
+
+  for (i = 0; i < 256; i++) {
+    if ((i & 0xF) < 4 && i < 64)
+      EXPECT_EQ(i + 1, output[i]) << "i==" << i;
+    else
+      EXPECT_EQ(255, output[i]) << "i==" << i;
  }
-
-  ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(),
-                               predict->stride(), output->TopLeftPixel(),
-                               output->stride()));
-
-  uint8_t const *out = output->TopLeftPixel();
-  for (int y = 0; y < 4; ++y) {
-    for (int x = 0; x < 4; ++x) {
-      EXPECT_EQ(1 + y * 4 + x, out[y * output->stride() + x]);
-    }
-  }
-
-  if (HasFailure()) {
-    output->DumpBuffer();
-  }
-
-  ASSERT_TRUE(output->CheckPadding());
 }

 TEST_P(IDCTTest, TestWithData) {
-  // Test a single known input.
-  predict->Set(0);
+  int i;

-  int16_t *in = input->TopLeftPixel();
-  for (int y = 0; y < 4; ++y) {
-    for (int x = 0; x < 4; ++x) {
-      in[y * input->stride() + x] = y * 4 + x;
-    }
+  for (i = 0; i < 16; i++) input[i] = i;
+
+  ASM_REGISTER_STATE_CHECK(UUT(input, output, 16, output, 16));
+
+  for (i = 0; i < 256; i++) {
+    if ((i & 0xF) > 3 || i > 63)
+      EXPECT_EQ(255, output[i]) << "i==" << i;
+    else if (i == 0)
+      EXPECT_EQ(11, output[i]) << "i==" << i;
+    else if (i == 34)
+      EXPECT_EQ(1, output[i]) << "i==" << i;
+    else if (i == 2 || i == 17 || i == 32)
+      EXPECT_EQ(3, output[i]) << "i==" << i;
+    else
+      EXPECT_EQ(0, output[i]) << "i==" << i;
  }
-
-  ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(),
-                               predict->stride(), output->TopLeftPixel(),
-                               output->stride()));
-
-  uint8_t *out = output->TopLeftPixel();
-  for (int y = 0; y < 4; ++y) {
-    for (int x = 0; x < 4; ++x) {
-      switch (y * 4 + x) {
-        case 0: EXPECT_EQ(11, out[y * output->stride() + x]); break;
-        case 2:
-        case 5:
-        case 8: EXPECT_EQ(3, out[y * output->stride() + x]); break;
-        case 10: EXPECT_EQ(1, out[y * output->stride() + x]); break;
-        default: EXPECT_EQ(0, out[y * output->stride() + x]);
-      }
-    }
-  }
-
-  if (HasFailure()) {
-    output->DumpBuffer();
-  }
-
-  ASSERT_TRUE(output->CheckPadding());
 }

 INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c));
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, IDCTTest,
-                        ::testing::Values(vp8_short_idct4x4llm_neon));
-#endif  // HAVE_NEON
-
 #if HAVE_MMX
 INSTANTIATE_TEST_CASE_P(MMX, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_mmx));
-#endif  // HAVE_MMX
-
+#endif
 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(MSA, IDCTTest,
                        ::testing::Values(vp8_short_idct4x4llm_msa));
-#endif  // HAVE_MSA
-
-#if HAVE_MMI
-INSTANTIATE_TEST_CASE_P(MMI, IDCTTest,
-                        ::testing::Values(vp8_short_idct4x4llm_mmi));
-#endif  // HAVE_MMI
+#endif
 }
--- a/test/invalid_file_test.cc
+++ b/test/invalid_file_test.cc
@@ -45,8 +45,8 @@ class InvalidFileTest : public ::libvpx_test::DecoderTest,

  void OpenResFile(const std::string &res_file_name_) {
    res_file_ = libvpx_test::OpenTestDataFile(res_file_name_);
-    ASSERT_TRUE(res_file_ != NULL)
-        << "Result file open failed. Filename: " << res_file_name_;
+    ASSERT_TRUE(res_file_ != NULL) << "Result file open failed. Filename: "
+                                   << res_file_name_;
  }

  virtual bool HandleDecodeResult(
@@ -120,23 +120,10 @@ class InvalidFileTest : public ::libvpx_test::DecoderTest,

 TEST_P(InvalidFileTest, ReturnCode) { RunTest(); }

-#if CONFIG_VP8_DECODER
-const DecodeParam kVP8InvalidFileTests[] = {
-  { 1, "invalid-bug-1443.ivf" },
-};
-
-VP8_INSTANTIATE_TEST_CASE(InvalidFileTest,
-                          ::testing::ValuesIn(kVP8InvalidFileTests));
-#endif  // CONFIG_VP8_DECODER
-
-#if CONFIG_VP9_DECODER
 const DecodeParam kVP9InvalidFileTests[] = {
  { 1, "invalid-vp90-02-v2.webm" },
 #if CONFIG_VP9_HIGHBITDEPTH
  { 1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf" },
-  { 1,
-    "invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-."
-    "ivf" },
 #endif
  { 1, "invalid-vp90-03-v3.webm" },
  { 1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf" },
@@ -154,14 +141,10 @@ const DecodeParam kVP9InvalidFileTests[] = {
  { 1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf" },
  { 1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf" },
  { 1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf" },
-  { 1,
-    "invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf" },
-  { 1, "invalid-crbug-667044.webm" },
 };

 VP9_INSTANTIATE_TEST_CASE(InvalidFileTest,
                          ::testing::ValuesIn(kVP9InvalidFileTests));
-#endif  // CONFIG_VP9_DECODER

 // This class will include test vectors that are expected to fail
 // peek. However they are still expected to have no fatal failures.
@@ -175,16 +158,6 @@ class InvalidFileInvalidPeekTest : public InvalidFileTest {

 TEST_P(InvalidFileInvalidPeekTest, ReturnCode) { RunTest(); }

-#if CONFIG_VP8_DECODER
-const DecodeParam kVP8InvalidPeekTests[] = {
-  { 1, "invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf" },
-};
-
-VP8_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest,
-                          ::testing::ValuesIn(kVP8InvalidPeekTests));
-#endif  // CONFIG_VP8_DECODER
-
-#if CONFIG_VP9_DECODER
 const DecodeParam kVP9InvalidFileInvalidPeekTests[] = {
  { 1, "invalid-vp90-01-v3.webm" },
 };
@@ -201,7 +174,6 @@ const DecodeParam kMultiThreadedVP9InvalidFileTests[] = {
    "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf" },
  { 2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf" },
  { 4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf" },
-  { 2, "invalid-crbug-629481.webm" },
 };

 INSTANTIATE_TEST_CASE_P(
@@ -210,5 +182,4 @@ INSTANTIATE_TEST_CASE_P(
        ::testing::Values(
            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
        ::testing::ValuesIn(kMultiThreadedVP9InvalidFileTests)));
-#endif  // CONFIG_VP9_DECODER
 }  // namespace
--- a/test/ivf_video_source.h
+++ b/test/ivf_video_source.h
@@ -47,8 +47,8 @@ class IVFVideoSource : public CompressedVideoSource {

  virtual void Begin() {
    input_file_ = OpenTestDataFile(file_name_);
-    ASSERT_TRUE(input_file_ != NULL)
-        << "Input file open failed. Filename: " << file_name_;
+    ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: "
+                                     << file_name_;

    // Read file header
    uint8_t file_hdr[kIvfFileHdrSize];
--- a/test/keyframe_test.cc
+++ b/test/keyframe_test.cc
@@ -135,8 +135,8 @@ TEST_P(KeyframeTest, TestAutoKeyframe) {
  for (std::vector<vpx_codec_pts_t>::const_iterator iter = kf_pts_list_.begin();
       iter != kf_pts_list_.end(); ++iter) {
    if (deadline_ == VPX_DL_REALTIME && *iter > 0)
-      EXPECT_EQ(0, (*iter - 1) % 30)
-          << "Unexpected keyframe at frame " << *iter;
+      EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame "
+                                     << *iter;
    else
      EXPECT_EQ(0, *iter % 30) << "Unexpected keyframe at frame " << *iter;
  }
--- a/test/level_test.cc
+++ b/test/level_test.cc
@@ -66,36 +66,6 @@ class LevelTest
  int level_;
 };

-TEST_P(LevelTest, TestTargetLevel11Large) {
-  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
-  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
-                                       60);
-  target_level_ = 11;
-  cfg_.rc_target_bitrate = 150;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(target_level_, level_);
-}
-
-TEST_P(LevelTest, TestTargetLevel20Large) {
-  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
-  ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288,
-                                       30, 1, 0, 60);
-  target_level_ = 20;
-  cfg_.rc_target_bitrate = 1200;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(target_level_, level_);
-}
-
-TEST_P(LevelTest, TestTargetLevel31Large) {
-  ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime);
-  ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30,
-                                       1, 0, 60);
-  target_level_ = 31;
-  cfg_.rc_target_bitrate = 8000;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(target_level_, level_);
-}
-
 // Test for keeping level stats only
 TEST_P(LevelTest, TestTargetLevel0) {
  ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0,
@@ -103,11 +73,11 @@ TEST_P(LevelTest, TestTargetLevel0) {
  target_level_ = 0;
  min_gf_internal_ = 4;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(11, level_);
+  ASSERT_EQ(11, level_);

  cfg_.rc_target_bitrate = 1600;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  ASSERT_GE(20, level_);
+  ASSERT_EQ(20, level_);
 }

 // Test for level control being turned off
@@ -124,13 +94,12 @@ TEST_P(LevelTest, TestTargetLevelApi) {
  vpx_codec_ctx_t enc;
  vpx_codec_enc_cfg_t cfg;
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(codec, &cfg, 0));
-  cfg.rc_target_bitrate = 100;
  EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, codec, &cfg, 0));
  for (int level = 0; level <= 256; ++level) {
    if (level == 10 || level == 11 || level == 20 || level == 21 ||
        level == 30 || level == 31 || level == 40 || level == 41 ||
        level == 50 || level == 51 || level == 52 || level == 60 ||
-        level == 61 || level == 62 || level == 0 || level == 1 || level == 255)
+        level == 61 || level == 62 || level == 0 || level == 255)
      EXPECT_EQ(VPX_CODEC_OK,
                vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level));
    else
--- a/test/lpf_8_test.cc
+++ b/test/lpf_8_test.cc
@@ -114,18 +114,6 @@ void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit,
  }
 }

-uint8_t GetOuterThresh(ACMRandom *rnd) {
-  return static_cast<uint8_t>(rnd->RandRange(3 * MAX_LOOP_FILTER + 5));
-}
-
-uint8_t GetInnerThresh(ACMRandom *rnd) {
-  return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1));
-}
-
-uint8_t GetHevThresh(ACMRandom *rnd) {
-  return static_cast<uint8_t>(rnd->RandRange(MAX_LOOP_FILTER + 1) >> 4);
-}
-
 class Loop8Test6Param : public ::testing::TestWithParam<loop8_param_t> {
 public:
  virtual ~Loop8Test6Param() {}
@@ -174,15 +162,15 @@ TEST_P(Loop8Test6Param, OperationCheck) {
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = GetOuterThresh(&rnd);
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -233,15 +221,15 @@ TEST_P(Loop8Test6Param, ValueCheck) {

  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = GetOuterThresh(&rnd);
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                   tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -283,27 +271,27 @@ TEST_P(Loop8Test9Param, OperationCheck) {
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = GetOuterThresh(&rnd);
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetOuterThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -346,27 +334,27 @@ TEST_P(Loop8Test9Param, ValueCheck) {
  int first_failure = -1;
  for (int i = 0; i < count_test_block; ++i) {
    int err_count = 0;
-    uint8_t tmp = GetOuterThresh(&rnd);
+    uint8_t tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetOuterThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(3 * MAX_LOOP_FILTER + 4));
    DECLARE_ALIGNED(16, const uint8_t,
                    blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetInnerThresh(&rnd);
+    tmp = static_cast<uint8_t>(rnd(MAX_LOOP_FILTER));
    DECLARE_ALIGNED(16, const uint8_t,
                    limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                    tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
-    tmp = GetHevThresh(&rnd);
+    tmp = rnd.Rand8();
    DECLARE_ALIGNED(16, const uint8_t,
                    thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp,
                                     tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp };
@@ -414,10 +402,10 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_highbd_lpf_vertical_4_c, 8),
                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
                                 &vpx_highbd_lpf_horizontal_8_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
-                                 &vpx_highbd_lpf_horizontal_16_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
-                                 &vpx_highbd_lpf_horizontal_16_dual_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
+                                 &vpx_highbd_lpf_horizontal_edge_8_c, 8),
+                      make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_edge_16_c, 8),
                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
                                 &vpx_highbd_lpf_vertical_8_c, 8),
                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
@@ -428,10 +416,10 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_highbd_lpf_vertical_4_c, 10),
                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
                                 &vpx_highbd_lpf_horizontal_8_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
-                                 &vpx_highbd_lpf_horizontal_16_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
-                                 &vpx_highbd_lpf_horizontal_16_dual_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
+                                 &vpx_highbd_lpf_horizontal_edge_8_c, 10),
+                      make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_edge_16_c, 10),
                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
                                 &vpx_highbd_lpf_vertical_8_c, 10),
                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
@@ -442,10 +430,10 @@ INSTANTIATE_TEST_CASE_P(
                                 &vpx_highbd_lpf_vertical_4_c, 12),
                      make_tuple(&vpx_highbd_lpf_horizontal_8_sse2,
                                 &vpx_highbd_lpf_horizontal_8_c, 12),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_sse2,
-                                 &vpx_highbd_lpf_horizontal_16_c, 12),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2,
-                                 &vpx_highbd_lpf_horizontal_16_dual_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_edge_8_sse2,
+                                 &vpx_highbd_lpf_horizontal_edge_8_c, 12),
+                      make_tuple(&vpx_highbd_lpf_horizontal_edge_16_sse2,
+                                 &vpx_highbd_lpf_horizontal_edge_16_c, 12),
                      make_tuple(&vpx_highbd_lpf_vertical_8_sse2,
                                 &vpx_highbd_lpf_vertical_8_c, 12),
                      make_tuple(&vpx_highbd_lpf_vertical_16_sse2,
@@ -462,9 +450,10 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vpx_lpf_horizontal_4_sse2, &vpx_lpf_horizontal_4_c, 8),
        make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8),
-        make_tuple(&vpx_lpf_horizontal_16_dual_sse2,
-                   &vpx_lpf_horizontal_16_dual_c, 8),
+        make_tuple(&vpx_lpf_horizontal_edge_8_sse2,
+                   &vpx_lpf_horizontal_edge_8_c, 8),
+        make_tuple(&vpx_lpf_horizontal_edge_16_sse2,
+                   &vpx_lpf_horizontal_edge_16_c, 8),
        make_tuple(&vpx_lpf_vertical_4_sse2, &vpx_lpf_vertical_4_c, 8),
        make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8),
        make_tuple(&vpx_lpf_vertical_16_sse2, &vpx_lpf_vertical_16_c, 8),
@@ -476,10 +465,10 @@ INSTANTIATE_TEST_CASE_P(
 #if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH)
 INSTANTIATE_TEST_CASE_P(
    AVX2, Loop8Test6Param,
-    ::testing::Values(make_tuple(&vpx_lpf_horizontal_16_avx2,
-                                 &vpx_lpf_horizontal_16_c, 8),
-                      make_tuple(&vpx_lpf_horizontal_16_dual_avx2,
-                                 &vpx_lpf_horizontal_16_dual_c, 8)));
+    ::testing::Values(make_tuple(&vpx_lpf_horizontal_edge_8_avx2,
+                                 &vpx_lpf_horizontal_edge_8_c, 8),
+                      make_tuple(&vpx_lpf_horizontal_edge_16_avx2,
+                                 &vpx_lpf_horizontal_edge_16_c, 8)));
 #endif

 #if HAVE_SSE2
@@ -526,89 +515,15 @@ INSTANTIATE_TEST_CASE_P(

 #if HAVE_NEON
 #if CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    NEON, Loop8Test6Param,
-    ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_neon,
-                                 &vpx_highbd_lpf_horizontal_4_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_4_neon,
-                                 &vpx_highbd_lpf_horizontal_4_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_4_neon,
-                                 &vpx_highbd_lpf_horizontal_4_c, 12),
-                      make_tuple(&vpx_highbd_lpf_horizontal_8_neon,
-                                 &vpx_highbd_lpf_horizontal_8_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_8_neon,
-                                 &vpx_highbd_lpf_horizontal_8_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_8_neon,
-                                 &vpx_highbd_lpf_horizontal_8_c, 12),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_neon,
-                                 &vpx_highbd_lpf_horizontal_16_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_neon,
-                                 &vpx_highbd_lpf_horizontal_16_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_neon,
-                                 &vpx_highbd_lpf_horizontal_16_c, 12),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon,
-                                 &vpx_highbd_lpf_horizontal_16_dual_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon,
-                                 &vpx_highbd_lpf_horizontal_16_dual_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon,
-                                 &vpx_highbd_lpf_horizontal_16_dual_c, 12),
-                      make_tuple(&vpx_highbd_lpf_vertical_4_neon,
-                                 &vpx_highbd_lpf_vertical_4_c, 8),
-                      make_tuple(&vpx_highbd_lpf_vertical_4_neon,
-                                 &vpx_highbd_lpf_vertical_4_c, 10),
-                      make_tuple(&vpx_highbd_lpf_vertical_4_neon,
-                                 &vpx_highbd_lpf_vertical_4_c, 12),
-                      make_tuple(&vpx_highbd_lpf_vertical_8_neon,
-                                 &vpx_highbd_lpf_vertical_8_c, 8),
-                      make_tuple(&vpx_highbd_lpf_vertical_8_neon,
-                                 &vpx_highbd_lpf_vertical_8_c, 10),
-                      make_tuple(&vpx_highbd_lpf_vertical_8_neon,
-                                 &vpx_highbd_lpf_vertical_8_c, 12),
-                      make_tuple(&vpx_highbd_lpf_vertical_16_neon,
-                                 &vpx_highbd_lpf_vertical_16_c, 8),
-                      make_tuple(&vpx_highbd_lpf_vertical_16_neon,
-                                 &vpx_highbd_lpf_vertical_16_c, 10),
-                      make_tuple(&vpx_highbd_lpf_vertical_16_neon,
-                                 &vpx_highbd_lpf_vertical_16_c, 12),
-                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon,
-                                 &vpx_highbd_lpf_vertical_16_dual_c, 8),
-                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon,
-                                 &vpx_highbd_lpf_vertical_16_dual_c, 10),
-                      make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon,
-                                 &vpx_highbd_lpf_vertical_16_dual_c, 12)));
-INSTANTIATE_TEST_CASE_P(
-    NEON, Loop8Test9Param,
-    ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon,
-                                 &vpx_highbd_lpf_horizontal_4_dual_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon,
-                                 &vpx_highbd_lpf_horizontal_4_dual_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon,
-                                 &vpx_highbd_lpf_horizontal_4_dual_c, 12),
-                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon,
-                                 &vpx_highbd_lpf_horizontal_8_dual_c, 8),
-                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon,
-                                 &vpx_highbd_lpf_horizontal_8_dual_c, 10),
-                      make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon,
-                                 &vpx_highbd_lpf_horizontal_8_dual_c, 12),
-                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon,
-                                 &vpx_highbd_lpf_vertical_4_dual_c, 8),
-                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon,
-                                 &vpx_highbd_lpf_vertical_4_dual_c, 10),
-                      make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon,
-                                 &vpx_highbd_lpf_vertical_4_dual_c, 12),
-                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon,
-                                 &vpx_highbd_lpf_vertical_8_dual_c, 8),
-                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon,
-                                 &vpx_highbd_lpf_vertical_8_dual_c, 10),
-                      make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon,
-                                 &vpx_highbd_lpf_vertical_8_dual_c, 12)));
+// No neon high bitdepth functions.
 #else
 INSTANTIATE_TEST_CASE_P(
    NEON, Loop8Test6Param,
    ::testing::Values(
-        make_tuple(&vpx_lpf_horizontal_16_neon, &vpx_lpf_horizontal_16_c, 8),
-        make_tuple(&vpx_lpf_horizontal_16_dual_neon,
-                   &vpx_lpf_horizontal_16_dual_c, 8),
+        make_tuple(&vpx_lpf_horizontal_edge_8_neon,
+                   &vpx_lpf_horizontal_edge_8_c, 8),
+        make_tuple(&vpx_lpf_horizontal_edge_16_neon,
+                   &vpx_lpf_horizontal_edge_16_c, 8),
        make_tuple(&vpx_lpf_vertical_16_neon, &vpx_lpf_vertical_16_c, 8),
        make_tuple(&vpx_lpf_vertical_16_dual_neon, &vpx_lpf_vertical_16_dual_c,
                   8),
@@ -635,9 +550,8 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8),
        make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_16_dspr2, &vpx_lpf_horizontal_16_c, 8),
-        make_tuple(&vpx_lpf_horizontal_16_dual_dspr2,
-                   &vpx_lpf_horizontal_16_dual_c, 8),
+        make_tuple(&vpx_lpf_horizontal_edge_8, &vpx_lpf_horizontal_edge_8, 8),
+        make_tuple(&vpx_lpf_horizontal_edge_16, &vpx_lpf_horizontal_edge_16, 8),
        make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8),
        make_tuple(&vpx_lpf_vertical_8_dspr2, &vpx_lpf_vertical_8_c, 8),
        make_tuple(&vpx_lpf_vertical_16_dspr2, &vpx_lpf_vertical_16_c, 8),
@@ -662,9 +576,10 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(
        make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8),
        make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8),
-        make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8),
-        make_tuple(&vpx_lpf_horizontal_16_dual_msa,
-                   &vpx_lpf_horizontal_16_dual_c, 8),
+        make_tuple(&vpx_lpf_horizontal_edge_8_msa, &vpx_lpf_horizontal_edge_8_c,
+                   8),
+        make_tuple(&vpx_lpf_horizontal_edge_16_msa,
+                   &vpx_lpf_horizontal_edge_16_c, 8),
        make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8),
        make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8),
        make_tuple(&vpx_lpf_vertical_16_msa, &vpx_lpf_vertical_16_c, 8)));
--- a/test/minmax_test.cc
+++ b/test/minmax_test.cc
@@ -107,10 +107,10 @@ TEST_P(MinMaxTest, CompareReferenceAndVaryStride) {
      int min_ref, max_ref, min, max;
      reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref);
      ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max));
-      EXPECT_EQ(max_ref, max)
-          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
-      EXPECT_EQ(min_ref, min)
-          << "when a_stride = " << a_stride << " and b_stride = " << b_stride;
+      EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride
+                              << " and b_stride = " << b_stride;
+      EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride
+                              << " and b_stride = " << b_stride;
    }
  }
 }
@@ -127,9 +127,4 @@ INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest,
                        ::testing::Values(&vpx_minmax_8x8_neon));
 #endif

-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, MinMaxTest,
-                        ::testing::Values(&vpx_minmax_8x8_msa));
-#endif
-
 }  // namespace
--- a/test/partial_idct_test.cc
+++ b/test/partial_idct_test.cc
--- a/test/pp_filter_test.cc
+++ b/test/pp_filter_test.cc
@@ -7,42 +7,22 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
-#include <limits.h>
-#include "./vpx_config.h"
-#include "./vpx_dsp_rtcd.h"
-#include "test/acm_random.h"
-#include "test/buffer.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vpx_config.h"
+#include "./vpx_dsp_rtcd.h"
 #include "vpx/vpx_integer.h"
 #include "vpx_mem/vpx_mem.h"

-using libvpx_test::ACMRandom;
-using libvpx_test::Buffer;
-
-typedef void (*VpxPostProcDownAndAcrossMbRowFunc)(
-    unsigned char *src_ptr, unsigned char *dst_ptr, int src_pixels_per_line,
-    int dst_pixels_per_line, int cols, unsigned char *flimit, int size);
-
-typedef void (*VpxMbPostProcAcrossIpFunc)(unsigned char *src, int pitch,
-                                          int rows, int cols, int flimit);
-
-typedef void (*VpxMbPostProcDownFunc)(unsigned char *dst, int pitch, int rows,
-                                      int cols, int flimit);
+typedef void (*PostProcFunc)(unsigned char *src_ptr, unsigned char *dst_ptr,
+                             int src_pixels_per_line, int dst_pixels_per_line,
+                             int cols, unsigned char *flimit, int size);

 namespace {

-// Compute the filter level used in post proc from the loop filter strength
-int q2mbl(int x) {
-  if (x < 20) x = 20;
-
-  x = 50 + (x - 50) * 10 / 8;
-  return x * x / 3;
-}
-
-class VpxPostProcDownAndAcrossMbRowTest
-    : public ::testing::TestWithParam<VpxPostProcDownAndAcrossMbRowFunc> {
+class VPxPostProcessingFilterTest
+    : public ::testing::TestWithParam<PostProcFunc> {
 public:
  virtual void TearDown() { libvpx_test::ClearSystemState(); }
 };
@@ -50,22 +30,31 @@ class VpxPostProcDownAndAcrossMbRowTest
 // Test routine for the VPx post-processing function
 // vpx_post_proc_down_and_across_mb_row_c.

-TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) {
+TEST_P(VPxPostProcessingFilterTest, FilterOutputCheck) {
  // Size of the underlying data block that will be filtered.
  const int block_width = 16;
  const int block_height = 16;

  // 5-tap filter needs 2 padding rows above and below the block in the input.
-  Buffer<uint8_t> src_image = Buffer<uint8_t>(block_width, block_height, 2);
-  ASSERT_TRUE(src_image.Init());
+  const int input_width = block_width;
+  const int input_height = block_height + 4;
+  const int input_stride = input_width;
+  const int input_size = input_width * input_height;

  // Filter extends output block by 8 samples at left and right edges.
-  // Though the left padding is only 8 bytes, the assembly code tries to
-  // read 16 bytes before the pointer.
-  Buffer<uint8_t> dst_image =
-      Buffer<uint8_t>(block_width, block_height, 8, 16, 8, 8);
-  ASSERT_TRUE(dst_image.Init());
+  const int output_width = block_width + 16;
+  const int output_height = block_height;
+  const int output_stride = output_width;
+  const int output_size = output_width * output_height;

+  uint8_t *const src_image =
+      reinterpret_cast<uint8_t *>(vpx_calloc(input_size, 1));
+  uint8_t *const dst_image =
+      reinterpret_cast<uint8_t *>(vpx_calloc(output_size, 1));
+
+  // Pointers to top-left pixel of block in the input and output images.
+  uint8_t *const src_image_ptr = src_image + (input_stride << 1);
+  uint8_t *const dst_image_ptr = dst_image + 8;
  uint8_t *const flimits =
      reinterpret_cast<uint8_t *>(vpx_memalign(16, block_width));
  (void)memset(flimits, 255, block_width);
@@ -73,412 +62,53 @@ TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) {
  // Initialize pixels in the input:
  //   block pixels to value 1,
  //   border pixels to value 10.
-  src_image.SetPadding(10);
-  src_image.Set(1);
-
-  // Initialize pixels in the output to 99.
-  dst_image.Set(99);
-
-  ASM_REGISTER_STATE_CHECK(GetParam()(
-      src_image.TopLeftPixel(), dst_image.TopLeftPixel(), src_image.stride(),
-      dst_image.stride(), block_width, flimits, 16));
-
-  static const uint8_t kExpectedOutput[block_height] = {
-    4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4
-  };
-
-  uint8_t *pixel_ptr = dst_image.TopLeftPixel();
+  (void)memset(src_image, 10, input_size);
+  uint8_t *pixel_ptr = src_image_ptr;
  for (int i = 0; i < block_height; ++i) {
    for (int j = 0; j < block_width; ++j) {
-      ASSERT_EQ(kExpectedOutput[i], pixel_ptr[j])
-          << "at (" << i << ", " << j << ")";
+      pixel_ptr[j] = 1;
    }
-    pixel_ptr += dst_image.stride();
+    pixel_ptr += input_stride;
  }

+  // Initialize pixels in the output to 99.
+  (void)memset(dst_image, 99, output_size);
+
+  ASM_REGISTER_STATE_CHECK(GetParam()(src_image_ptr, dst_image_ptr,
+                                      input_stride, output_stride, block_width,
+                                      flimits, 16));
+
+  static const uint8_t expected_data[block_height] = { 4, 3, 1, 1, 1, 1, 1, 1,
+                                                       1, 1, 1, 1, 1, 1, 3, 4 };
+
+  pixel_ptr = dst_image_ptr;
+  for (int i = 0; i < block_height; ++i) {
+    for (int j = 0; j < block_width; ++j) {
+      EXPECT_EQ(expected_data[i], pixel_ptr[j])
+          << "VPxPostProcessingFilterTest failed with invalid filter output";
+    }
+    pixel_ptr += output_stride;
+  }
+
+  vpx_free(src_image);
+  vpx_free(dst_image);
  vpx_free(flimits);
 };

-TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) {
-  // Size of the underlying data block that will be filtered.
-  // Y blocks are always a multiple of 16 wide and exactly 16 high. U and V
-  // blocks are always a multiple of 8 wide and exactly 8 high.
-  const int block_width = 136;
-  const int block_height = 16;
-
-  // 5-tap filter needs 2 padding rows above and below the block in the input.
-  // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16.
-  Buffer<uint8_t> src_image =
-      Buffer<uint8_t>(block_width, block_height, 2, 2, 10, 2);
-  ASSERT_TRUE(src_image.Init());
-
-  // Filter extends output block by 8 samples at left and right edges.
-  // Though the left padding is only 8 bytes, there is 'above' padding as well
-  // so when the assembly code tries to read 16 bytes before the pointer it is
-  // not a problem.
-  // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16.
-  Buffer<uint8_t> dst_image =
-      Buffer<uint8_t>(block_width, block_height, 8, 8, 16, 8);
-  ASSERT_TRUE(dst_image.Init());
-  Buffer<uint8_t> dst_image_ref = Buffer<uint8_t>(block_width, block_height, 8);
-  ASSERT_TRUE(dst_image_ref.Init());
-
-  // Filter values are set in blocks of 16 for Y and 8 for U/V. Each macroblock
-  // can have a different filter. SSE2 assembly reads flimits in blocks of 16 so
-  // it must be padded out.
-  const int flimits_width = block_width % 16 ? block_width + 8 : block_width;
-  uint8_t *const flimits =
-      reinterpret_cast<uint8_t *>(vpx_memalign(16, flimits_width));
-
-  ACMRandom rnd;
-  rnd.Reset(ACMRandom::DeterministicSeed());
-  // Initialize pixels in the input:
-  //   block pixels to random values.
-  //   border pixels to value 10.
-  src_image.SetPadding(10);
-  src_image.Set(&rnd, &ACMRandom::Rand8);
-
-  for (int blocks = 0; blocks < block_width; blocks += 8) {
-    (void)memset(flimits, 0, sizeof(*flimits) * flimits_width);
-
-    for (int f = 0; f < 255; f++) {
-      (void)memset(flimits + blocks, f, sizeof(*flimits) * 8);
-
-      dst_image.Set(0);
-      dst_image_ref.Set(0);
-
-      vpx_post_proc_down_and_across_mb_row_c(
-          src_image.TopLeftPixel(), dst_image_ref.TopLeftPixel(),
-          src_image.stride(), dst_image_ref.stride(), block_width, flimits,
-          block_height);
-      ASM_REGISTER_STATE_CHECK(
-          GetParam()(src_image.TopLeftPixel(), dst_image.TopLeftPixel(),
-                     src_image.stride(), dst_image.stride(), block_width,
-                     flimits, block_height));
-
-      ASSERT_TRUE(dst_image.CheckValues(dst_image_ref));
-    }
-  }
-
-  vpx_free(flimits);
-}
-
-class VpxMbPostProcAcrossIpTest
-    : public ::testing::TestWithParam<VpxMbPostProcAcrossIpFunc> {
- public:
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  void SetCols(unsigned char *s, int rows, int cols, int src_width) {
-    for (int r = 0; r < rows; r++) {
-      for (int c = 0; c < cols; c++) {
-        s[c] = c;
-      }
-      s += src_width;
-    }
-  }
-
-  void RunComparison(const unsigned char *expected_output, unsigned char *src_c,
-                     int rows, int cols, int src_pitch) {
-    for (int r = 0; r < rows; r++) {
-      for (int c = 0; c < cols; c++) {
-        ASSERT_EQ(expected_output[c], src_c[c])
-            << "at (" << r << ", " << c << ")";
-      }
-      src_c += src_pitch;
-    }
-  }
-
-  void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width,
-                      int filter_level, const unsigned char *expected_output) {
-    ASM_REGISTER_STATE_CHECK(
-        GetParam()(s, src_width, rows, cols, filter_level));
-    RunComparison(expected_output, s, rows, cols, src_width);
-  }
-};
-
-TEST_P(VpxMbPostProcAcrossIpTest, CheckLowFilterOutput) {
-  const int rows = 16;
-  const int cols = 16;
-
-  Buffer<uint8_t> src = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
-  ASSERT_TRUE(src.Init());
-  src.SetPadding(10);
-  SetCols(src.TopLeftPixel(), rows, cols, src.stride());
-
-  Buffer<uint8_t> expected_output = Buffer<uint8_t>(cols, rows, 0);
-  ASSERT_TRUE(expected_output.Init());
-  SetCols(expected_output.TopLeftPixel(), rows, cols, expected_output.stride());
-
-  RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), q2mbl(0),
-                 expected_output.TopLeftPixel());
-}
-
-TEST_P(VpxMbPostProcAcrossIpTest, CheckMediumFilterOutput) {
-  const int rows = 16;
-  const int cols = 16;
-
-  Buffer<uint8_t> src = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
-  ASSERT_TRUE(src.Init());
-  src.SetPadding(10);
-  SetCols(src.TopLeftPixel(), rows, cols, src.stride());
-
-  static const unsigned char kExpectedOutput[cols] = {
-    2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13
-  };
-
-  RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), q2mbl(70),
-                 kExpectedOutput);
-}
-
-TEST_P(VpxMbPostProcAcrossIpTest, CheckHighFilterOutput) {
-  const int rows = 16;
-  const int cols = 16;
-
-  Buffer<uint8_t> src = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
-  ASSERT_TRUE(src.Init());
-  src.SetPadding(10);
-  SetCols(src.TopLeftPixel(), rows, cols, src.stride());
-
-  static const unsigned char kExpectedOutput[cols] = {
-    2, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13
-  };
-
-  RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), INT_MAX,
-                 kExpectedOutput);
-
-  SetCols(src.TopLeftPixel(), rows, cols, src.stride());
-
-  RunFilterLevel(src.TopLeftPixel(), rows, cols, src.stride(), q2mbl(100),
-                 kExpectedOutput);
-}
-
-TEST_P(VpxMbPostProcAcrossIpTest, CheckCvsAssembly) {
-  const int rows = 16;
-  const int cols = 16;
-
-  Buffer<uint8_t> c_mem = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
-  ASSERT_TRUE(c_mem.Init());
-  Buffer<uint8_t> asm_mem = Buffer<uint8_t>(cols, rows, 8, 8, 17, 8);
-  ASSERT_TRUE(asm_mem.Init());
-
-  // When level >= 100, the filter behaves the same as the level = INT_MAX
-  // When level < 20, it behaves the same as the level = 0
-  for (int level = 0; level < 100; level++) {
-    c_mem.SetPadding(10);
-    asm_mem.SetPadding(10);
-    SetCols(c_mem.TopLeftPixel(), rows, cols, c_mem.stride());
-    SetCols(asm_mem.TopLeftPixel(), rows, cols, asm_mem.stride());
-
-    vpx_mbpost_proc_across_ip_c(c_mem.TopLeftPixel(), c_mem.stride(), rows,
-                                cols, q2mbl(level));
-    ASM_REGISTER_STATE_CHECK(GetParam()(
-        asm_mem.TopLeftPixel(), asm_mem.stride(), rows, cols, q2mbl(level)));
-
-    ASSERT_TRUE(asm_mem.CheckValues(c_mem));
-  }
-}
-
-class VpxMbPostProcDownTest
-    : public ::testing::TestWithParam<VpxMbPostProcDownFunc> {
- public:
-  virtual void TearDown() { libvpx_test::ClearSystemState(); }
-
- protected:
-  void SetRows(unsigned char *src_c, int rows, int cols, int src_width) {
-    for (int r = 0; r < rows; r++) {
-      memset(src_c, r, cols);
-      src_c += src_width;
-    }
-  }
-
-  void RunComparison(const unsigned char *expected_output, unsigned char *src_c,
-                     int rows, int cols, int src_pitch) {
-    for (int r = 0; r < rows; r++) {
-      for (int c = 0; c < cols; c++) {
-        ASSERT_EQ(expected_output[r * rows + c], src_c[c])
-            << "at (" << r << ", " << c << ")";
-      }
-      src_c += src_pitch;
-    }
-  }
-
-  void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width,
-                      int filter_level, const unsigned char *expected_output) {
-    ASM_REGISTER_STATE_CHECK(
-        GetParam()(s, src_width, rows, cols, filter_level));
-    RunComparison(expected_output, s, rows, cols, src_width);
-  }
-};
-
-TEST_P(VpxMbPostProcDownTest, CheckHighFilterOutput) {
-  const int rows = 16;
-  const int cols = 16;
-
-  Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
-  ASSERT_TRUE(src_c.Init());
-  src_c.SetPadding(10);
-
-  SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride());
-
-  static const unsigned char kExpectedOutput[rows * cols] = {
-    2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  2,
-    2,  3,  2,  2,  2,  2,  2,  2,  2,  3,  2,  2,  2,  3,  3,  3,  3,  3,  3,
-    3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  3,  4,  4,  3,  4,  4,  3,  3,  3,
-    4,  4,  3,  4,  4,  3,  3,  4,  5,  4,  4,  4,  4,  4,  4,  4,  5,  4,  4,
-    4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-    5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,
-    7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
-    8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  8,  9,  9,  8,  8,  8,  9,
-    9,  8,  9,  9,  8,  8,  8,  9,  9,  10, 10, 9,  9,  9,  10, 10, 9,  10, 10,
-    9,  9,  9,  10, 10, 10, 11, 10, 10, 10, 11, 10, 11, 10, 11, 10, 10, 10, 11,
-    10, 11, 11, 11, 11, 11, 11, 11, 12, 11, 11, 11, 11, 11, 11, 11, 12, 11, 12,
-    12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 12,
-    13, 12, 13, 12, 12, 12, 13, 12, 13, 12, 13, 12, 13, 13, 13, 14, 13, 13, 13,
-    13, 13, 13, 13, 14, 13, 13, 13, 13
-  };
-
-  RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), INT_MAX,
-                 kExpectedOutput);
-
-  src_c.SetPadding(10);
-  SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride());
-  RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), q2mbl(100),
-                 kExpectedOutput);
-}
-
-TEST_P(VpxMbPostProcDownTest, CheckMediumFilterOutput) {
-  const int rows = 16;
-  const int cols = 16;
-
-  Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
-  ASSERT_TRUE(src_c.Init());
-  src_c.SetPadding(10);
-
-  SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride());
-
-  static const unsigned char kExpectedOutput[rows * cols] = {
-    2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  1,  1,  2,  2,  2,  2,  2,  2,  2,
-    2,  3,  2,  2,  2,  2,  2,  2,  2,  3,  2,  2,  2,  2,  2,  2,  2,  2,  2,
-    2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  3,  3,  3,  3,  3,  3,  3,  3,  3,
-    3,  3,  3,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,  4,
-    4,  4,  4,  4,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
-    5,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  7,  7,
-    7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,
-    8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  9,  9,  9,  9,  9,  9,  9,  9,
-    9,  9,  9,  9,  9,  9,  9,  9,  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
-    10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
-    11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13,
-    13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 13, 12,
-    13, 12, 13, 12, 12, 12, 13, 12, 13, 12, 13, 12, 13, 13, 13, 14, 13, 13, 13,
-    13, 13, 13, 13, 14, 13, 13, 13, 13
-  };
-
-  RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), q2mbl(70),
-                 kExpectedOutput);
-}
-
-TEST_P(VpxMbPostProcDownTest, CheckLowFilterOutput) {
-  const int rows = 16;
-  const int cols = 16;
-
-  Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
-  ASSERT_TRUE(src_c.Init());
-  src_c.SetPadding(10);
-
-  SetRows(src_c.TopLeftPixel(), rows, cols, src_c.stride());
-
-  unsigned char *expected_output = new unsigned char[rows * cols];
-  ASSERT_TRUE(expected_output != NULL);
-  SetRows(expected_output, rows, cols, cols);
-
-  RunFilterLevel(src_c.TopLeftPixel(), rows, cols, src_c.stride(), q2mbl(0),
-                 expected_output);
-
-  delete[] expected_output;
-}
-
-TEST_P(VpxMbPostProcDownTest, CheckCvsAssembly) {
-  const int rows = 16;
-  const int cols = 16;
-
-  ACMRandom rnd;
-  rnd.Reset(ACMRandom::DeterministicSeed());
-
-  Buffer<uint8_t> src_c = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
-  ASSERT_TRUE(src_c.Init());
-  Buffer<uint8_t> src_asm = Buffer<uint8_t>(cols, rows, 8, 8, 8, 17);
-  ASSERT_TRUE(src_asm.Init());
-
-  for (int level = 0; level < 100; level++) {
-    src_c.SetPadding(10);
-    src_asm.SetPadding(10);
-    src_c.Set(&rnd, &ACMRandom::Rand8);
-    src_asm.CopyFrom(src_c);
-
-    vpx_mbpost_proc_down_c(src_c.TopLeftPixel(), src_c.stride(), rows, cols,
-                           q2mbl(level));
-    ASM_REGISTER_STATE_CHECK(GetParam()(
-        src_asm.TopLeftPixel(), src_asm.stride(), rows, cols, q2mbl(level)));
-    ASSERT_TRUE(src_asm.CheckValues(src_c));
-
-    src_c.SetPadding(10);
-    src_asm.SetPadding(10);
-    src_c.Set(&rnd, &ACMRandom::Rand8Extremes);
-    src_asm.CopyFrom(src_c);
-
-    vpx_mbpost_proc_down_c(src_c.TopLeftPixel(), src_c.stride(), rows, cols,
-                           q2mbl(level));
-    ASM_REGISTER_STATE_CHECK(GetParam()(
-        src_asm.TopLeftPixel(), src_asm.stride(), rows, cols, q2mbl(level)));
-    ASSERT_TRUE(src_asm.CheckValues(src_c));
-  }
-}
-
 INSTANTIATE_TEST_CASE_P(
-    C, VpxPostProcDownAndAcrossMbRowTest,
+    C, VPxPostProcessingFilterTest,
    ::testing::Values(vpx_post_proc_down_and_across_mb_row_c));

-INSTANTIATE_TEST_CASE_P(C, VpxMbPostProcAcrossIpTest,
-                        ::testing::Values(vpx_mbpost_proc_across_ip_c));
-
-INSTANTIATE_TEST_CASE_P(C, VpxMbPostProcDownTest,
-                        ::testing::Values(vpx_mbpost_proc_down_c));
-
 #if HAVE_SSE2
 INSTANTIATE_TEST_CASE_P(
-    SSE2, VpxPostProcDownAndAcrossMbRowTest,
+    SSE2, VPxPostProcessingFilterTest,
    ::testing::Values(vpx_post_proc_down_and_across_mb_row_sse2));
-
-INSTANTIATE_TEST_CASE_P(SSE2, VpxMbPostProcAcrossIpTest,
-                        ::testing::Values(vpx_mbpost_proc_across_ip_sse2));
-
-INSTANTIATE_TEST_CASE_P(SSE2, VpxMbPostProcDownTest,
-                        ::testing::Values(vpx_mbpost_proc_down_sse2));
-#endif  // HAVE_SSE2
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, VpxPostProcDownAndAcrossMbRowTest,
-    ::testing::Values(vpx_post_proc_down_and_across_mb_row_neon));
-
-INSTANTIATE_TEST_CASE_P(NEON, VpxMbPostProcAcrossIpTest,
-                        ::testing::Values(vpx_mbpost_proc_across_ip_neon));
-
-INSTANTIATE_TEST_CASE_P(NEON, VpxMbPostProcDownTest,
-                        ::testing::Values(vpx_mbpost_proc_down_neon));
-#endif  // HAVE_NEON
+#endif

 #if HAVE_MSA
 INSTANTIATE_TEST_CASE_P(
-    MSA, VpxPostProcDownAndAcrossMbRowTest,
+    MSA, VPxPostProcessingFilterTest,
    ::testing::Values(vpx_post_proc_down_and_across_mb_row_msa));
-
-INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcAcrossIpTest,
-                        ::testing::Values(vpx_mbpost_proc_across_ip_msa));
-
-INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcDownTest,
-                        ::testing::Values(vpx_mbpost_proc_down_msa));
-#endif  // HAVE_MSA
+#endif

 }  // namespace
--- a/test/predict_test.cc
+++ b/test/predict_test.cc
@@ -1,385 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vp8_rtcd.h"
-#include "./vpx_config.h"
-#include "test/acm_random.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/util.h"
-#include "vpx/vpx_integer.h"
-#include "vpx_mem/vpx_mem.h"
-
-namespace {
-
-using libvpx_test::ACMRandom;
-using std::tr1::make_tuple;
-
-typedef void (*PredictFunc)(uint8_t *src_ptr, int src_pixels_per_line,
-                            int xoffset, int yoffset, uint8_t *dst_ptr,
-                            int dst_pitch);
-
-typedef std::tr1::tuple<int, int, PredictFunc> PredictParam;
-
-class PredictTestBase : public ::testing::TestWithParam<PredictParam> {
- public:
-  PredictTestBase()
-      : width_(GET_PARAM(0)), height_(GET_PARAM(1)), predict_(GET_PARAM(2)),
-        src_(NULL), padded_dst_(NULL), dst_(NULL), dst_c_(NULL) {}
-
-  virtual void SetUp() {
-    src_ = new uint8_t[kSrcSize];
-    ASSERT_TRUE(src_ != NULL);
-
-    // padded_dst_ provides a buffer of kBorderSize around the destination
-    // memory to facilitate detecting out of bounds writes.
-    dst_stride_ = kBorderSize + width_ + kBorderSize;
-    padded_dst_size_ = dst_stride_ * (kBorderSize + height_ + kBorderSize);
-    padded_dst_ =
-        reinterpret_cast<uint8_t *>(vpx_memalign(16, padded_dst_size_));
-    ASSERT_TRUE(padded_dst_ != NULL);
-    dst_ = padded_dst_ + (kBorderSize * dst_stride_) + kBorderSize;
-
-    dst_c_ = new uint8_t[16 * 16];
-    ASSERT_TRUE(dst_c_ != NULL);
-
-    memset(src_, 0, kSrcSize);
-    memset(padded_dst_, 128, padded_dst_size_);
-    memset(dst_c_, 0, 16 * 16);
-  }
-
-  virtual void TearDown() {
-    delete[] src_;
-    src_ = NULL;
-    vpx_free(padded_dst_);
-    padded_dst_ = NULL;
-    dst_ = NULL;
-    delete[] dst_c_;
-    dst_c_ = NULL;
-    libvpx_test::ClearSystemState();
-  }
-
- protected:
-  // Make reference arrays big enough for 16x16 functions. Six-tap filters need
-  // 5 extra pixels outside of the macroblock.
-  static const int kSrcStride = 21;
-  static const int kSrcSize = kSrcStride * kSrcStride;
-  static const int kBorderSize = 16;
-
-  int width_;
-  int height_;
-  PredictFunc predict_;
-  uint8_t *src_;
-  uint8_t *padded_dst_;
-  uint8_t *dst_;
-  int padded_dst_size_;
-  uint8_t *dst_c_;
-  int dst_stride_;
-
-  bool CompareBuffers(const uint8_t *a, int a_stride, const uint8_t *b,
-                      int b_stride) const {
-    for (int height = 0; height < height_; ++height) {
-      EXPECT_EQ(0, memcmp(a + height * a_stride, b + height * b_stride,
-                          sizeof(*a) * width_))
-          << "Row " << height << " does not match.";
-    }
-
-    return !HasFailure();
-  }
-
-  // Given a block of memory 'a' with size 'a_size', determine if all regions
-  // excepting block 'b' described by 'b_stride', 'b_height', and 'b_width'
-  // match pixel value 'c'.
-  bool CheckBorder(const uint8_t *a, int a_size, const uint8_t *b, int b_width,
-                   int b_height, int b_stride, uint8_t c) const {
-    const uint8_t *a_end = a + a_size;
-    const int b_size = (b_stride * b_height) + b_width;
-    const uint8_t *b_end = b + b_size;
-    const int left_border = (b_stride - b_width) / 2;
-    const int right_border = left_border + ((b_stride - b_width) % 2);
-
-    EXPECT_GE(b - left_border, a) << "'b' does not start within 'a'";
-    EXPECT_LE(b_end + right_border, a_end) << "'b' does not end within 'a'";
-
-    // Top border.
-    for (int pixel = 0; pixel < b - a - left_border; ++pixel) {
-      EXPECT_EQ(c, a[pixel]) << "Mismatch at " << pixel << " in top border.";
-    }
-
-    // Left border.
-    for (int height = 0; height < b_height; ++height) {
-      for (int width = left_border; width > 0; --width) {
-        EXPECT_EQ(c, b[height * b_stride - width])
-            << "Mismatch at row " << height << " column " << left_border - width
-            << " in left border.";
-      }
-    }
-
-    // Right border.
-    for (int height = 0; height < b_height; ++height) {
-      for (int width = b_width; width < b_width + right_border; ++width) {
-        EXPECT_EQ(c, b[height * b_stride + width])
-            << "Mismatch at row " << height << " column " << width - b_width
-            << " in right border.";
-      }
-    }
-
-    // Bottom border.
-    for (int pixel = static_cast<int>(b - a + b_size); pixel < a_size;
-         ++pixel) {
-      EXPECT_EQ(c, a[pixel]) << "Mismatch at " << pixel << " in bottom border.";
-    }
-
-    return !HasFailure();
-  }
-
-  void TestWithRandomData(PredictFunc reference) {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-
-    // Run tests for almost all possible offsets.
-    for (int xoffset = 0; xoffset < 8; ++xoffset) {
-      for (int yoffset = 0; yoffset < 8; ++yoffset) {
-        if (xoffset == 0 && yoffset == 0) {
-          // This represents a copy which is not required to be handled by this
-          // module.
-          continue;
-        }
-
-        for (int i = 0; i < kSrcSize; ++i) {
-          src_[i] = rnd.Rand8();
-        }
-        reference(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset,
-                  dst_c_, 16);
-
-        ASM_REGISTER_STATE_CHECK(predict_(&src_[kSrcStride * 2 + 2], kSrcStride,
-                                          xoffset, yoffset, dst_, dst_stride_));
-
-        ASSERT_TRUE(CompareBuffers(dst_c_, 16, dst_, dst_stride_));
-        ASSERT_TRUE(CheckBorder(padded_dst_, padded_dst_size_, dst_, width_,
-                                height_, dst_stride_, 128));
-      }
-    }
-  }
-
-  void TestWithUnalignedDst(PredictFunc reference) {
-    ACMRandom rnd(ACMRandom::DeterministicSeed());
-
-    // Only the 4x4 need to be able to handle unaligned writes.
-    if (width_ == 4 && height_ == 4) {
-      for (int xoffset = 0; xoffset < 8; ++xoffset) {
-        for (int yoffset = 0; yoffset < 8; ++yoffset) {
-          if (xoffset == 0 && yoffset == 0) {
-            continue;
-          }
-          for (int i = 0; i < kSrcSize; ++i) {
-            src_[i] = rnd.Rand8();
-          }
-          reference(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset,
-                    dst_c_, 16);
-
-          for (int i = 1; i < 4; ++i) {
-            memset(padded_dst_, 128, padded_dst_size_);
-
-            ASM_REGISTER_STATE_CHECK(predict_(&src_[kSrcStride * 2 + 2],
-                                              kSrcStride, xoffset, yoffset,
-                                              dst_ + i, dst_stride_ + i));
-
-            ASSERT_TRUE(CompareBuffers(dst_c_, 16, dst_ + i, dst_stride_ + i));
-            ASSERT_TRUE(CheckBorder(padded_dst_, padded_dst_size_, dst_ + i,
-                                    width_, height_, dst_stride_ + i, 128));
-          }
-        }
-      }
-    }
-  }
-};
-
-class SixtapPredictTest : public PredictTestBase {};
-
-TEST_P(SixtapPredictTest, TestWithRandomData) {
-  TestWithRandomData(vp8_sixtap_predict16x16_c);
-}
-TEST_P(SixtapPredictTest, TestWithUnalignedDst) {
-  TestWithUnalignedDst(vp8_sixtap_predict16x16_c);
-}
-
-TEST_P(SixtapPredictTest, TestWithPresetData) {
-  // Test input
-  static const uint8_t kTestData[kSrcSize] = {
-    184, 4,   191, 82,  92,  41,  0,   1,   226, 236, 172, 20,  182, 42,  226,
-    177, 79,  94,  77,  179, 203, 206, 198, 22,  192, 19,  75,  17,  192, 44,
-    233, 120, 48,  168, 203, 141, 210, 203, 143, 180, 184, 59,  201, 110, 102,
-    171, 32,  182, 10,  109, 105, 213, 60,  47,  236, 253, 67,  55,  14,  3,
-    99,  247, 124, 148, 159, 71,  34,  114, 19,  177, 38,  203, 237, 239, 58,
-    83,  155, 91,  10,  166, 201, 115, 124, 5,   163, 104, 2,   231, 160, 16,
-    234, 4,   8,   103, 153, 167, 174, 187, 26,  193, 109, 64,  141, 90,  48,
-    200, 174, 204, 36,  184, 114, 237, 43,  238, 242, 207, 86,  245, 182, 247,
-    6,   161, 251, 14,  8,   148, 182, 182, 79,  208, 120, 188, 17,  6,   23,
-    65,  206, 197, 13,  242, 126, 128, 224, 170, 110, 211, 121, 197, 200, 47,
-    188, 207, 208, 184, 221, 216, 76,  148, 143, 156, 100, 8,   89,  117, 14,
-    112, 183, 221, 54,  197, 208, 180, 69,  176, 94,  180, 131, 215, 121, 76,
-    7,   54,  28,  216, 238, 249, 176, 58,  142, 64,  215, 242, 72,  49,  104,
-    87,  161, 32,  52,  216, 230, 4,   141, 44,  181, 235, 224, 57,  195, 89,
-    134, 203, 144, 162, 163, 126, 156, 84,  185, 42,  148, 145, 29,  221, 194,
-    134, 52,  100, 166, 105, 60,  140, 110, 201, 184, 35,  181, 153, 93,  121,
-    243, 227, 68,  131, 134, 232, 2,   35,  60,  187, 77,  209, 76,  106, 174,
-    15,  241, 227, 115, 151, 77,  175, 36,  187, 121, 221, 223, 47,  118, 61,
-    168, 105, 32,  237, 236, 167, 213, 238, 202, 17,  170, 24,  226, 247, 131,
-    145, 6,   116, 117, 121, 11,  194, 41,  48,  126, 162, 13,  93,  209, 131,
-    154, 122, 237, 187, 103, 217, 99,  60,  200, 45,  78,  115, 69,  49,  106,
-    200, 194, 112, 60,  56,  234, 72,  251, 19,  120, 121, 182, 134, 215, 135,
-    10,  114, 2,   247, 46,  105, 209, 145, 165, 153, 191, 243, 12,  5,   36,
-    119, 206, 231, 231, 11,  32,  209, 83,  27,  229, 204, 149, 155, 83,  109,
-    35,  93,  223, 37,  84,  14,  142, 37,  160, 52,  191, 96,  40,  204, 101,
-    77,  67,  52,  53,  43,  63,  85,  253, 147, 113, 226, 96,  6,   125, 179,
-    115, 161, 17,  83,  198, 101, 98,  85,  139, 3,   137, 75,  99,  178, 23,
-    201, 255, 91,  253, 52,  134, 60,  138, 131, 208, 251, 101, 48,  2,   227,
-    228, 118, 132, 245, 202, 75,  91,  44,  160, 231, 47,  41,  50,  147, 220,
-    74,  92,  219, 165, 89,  16
-  };
-
-  // Expected results for xoffset = 2 and yoffset = 2.
-  static const int kExpectedDstStride = 16;
-  static const uint8_t kExpectedDst[256] = {
-    117, 102, 74,  135, 42,  98,  175, 206, 70,  73,  222, 197, 50,  24,  39,
-    49,  38,  105, 90,  47,  169, 40,  171, 215, 200, 73,  109, 141, 53,  85,
-    177, 164, 79,  208, 124, 89,  212, 18,  81,  145, 151, 164, 217, 153, 91,
-    154, 102, 102, 159, 75,  164, 152, 136, 51,  213, 219, 186, 116, 193, 224,
-    186, 36,  231, 208, 84,  211, 155, 167, 35,  59,  42,  76,  216, 149, 73,
-    201, 78,  149, 184, 100, 96,  196, 189, 198, 188, 235, 195, 117, 129, 120,
-    129, 49,  25,  133, 113, 69,  221, 114, 70,  143, 99,  157, 108, 189, 140,
-    78,  6,   55,  65,  240, 255, 245, 184, 72,  90,  100, 116, 131, 39,  60,
-    234, 167, 33,  160, 88,  185, 200, 157, 159, 176, 127, 151, 138, 102, 168,
-    106, 170, 86,  82,  219, 189, 76,  33,  115, 197, 106, 96,  198, 136, 97,
-    141, 237, 151, 98,  137, 191, 185, 2,   57,  95,  142, 91,  255, 185, 97,
-    137, 76,  162, 94,  173, 131, 193, 161, 81,  106, 72,  135, 222, 234, 137,
-    66,  137, 106, 243, 210, 147, 95,  15,  137, 110, 85,  66,  16,  96,  167,
-    147, 150, 173, 203, 140, 118, 196, 84,  147, 160, 19,  95,  101, 123, 74,
-    132, 202, 82,  166, 12,  131, 166, 189, 170, 159, 85,  79,  66,  57,  152,
-    132, 203, 194, 0,   1,   56,  146, 180, 224, 156, 28,  83,  181, 79,  76,
-    80,  46,  160, 175, 59,  106, 43,  87,  75,  136, 85,  189, 46,  71,  200,
-    90
-  };
-
-  ASM_REGISTER_STATE_CHECK(
-      predict_(const_cast<uint8_t *>(kTestData) + kSrcStride * 2 + 2,
-               kSrcStride, 2, 2, dst_, dst_stride_));
-
-  ASSERT_TRUE(
-      CompareBuffers(kExpectedDst, kExpectedDstStride, dst_, dst_stride_));
-}
-
-INSTANTIATE_TEST_CASE_P(
-    C, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_c),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_c),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_c),
-                      make_tuple(4, 4, &vp8_sixtap_predict4x4_c)));
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_neon),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_neon),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_neon),
-                      make_tuple(4, 4, &vp8_sixtap_predict4x4_neon)));
-#endif
-#if HAVE_MMX
-INSTANTIATE_TEST_CASE_P(
-    MMX, SixtapPredictTest,
-    ::testing::Values(make_tuple(4, 4, &vp8_sixtap_predict4x4_mmx)));
-#endif
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_sse2),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_sse2),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_sse2)));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_ssse3),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_ssse3),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_ssse3),
-                      make_tuple(4, 4, &vp8_sixtap_predict4x4_ssse3)));
-#endif
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(
-    MSA, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_msa),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_msa),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_msa),
-                      make_tuple(4, 4, &vp8_sixtap_predict4x4_msa)));
-#endif
-
-#if HAVE_MMI
-INSTANTIATE_TEST_CASE_P(
-    MMI, SixtapPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_mmi),
-                      make_tuple(8, 8, &vp8_sixtap_predict8x8_mmi),
-                      make_tuple(8, 4, &vp8_sixtap_predict8x4_mmi),
-                      make_tuple(4, 4, &vp8_sixtap_predict4x4_mmi)));
-#endif
-
-class BilinearPredictTest : public PredictTestBase {};
-
-TEST_P(BilinearPredictTest, TestWithRandomData) {
-  TestWithRandomData(vp8_bilinear_predict16x16_c);
-}
-TEST_P(BilinearPredictTest, TestWithUnalignedDst) {
-  TestWithUnalignedDst(vp8_bilinear_predict16x16_c);
-}
-
-INSTANTIATE_TEST_CASE_P(
-    C, BilinearPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_c),
-                      make_tuple(8, 8, &vp8_bilinear_predict8x8_c),
-                      make_tuple(8, 4, &vp8_bilinear_predict8x4_c),
-                      make_tuple(4, 4, &vp8_bilinear_predict4x4_c)));
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, BilinearPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_neon),
-                      make_tuple(8, 8, &vp8_bilinear_predict8x8_neon),
-                      make_tuple(8, 4, &vp8_bilinear_predict8x4_neon),
-                      make_tuple(4, 4, &vp8_bilinear_predict4x4_neon)));
-#endif
-#if HAVE_MMX
-INSTANTIATE_TEST_CASE_P(
-    MMX, BilinearPredictTest,
-    ::testing::Values(make_tuple(8, 4, &vp8_bilinear_predict8x4_mmx),
-                      make_tuple(4, 4, &vp8_bilinear_predict4x4_mmx)));
-#endif
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, BilinearPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_sse2),
-                      make_tuple(8, 8, &vp8_bilinear_predict8x8_sse2)));
-#endif
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, BilinearPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_ssse3),
-                      make_tuple(8, 8, &vp8_bilinear_predict8x8_ssse3)));
-#endif
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(
-    MSA, BilinearPredictTest,
-    ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_msa),
-                      make_tuple(8, 8, &vp8_bilinear_predict8x8_msa),
-                      make_tuple(8, 4, &vp8_bilinear_predict8x4_msa),
-                      make_tuple(4, 4, &vp8_bilinear_predict4x4_msa)));
-#endif
-}  // namespace
--- a/test/quantize_test.cc
+++ b/test/quantize_test.cc
@@ -200,12 +200,4 @@ INSTANTIATE_TEST_CASE_P(
        make_tuple(&vp8_fast_quantize_b_msa, &vp8_fast_quantize_b_c),
        make_tuple(&vp8_regular_quantize_b_msa, &vp8_regular_quantize_b_c)));
 #endif  // HAVE_MSA
-
-#if HAVE_MMI
-INSTANTIATE_TEST_CASE_P(
-    MMI, QuantizeTest,
-    ::testing::Values(
-        make_tuple(&vp8_fast_quantize_b_mmi, &vp8_fast_quantize_b_c),
-        make_tuple(&vp8_regular_quantize_b_mmi, &vp8_regular_quantize_b_c)));
-#endif  // HAVE_MMI
 }  // namespace
--- a/test/register_state_check.h
+++ b/test/register_state_check.h
@@ -32,9 +32,7 @@

 #undef NOMINMAX
 #define NOMINMAX
-#ifndef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
-#endif
 #include <windows.h>
 #include <winnt.h>

@@ -113,8 +111,8 @@ class RegisterStateCheck {
    int64_t post_store[8];
    vpx_push_neon(post_store);
    for (int i = 0; i < 8; ++i) {
-      EXPECT_EQ(pre_store_[i], post_store[i])
-          << "d" << i + 8 << " has been modified";
+      EXPECT_EQ(pre_store_[i], post_store[i]) << "d" << i + 8
+                                              << " has been modified";
    }
  }

--- a/test/resize_test.cc
+++ b/test/resize_test.cc
@@ -298,10 +298,10 @@ TEST_P(ResizeTest, TestExternalResizeWorks) {
    unsigned int expected_h;
    ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
                        &expected_h, 0);
-    EXPECT_EQ(expected_w, info->w)
-        << "Frame " << frame << " had unexpected width";
-    EXPECT_EQ(expected_h, info->h)
-        << "Frame " << frame << " had unexpected height";
+    EXPECT_EQ(expected_w, info->w) << "Frame " << frame
+                                   << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h) << "Frame " << frame
+                                   << " had unexpected height";
  }
 }

@@ -513,10 +513,10 @@ TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) {
    unsigned int expected_h;
    ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w,
                        &expected_h, 1);
-    EXPECT_EQ(expected_w, info->w)
-        << "Frame " << frame << " had unexpected width";
-    EXPECT_EQ(expected_h, info->h)
-        << "Frame " << frame << " had unexpected height";
+    EXPECT_EQ(expected_w, info->w) << "Frame " << frame
+                                   << " had unexpected width";
+    EXPECT_EQ(expected_h, info->h) << "Frame " << frame
+                                   << " had unexpected height";
    EXPECT_EQ(static_cast<unsigned int>(0), GetMismatchFrames());
  }
 }
--- a/test/sad_test.cc
+++ b/test/sad_test.cc
@@ -644,50 +644,19 @@ INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests));
 #if HAVE_NEON
 const SadMxNParam neon_tests[] = {
  SadMxNParam(64, 64, &vpx_sad64x64_neon),
-  SadMxNParam(64, 32, &vpx_sad64x32_neon),
  SadMxNParam(32, 32, &vpx_sad32x32_neon),
-  SadMxNParam(16, 32, &vpx_sad16x32_neon),
  SadMxNParam(16, 16, &vpx_sad16x16_neon),
  SadMxNParam(16, 8, &vpx_sad16x8_neon),
  SadMxNParam(8, 16, &vpx_sad8x16_neon),
  SadMxNParam(8, 8, &vpx_sad8x8_neon),
-  SadMxNParam(8, 4, &vpx_sad8x4_neon),
-  SadMxNParam(4, 8, &vpx_sad4x8_neon),
  SadMxNParam(4, 4, &vpx_sad4x4_neon),
 };
 INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests));

-const SadMxNAvgParam avg_neon_tests[] = {
-  SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_neon),
-  SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_neon),
-  SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_neon),
-  SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_neon),
-  SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_neon),
-  SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_neon),
-  SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_neon),
-  SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_neon),
-  SadMxNAvgParam(8, 16, &vpx_sad8x16_avg_neon),
-  SadMxNAvgParam(8, 8, &vpx_sad8x8_avg_neon),
-  SadMxNAvgParam(8, 4, &vpx_sad8x4_avg_neon),
-  SadMxNAvgParam(4, 8, &vpx_sad4x8_avg_neon),
-  SadMxNAvgParam(4, 4, &vpx_sad4x4_avg_neon),
-};
-INSTANTIATE_TEST_CASE_P(NEON, SADavgTest, ::testing::ValuesIn(avg_neon_tests));
-
 const SadMxNx4Param x4d_neon_tests[] = {
  SadMxNx4Param(64, 64, &vpx_sad64x64x4d_neon),
-  SadMxNx4Param(64, 32, &vpx_sad64x32x4d_neon),
-  SadMxNx4Param(32, 64, &vpx_sad32x64x4d_neon),
  SadMxNx4Param(32, 32, &vpx_sad32x32x4d_neon),
-  SadMxNx4Param(32, 16, &vpx_sad32x16x4d_neon),
-  SadMxNx4Param(16, 32, &vpx_sad16x32x4d_neon),
  SadMxNx4Param(16, 16, &vpx_sad16x16x4d_neon),
-  SadMxNx4Param(16, 8, &vpx_sad16x8x4d_neon),
-  SadMxNx4Param(8, 16, &vpx_sad8x16x4d_neon),
-  SadMxNx4Param(8, 8, &vpx_sad8x8x4d_neon),
-  SadMxNx4Param(8, 4, &vpx_sad8x4x4d_neon),
-  SadMxNx4Param(4, 8, &vpx_sad4x8x4d_neon),
-  SadMxNx4Param(4, 4, &vpx_sad4x4x4d_neon),
 };
 INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests));
 #endif  // HAVE_NEON
@@ -896,14 +865,6 @@ const SadMxNx4Param x4d_avx2_tests[] = {
 INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests));
 #endif  // HAVE_AVX2

-#if HAVE_AVX512
-const SadMxNx4Param x4d_avx512_tests[] = {
-  SadMxNx4Param(64, 64, &vpx_sad64x64x4d_avx512),
-};
-INSTANTIATE_TEST_CASE_P(AVX512, SADx4Test,
-                        ::testing::ValuesIn(x4d_avx512_tests));
-#endif  // HAVE_AVX512
-
 //------------------------------------------------------------------------------
 // MIPS functions
 #if HAVE_MSA
@@ -959,98 +920,4 @@ const SadMxNx4Param x4d_msa_tests[] = {
 INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests));
 #endif  // HAVE_MSA

-//------------------------------------------------------------------------------
-// VSX functions
-#if HAVE_VSX
-const SadMxNParam vsx_tests[] = {
-  SadMxNParam(64, 64, &vpx_sad64x64_vsx),
-  SadMxNParam(64, 32, &vpx_sad64x32_vsx),
-  SadMxNParam(32, 64, &vpx_sad32x64_vsx),
-  SadMxNParam(32, 32, &vpx_sad32x32_vsx),
-  SadMxNParam(32, 16, &vpx_sad32x16_vsx),
-  SadMxNParam(16, 32, &vpx_sad16x32_vsx),
-  SadMxNParam(16, 16, &vpx_sad16x16_vsx),
-  SadMxNParam(16, 8, &vpx_sad16x8_vsx),
-};
-INSTANTIATE_TEST_CASE_P(VSX, SADTest, ::testing::ValuesIn(vsx_tests));
-
-const SadMxNAvgParam avg_vsx_tests[] = {
-  SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_vsx),
-  SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_vsx),
-  SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_vsx),
-  SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_vsx),
-  SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_vsx),
-  SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_vsx),
-  SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_vsx),
-  SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_vsx),
-};
-INSTANTIATE_TEST_CASE_P(VSX, SADavgTest, ::testing::ValuesIn(avg_vsx_tests));
-
-const SadMxNx4Param x4d_vsx_tests[] = {
-  SadMxNx4Param(64, 64, &vpx_sad64x64x4d_vsx),
-  SadMxNx4Param(64, 32, &vpx_sad64x32x4d_vsx),
-  SadMxNx4Param(32, 64, &vpx_sad32x64x4d_vsx),
-  SadMxNx4Param(32, 32, &vpx_sad32x32x4d_vsx),
-  SadMxNx4Param(32, 16, &vpx_sad32x16x4d_vsx),
-  SadMxNx4Param(16, 32, &vpx_sad16x32x4d_vsx),
-  SadMxNx4Param(16, 16, &vpx_sad16x16x4d_vsx),
-  SadMxNx4Param(16, 8, &vpx_sad16x8x4d_vsx),
-};
-INSTANTIATE_TEST_CASE_P(VSX, SADx4Test, ::testing::ValuesIn(x4d_vsx_tests));
-#endif  // HAVE_VSX
-
-//------------------------------------------------------------------------------
-// Loongson functions
-#if HAVE_MMI
-const SadMxNParam mmi_tests[] = {
-  SadMxNParam(64, 64, &vpx_sad64x64_mmi),
-  SadMxNParam(64, 32, &vpx_sad64x32_mmi),
-  SadMxNParam(32, 64, &vpx_sad32x64_mmi),
-  SadMxNParam(32, 32, &vpx_sad32x32_mmi),
-  SadMxNParam(32, 16, &vpx_sad32x16_mmi),
-  SadMxNParam(16, 32, &vpx_sad16x32_mmi),
-  SadMxNParam(16, 16, &vpx_sad16x16_mmi),
-  SadMxNParam(16, 8, &vpx_sad16x8_mmi),
-  SadMxNParam(8, 16, &vpx_sad8x16_mmi),
-  SadMxNParam(8, 8, &vpx_sad8x8_mmi),
-  SadMxNParam(8, 4, &vpx_sad8x4_mmi),
-  SadMxNParam(4, 8, &vpx_sad4x8_mmi),
-  SadMxNParam(4, 4, &vpx_sad4x4_mmi),
-};
-INSTANTIATE_TEST_CASE_P(MMI, SADTest, ::testing::ValuesIn(mmi_tests));
-
-const SadMxNAvgParam avg_mmi_tests[] = {
-  SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_mmi),
-  SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_mmi),
-  SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_mmi),
-  SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_mmi),
-  SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_mmi),
-  SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_mmi),
-  SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_mmi),
-  SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_mmi),
-  SadMxNAvgParam(8, 16, &vpx_sad8x16_avg_mmi),
-  SadMxNAvgParam(8, 8, &vpx_sad8x8_avg_mmi),
-  SadMxNAvgParam(8, 4, &vpx_sad8x4_avg_mmi),
-  SadMxNAvgParam(4, 8, &vpx_sad4x8_avg_mmi),
-  SadMxNAvgParam(4, 4, &vpx_sad4x4_avg_mmi),
-};
-INSTANTIATE_TEST_CASE_P(MMI, SADavgTest, ::testing::ValuesIn(avg_mmi_tests));
-
-const SadMxNx4Param x4d_mmi_tests[] = {
-  SadMxNx4Param(64, 64, &vpx_sad64x64x4d_mmi),
-  SadMxNx4Param(64, 32, &vpx_sad64x32x4d_mmi),
-  SadMxNx4Param(32, 64, &vpx_sad32x64x4d_mmi),
-  SadMxNx4Param(32, 32, &vpx_sad32x32x4d_mmi),
-  SadMxNx4Param(32, 16, &vpx_sad32x16x4d_mmi),
-  SadMxNx4Param(16, 32, &vpx_sad16x32x4d_mmi),
-  SadMxNx4Param(16, 16, &vpx_sad16x16x4d_mmi),
-  SadMxNx4Param(16, 8, &vpx_sad16x8x4d_mmi),
-  SadMxNx4Param(8, 16, &vpx_sad8x16x4d_mmi),
-  SadMxNx4Param(8, 8, &vpx_sad8x8x4d_mmi),
-  SadMxNx4Param(8, 4, &vpx_sad8x4x4d_mmi),
-  SadMxNx4Param(4, 8, &vpx_sad4x8x4d_mmi),
-  SadMxNx4Param(4, 4, &vpx_sad4x4x4d_mmi),
-};
-INSTANTIATE_TEST_CASE_P(MMI, SADx4Test, ::testing::ValuesIn(x4d_mmi_tests));
-#endif  // HAVE_MMI
 }  // namespace
--- a/test/set_roi.cc
+++ b/test/set_roi.cc
@@ -146,6 +146,14 @@ TEST(VP8RoiMapTest, ParameterCheck) {
      if (deltas_valid != roi_retval) break;
    }

+    // Test that we report and error if cyclic refresh is enabled.
+    cpi.cyclic_refresh_mode_enabled = 1;
+    roi_retval =
+        vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, cpi.common.mb_cols,
+                       delta_q, delta_lf, threshold);
+    EXPECT_EQ(-1, roi_retval) << "cyclic refresh check error";
+    cpi.cyclic_refresh_mode_enabled = 0;
+
    // Test invalid number of rows or colums.
    roi_retval =
        vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows + 1,
--- a/test/sixtap_predict_test.cc
+++ b/test/sixtap_predict_test.cc
@@ -0,0 +1,231 @@
+/*
+ *  Copyright (c) 2013 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "third_party/googletest/src/include/gtest/gtest.h"
+
+#include "./vpx_config.h"
+#include "./vp8_rtcd.h"
+#include "test/acm_random.h"
+#include "test/clear_system_state.h"
+#include "test/register_state_check.h"
+#include "test/util.h"
+#include "vpx/vpx_integer.h"
+#include "vpx_mem/vpx_mem.h"
+
+namespace {
+
+typedef void (*SixtapPredictFunc)(uint8_t *src_ptr, int src_pixels_per_line,
+                                  int xoffset, int yoffset, uint8_t *dst_ptr,
+                                  int dst_pitch);
+
+typedef std::tr1::tuple<int, int, SixtapPredictFunc> SixtapPredictParam;
+
+class SixtapPredictTest : public ::testing::TestWithParam<SixtapPredictParam> {
+ public:
+  static void SetUpTestCase() {
+    src_ = reinterpret_cast<uint8_t *>(vpx_memalign(kDataAlignment, kSrcSize));
+    dst_ = reinterpret_cast<uint8_t *>(vpx_memalign(kDataAlignment, kDstSize));
+    dst_c_ =
+        reinterpret_cast<uint8_t *>(vpx_memalign(kDataAlignment, kDstSize));
+  }
+
+  static void TearDownTestCase() {
+    vpx_free(src_);
+    src_ = NULL;
+    vpx_free(dst_);
+    dst_ = NULL;
+    vpx_free(dst_c_);
+    dst_c_ = NULL;
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }
+
+ protected:
+  // Make test arrays big enough for 16x16 functions. Six-tap filters
+  // need 5 extra pixels outside of the macroblock.
+  static const int kSrcStride = 21;
+  static const int kDstStride = 16;
+  static const int kDataAlignment = 16;
+  static const int kSrcSize = kSrcStride * kSrcStride + 1;
+  static const int kDstSize = kDstStride * kDstStride;
+
+  virtual void SetUp() {
+    width_ = GET_PARAM(0);
+    height_ = GET_PARAM(1);
+    sixtap_predict_ = GET_PARAM(2);
+    memset(src_, 0, kSrcSize);
+    memset(dst_, 0, kDstSize);
+    memset(dst_c_, 0, kDstSize);
+  }
+
+  int width_;
+  int height_;
+  SixtapPredictFunc sixtap_predict_;
+  // The src stores the macroblock we will filter on, and makes it 1 byte larger
+  // in order to test unaligned access. The result is stored in dst and dst_c(c
+  // reference code result).
+  static uint8_t *src_;
+  static uint8_t *dst_;
+  static uint8_t *dst_c_;
+};
+
+uint8_t *SixtapPredictTest::src_ = NULL;
+uint8_t *SixtapPredictTest::dst_ = NULL;
+uint8_t *SixtapPredictTest::dst_c_ = NULL;
+
+TEST_P(SixtapPredictTest, TestWithPresetData) {
+  // Test input
+  static const uint8_t test_data[kSrcSize] = {
+    216, 184, 4,   191, 82,  92,  41,  0,   1,   226, 236, 172, 20,  182, 42,
+    226, 177, 79,  94,  77,  179, 203, 206, 198, 22,  192, 19,  75,  17,  192,
+    44,  233, 120, 48,  168, 203, 141, 210, 203, 143, 180, 184, 59,  201, 110,
+    102, 171, 32,  182, 10,  109, 105, 213, 60,  47,  236, 253, 67,  55,  14,
+    3,   99,  247, 124, 148, 159, 71,  34,  114, 19,  177, 38,  203, 237, 239,
+    58,  83,  155, 91,  10,  166, 201, 115, 124, 5,   163, 104, 2,   231, 160,
+    16,  234, 4,   8,   103, 153, 167, 174, 187, 26,  193, 109, 64,  141, 90,
+    48,  200, 174, 204, 36,  184, 114, 237, 43,  238, 242, 207, 86,  245, 182,
+    247, 6,   161, 251, 14,  8,   148, 182, 182, 79,  208, 120, 188, 17,  6,
+    23,  65,  206, 197, 13,  242, 126, 128, 224, 170, 110, 211, 121, 197, 200,
+    47,  188, 207, 208, 184, 221, 216, 76,  148, 143, 156, 100, 8,   89,  117,
+    14,  112, 183, 221, 54,  197, 208, 180, 69,  176, 94,  180, 131, 215, 121,
+    76,  7,   54,  28,  216, 238, 249, 176, 58,  142, 64,  215, 242, 72,  49,
+    104, 87,  161, 32,  52,  216, 230, 4,   141, 44,  181, 235, 224, 57,  195,
+    89,  134, 203, 144, 162, 163, 126, 156, 84,  185, 42,  148, 145, 29,  221,
+    194, 134, 52,  100, 166, 105, 60,  140, 110, 201, 184, 35,  181, 153, 93,
+    121, 243, 227, 68,  131, 134, 232, 2,   35,  60,  187, 77,  209, 76,  106,
+    174, 15,  241, 227, 115, 151, 77,  175, 36,  187, 121, 221, 223, 47,  118,
+    61,  168, 105, 32,  237, 236, 167, 213, 238, 202, 17,  170, 24,  226, 247,
+    131, 145, 6,   116, 117, 121, 11,  194, 41,  48,  126, 162, 13,  93,  209,
+    131, 154, 122, 237, 187, 103, 217, 99,  60,  200, 45,  78,  115, 69,  49,
+    106, 200, 194, 112, 60,  56,  234, 72,  251, 19,  120, 121, 182, 134, 215,
+    135, 10,  114, 2,   247, 46,  105, 209, 145, 165, 153, 191, 243, 12,  5,
+    36,  119, 206, 231, 231, 11,  32,  209, 83,  27,  229, 204, 149, 155, 83,
+    109, 35,  93,  223, 37,  84,  14,  142, 37,  160, 52,  191, 96,  40,  204,
+    101, 77,  67,  52,  53,  43,  63,  85,  253, 147, 113, 226, 96,  6,   125,
+    179, 115, 161, 17,  83,  198, 101, 98,  85,  139, 3,   137, 75,  99,  178,
+    23,  201, 255, 91,  253, 52,  134, 60,  138, 131, 208, 251, 101, 48,  2,
+    227, 228, 118, 132, 245, 202, 75,  91,  44,  160, 231, 47,  41,  50,  147,
+    220, 74,  92,  219, 165, 89,  16
+  };
+
+  // Expected result
+  static const uint8_t expected_dst[kDstSize] = {
+    117, 102, 74,  135, 42,  98,  175, 206, 70,  73,  222, 197, 50,  24,  39,
+    49,  38,  105, 90,  47,  169, 40,  171, 215, 200, 73,  109, 141, 53,  85,
+    177, 164, 79,  208, 124, 89,  212, 18,  81,  145, 151, 164, 217, 153, 91,
+    154, 102, 102, 159, 75,  164, 152, 136, 51,  213, 219, 186, 116, 193, 224,
+    186, 36,  231, 208, 84,  211, 155, 167, 35,  59,  42,  76,  216, 149, 73,
+    201, 78,  149, 184, 100, 96,  196, 189, 198, 188, 235, 195, 117, 129, 120,
+    129, 49,  25,  133, 113, 69,  221, 114, 70,  143, 99,  157, 108, 189, 140,
+    78,  6,   55,  65,  240, 255, 245, 184, 72,  90,  100, 116, 131, 39,  60,
+    234, 167, 33,  160, 88,  185, 200, 157, 159, 176, 127, 151, 138, 102, 168,
+    106, 170, 86,  82,  219, 189, 76,  33,  115, 197, 106, 96,  198, 136, 97,
+    141, 237, 151, 98,  137, 191, 185, 2,   57,  95,  142, 91,  255, 185, 97,
+    137, 76,  162, 94,  173, 131, 193, 161, 81,  106, 72,  135, 222, 234, 137,
+    66,  137, 106, 243, 210, 147, 95,  15,  137, 110, 85,  66,  16,  96,  167,
+    147, 150, 173, 203, 140, 118, 196, 84,  147, 160, 19,  95,  101, 123, 74,
+    132, 202, 82,  166, 12,  131, 166, 189, 170, 159, 85,  79,  66,  57,  152,
+    132, 203, 194, 0,   1,   56,  146, 180, 224, 156, 28,  83,  181, 79,  76,
+    80,  46,  160, 175, 59,  106, 43,  87,  75,  136, 85,  189, 46,  71,  200,
+    90
+  };
+
+  uint8_t *src = const_cast<uint8_t *>(test_data);
+
+  ASM_REGISTER_STATE_CHECK(sixtap_predict_(&src[kSrcStride * 2 + 2 + 1],
+                                           kSrcStride, 2, 2, dst_, kDstStride));
+
+  for (int i = 0; i < height_; ++i) {
+    for (int j = 0; j < width_; ++j)
+      ASSERT_EQ(expected_dst[i * kDstStride + j], dst_[i * kDstStride + j])
+          << "i==" << (i * width_ + j);
+  }
+}
+
+using libvpx_test::ACMRandom;
+
+TEST_P(SixtapPredictTest, TestWithRandomData) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  for (int i = 0; i < kSrcSize; ++i) src_[i] = rnd.Rand8();
+
+  // Run tests for all possible offsets.
+  for (int xoffset = 0; xoffset < 8; ++xoffset) {
+    for (int yoffset = 0; yoffset < 8; ++yoffset) {
+      // Call c reference function.
+      // Move start point to next pixel to test if the function reads
+      // unaligned data correctly.
+      vp8_sixtap_predict16x16_c(&src_[kSrcStride * 2 + 2 + 1], kSrcStride,
+                                xoffset, yoffset, dst_c_, kDstStride);
+
+      // Run test.
+      ASM_REGISTER_STATE_CHECK(sixtap_predict_(&src_[kSrcStride * 2 + 2 + 1],
+                                               kSrcStride, xoffset, yoffset,
+                                               dst_, kDstStride));
+
+      for (int i = 0; i < height_; ++i) {
+        for (int j = 0; j < width_; ++j)
+          ASSERT_EQ(dst_c_[i * kDstStride + j], dst_[i * kDstStride + j])
+              << "i==" << (i * width_ + j);
+      }
+    }
+  }
+}
+
+using std::tr1::make_tuple;
+
+INSTANTIATE_TEST_CASE_P(
+    C, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_c),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_c),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_c),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_c)));
+#if HAVE_NEON
+INSTANTIATE_TEST_CASE_P(
+    NEON, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_neon),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_neon),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_neon)));
+#endif
+#if HAVE_MMX
+INSTANTIATE_TEST_CASE_P(
+    MMX, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_mmx),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_mmx),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_mmx),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_mmx)));
+#endif
+#if HAVE_SSE2
+INSTANTIATE_TEST_CASE_P(
+    SSE2, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_sse2),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_sse2),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_sse2)));
+#endif
+#if HAVE_SSSE3
+INSTANTIATE_TEST_CASE_P(
+    SSSE3, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_ssse3),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_ssse3),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_ssse3),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_ssse3)));
+#endif
+#if HAVE_MSA
+INSTANTIATE_TEST_CASE_P(
+    MSA, SixtapPredictTest,
+    ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_msa),
+                      make_tuple(8, 8, &vp8_sixtap_predict8x8_msa),
+                      make_tuple(8, 4, &vp8_sixtap_predict8x4_msa),
+                      make_tuple(4, 4, &vp8_sixtap_predict4x4_msa)));
+#endif
+}  // namespace
--- a/test/stress.sh
+++ b/test/stress.sh
@@ -1,169 +0,0 @@
-#!/bin/sh
-##
-##  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
-##
-##  Use of this source code is governed by a BSD-style license
-##  that can be found in the LICENSE file in the root of the source
-##  tree. An additional intellectual property rights grant can be found
-##  in the file PATENTS.  All contributing project authors may
-##  be found in the AUTHORS file in the root of the source tree.
-##
-##  This file performs a stress test. It runs (STRESS_ONEPASS_MAX_JOBS,
-##  default=5) one, (STRESS_TWOPASS_MAX_JOBS, default=5) two pass &
-##  (STRESS_RT_MAX_JOBS, default=5) encodes and (STRESS_<codec>_DECODE_MAX_JOBS,
-##  default=30) decodes in parallel.
-
-. $(dirname $0)/tools_common.sh
-
-YUV="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.yuv"
-VP8="${LIBVPX_TEST_DATA_PATH}/tos_vp8.webm"
-VP9="${LIBVPX_TEST_DATA_PATH}/vp90-2-sintel_1920x818_tile_1x4_fpm_2279kbps.webm"
-DATA_URL="http://downloads.webmproject.org/test_data/libvpx/"
-SHA1_FILE="$(dirname $0)/test-data.sha1"
-
-# Set sha1sum to proper sha program (sha1sum, shasum, sha1). This code is
-# cribbed from libs.mk.
-[ -x "$(which sha1sum)" ] && sha1sum=sha1sum
-[ -x "$(which shasum)" ] && sha1sum=shasum
-[ -x "$(which sha1)" ] && sha1sum=sha1
-
-# Download a file from the url and check its sha1sum.
-download_and_check_file() {
-  # Get the file from the file path.
-  local readonly root="${1#${LIBVPX_TEST_DATA_PATH}/}"
-
-  # Download the file using curl. Trap to insure non partial file.
-  (trap "rm -f $1" INT TERM \
-    && eval "curl --retry 1 -L -o $1 ${DATA_URL}${root} ${devnull}")
-
-  # Check the sha1 sum of the file.
-  if [ -n "${sha1sum}" ]; then
-    set -e
-    grep ${root} ${SHA1_FILE} \
-      | (cd ${LIBVPX_TEST_DATA_PATH}; ${sha1sum} -c);
-  fi
-}
-
-# Environment check: Make sure input is available.
-stress_verify_environment() {
-  if [ ! -e "${SHA1_FILE}" ] ; then
-    echo "Missing ${SHA1_FILE}"
-    return 1
-  fi
-  for file in "${YUV}" "${VP8}" "${VP9}"; do
-    if [ ! -e "${file}" ] ; then
-      download_and_check_file "${file}"
-    fi
-  done
-  if [ ! -e "${YUV}" ] || [ ! -e "${VP8}" ] || [ ! -e "${VP9}" ] ; then
-    elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH."
-    return 1
-  fi
-  if [ -z "$(vpx_tool_path vpxenc)" ]; then
-    elog "vpxenc not found. It must exist in LIBVPX_BIN_PATH or its parent."
-    return 1
-  fi
-  if [ -z "$(vpx_tool_path vpxdec)" ]; then
-    elog "vpxdec not found. It must exist in LIBVPX_BIN_PATH or its parent."
-    return 1
-  fi
-}
-
-# This function runs tests on libvpx that run multiple encodes and decodes
-# in parallel in hopes of catching synchronization and/or threading issues.
-stress() {
-  local readonly decoder="$(vpx_tool_path vpxdec)"
-  local readonly encoder="$(vpx_tool_path vpxenc)"
-  local readonly codec="$1"
-  local readonly webm="$2"
-  local readonly decode_count="$3"
-  local readonly threads="$4"
-  local readonly enc_args="$5"
-  local pids=""
-  local rt_max_jobs=${STRESS_RT_MAX_JOBS:-5}
-  local onepass_max_jobs=${STRESS_ONEPASS_MAX_JOBS:-5}
-  local twopass_max_jobs=${STRESS_TWOPASS_MAX_JOBS:-5}
-
-  # Enable job control, so we can run multiple processes.
-  set -m
-
-  # Start $onepass_max_jobs encode jobs in parallel.
-  for i in $(seq ${onepass_max_jobs}); do
-    bitrate=$(($i * 20 + 300))
-    eval "${VPX_TEST_PREFIX}" "${encoder}" "--codec=${codec} -w 1280 -h 720" \
-      "${YUV}" "-t ${threads} --limit=150 --test-decode=fatal --passes=1" \
-      "--target-bitrate=${bitrate} -o ${VPX_TEST_OUTPUT_DIR}/${i}.1pass.webm" \
-      "${enc_args}" ${devnull} &
-    pids="${pids} $!"
-  done
-
-  # Start $twopass_max_jobs encode jobs in parallel.
-  for i in $(seq ${twopass_max_jobs}); do
-    bitrate=$(($i * 20 + 300))
-    eval "${VPX_TEST_PREFIX}" "${encoder}" "--codec=${codec} -w 1280 -h 720" \
-      "${YUV}" "-t ${threads} --limit=150 --test-decode=fatal --passes=2" \
-      "--target-bitrate=${bitrate} -o ${VPX_TEST_OUTPUT_DIR}/${i}.2pass.webm" \
-      "${enc_args}" ${devnull} &
-    pids="${pids} $!"
-  done
-
-  # Start $rt_max_jobs rt encode jobs in parallel.
-  for i in $(seq ${rt_max_jobs}); do
-    bitrate=$(($i * 20 + 300))
-    eval "${VPX_TEST_PREFIX}" "${encoder}" "--codec=${codec} -w 1280 -h 720" \
-      "${YUV}" "-t ${threads} --limit=150 --test-decode=fatal " \
-      "--target-bitrate=${bitrate} --lag-in-frames=0 --error-resilient=1" \
-      "--kf-min-dist=3000 --kf-max-dist=3000 --cpu-used=-6 --static-thresh=1" \
-      "--end-usage=cbr --min-q=2 --max-q=56 --undershoot-pct=100" \
-      "--overshoot-pct=15 --buf-sz=1000 --buf-initial-sz=500" \
-      "--buf-optimal-sz=600 --max-intra-rate=900 --resize-allowed=0" \
-      "--drop-frame=0 --passes=1 --rt --noise-sensitivity=4" \
-      "-o ${VPX_TEST_OUTPUT_DIR}/${i}.rt.webm" ${devnull} &
-    pids="${pids} $!"
-  done
-
-  # Start $decode_count decode jobs in parallel.
-  for i in $(seq "${decode_count}"); do
-    eval "${decoder}" "-t ${threads}" "${webm}" "--noblit" ${devnull} &
-    pids="${pids} $!"
-  done
-
-  # Wait for all parallel jobs to finish.
-  fail=0
-  for job in "${pids}"; do
-    wait $job || fail=$(($fail + 1))
-  done
-  return $fail
-}
-
-vp8_stress_test() {
-  local vp8_max_jobs=${STRESS_VP8_DECODE_MAX_JOBS:-40}
-  if [ "$(vp8_decode_available)" = "yes" -a \
-       "$(vp8_encode_available)" = "yes" ]; then
-    stress vp8 "${VP8}" "${vp8_max_jobs}" 4
-  fi
-}
-
-vp9_stress() {
-  local vp9_max_jobs=${STRESS_VP9_DECODE_MAX_JOBS:-25}
-
-  if [ "$(vp9_decode_available)" = "yes" -a \
-       "$(vp9_encode_available)" = "yes" ]; then
-    stress vp9 "${VP9}" "${vp9_max_jobs}" "$@"
-  fi
-}
-
-vp9_stress_test() {
-  for threads in 4 8 100; do
-    vp9_stress "$threads" "--row-mt=0"
-  done
-}
-
-vp9_stress_test_row_mt() {
-  for threads in 4 8 100; do
-    vp9_stress "$threads" "--row-mt=1"
-  done
-}
-
-run_tests stress_verify_environment \
-  "vp8_stress_test vp9_stress_test vp9_stress_test_row_mt"
--- a/test/sum_squares_test.cc
+++ b/test/sum_squares_test.cc
@@ -110,10 +110,4 @@ INSTANTIATE_TEST_CASE_P(
    ::testing::Values(make_tuple(&vpx_sum_squares_2d_i16_c,
                                 &vpx_sum_squares_2d_i16_sse2)));
 #endif  // HAVE_SSE2
-
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, SumSquaresTest, ::testing::Values(make_tuple(
-                                                 &vpx_sum_squares_2d_i16_c,
-                                                 &vpx_sum_squares_2d_i16_msa)));
-#endif  // HAVE_MSA
 }  // namespace
--- a/test/svc_test.cc
+++ b/test/svc_test.cc
@@ -438,7 +438,7 @@ TEST_F(SvcTest, SetAutoAltRefOption) {
 // Test that decoder can handle an SVC frame as the first frame in a sequence.
 TEST_F(SvcTest, OnePassEncodeOneFrame) {
  codec_enc_.g_pass = VPX_RC_ONE_PASS;
-  vpx_fixed_buf output = vpx_fixed_buf();
+  vpx_fixed_buf output = { 0 };
  Pass2EncodeNFrames(NULL, 1, 2, &output);
  DecodeNFrames(&output, 1);
  FreeBitstreamBuffers(&output, 1);
--- a/test/temporal_filter_test.cc
+++ b/test/temporal_filter_test.cc
@@ -1,277 +0,0 @@
-/*
- *  Copyright (c) 2016 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <limits>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vp9_rtcd.h"
-#include "test/acm_random.h"
-#include "test/buffer.h"
-#include "test/register_state_check.h"
-#include "vpx_ports/vpx_timer.h"
-
-namespace {
-
-using ::libvpx_test::ACMRandom;
-using ::libvpx_test::Buffer;
-
-typedef void (*TemporalFilterFunc)(const uint8_t *a, unsigned int stride,
-                                   const uint8_t *b, unsigned int w,
-                                   unsigned int h, int filter_strength,
-                                   int filter_weight, unsigned int *accumulator,
-                                   uint16_t *count);
-
-// Calculate the difference between 'a' and 'b', sum in blocks of 9, and apply
-// filter based on strength and weight. Store the resulting filter amount in
-// 'count' and apply it to 'b' and store it in 'accumulator'.
-void reference_filter(const Buffer<uint8_t> &a, const Buffer<uint8_t> &b, int w,
-                      int h, int filter_strength, int filter_weight,
-                      Buffer<unsigned int> *accumulator,
-                      Buffer<uint16_t> *count) {
-  Buffer<int> diff_sq = Buffer<int>(w, h, 0);
-  ASSERT_TRUE(diff_sq.Init());
-  diff_sq.Set(0);
-
-  int rounding = 0;
-  if (filter_strength > 0) {
-    rounding = 1 << (filter_strength - 1);
-  }
-
-  // Calculate all the differences. Avoids re-calculating a bunch of extra
-  // values.
-  for (int height = 0; height < h; ++height) {
-    for (int width = 0; width < w; ++width) {
-      int diff = a.TopLeftPixel()[height * a.stride() + width] -
-                 b.TopLeftPixel()[height * b.stride() + width];
-      diff_sq.TopLeftPixel()[height * diff_sq.stride() + width] = diff * diff;
-    }
-  }
-
-  // For any given point, sum the neighboring values and calculate the
-  // modifier.
-  for (int height = 0; height < h; ++height) {
-    for (int width = 0; width < w; ++width) {
-      // Determine how many values are being summed.
-      int summed_values = 9;
-
-      if (height == 0 || height == (h - 1)) {
-        summed_values -= 3;
-      }
-
-      if (width == 0 || width == (w - 1)) {
-        if (summed_values == 6) {  // corner
-          summed_values -= 2;
-        } else {
-          summed_values -= 3;
-        }
-      }
-
-      // Sum the diff_sq of the surrounding values.
-      int sum = 0;
-      for (int idy = -1; idy <= 1; ++idy) {
-        for (int idx = -1; idx <= 1; ++idx) {
-          const int y = height + idy;
-          const int x = width + idx;
-
-          // If inside the border.
-          if (y >= 0 && y < h && x >= 0 && x < w) {
-            sum += diff_sq.TopLeftPixel()[y * diff_sq.stride() + x];
-          }
-        }
-      }
-
-      sum *= 3;
-      sum /= summed_values;
-      sum += rounding;
-      sum >>= filter_strength;
-
-      // Clamp the value and invert it.
-      if (sum > 16) sum = 16;
-      sum = 16 - sum;
-
-      sum *= filter_weight;
-
-      count->TopLeftPixel()[height * count->stride() + width] += sum;
-      accumulator->TopLeftPixel()[height * accumulator->stride() + width] +=
-          sum * b.TopLeftPixel()[height * b.stride() + width];
-    }
-  }
-}
-
-class TemporalFilterTest : public ::testing::TestWithParam<TemporalFilterFunc> {
- public:
-  virtual void SetUp() {
-    filter_func_ = GetParam();
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
- protected:
-  TemporalFilterFunc filter_func_;
-  ACMRandom rnd_;
-};
-
-TEST_P(TemporalFilterTest, SizeCombinations) {
-  // Depending on subsampling this function may be called with values of 8 or 16
-  // for width and height, in any combination.
-  Buffer<uint8_t> a = Buffer<uint8_t>(16, 16, 8);
-  ASSERT_TRUE(a.Init());
-
-  const int filter_weight = 2;
-  const int filter_strength = 6;
-
-  for (int width = 8; width <= 16; width += 8) {
-    for (int height = 8; height <= 16; height += 8) {
-      // The second buffer must not have any border.
-      Buffer<uint8_t> b = Buffer<uint8_t>(width, height, 0);
-      ASSERT_TRUE(b.Init());
-      Buffer<unsigned int> accum_ref = Buffer<unsigned int>(width, height, 0);
-      ASSERT_TRUE(accum_ref.Init());
-      Buffer<unsigned int> accum_chk = Buffer<unsigned int>(width, height, 0);
-      ASSERT_TRUE(accum_chk.Init());
-      Buffer<uint16_t> count_ref = Buffer<uint16_t>(width, height, 0);
-      ASSERT_TRUE(count_ref.Init());
-      Buffer<uint16_t> count_chk = Buffer<uint16_t>(width, height, 0);
-      ASSERT_TRUE(count_chk.Init());
-
-      // The difference between the buffers must be small to pass the threshold
-      // to apply the filter.
-      a.Set(&rnd_, 0, 7);
-      b.Set(&rnd_, 0, 7);
-
-      accum_ref.Set(rnd_.Rand8());
-      accum_chk.CopyFrom(accum_ref);
-      count_ref.Set(rnd_.Rand8());
-      count_chk.CopyFrom(count_ref);
-      reference_filter(a, b, width, height, filter_strength, filter_weight,
-                       &accum_ref, &count_ref);
-      ASM_REGISTER_STATE_CHECK(
-          filter_func_(a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width,
-                       height, filter_strength, filter_weight,
-                       accum_chk.TopLeftPixel(), count_chk.TopLeftPixel()));
-      EXPECT_TRUE(accum_chk.CheckValues(accum_ref));
-      EXPECT_TRUE(count_chk.CheckValues(count_ref));
-      if (HasFailure()) {
-        printf("Width: %d Height: %d\n", width, height);
-        count_chk.PrintDifference(count_ref);
-        accum_chk.PrintDifference(accum_ref);
-        return;
-      }
-    }
-  }
-}
-
-TEST_P(TemporalFilterTest, CompareReferenceRandom) {
-  for (int width = 8; width <= 16; width += 8) {
-    for (int height = 8; height <= 16; height += 8) {
-      Buffer<uint8_t> a = Buffer<uint8_t>(width, height, 8);
-      ASSERT_TRUE(a.Init());
-      // The second buffer must not have any border.
-      Buffer<uint8_t> b = Buffer<uint8_t>(width, height, 0);
-      ASSERT_TRUE(b.Init());
-      Buffer<unsigned int> accum_ref = Buffer<unsigned int>(width, height, 0);
-      ASSERT_TRUE(accum_ref.Init());
-      Buffer<unsigned int> accum_chk = Buffer<unsigned int>(width, height, 0);
-      ASSERT_TRUE(accum_chk.Init());
-      Buffer<uint16_t> count_ref = Buffer<uint16_t>(width, height, 0);
-      ASSERT_TRUE(count_ref.Init());
-      Buffer<uint16_t> count_chk = Buffer<uint16_t>(width, height, 0);
-      ASSERT_TRUE(count_chk.Init());
-
-      for (int filter_strength = 0; filter_strength <= 6; ++filter_strength) {
-        for (int filter_weight = 0; filter_weight <= 2; ++filter_weight) {
-          for (int repeat = 0; repeat < 100; ++repeat) {
-            if (repeat < 50) {
-              a.Set(&rnd_, 0, 7);
-              b.Set(&rnd_, 0, 7);
-            } else {
-              // Check large (but close) values as well.
-              a.Set(&rnd_, std::numeric_limits<uint8_t>::max() - 7,
-                    std::numeric_limits<uint8_t>::max());
-              b.Set(&rnd_, std::numeric_limits<uint8_t>::max() - 7,
-                    std::numeric_limits<uint8_t>::max());
-            }
-
-            accum_ref.Set(rnd_.Rand8());
-            accum_chk.CopyFrom(accum_ref);
-            count_ref.Set(rnd_.Rand8());
-            count_chk.CopyFrom(count_ref);
-            reference_filter(a, b, width, height, filter_strength,
-                             filter_weight, &accum_ref, &count_ref);
-            ASM_REGISTER_STATE_CHECK(filter_func_(
-                a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width, height,
-                filter_strength, filter_weight, accum_chk.TopLeftPixel(),
-                count_chk.TopLeftPixel()));
-            EXPECT_TRUE(accum_chk.CheckValues(accum_ref));
-            EXPECT_TRUE(count_chk.CheckValues(count_ref));
-            if (HasFailure()) {
-              printf("Weight: %d Strength: %d\n", filter_weight,
-                     filter_strength);
-              count_chk.PrintDifference(count_ref);
-              accum_chk.PrintDifference(accum_ref);
-              return;
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
-TEST_P(TemporalFilterTest, DISABLED_Speed) {
-  Buffer<uint8_t> a = Buffer<uint8_t>(16, 16, 8);
-  ASSERT_TRUE(a.Init());
-
-  const int filter_weight = 2;
-  const int filter_strength = 6;
-
-  for (int width = 8; width <= 16; width += 8) {
-    for (int height = 8; height <= 16; height += 8) {
-      // The second buffer must not have any border.
-      Buffer<uint8_t> b = Buffer<uint8_t>(width, height, 0);
-      ASSERT_TRUE(b.Init());
-      Buffer<unsigned int> accum_ref = Buffer<unsigned int>(width, height, 0);
-      ASSERT_TRUE(accum_ref.Init());
-      Buffer<unsigned int> accum_chk = Buffer<unsigned int>(width, height, 0);
-      ASSERT_TRUE(accum_chk.Init());
-      Buffer<uint16_t> count_ref = Buffer<uint16_t>(width, height, 0);
-      ASSERT_TRUE(count_ref.Init());
-      Buffer<uint16_t> count_chk = Buffer<uint16_t>(width, height, 0);
-      ASSERT_TRUE(count_chk.Init());
-
-      a.Set(&rnd_, 0, 7);
-      b.Set(&rnd_, 0, 7);
-
-      accum_chk.Set(0);
-      count_chk.Set(0);
-
-      vpx_usec_timer timer;
-      vpx_usec_timer_start(&timer);
-      for (int i = 0; i < 10000; ++i) {
-        filter_func_(a.TopLeftPixel(), a.stride(), b.TopLeftPixel(), width,
-                     height, filter_strength, filter_weight,
-                     accum_chk.TopLeftPixel(), count_chk.TopLeftPixel());
-      }
-      vpx_usec_timer_mark(&timer);
-      const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
-      printf("Temporal filter %dx%d time: %5d us\n", width, height,
-             elapsed_time);
-    }
-  }
-}
-
-INSTANTIATE_TEST_CASE_P(C, TemporalFilterTest,
-                        ::testing::Values(&vp9_temporal_filter_apply_c));
-
-#if HAVE_SSE4_1
-INSTANTIATE_TEST_CASE_P(SSE4_1, TemporalFilterTest,
-                        ::testing::Values(&vp9_temporal_filter_apply_sse4_1));
-#endif  // HAVE_SSE4_1
-}  // namespace
--- a/test/test-data.mk
+++ b/test/test-data.mk
@@ -20,10 +20,8 @@ LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv

 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += desktop_credits.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.y4m
-LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += noisy_clip_640_360.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m
-LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_640_480_30.yuv

 # Test vectors
 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf
@@ -732,10 +730,6 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm.md5
 endif  # CONFIG_VP9_HIGHBITDEPTH

 # Invalid files for testing libvpx error checking.
-LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf.res
-LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm
@@ -768,23 +762,15 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s195
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf.res
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-1.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-2.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-3.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-629481.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-629481.webm.res
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-667044.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-667044.webm.res

 ifeq ($(CONFIG_DECODE_PERF_TESTS),yes)
 # Encode / Decode test
@@ -819,6 +805,7 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += kirland_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += macmarcomoving_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += macmarcostationary_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.yuv
+LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomanarrows_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomasmallcameramovement_640_480_30.yuv
 LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += thaloundeskmtg_640_480_30.yuv
@@ -876,7 +863,3 @@ LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2.webm.md5
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm
 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_3.ivf
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_3.ivf.md5
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_1.webm
-LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_1.webm.md5
--- a/test/test-data.sha1
+++ b/test/test-data.sha1
@@ -6,8 +6,6 @@ b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv
 456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res
 c123d1f9f02fb4143abb5e271916e3a3080de8f6 *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf
 456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res
-efafb92b7567bc04c3f1432ea6c268c1c31affd5 *invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf
-5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf.res
 fe346136b9b8c1e6f6084cc106485706915795e4 *invalid-vp90-01-v3.webm
 5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-vp90-01-v3.webm.res
 d78e2fceba5ac942246503ec8366f879c4775ca5 *invalid-vp90-02-v2.webm
@@ -16,7 +14,6 @@ df1a1453feb3c00d7d89746c7003b4163523bff3 *invalid-vp90-03-v3.webm
 4935c62becc68c13642a03db1e6d3e2331c1c612 *invalid-vp90-03-v3.webm.res
 d637297561dd904eb2c97a9015deeb31c4a1e8d2 *invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm
 3a204bdbeaa3c6458b77bcebb8366d107267f55d *invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm.res
-9aa21d8b2cb9d39abe8a7bb6032dc66955fb4342 *noisy_clip_640_360.y4m
 a432f96ff0a787268e2f94a8092ab161a18d1b06 *park_joy_90p_10_420.y4m
 0b194cc312c3a2e84d156a221b0a5eb615dfddc5 *park_joy_90p_10_422.y4m
 ff0e0a21dc2adc95b8c1b37902713700655ced17 *park_joy_90p_10_444.y4m
@@ -837,20 +834,5 @@ f6856f19236ee46ed462bd0a2e7e72b9c3b9cea6 *vp90-2-21-resize_inter_640x480_5_1-2.w
 7739bfca167b1b43fea72f807f01e097b7cb98d8 *vp90-2-21-resize_inter_640x480_7_1-2.webm.md5
 7291af354b4418917eee00e3a7e366086a0b7a10 *vp90-2-21-resize_inter_640x480_7_3-4.webm
 4a18b09ccb36564193f0215f599d745d95bb558c *vp90-2-21-resize_inter_640x480_7_3-4.webm.md5
-a000d568431d07379dd5a8ec066061c07e560b47 *invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf
-1e75aad3433c5c21c194a7b53fc393970f0a8d7f *invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf.res
-235182f9a1c5c8841552510dd4288487447bfc40 *invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf
-787f04f0483320d536894282f3358a4f8cac1cf9 *invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res
-91d3cefd0deb98f3b0caf3a2d900ec7a7605e53a *invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf
-1e472baaf5f6113459f0399a38a5a5e68d17799d *invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf.res
-70057835bf29d14e66699ce5f022df2551fb6b37 *invalid-crbug-629481.webm
-5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-crbug-629481.webm.res
-7602e00378161ca36ae93cc6ee12dd30b5ba1e1d *vp90-2-22-svc_1280x720_3.ivf
-02e53e3eefbf25ec0929047fe50876acdeb040bd *vp90-2-22-svc_1280x720_3.ivf.md5
-6fa3d3ac306a3d9ce1d610b78441dc00d2c2d4b9 *tos_vp8.webm
-e402cbbf9e550ae017a1e9f1f73931c1d18474e8 *invalid-crbug-667044.webm
-d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-crbug-667044.webm.res
-fd9df7f3f6992af1d7a9dde975c9a0d6f28c053d *invalid-bug-1443.ivf
-fd3020fa6e9ca5966206738654c97dec313b0a95 *invalid-bug-1443.ivf.res
-17696cd21e875f1d6e5d418cbf89feab02c8850a *vp90-2-22-svc_1280x720_1.webm
-e2f9e1e47a791b4e939a9bdc50bf7a25b3761f77 *vp90-2-22-svc_1280x720_1.webm.md5
+a000d568431d07379dd5a8ec066061c07e560b47  invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf
+1e75aad3433c5c21c194a7b53fc393970f0a8d7f  invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf.res
--- a/test/test.mk
+++ b/test/test.mk
@@ -1,5 +1,4 @@
 LIBVPX_TEST_SRCS-yes += acm_random.h
-LIBVPX_TEST_SRCS-yes += buffer.h
 LIBVPX_TEST_SRCS-yes += clear_system_state.h
 LIBVPX_TEST_SRCS-yes += codec_factory.h
 LIBVPX_TEST_SRCS-yes += md5_helper.h
@@ -21,7 +20,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_DECODERS)    += ivf_video_source.h
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += ../y4minput.h ../y4minput.c
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += altref_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += aq_segment_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += alt_ref_aq_segment_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += datarate_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += encode_api_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS)    += error_resilience_test.cc
@@ -36,9 +34,10 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc

 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += byte_alignment_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += decode_svc_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += invalid_file_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_frame_parallel_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_refresh_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc
@@ -47,7 +46,6 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_motion_vector_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += level_test.cc

 LIBVPX_TEST_SRCS-yes                   += decode_test_driver.cc
@@ -90,11 +88,6 @@ ifeq ($(CONFIG_ENCODE_PERF_TESTS)$(CONFIG_VP9_ENCODER), yesyes)
 LIBVPX_TEST_SRCS-yes += encode_perf_test.cc
 endif

-## Multi-codec blackbox tests.
-ifeq ($(findstring yes,$(CONFIG_VP8_DECODER)$(CONFIG_VP9_DECODER)), yes)
-LIBVPX_TEST_SRCS-yes += invalid_file_test.cc
-endif
-
 ##
 ## WHITE BOX TESTS
 ##
@@ -121,9 +114,8 @@ LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc

 LIBVPX_TEST_SRCS-yes                   += idct_test.cc
-LIBVPX_TEST_SRCS-yes                   += predict_test.cc
+LIBVPX_TEST_SRCS-yes                   += sixtap_predict_test.cc
 LIBVPX_TEST_SRCS-yes                   += vpx_scale_test.cc
-LIBVPX_TEST_SRCS-yes                   += vpx_scale_test.h

 ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_TEMPORAL_DENOISING),yesyes)
 LIBVPX_TEST_SRCS-$(HAVE_SSE2) += vp8_denoiser_sse2_test.cc
@@ -146,25 +138,19 @@ LIBVPX_TEST_SRCS-yes                   += vp9_encoder_parms_get_to_decoder.cc
 endif

 LIBVPX_TEST_SRCS-yes                   += convolve_test.cc
-LIBVPX_TEST_SRCS-yes                   += lpf_test.cc
+LIBVPX_TEST_SRCS-yes                   += lpf_8_test.cc
 LIBVPX_TEST_SRCS-yes                   += vp9_intrapred_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_decrypt_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += avg_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += comp_avg_pred_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct_partial_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct4x4_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_scale_test.cc
-ifneq ($(CONFIG_REALTIME_ONLY),yes)
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += temporal_filter_test.cc
-endif
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc
-LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_block_error_test.cc
+LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_error_block_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc

@@ -175,7 +161,7 @@ LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += consistency_test.cc
 endif

 ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes)
-LIBVPX_TEST_SRCS-yes += vp9_denoiser_test.cc
+LIBVPX_TEST_SRCS-$(HAVE_SSE2) += vp9_denoiser_sse2_test.cc
 endif
 LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_arf_freq_test.cc

--- a/test/test_intra_pred_speed.cc
+++ b/test/test_intra_pred_speed.cc
@@ -29,8 +29,6 @@ namespace {
 typedef void (*VpxPredFunc)(uint8_t *dst, ptrdiff_t y_stride,
                            const uint8_t *above, const uint8_t *left);

-const int kBPS = 32;
-const int kTotalPixels = 32 * kBPS;
 const int kNumVp9IntraPredFuncs = 13;
 const char *kVp9IntraPredNames[kNumVp9IntraPredFuncs] = {
  "DC_PRED",   "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED",
@@ -38,121 +36,107 @@ const char *kVp9IntraPredNames[kNumVp9IntraPredFuncs] = {
  "D207_PRED", "D63_PRED",     "TM_PRED"
 };

-template <typename Pixel>
-struct IntraPredTestMem {
-  void Init(int block_size, int bd) {
-    libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
-    Pixel *const above = above_mem + 16;
-    const int mask = (1 << bd) - 1;
-    for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand16() & mask;
-    for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand16() & mask;
-    for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand16() & mask;
-
-    // some code assumes the top row has been extended:
-    // d45/d63 C-code, for instance, but not the assembly.
-    // TODO(jzern): this style of extension isn't strictly necessary.
-    ASSERT_LE(block_size, kBPS);
-    for (int i = block_size; i < 2 * kBPS; ++i) {
-      above[i] = above[block_size - 1];
-    }
-  }
-
-  DECLARE_ALIGNED(16, Pixel, src[kTotalPixels]);
-  DECLARE_ALIGNED(16, Pixel, ref_src[kTotalPixels]);
-  DECLARE_ALIGNED(16, Pixel, left[kBPS]);
-  DECLARE_ALIGNED(16, Pixel, above_mem[2 * kBPS + 16]);
-};
-
-typedef IntraPredTestMem<uint8_t> Vp9IntraPredTestMem;
-
-void CheckMd5Signature(const char name[], const char *const signatures[],
-                       const void *data, size_t data_size, int elapsed_time,
-                       int idx) {
-  libvpx_test::MD5 md5;
-  md5.Add(reinterpret_cast<const uint8_t *>(data), data_size);
-  printf("Mode %s[%12s]: %5d ms     MD5: %s\n", name, kVp9IntraPredNames[idx],
-         elapsed_time, md5.Get());
-  EXPECT_STREQ(signatures[idx], md5.Get());
-}
-
 void TestIntraPred(const char name[], VpxPredFunc const *pred_funcs,
-                   const char *const signatures[], int block_size) {
-  const int kNumTests = static_cast<int>(
-      2.e10 / (block_size * block_size * kNumVp9IntraPredFuncs));
-  Vp9IntraPredTestMem intra_pred_test_mem;
-  const uint8_t *const above = intra_pred_test_mem.above_mem + 16;
+                   const char *const pred_func_names[], int num_funcs,
+                   const char *const signatures[], int block_size,
+                   int num_pixels_per_test) {
+  libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed());
+  const int kBPS = 32;
+  const int kTotalPixels = 32 * kBPS;
+  DECLARE_ALIGNED(16, uint8_t, src[kTotalPixels]);
+  DECLARE_ALIGNED(16, uint8_t, ref_src[kTotalPixels]);
+  DECLARE_ALIGNED(16, uint8_t, left[kBPS]);
+  DECLARE_ALIGNED(16, uint8_t, above_mem[2 * kBPS + 16]);
+  uint8_t *const above = above_mem + 16;
+  for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand8();
+  for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand8();
+  for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand8();
+  const int kNumTests = static_cast<int>(2.e10 / num_pixels_per_test);

-  intra_pred_test_mem.Init(block_size, 8);
+  // some code assumes the top row has been extended:
+  // d45/d63 C-code, for instance, but not the assembly.
+  // TODO(jzern): this style of extension isn't strictly necessary.
+  ASSERT_LE(block_size, kBPS);
+  memset(above + block_size, above[block_size - 1], 2 * kBPS - block_size);

-  for (int k = 0; k < kNumVp9IntraPredFuncs; ++k) {
+  for (int k = 0; k < num_funcs; ++k) {
    if (pred_funcs[k] == NULL) continue;
-    memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src,
-           sizeof(intra_pred_test_mem.src));
+    memcpy(src, ref_src, sizeof(src));
    vpx_usec_timer timer;
    vpx_usec_timer_start(&timer);
    for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
-      pred_funcs[k](intra_pred_test_mem.src, kBPS, above,
-                    intra_pred_test_mem.left);
+      pred_funcs[k](src, kBPS, above, left);
    }
    libvpx_test::ClearSystemState();
    vpx_usec_timer_mark(&timer);
    const int elapsed_time =
        static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
-    CheckMd5Signature(name, signatures, intra_pred_test_mem.src,
-                      sizeof(intra_pred_test_mem.src), elapsed_time, k);
+    libvpx_test::MD5 md5;
+    md5.Add(src, sizeof(src));
+    printf("Mode %s[%12s]: %5d ms     MD5: %s\n", name, pred_func_names[k],
+           elapsed_time, md5.Get());
+    EXPECT_STREQ(signatures[k], md5.Get());
  }
 }

 void TestIntraPred4(VpxPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
-    "e7ed7353c3383fff942e500e9bfe82fe", "2a4a26fcc6ce005eadc08354d196c8a9",
-    "269d92eff86f315d9c38fe7640d85b15", "ae2960eea9f71ee3dabe08b282ec1773",
-    "6c1abcc44e90148998b51acd11144e9c", "f7bb3186e1ef8a2b326037ff898cad8e",
-    "364c1f3fb2f445f935aec2a70a67eaa4", "141624072a4a56773f68fadbdd07c4a7",
-    "7be49b08687a5f24df3a2c612fca3876", "459bb5d9fd5b238348179c9a22108cd6",
-    "73edb8831bf1bdfce21ae8eaa43b1234", "2e2457f2009c701a355a8b25eb74fcda",
-    "52ae4e8bdbe41494c1f43051d4dd7f0b"
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "4334156168b34ab599d9b5b30f522fe9", "bc4649d5ba47c7ff178d92e475960fb0",
+    "8d316e5933326dcac24e1064794b5d12", "a27270fed024eafd762c95de85f4da51",
+    "c33dff000d4256c2b8f3bf9e9bab14d2", "44d8cddc2ad8f79b8ed3306051722b4f",
+    "eb54839b2bad6699d8946f01ec041cd0", "ecb0d56ae5f677ea45127ce9d5c058e4",
+    "0b7936841f6813da818275944895b574", "9117972ef64f91a58ff73e1731c81db2",
+    "c56d5e8c729e46825f46dd5d3b5d508a", "c0889e2039bcf7bcb5d2f33cdca69adc",
+    "309a618577b27c648f9c5ee45252bc8f",
  };
-  TestIntraPred("Intra4", pred_funcs, kSignatures, 4);
+  TestIntraPred("Intra4", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 4, 4 * 4 * kNumVp9IntraFuncs);
 }

 void TestIntraPred8(VpxPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
-    "d8bbae5d6547cfc17e4f5f44c8730e88", "373bab6d931868d41a601d9d88ce9ac3",
-    "6fdd5ff4ff79656c14747598ca9e3706", "d9661c2811d6a73674f40ffb2b841847",
-    "7c722d10b19ccff0b8c171868e747385", "f81dd986eb2b50f750d3a7da716b7e27",
-    "d500f2c8fc78f46a4c74e4dcf51f14fb", "0e3523f9cab2142dd37fd07ec0760bce",
-    "79ac4efe907f0a0f1885d43066cfedee", "19ecf2432ac305057de3b6578474eec6",
-    "4f985b61acc6dd5d2d2585fa89ea2e2d", "f1bb25a9060dd262f405f15a38f5f674",
-    "209ea00801584829e9a0f7be7d4a74ba"
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "7694ddeeefed887faf9d339d18850928", "7d726b1213591b99f736be6dec65065b",
+    "19c5711281357a485591aaf9c96c0a67", "ba6b66877a089e71cd938e3b8c40caac",
+    "802440c93317e0f8ba93fab02ef74265", "9e09a47a15deb0b9d8372824f9805080",
+    "b7c2d8c662268c0c427da412d7b0311d", "78339c1c60bb1d67d248ab8c4da08b7f",
+    "5c97d70f7d47de1882a6cd86c165c8a9", "8182bf60688b42205acd95e59e967157",
+    "08323400005a297f16d7e57e7fe1eaac", "95f7bfc262329a5849eda66d8f7c68ce",
+    "815b75c8e0d91cc1ae766dc5d3e445a3",
  };
-  TestIntraPred("Intra8", pred_funcs, kSignatures, 8);
+  TestIntraPred("Intra8", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 8, 8 * 8 * kNumVp9IntraFuncs);
 }

 void TestIntraPred16(VpxPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
-    "50971c07ce26977d30298538fffec619", "527a6b9e0dc5b21b98cf276305432bef",
-    "7eff2868f80ebc2c43a4f367281d80f7", "67cd60512b54964ef6aff1bd4816d922",
-    "48371c87dc95c08a33b2048f89cf6468", "b0acf2872ee411d7530af6d2625a7084",
-    "f32aafed4d8d3776ed58bcb6188756d5", "dae208f3dca583529cff49b73f7c4183",
-    "7af66a2f4c8e0b4908e40f047e60c47c", "125e3ab6ab9bc961f183ec366a7afa88",
-    "6b90f25b23983c35386b9fd704427622", "f8d6b11d710edc136a7c62c917435f93",
-    "ed308f18614a362917f411c218aee532"
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "b40dbb555d5d16a043dc361e6694fe53", "fb08118cee3b6405d64c1fd68be878c6",
+    "6c190f341475c837cc38c2e566b64875", "db5c34ccbe2c7f595d9b08b0dc2c698c",
+    "a62cbfd153a1f0b9fed13e62b8408a7a", "143df5b4c89335e281103f610f5052e4",
+    "d87feb124107cdf2cfb147655aa0bb3c", "7841fae7d4d47b519322e6a03eeed9dc",
+    "f6ebed3f71cbcf8d6d0516ce87e11093", "3cc480297dbfeed01a1c2d78dd03d0c5",
+    "b9f69fa6532b372c545397dcb78ef311", "a8fe1c70432f09d0c20c67bdb6432c4d",
+    "b8a41aa968ec108af447af4217cba91b",
  };
-  TestIntraPred("Intra16", pred_funcs, kSignatures, 16);
+  TestIntraPred("Intra16", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 16, 16 * 16 * kNumVp9IntraFuncs);
 }

 void TestIntraPred32(VpxPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
-    "a0a618c900e65ae521ccc8af789729f2", "985aaa7c72b4a6c2fb431d32100cf13a",
-    "10662d09febc3ca13ee4e700120daeb5", "b3b01379ba08916ef6b1b35f7d9ad51c",
-    "9f4261755795af97e34679c333ec7004", "bc2c9da91ad97ef0d1610fb0a9041657",
-    "75c79b1362ad18abfcdb1aa0aacfc21d", "4039bb7da0f6860090d3c57b5c85468f",
-    "b29fff7b61804e68383e3a609b33da58", "e1aa5e49067fd8dba66c2eb8d07b7a89",
-    "4e042822909c1c06d3b10a88281df1eb", "72eb9d9e0e67c93f4c66b70348e9fef7",
-    "a22d102bcb51ca798aac12ca4ae8f2e8"
+  static const int kNumVp9IntraFuncs = 13;
+  static const char *const kSignatures[kNumVp9IntraFuncs] = {
+    "558541656d84f9ae7896db655826febe", "b3587a1f9a01495fa38c8cd3c8e2a1bf",
+    "4c6501e64f25aacc55a2a16c7e8f0255", "b3b01379ba08916ef6b1b35f7d9ad51c",
+    "0f1eb38b6cbddb3d496199ef9f329071", "911c06efb9ed1c3b4c104b232b55812f",
+    "9225beb0ddfa7a1d24eaa1be430a6654", "0a6d584a44f8db9aa7ade2e2fdb9fc9e",
+    "b01c9076525216925f3456f034fb6eee", "d267e20ad9e5cd2915d1a47254d3d149",
+    "ed012a4a5da71f36c2393023184a0e59", "f162b51ed618d28b936974cff4391da5",
+    "9e1370c6d42e08d357d9612c93a71cfc",
  };
-  TestIntraPred("Intra32", pred_funcs, kSignatures, 32);
+  TestIntraPred("Intra32", pred_funcs, kVp9IntraPredNames, kNumVp9IntraFuncs,
+                kSignatures, 32, 32 * 32 * kNumVp9IntraFuncs);
 }

 }  // namespace
@@ -169,6 +153,7 @@ void TestIntraPred32(VpxPredFunc const *pred_funcs) {
  }

 // -----------------------------------------------------------------------------
+// 4x4

 INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c,
                vpx_dc_left_predictor_4x4_c, vpx_dc_top_predictor_4x4_c,
@@ -178,6 +163,47 @@ INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c,
                vpx_d153_predictor_4x4_c, vpx_d207_predictor_4x4_c,
                vpx_d63_predictor_4x4_c, vpx_tm_predictor_4x4_c)

+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2, TestIntraPred4, vpx_dc_predictor_4x4_sse2,
+                vpx_dc_left_predictor_4x4_sse2, vpx_dc_top_predictor_4x4_sse2,
+                vpx_dc_128_predictor_4x4_sse2, vpx_v_predictor_4x4_sse2,
+                vpx_h_predictor_4x4_sse2, vpx_d45_predictor_4x4_sse2, NULL,
+                NULL, NULL, vpx_d207_predictor_4x4_sse2, NULL,
+                vpx_tm_predictor_4x4_sse2)
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_d153_predictor_4x4_ssse3, NULL,
+                vpx_d63_predictor_4x4_ssse3, NULL)
+#endif  // HAVE_SSSE3
+
+#if HAVE_DSPR2
+INTRA_PRED_TEST(DSPR2, TestIntraPred4, vpx_dc_predictor_4x4_dspr2, NULL, NULL,
+                NULL, NULL, vpx_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_tm_predictor_4x4_dspr2)
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred4, vpx_dc_predictor_4x4_neon,
+                vpx_dc_left_predictor_4x4_neon, vpx_dc_top_predictor_4x4_neon,
+                vpx_dc_128_predictor_4x4_neon, vpx_v_predictor_4x4_neon,
+                vpx_h_predictor_4x4_neon, vpx_d45_predictor_4x4_neon,
+                vpx_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_4x4_neon)
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred4, vpx_dc_predictor_4x4_msa,
+                vpx_dc_left_predictor_4x4_msa, vpx_dc_top_predictor_4x4_msa,
+                vpx_dc_128_predictor_4x4_msa, vpx_v_predictor_4x4_msa,
+                vpx_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_4x4_msa)
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 8x8
+
 INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c,
                vpx_dc_left_predictor_8x8_c, vpx_dc_top_predictor_8x8_c,
                vpx_dc_128_predictor_8x8_c, vpx_v_predictor_8x8_c,
@@ -186,6 +212,46 @@ INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c,
                vpx_d153_predictor_8x8_c, vpx_d207_predictor_8x8_c,
                vpx_d63_predictor_8x8_c, vpx_tm_predictor_8x8_c)

+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2, TestIntraPred8, vpx_dc_predictor_8x8_sse2,
+                vpx_dc_left_predictor_8x8_sse2, vpx_dc_top_predictor_8x8_sse2,
+                vpx_dc_128_predictor_8x8_sse2, vpx_v_predictor_8x8_sse2,
+                vpx_h_predictor_8x8_sse2, vpx_d45_predictor_8x8_sse2, NULL,
+                NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2)
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_d153_predictor_8x8_ssse3,
+                vpx_d207_predictor_8x8_ssse3, vpx_d63_predictor_8x8_ssse3, NULL)
+#endif  // HAVE_SSSE3
+
+#if HAVE_DSPR2
+INTRA_PRED_TEST(DSPR2, TestIntraPred8, vpx_dc_predictor_8x8_dspr2, NULL, NULL,
+                NULL, NULL, vpx_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
+                NULL, NULL, vpx_tm_predictor_8x8_c)
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred8, vpx_dc_predictor_8x8_neon,
+                vpx_dc_left_predictor_8x8_neon, vpx_dc_top_predictor_8x8_neon,
+                vpx_dc_128_predictor_8x8_neon, vpx_v_predictor_8x8_neon,
+                vpx_h_predictor_8x8_neon, vpx_d45_predictor_8x8_neon, NULL,
+                NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_neon)
+
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred8, vpx_dc_predictor_8x8_msa,
+                vpx_dc_left_predictor_8x8_msa, vpx_dc_top_predictor_8x8_msa,
+                vpx_dc_128_predictor_8x8_msa, vpx_v_predictor_8x8_msa,
+                vpx_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_8x8_msa)
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 16x16
+
 INTRA_PRED_TEST(C, TestIntraPred16, vpx_dc_predictor_16x16_c,
                vpx_dc_left_predictor_16x16_c, vpx_dc_top_predictor_16x16_c,
                vpx_dc_128_predictor_16x16_c, vpx_v_predictor_16x16_c,
@@ -194,6 +260,48 @@ INTRA_PRED_TEST(C, TestIntraPred16, vpx_dc_predictor_16x16_c,
                vpx_d153_predictor_16x16_c, vpx_d207_predictor_16x16_c,
                vpx_d63_predictor_16x16_c, vpx_tm_predictor_16x16_c)

+#if HAVE_SSE2
+INTRA_PRED_TEST(SSE2, TestIntraPred16, vpx_dc_predictor_16x16_sse2,
+                vpx_dc_left_predictor_16x16_sse2,
+                vpx_dc_top_predictor_16x16_sse2,
+                vpx_dc_128_predictor_16x16_sse2, vpx_v_predictor_16x16_sse2,
+                vpx_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_16x16_sse2)
+#endif  // HAVE_SSE2
+
+#if HAVE_SSSE3
+INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_d45_predictor_16x16_ssse3, NULL, NULL,
+                vpx_d153_predictor_16x16_ssse3, vpx_d207_predictor_16x16_ssse3,
+                vpx_d63_predictor_16x16_ssse3, NULL)
+#endif  // HAVE_SSSE3
+
+#if HAVE_DSPR2
+INTRA_PRED_TEST(DSPR2, TestIntraPred16, vpx_dc_predictor_16x16_dspr2, NULL,
+                NULL, NULL, NULL, vpx_h_predictor_16x16_dspr2, NULL, NULL, NULL,
+                NULL, NULL, NULL, NULL)
+#endif  // HAVE_DSPR2
+
+#if HAVE_NEON
+INTRA_PRED_TEST(NEON, TestIntraPred16, vpx_dc_predictor_16x16_neon,
+                vpx_dc_left_predictor_16x16_neon,
+                vpx_dc_top_predictor_16x16_neon,
+                vpx_dc_128_predictor_16x16_neon, vpx_v_predictor_16x16_neon,
+                vpx_h_predictor_16x16_neon, vpx_d45_predictor_16x16_neon, NULL,
+                NULL, NULL, NULL, NULL, vpx_tm_predictor_16x16_neon)
+#endif  // HAVE_NEON
+
+#if HAVE_MSA
+INTRA_PRED_TEST(MSA, TestIntraPred16, vpx_dc_predictor_16x16_msa,
+                vpx_dc_left_predictor_16x16_msa, vpx_dc_top_predictor_16x16_msa,
+                vpx_dc_128_predictor_16x16_msa, vpx_v_predictor_16x16_msa,
+                vpx_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL, NULL,
+                vpx_tm_predictor_16x16_msa)
+#endif  // HAVE_MSA
+
+// -----------------------------------------------------------------------------
+// 32x32
+
 INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c,
                vpx_dc_left_predictor_32x32_c, vpx_dc_top_predictor_32x32_c,
                vpx_dc_128_predictor_32x32_c, vpx_v_predictor_32x32_c,
@@ -203,26 +311,6 @@ INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c,
                vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c)

 #if HAVE_SSE2
-INTRA_PRED_TEST(SSE2, TestIntraPred4, vpx_dc_predictor_4x4_sse2,
-                vpx_dc_left_predictor_4x4_sse2, vpx_dc_top_predictor_4x4_sse2,
-                vpx_dc_128_predictor_4x4_sse2, vpx_v_predictor_4x4_sse2,
-                vpx_h_predictor_4x4_sse2, vpx_d45_predictor_4x4_sse2, NULL,
-                NULL, NULL, vpx_d207_predictor_4x4_sse2, NULL,
-                vpx_tm_predictor_4x4_sse2)
-
-INTRA_PRED_TEST(SSE2, TestIntraPred8, vpx_dc_predictor_8x8_sse2,
-                vpx_dc_left_predictor_8x8_sse2, vpx_dc_top_predictor_8x8_sse2,
-                vpx_dc_128_predictor_8x8_sse2, vpx_v_predictor_8x8_sse2,
-                vpx_h_predictor_8x8_sse2, vpx_d45_predictor_8x8_sse2, NULL,
-                NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2)
-
-INTRA_PRED_TEST(SSE2, TestIntraPred16, vpx_dc_predictor_16x16_sse2,
-                vpx_dc_left_predictor_16x16_sse2,
-                vpx_dc_top_predictor_16x16_sse2,
-                vpx_dc_128_predictor_16x16_sse2, vpx_v_predictor_16x16_sse2,
-                vpx_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_16x16_sse2)
-
 INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
                vpx_dc_left_predictor_32x32_sse2,
                vpx_dc_top_predictor_32x32_sse2,
@@ -232,79 +320,22 @@ INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2,
 #endif  // HAVE_SSE2

 #if HAVE_SSSE3
-INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, vpx_d153_predictor_4x4_ssse3, NULL,
-                vpx_d63_predictor_4x4_ssse3, NULL)
-INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
-                NULL, NULL, vpx_d153_predictor_8x8_ssse3,
-                vpx_d207_predictor_8x8_ssse3, vpx_d63_predictor_8x8_ssse3, NULL)
-INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL, NULL,
-                vpx_d45_predictor_16x16_ssse3, NULL, NULL,
-                vpx_d153_predictor_16x16_ssse3, vpx_d207_predictor_16x16_ssse3,
-                vpx_d63_predictor_16x16_ssse3, NULL)
 INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL, NULL,
                vpx_d45_predictor_32x32_ssse3, NULL, NULL,
                vpx_d153_predictor_32x32_ssse3, vpx_d207_predictor_32x32_ssse3,
                vpx_d63_predictor_32x32_ssse3, NULL)
 #endif  // HAVE_SSSE3

-#if HAVE_DSPR2
-INTRA_PRED_TEST(DSPR2, TestIntraPred4, vpx_dc_predictor_4x4_dspr2, NULL, NULL,
-                NULL, NULL, vpx_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL,
-                NULL, NULL, vpx_tm_predictor_4x4_dspr2)
-INTRA_PRED_TEST(DSPR2, TestIntraPred8, vpx_dc_predictor_8x8_dspr2, NULL, NULL,
-                NULL, NULL, vpx_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL,
-                NULL, NULL, vpx_tm_predictor_8x8_c)
-INTRA_PRED_TEST(DSPR2, TestIntraPred16, vpx_dc_predictor_16x16_dspr2, NULL,
-                NULL, NULL, NULL, vpx_h_predictor_16x16_dspr2, NULL, NULL, NULL,
-                NULL, NULL, NULL, NULL)
-#endif  // HAVE_DSPR2
-
 #if HAVE_NEON
-INTRA_PRED_TEST(NEON, TestIntraPred4, vpx_dc_predictor_4x4_neon,
-                vpx_dc_left_predictor_4x4_neon, vpx_dc_top_predictor_4x4_neon,
-                vpx_dc_128_predictor_4x4_neon, vpx_v_predictor_4x4_neon,
-                vpx_h_predictor_4x4_neon, vpx_d45_predictor_4x4_neon,
-                vpx_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_4x4_neon)
-INTRA_PRED_TEST(NEON, TestIntraPred8, vpx_dc_predictor_8x8_neon,
-                vpx_dc_left_predictor_8x8_neon, vpx_dc_top_predictor_8x8_neon,
-                vpx_dc_128_predictor_8x8_neon, vpx_v_predictor_8x8_neon,
-                vpx_h_predictor_8x8_neon, vpx_d45_predictor_8x8_neon,
-                vpx_d135_predictor_8x8_neon, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_8x8_neon)
-INTRA_PRED_TEST(NEON, TestIntraPred16, vpx_dc_predictor_16x16_neon,
-                vpx_dc_left_predictor_16x16_neon,
-                vpx_dc_top_predictor_16x16_neon,
-                vpx_dc_128_predictor_16x16_neon, vpx_v_predictor_16x16_neon,
-                vpx_h_predictor_16x16_neon, vpx_d45_predictor_16x16_neon,
-                vpx_d135_predictor_16x16_neon, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_16x16_neon)
 INTRA_PRED_TEST(NEON, TestIntraPred32, vpx_dc_predictor_32x32_neon,
                vpx_dc_left_predictor_32x32_neon,
                vpx_dc_top_predictor_32x32_neon,
                vpx_dc_128_predictor_32x32_neon, vpx_v_predictor_32x32_neon,
-                vpx_h_predictor_32x32_neon, vpx_d45_predictor_32x32_neon,
-                vpx_d135_predictor_32x32_neon, NULL, NULL, NULL, NULL,
+                vpx_h_predictor_32x32_neon, NULL, NULL, NULL, NULL, NULL, NULL,
                vpx_tm_predictor_32x32_neon)
 #endif  // HAVE_NEON

 #if HAVE_MSA
-INTRA_PRED_TEST(MSA, TestIntraPred4, vpx_dc_predictor_4x4_msa,
-                vpx_dc_left_predictor_4x4_msa, vpx_dc_top_predictor_4x4_msa,
-                vpx_dc_128_predictor_4x4_msa, vpx_v_predictor_4x4_msa,
-                vpx_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_4x4_msa)
-INTRA_PRED_TEST(MSA, TestIntraPred8, vpx_dc_predictor_8x8_msa,
-                vpx_dc_left_predictor_8x8_msa, vpx_dc_top_predictor_8x8_msa,
-                vpx_dc_128_predictor_8x8_msa, vpx_v_predictor_8x8_msa,
-                vpx_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_8x8_msa)
-INTRA_PRED_TEST(MSA, TestIntraPred16, vpx_dc_predictor_16x16_msa,
-                vpx_dc_left_predictor_16x16_msa, vpx_dc_top_predictor_16x16_msa,
-                vpx_dc_128_predictor_16x16_msa, vpx_v_predictor_16x16_msa,
-                vpx_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_16x16_msa)
 INTRA_PRED_TEST(MSA, TestIntraPred32, vpx_dc_predictor_32x32_msa,
                vpx_dc_left_predictor_32x32_msa, vpx_dc_top_predictor_32x32_msa,
                vpx_dc_128_predictor_32x32_msa, vpx_v_predictor_32x32_msa,
@@ -312,275 +343,4 @@ INTRA_PRED_TEST(MSA, TestIntraPred32, vpx_dc_predictor_32x32_msa,
                vpx_tm_predictor_32x32_msa)
 #endif  // HAVE_MSA

-#if HAVE_VSX
-INTRA_PRED_TEST(VSX, TestIntraPred4, NULL, NULL, NULL, NULL, NULL,
-                vpx_h_predictor_4x4_vsx, NULL, NULL, NULL, NULL, NULL, NULL,
-                vpx_tm_predictor_4x4_vsx)
-
-INTRA_PRED_TEST(VSX, TestIntraPred8, vpx_dc_predictor_8x8_vsx, NULL, NULL, NULL,
-                NULL, vpx_h_predictor_8x8_vsx, vpx_d45_predictor_8x8_vsx, NULL,
-                NULL, NULL, NULL, vpx_d63_predictor_8x8_vsx,
-                vpx_tm_predictor_8x8_vsx)
-
-INTRA_PRED_TEST(VSX, TestIntraPred16, vpx_dc_predictor_16x16_vsx,
-                vpx_dc_left_predictor_16x16_vsx, vpx_dc_top_predictor_16x16_vsx,
-                vpx_dc_128_predictor_16x16_vsx, vpx_v_predictor_16x16_vsx,
-                vpx_h_predictor_16x16_vsx, vpx_d45_predictor_16x16_vsx, NULL,
-                NULL, NULL, NULL, vpx_d63_predictor_16x16_vsx,
-                vpx_tm_predictor_16x16_vsx)
-
-INTRA_PRED_TEST(VSX, TestIntraPred32, vpx_dc_predictor_32x32_vsx,
-                vpx_dc_left_predictor_32x32_vsx, vpx_dc_top_predictor_32x32_vsx,
-                vpx_dc_128_predictor_32x32_vsx, vpx_v_predictor_32x32_vsx,
-                vpx_h_predictor_32x32_vsx, vpx_d45_predictor_32x32_vsx, NULL,
-                NULL, NULL, NULL, vpx_d63_predictor_32x32_vsx,
-                vpx_tm_predictor_32x32_vsx)
-#endif  // HAVE_VSX
-
-// -----------------------------------------------------------------------------
-
-#if CONFIG_VP9_HIGHBITDEPTH
-namespace {
-
-typedef void (*VpxHighbdPredFunc)(uint16_t *dst, ptrdiff_t y_stride,
-                                  const uint16_t *above, const uint16_t *left,
-                                  int bd);
-
-typedef IntraPredTestMem<uint16_t> Vp9HighbdIntraPredTestMem;
-
-void TestHighbdIntraPred(const char name[], VpxHighbdPredFunc const *pred_funcs,
-                         const char *const signatures[], int block_size) {
-  const int kNumTests = static_cast<int>(
-      2.e10 / (block_size * block_size * kNumVp9IntraPredFuncs));
-  Vp9HighbdIntraPredTestMem intra_pred_test_mem;
-  const uint16_t *const above = intra_pred_test_mem.above_mem + 16;
-
-  intra_pred_test_mem.Init(block_size, 12);
-
-  for (int k = 0; k < kNumVp9IntraPredFuncs; ++k) {
-    if (pred_funcs[k] == NULL) continue;
-    memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src,
-           sizeof(intra_pred_test_mem.src));
-    vpx_usec_timer timer;
-    vpx_usec_timer_start(&timer);
-    for (int num_tests = 0; num_tests < kNumTests; ++num_tests) {
-      pred_funcs[k](intra_pred_test_mem.src, kBPS, above,
-                    intra_pred_test_mem.left, 12);
-    }
-    libvpx_test::ClearSystemState();
-    vpx_usec_timer_mark(&timer);
-    const int elapsed_time =
-        static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
-    CheckMd5Signature(name, signatures, intra_pred_test_mem.src,
-                      sizeof(intra_pred_test_mem.src), elapsed_time, k);
-  }
-}
-
-void TestHighbdIntraPred4(VpxHighbdPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
-    "11f74af6c5737df472f3275cbde062fa", "51bea056b6447c93f6eb8f6b7e8f6f71",
-    "27e97f946766331795886f4de04c5594", "53ab15974b049111fb596c5168ec7e3f",
-    "f0b640bb176fbe4584cf3d32a9b0320a", "729783ca909e03afd4b47111c80d967b",
-    "fbf1c30793d9f32812e4d9f905d53530", "293fc903254a33754133314c6cdba81f",
-    "f8074d704233e73dfd35b458c6092374", "aa6363d08544a1ec4da33d7a0be5640d",
-    "462abcfdfa3d087bb33c9a88f2aec491", "863eab65d22550dd44a2397277c1ec71",
-    "23d61df1574d0fa308f9731811047c4b"
-  };
-  TestHighbdIntraPred("Intra4", pred_funcs, kSignatures, 4);
-}
-
-void TestHighbdIntraPred8(VpxHighbdPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
-    "03da8829fe94663047fd108c5fcaa71d", "ecdb37b8120a2d3a4c706b016bd1bfd7",
-    "1d4543ed8d2b9368cb96898095fe8a75", "f791c9a67b913cbd82d9da8ecede30e2",
-    "065c70646f4dbaff913282f55a45a441", "51f87123616662ef7c35691497dfd0ba",
-    "2a5b0131ef4716f098ee65e6df01e3dd", "9ffe186a6bc7db95275f1bbddd6f7aba",
-    "a3258a2eae2e2bd55cb8f71351b22998", "8d909f0a2066e39b3216092c6289ece4",
-    "d183abb30b9f24c886a0517e991b22c7", "702a42fe4c7d665dc561b2aeeb60f311",
-    "7b5dbbbe7ae3a4ac2948731600bde5d6"
-  };
-  TestHighbdIntraPred("Intra8", pred_funcs, kSignatures, 8);
-}
-
-void TestHighbdIntraPred16(VpxHighbdPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
-    "e33cb3f56a878e2fddb1b2fc51cdd275", "c7bff6f04b6052c8ab335d726dbbd52d",
-    "d0b0b47b654a9bcc5c6008110a44589b", "78f5da7b10b2b9ab39f114a33b6254e9",
-    "c78e31d23831abb40d6271a318fdd6f3", "90d1347f4ec9198a0320daecb6ff90b8",
-    "d2c623746cbb64a0c9e29c10f2c57041", "cf28bd387b81ad3e5f1a1c779a4b70a0",
-    "24c304330431ddeaf630f6ce94af2eac", "91a329798036bf64e8e00a87b131b8b1",
-    "d39111f22885307f920796a42084c872", "e2e702f7250ece98dd8f3f2854c31eeb",
-    "e2fb05b01eb8b88549e85641d8ce5b59"
-  };
-  TestHighbdIntraPred("Intra16", pred_funcs, kSignatures, 16);
-}
-
-void TestHighbdIntraPred32(VpxHighbdPredFunc const *pred_funcs) {
-  static const char *const kSignatures[kNumVp9IntraPredFuncs] = {
-    "a3e8056ba7e36628cce4917cd956fedd", "cc7d3024fe8748b512407edee045377e",
-    "2aab0a0f330a1d3e19b8ecb8f06387a3", "a547bc3fb7b06910bf3973122a426661",
-    "26f712514da95042f93d6e8dc8e431dc", "bb08c6e16177081daa3d936538dbc2e3",
-    "8f031af3e2650e89620d8d2c3a843d8b", "42867c8553285e94ee8e4df7abafbda8",
-    "6496bdee96100667833f546e1be3d640", "2ebfa25bf981377e682e580208504300",
-    "3e8ae52fd1f607f348aa4cb436c71ab7", "3d4efe797ca82193613696753ea624c4",
-    "cb8aab6d372278f3131e8d99efde02d9"
-  };
-  TestHighbdIntraPred("Intra32", pred_funcs, kSignatures, 32);
-}
-
-}  // namespace
-
-// Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors
-// to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4.
-#define HIGHBD_INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128,  \
-                               v, h, d45, d135, d117, d153, d207, d63, tm)    \
-  TEST(arch, test_func) {                                                     \
-    static const VpxHighbdPredFunc vpx_intra_pred[] = {                       \
-      dc, dc_left, dc_top, dc_128, v, h, d45, d135, d117, d153, d207, d63, tm \
-    };                                                                        \
-    test_func(vpx_intra_pred);                                                \
-  }
-
-// -----------------------------------------------------------------------------
-
-HIGHBD_INTRA_PRED_TEST(
-    C, TestHighbdIntraPred4, vpx_highbd_dc_predictor_4x4_c,
-    vpx_highbd_dc_left_predictor_4x4_c, vpx_highbd_dc_top_predictor_4x4_c,
-    vpx_highbd_dc_128_predictor_4x4_c, vpx_highbd_v_predictor_4x4_c,
-    vpx_highbd_h_predictor_4x4_c, vpx_highbd_d45_predictor_4x4_c,
-    vpx_highbd_d135_predictor_4x4_c, vpx_highbd_d117_predictor_4x4_c,
-    vpx_highbd_d153_predictor_4x4_c, vpx_highbd_d207_predictor_4x4_c,
-    vpx_highbd_d63_predictor_4x4_c, vpx_highbd_tm_predictor_4x4_c)
-
-HIGHBD_INTRA_PRED_TEST(
-    C, TestHighbdIntraPred8, vpx_highbd_dc_predictor_8x8_c,
-    vpx_highbd_dc_left_predictor_8x8_c, vpx_highbd_dc_top_predictor_8x8_c,
-    vpx_highbd_dc_128_predictor_8x8_c, vpx_highbd_v_predictor_8x8_c,
-    vpx_highbd_h_predictor_8x8_c, vpx_highbd_d45_predictor_8x8_c,
-    vpx_highbd_d135_predictor_8x8_c, vpx_highbd_d117_predictor_8x8_c,
-    vpx_highbd_d153_predictor_8x8_c, vpx_highbd_d207_predictor_8x8_c,
-    vpx_highbd_d63_predictor_8x8_c, vpx_highbd_tm_predictor_8x8_c)
-
-HIGHBD_INTRA_PRED_TEST(
-    C, TestHighbdIntraPred16, vpx_highbd_dc_predictor_16x16_c,
-    vpx_highbd_dc_left_predictor_16x16_c, vpx_highbd_dc_top_predictor_16x16_c,
-    vpx_highbd_dc_128_predictor_16x16_c, vpx_highbd_v_predictor_16x16_c,
-    vpx_highbd_h_predictor_16x16_c, vpx_highbd_d45_predictor_16x16_c,
-    vpx_highbd_d135_predictor_16x16_c, vpx_highbd_d117_predictor_16x16_c,
-    vpx_highbd_d153_predictor_16x16_c, vpx_highbd_d207_predictor_16x16_c,
-    vpx_highbd_d63_predictor_16x16_c, vpx_highbd_tm_predictor_16x16_c)
-
-HIGHBD_INTRA_PRED_TEST(
-    C, TestHighbdIntraPred32, vpx_highbd_dc_predictor_32x32_c,
-    vpx_highbd_dc_left_predictor_32x32_c, vpx_highbd_dc_top_predictor_32x32_c,
-    vpx_highbd_dc_128_predictor_32x32_c, vpx_highbd_v_predictor_32x32_c,
-    vpx_highbd_h_predictor_32x32_c, vpx_highbd_d45_predictor_32x32_c,
-    vpx_highbd_d135_predictor_32x32_c, vpx_highbd_d117_predictor_32x32_c,
-    vpx_highbd_d153_predictor_32x32_c, vpx_highbd_d207_predictor_32x32_c,
-    vpx_highbd_d63_predictor_32x32_c, vpx_highbd_tm_predictor_32x32_c)
-
-#if HAVE_SSE2
-HIGHBD_INTRA_PRED_TEST(
-    SSE2, TestHighbdIntraPred4, vpx_highbd_dc_predictor_4x4_sse2,
-    vpx_highbd_dc_left_predictor_4x4_sse2, vpx_highbd_dc_top_predictor_4x4_sse2,
-    vpx_highbd_dc_128_predictor_4x4_sse2, vpx_highbd_v_predictor_4x4_sse2,
-    vpx_highbd_h_predictor_4x4_sse2, NULL, vpx_highbd_d135_predictor_4x4_sse2,
-    vpx_highbd_d117_predictor_4x4_sse2, vpx_highbd_d153_predictor_4x4_sse2,
-    vpx_highbd_d207_predictor_4x4_sse2, vpx_highbd_d63_predictor_4x4_sse2,
-    vpx_highbd_tm_predictor_4x4_c)
-
-HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred8,
-                       vpx_highbd_dc_predictor_8x8_sse2,
-                       vpx_highbd_dc_left_predictor_8x8_sse2,
-                       vpx_highbd_dc_top_predictor_8x8_sse2,
-                       vpx_highbd_dc_128_predictor_8x8_sse2,
-                       vpx_highbd_v_predictor_8x8_sse2,
-                       vpx_highbd_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL,
-                       NULL, NULL, vpx_highbd_tm_predictor_8x8_sse2)
-
-HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred16,
-                       vpx_highbd_dc_predictor_16x16_sse2,
-                       vpx_highbd_dc_left_predictor_16x16_sse2,
-                       vpx_highbd_dc_top_predictor_16x16_sse2,
-                       vpx_highbd_dc_128_predictor_16x16_sse2,
-                       vpx_highbd_v_predictor_16x16_sse2,
-                       vpx_highbd_h_predictor_16x16_sse2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, vpx_highbd_tm_predictor_16x16_sse2)
-
-HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred32,
-                       vpx_highbd_dc_predictor_32x32_sse2,
-                       vpx_highbd_dc_left_predictor_32x32_sse2,
-                       vpx_highbd_dc_top_predictor_32x32_sse2,
-                       vpx_highbd_dc_128_predictor_32x32_sse2,
-                       vpx_highbd_v_predictor_32x32_sse2,
-                       vpx_highbd_h_predictor_32x32_sse2, NULL, NULL, NULL,
-                       NULL, NULL, NULL, vpx_highbd_tm_predictor_32x32_sse2)
-#endif  // HAVE_SSE2
-
-#if HAVE_SSSE3
-HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred4, NULL, NULL, NULL, NULL,
-                       NULL, NULL, vpx_highbd_d45_predictor_4x4_ssse3, NULL,
-                       NULL, NULL, NULL, NULL, NULL)
-HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred8, NULL, NULL, NULL, NULL,
-                       NULL, NULL, vpx_highbd_d45_predictor_8x8_ssse3,
-                       vpx_highbd_d135_predictor_8x8_ssse3,
-                       vpx_highbd_d117_predictor_8x8_ssse3,
-                       vpx_highbd_d153_predictor_8x8_ssse3,
-                       vpx_highbd_d207_predictor_8x8_ssse3,
-                       vpx_highbd_d63_predictor_8x8_ssse3, NULL)
-HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred16, NULL, NULL, NULL, NULL,
-                       NULL, NULL, vpx_highbd_d45_predictor_16x16_ssse3,
-                       vpx_highbd_d135_predictor_16x16_ssse3,
-                       vpx_highbd_d117_predictor_16x16_ssse3,
-                       vpx_highbd_d153_predictor_16x16_ssse3,
-                       vpx_highbd_d207_predictor_16x16_ssse3,
-                       vpx_highbd_d63_predictor_16x16_ssse3, NULL)
-HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred32, NULL, NULL, NULL, NULL,
-                       NULL, NULL, vpx_highbd_d45_predictor_32x32_ssse3,
-                       vpx_highbd_d135_predictor_32x32_ssse3,
-                       vpx_highbd_d117_predictor_32x32_ssse3,
-                       vpx_highbd_d153_predictor_32x32_ssse3,
-                       vpx_highbd_d207_predictor_32x32_ssse3,
-                       vpx_highbd_d63_predictor_32x32_ssse3, NULL)
-#endif  // HAVE_SSSE3
-
-#if HAVE_NEON
-HIGHBD_INTRA_PRED_TEST(
-    NEON, TestHighbdIntraPred4, vpx_highbd_dc_predictor_4x4_neon,
-    vpx_highbd_dc_left_predictor_4x4_neon, vpx_highbd_dc_top_predictor_4x4_neon,
-    vpx_highbd_dc_128_predictor_4x4_neon, vpx_highbd_v_predictor_4x4_neon,
-    vpx_highbd_h_predictor_4x4_neon, vpx_highbd_d45_predictor_4x4_neon,
-    vpx_highbd_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL,
-    vpx_highbd_tm_predictor_4x4_neon)
-HIGHBD_INTRA_PRED_TEST(
-    NEON, TestHighbdIntraPred8, vpx_highbd_dc_predictor_8x8_neon,
-    vpx_highbd_dc_left_predictor_8x8_neon, vpx_highbd_dc_top_predictor_8x8_neon,
-    vpx_highbd_dc_128_predictor_8x8_neon, vpx_highbd_v_predictor_8x8_neon,
-    vpx_highbd_h_predictor_8x8_neon, vpx_highbd_d45_predictor_8x8_neon,
-    vpx_highbd_d135_predictor_8x8_neon, NULL, NULL, NULL, NULL,
-    vpx_highbd_tm_predictor_8x8_neon)
-HIGHBD_INTRA_PRED_TEST(NEON, TestHighbdIntraPred16,
-                       vpx_highbd_dc_predictor_16x16_neon,
-                       vpx_highbd_dc_left_predictor_16x16_neon,
-                       vpx_highbd_dc_top_predictor_16x16_neon,
-                       vpx_highbd_dc_128_predictor_16x16_neon,
-                       vpx_highbd_v_predictor_16x16_neon,
-                       vpx_highbd_h_predictor_16x16_neon,
-                       vpx_highbd_d45_predictor_16x16_neon,
-                       vpx_highbd_d135_predictor_16x16_neon, NULL, NULL, NULL,
-                       NULL, vpx_highbd_tm_predictor_16x16_neon)
-HIGHBD_INTRA_PRED_TEST(NEON, TestHighbdIntraPred32,
-                       vpx_highbd_dc_predictor_32x32_neon,
-                       vpx_highbd_dc_left_predictor_32x32_neon,
-                       vpx_highbd_dc_top_predictor_32x32_neon,
-                       vpx_highbd_dc_128_predictor_32x32_neon,
-                       vpx_highbd_v_predictor_32x32_neon,
-                       vpx_highbd_h_predictor_32x32_neon,
-                       vpx_highbd_d45_predictor_32x32_neon,
-                       vpx_highbd_d135_predictor_32x32_neon, NULL, NULL, NULL,
-                       NULL, vpx_highbd_tm_predictor_32x32_neon)
-#endif  // HAVE_NEON
-
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-
 #include "test/test_libvpx.cc"
--- a/test/test_libvpx.cc
+++ b/test/test_libvpx.cc
@@ -53,9 +53,6 @@ int main(int argc, char **argv) {
  }
  if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter(":AVX.*:AVX/*");
  if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter(":AVX2.*:AVX2/*");
-  if (!(simd_caps & HAS_AVX512)) {
-    append_negative_gtest_filter(":AVX512.*:AVX512/*");
-  }
 #endif  // ARCH_X86 || ARCH_X86_64

 #if !CONFIG_SHARED
--- a/test/test_vector_test.cc
+++ b/test/test_vector_test.cc
@@ -28,10 +28,13 @@

 namespace {

-const int kThreads = 0;
-const int kFileName = 1;
+enum DecodeMode { kSerialMode, kFrameParallelMode };

-typedef std::tr1::tuple<int, const char *> DecodeParam;
+const int kDecodeMode = 0;
+const int kThreads = 1;
+const int kFileName = 2;
+
+typedef std::tr1::tuple<int, int, const char *> DecodeParam;

 class TestVectorTest : public ::libvpx_test::DecoderTest,
                       public ::libvpx_test::CodecTestWithParam<DecodeParam> {
@@ -50,8 +53,8 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,

  void OpenMD5File(const std::string &md5_file_name_) {
    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_);
-    ASSERT_TRUE(md5_file_ != NULL)
-        << "Md5 file open failed. Filename: " << md5_file_name_;
+    ASSERT_TRUE(md5_file_ != NULL) << "Md5 file open failed. Filename: "
+                                   << md5_file_name_;
  }

  virtual void DecompressedFrameHook(const vpx_image_t &img,
@@ -89,14 +92,29 @@ class TestVectorTest : public ::libvpx_test::DecoderTest,
 TEST_P(TestVectorTest, MD5Match) {
  const DecodeParam input = GET_PARAM(1);
  const std::string filename = std::tr1::get<kFileName>(input);
+  const int threads = std::tr1::get<kThreads>(input);
+  const int mode = std::tr1::get<kDecodeMode>(input);
  vpx_codec_flags_t flags = 0;
  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
  char str[256];

-  cfg.threads = std::tr1::get<kThreads>(input);
+  if (mode == kFrameParallelMode) {
+    flags |= VPX_CODEC_USE_FRAME_THREADING;
+#if CONFIG_VP9_DECODER
+    // TODO(hkuang): Fix frame parallel decode bug. See issue 1086.
+    if (resize_clips_.find(filename) != resize_clips_.end()) {
+      printf("Skipping the test file: %s, due to frame parallel decode bug.\n",
+             filename.c_str());
+      return;
+    }
+#endif
+  }

-  snprintf(str, sizeof(str) / sizeof(str[0]) - 1, "file: %s threads: %d",
-           filename.c_str(), cfg.threads);
+  cfg.threads = threads;
+
+  snprintf(str, sizeof(str) / sizeof(str[0]) - 1,
+           "file: %s  mode: %s threads: %d", filename.c_str(),
+           mode == 0 ? "Serial" : "Parallel", threads);
  SCOPED_TRACE(str);

  // Open compressed video file.
@@ -127,44 +145,38 @@ TEST_P(TestVectorTest, MD5Match) {
  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get(), cfg));
 }

+// Test VP8 decode in serial mode with single thread.
+// NOTE: VP8 only support serial mode.
 #if CONFIG_VP8_DECODER
 VP8_INSTANTIATE_TEST_CASE(
    TestVectorTest,
    ::testing::Combine(
+        ::testing::Values(0),  // Serial Mode.
        ::testing::Values(1),  // Single thread.
        ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
                            libvpx_test::kVP8TestVectors +
                                libvpx_test::kNumVP8TestVectors)));
-
-// Test VP8 decode in with different numbers of threads.
-INSTANTIATE_TEST_CASE_P(
-    VP8MultiThreaded, TestVectorTest,
-    ::testing::Combine(
-        ::testing::Values(
-            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP8)),
-        ::testing::Combine(
-            ::testing::Range(2, 9),  // With 2 ~ 8 threads.
-            ::testing::ValuesIn(libvpx_test::kVP8TestVectors,
-                                libvpx_test::kVP8TestVectors +
-                                    libvpx_test::kNumVP8TestVectors))));
-
 #endif  // CONFIG_VP8_DECODER

+// Test VP9 decode in serial mode with single thread.
 #if CONFIG_VP9_DECODER
 VP9_INSTANTIATE_TEST_CASE(
    TestVectorTest,
    ::testing::Combine(
+        ::testing::Values(0),  // Serial Mode.
        ::testing::Values(1),  // Single thread.
        ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
                            libvpx_test::kVP9TestVectors +
                                libvpx_test::kNumVP9TestVectors)));

+// Test VP9 decode in frame parallel mode with different number of threads.
 INSTANTIATE_TEST_CASE_P(
-    VP9MultiThreaded, TestVectorTest,
+    VP9MultiThreadedFrameParallel, TestVectorTest,
    ::testing::Combine(
        ::testing::Values(
            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
        ::testing::Combine(
+            ::testing::Values(1),    // Frame Parallel mode.
            ::testing::Range(2, 9),  // With 2 ~ 8 threads.
            ::testing::ValuesIn(libvpx_test::kVP9TestVectors,
                                libvpx_test::kVP9TestVectors +
--- a/test/test_vectors.cc
+++ b/test/test_vectors.cc
@@ -371,12 +371,9 @@ const char *const kVP9TestVectors[] = {
 #endif  // CONFIG_VP9_HIGHBITDEPTH
  "vp90-2-20-big_superframe-01.webm",
  "vp90-2-20-big_superframe-02.webm",
-  "vp90-2-22-svc_1280x720_1.webm",
  RESIZE_TEST_VECTORS
 };
-const char *const kVP9TestVectorsSvc[] = { "vp90-2-22-svc_1280x720_3.ivf" };
 const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors);
-const int kNumVP9TestVectorsSvc = NELEMENTS(kVP9TestVectorsSvc);
 const char *const kVP9TestVectorsResize[] = { RESIZE_TEST_VECTORS };
 const int kNumVP9TestVectorsResize = NELEMENTS(kVP9TestVectorsResize);
 #undef RESIZE_TEST_VECTORS
--- a/test/test_vectors.h
+++ b/test/test_vectors.h
@@ -23,8 +23,6 @@ extern const char *const kVP8TestVectors[];
 #if CONFIG_VP9_DECODER
 extern const int kNumVP9TestVectors;
 extern const char *const kVP9TestVectors[];
-extern const int kNumVP9TestVectorsSvc;
-extern const char *const kVP9TestVectorsSvc[];
 extern const int kNumVP9TestVectorsResize;
 extern const char *const kVP9TestVectorsResize[];
 #endif  // CONFIG_VP9_DECODER
--- a/test/twopass_encoder.sh
+++ b/test/twopass_encoder.sh
@@ -54,10 +54,7 @@ twopass_encoder_vp9() {
  fi
 }

+twopass_encoder_tests="twopass_encoder_vp8
+                       twopass_encoder_vp9"

-if [ "$(vpx_config_option_enabled CONFIG_REALTIME_ONLY)" != "yes" ]; then
-  twopass_encoder_tests="twopass_encoder_vp8
-                         twopass_encoder_vp9"
-
-  run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}"
-fi
+run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}"
--- a/test/variance_test.cc
+++ b/test/variance_test.cc
--- a/test/video_source.h
+++ b/test/video_source.h
@@ -13,9 +13,7 @@
 #if defined(_WIN32)
 #undef NOMINMAX
 #define NOMINMAX
-#ifndef WIN32_LEAN_AND_MEAN
 #define WIN32_LEAN_AND_MEAN
-#endif
 #include <windows.h>
 #endif
 #include <cstdio>
--- a/test/vp8_fdct4x4_test.cc
+++ b/test/vp8_fdct4x4_test.cc
@@ -17,16 +17,12 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "./vpx_config.h"
 #include "./vp8_rtcd.h"
 #include "test/acm_random.h"
 #include "vpx/vpx_integer.h"
-#include "vpx_ports/mem.h"

 namespace {

-typedef void (*FdctFunc)(int16_t *a, int16_t *b, int a_stride);
-
 const int cospi8sqrt2minus1 = 20091;
 const int sinpi8sqrt2 = 35468;

@@ -72,21 +68,10 @@ void reference_idct4x4(const int16_t *input, int16_t *output) {

 using libvpx_test::ACMRandom;

-class FdctTest : public ::testing::TestWithParam<FdctFunc> {
- public:
-  virtual void SetUp() {
-    fdct_func_ = GetParam();
-    rnd_.Reset(ACMRandom::DeterministicSeed());
-  }
-
- protected:
-  FdctFunc fdct_func_;
-  ACMRandom rnd_;
-};
-
-TEST_P(FdctTest, SignBiasCheck) {
+TEST(VP8FdctTest, SignBiasCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
  int16_t test_input_block[16];
-  DECLARE_ALIGNED(16, int16_t, test_output_block[16]);
+  int16_t test_output_block[16];
  const int pitch = 8;
  int count_sign_block[16][2];
  const int count_test_block = 1000000;
@@ -96,10 +81,10 @@ TEST_P(FdctTest, SignBiasCheck) {
  for (int i = 0; i < count_test_block; ++i) {
    // Initialize a test block with input range [-255, 255].
    for (int j = 0; j < 16; ++j) {
-      test_input_block[j] = rnd_.Rand8() - rnd_.Rand8();
+      test_input_block[j] = rnd.Rand8() - rnd.Rand8();
    }

-    fdct_func_(test_input_block, test_output_block, pitch);
+    vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);

    for (int j = 0; j < 16; ++j) {
      if (test_output_block[j] < 0) {
@@ -125,10 +110,10 @@ TEST_P(FdctTest, SignBiasCheck) {
  for (int i = 0; i < count_test_block; ++i) {
    // Initialize a test block with input range [-15, 15].
    for (int j = 0; j < 16; ++j) {
-      test_input_block[j] = (rnd_.Rand8() >> 4) - (rnd_.Rand8() >> 4);
+      test_input_block[j] = (rnd.Rand8() >> 4) - (rnd.Rand8() >> 4);
    }

-    fdct_func_(test_input_block, test_output_block, pitch);
+    vp8_short_fdct4x4_c(test_input_block, test_output_block, pitch);

    for (int j = 0; j < 16; ++j) {
      if (test_output_block[j] < 0) {
@@ -150,22 +135,23 @@ TEST_P(FdctTest, SignBiasCheck) {
      << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]";
 };

-TEST_P(FdctTest, RoundTripErrorCheck) {
+TEST(VP8FdctTest, RoundTripErrorCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
  int max_error = 0;
  double total_error = 0;
  const int count_test_block = 1000000;
  for (int i = 0; i < count_test_block; ++i) {
    int16_t test_input_block[16];
+    int16_t test_temp_block[16];
    int16_t test_output_block[16];
-    DECLARE_ALIGNED(16, int16_t, test_temp_block[16]);

    // Initialize a test block with input range [-255, 255].
    for (int j = 0; j < 16; ++j) {
-      test_input_block[j] = rnd_.Rand8() - rnd_.Rand8();
+      test_input_block[j] = rnd.Rand8() - rnd.Rand8();
    }

    const int pitch = 8;
-    fdct_func_(test_input_block, test_temp_block, pitch);
+    vp8_short_fdct4x4_c(test_input_block, test_temp_block, pitch);
    reference_idct4x4(test_temp_block, test_output_block);

    for (int j = 0; j < 16; ++j) {
@@ -183,24 +169,4 @@ TEST_P(FdctTest, RoundTripErrorCheck) {
      << "Error: FDCT/IDCT has average roundtrip error > 1 per block";
 };

-INSTANTIATE_TEST_CASE_P(C, FdctTest, ::testing::Values(vp8_short_fdct4x4_c));
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, FdctTest,
-                        ::testing::Values(vp8_short_fdct4x4_neon));
-#endif  // HAVE_NEON
-
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(SSE2, FdctTest,
-                        ::testing::Values(vp8_short_fdct4x4_sse2));
-#endif  // HAVE_SSE2
-
-#if HAVE_MSA
-INSTANTIATE_TEST_CASE_P(MSA, FdctTest,
-                        ::testing::Values(vp8_short_fdct4x4_msa));
-#endif  // HAVE_MSA
-#if HAVE_MMI
-INSTANTIATE_TEST_CASE_P(MMI, FdctTest,
-                        ::testing::Values(vp8_short_fdct4x4_mmi));
-#endif  // HAVE_MMI
 }  // namespace
--- a/test/vp9_denoiser_sse2_test.cc
+++ b/test/vp9_denoiser_sse2_test.cc
@@ -29,21 +29,11 @@ using libvpx_test::ACMRandom;
 namespace {

 const int kNumPixels = 64 * 64;
-
-typedef int (*Vp9DenoiserFilterFunc)(const uint8_t *sig, int sig_stride,
-                                     const uint8_t *mc_avg, int mc_avg_stride,
-                                     uint8_t *avg, int avg_stride,
-                                     int increase_denoising, BLOCK_SIZE bs,
-                                     int motion_magnitude);
-typedef std::tr1::tuple<Vp9DenoiserFilterFunc, BLOCK_SIZE> VP9DenoiserTestParam;
-
-class VP9DenoiserTest
-    : public ::testing::Test,
-      public ::testing::WithParamInterface<VP9DenoiserTestParam> {
+class VP9DenoiserTest : public ::testing::TestWithParam<BLOCK_SIZE> {
 public:
  virtual ~VP9DenoiserTest() {}

-  virtual void SetUp() { bs_ = GET_PARAM(1); }
+  virtual void SetUp() { bs_ = GetParam(); }

  virtual void TearDown() { libvpx_test::ClearSystemState(); }

@@ -86,9 +76,9 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
                                                   64, avg_block_c, 64, 0, bs_,
                                                   motion_magnitude_random));

-    ASM_REGISTER_STATE_CHECK(GET_PARAM(0)(sig_block, 64, mc_avg_block, 64,
-                                          avg_block_sse2, 64, 0, bs_,
-                                          motion_magnitude_random));
+    ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_sse2(
+        sig_block, 64, mc_avg_block, 64, avg_block_sse2, 64, 0, bs_,
+        motion_magnitude_random));

    // Test bitexactness.
    for (int h = 0; h < (4 << b_height_log2_lookup[bs_]); ++h) {
@@ -99,36 +89,10 @@ TEST_P(VP9DenoiserTest, BitexactCheck) {
  }
 }

-using std::tr1::make_tuple;
-
 // Test for all block size.
-#if HAVE_SSE2
-INSTANTIATE_TEST_CASE_P(
-    SSE2, VP9DenoiserTest,
-    ::testing::Values(make_tuple(&vp9_denoiser_filter_sse2, BLOCK_8X8),
-                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_8X16),
-                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_16X8),
-                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_16X16),
-                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_16X32),
-                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_32X16),
-                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_32X32),
-                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_32X64),
-                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_64X32),
-                      make_tuple(&vp9_denoiser_filter_sse2, BLOCK_64X64)));
-#endif  // HAVE_SSE2
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(
-    NEON, VP9DenoiserTest,
-    ::testing::Values(make_tuple(&vp9_denoiser_filter_neon, BLOCK_8X8),
-                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_8X16),
-                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_16X8),
-                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_16X16),
-                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_16X32),
-                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_32X16),
-                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_32X32),
-                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_32X64),
-                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_64X32),
-                      make_tuple(&vp9_denoiser_filter_neon, BLOCK_64X64)));
-#endif
+INSTANTIATE_TEST_CASE_P(SSE2, VP9DenoiserTest,
+                        ::testing::Values(BLOCK_8X8, BLOCK_8X16, BLOCK_16X8,
+                                          BLOCK_16X16, BLOCK_16X32, BLOCK_32X16,
+                                          BLOCK_32X32, BLOCK_32X64, BLOCK_64X32,
+                                          BLOCK_64X64));
 }  // namespace
--- a/test/vp9_encoder_parms_get_to_decoder.cc
+++ b/test/vp9_encoder_parms_get_to_decoder.cc
@@ -99,7 +99,9 @@ class VpxEncoderParmsGetToDecoder
    vpx_codec_ctx_t *const vp9_decoder = decoder->GetDecoder();
    vpx_codec_alg_priv_t *const priv =
        reinterpret_cast<vpx_codec_alg_priv_t *>(vp9_decoder->priv);
-    VP9_COMMON *const common = &priv->pbi->common;
+    FrameWorkerData *const worker_data =
+        reinterpret_cast<FrameWorkerData *>(priv->frame_workers[0].data1);
+    VP9_COMMON *const common = &worker_data->pbi->common;

    if (encode_parms.lossless) {
      EXPECT_EQ(0, common->base_qindex);
--- a/test/vp9_error_block_test.cc
+++ b/test/vp9_error_block_test.cc
@@ -23,36 +23,36 @@
 #include "vp9/common/vp9_entropy.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
-#include "vpx_dsp/vpx_dsp_common.h"

 using libvpx_test::ACMRandom;

 namespace {
+#if CONFIG_VP9_HIGHBITDEPTH
 const int kNumIterations = 1000;

-typedef int64_t (*HBDBlockErrorFunc)(const tran_low_t *coeff,
-                                     const tran_low_t *dqcoeff,
-                                     intptr_t block_size, int64_t *ssz,
-                                     int bps);
-
-typedef std::tr1::tuple<HBDBlockErrorFunc, HBDBlockErrorFunc, vpx_bit_depth_t>
-    BlockErrorParam;
-
-typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff,
+typedef int64_t (*ErrorBlockFunc)(const tran_low_t *coeff,
                                  const tran_low_t *dqcoeff,
-                                  intptr_t block_size, int64_t *ssz);
+                                  intptr_t block_size, int64_t *ssz, int bps);

-template <BlockErrorFunc fn>
-int64_t BlockError8BitWrapper(const tran_low_t *coeff,
-                              const tran_low_t *dqcoeff, intptr_t block_size,
-                              int64_t *ssz, int bps) {
-  EXPECT_EQ(bps, 8);
+typedef std::tr1::tuple<ErrorBlockFunc, ErrorBlockFunc, vpx_bit_depth_t>
+    ErrorBlockParam;
+
+// wrapper for 8-bit block error functions without a 'bps' param.
+typedef int64_t (*HighBdBlockError8bit)(const tran_low_t *coeff,
+                                        const tran_low_t *dqcoeff,
+                                        intptr_t block_size, int64_t *ssz);
+template <HighBdBlockError8bit fn>
+int64_t HighBdBlockError8bitWrapper(const tran_low_t *coeff,
+                                    const tran_low_t *dqcoeff,
+                                    intptr_t block_size, int64_t *ssz,
+                                    int bps) {
+  EXPECT_EQ(8, bps);
  return fn(coeff, dqcoeff, block_size, ssz);
 }

-class BlockErrorTest : public ::testing::TestWithParam<BlockErrorParam> {
+class ErrorBlockTest : public ::testing::TestWithParam<ErrorBlockParam> {
 public:
-  virtual ~BlockErrorTest() {}
+  virtual ~ErrorBlockTest() {}
  virtual void SetUp() {
    error_block_op_ = GET_PARAM(0);
    ref_error_block_op_ = GET_PARAM(1);
@@ -63,11 +63,11 @@ class BlockErrorTest : public ::testing::TestWithParam<BlockErrorParam> {

 protected:
  vpx_bit_depth_t bit_depth_;
-  HBDBlockErrorFunc error_block_op_;
-  HBDBlockErrorFunc ref_error_block_op_;
+  ErrorBlockFunc error_block_op_;
+  ErrorBlockFunc ref_error_block_op_;
 };

-TEST_P(BlockErrorTest, OperationCheck) {
+TEST_P(ErrorBlockTest, OperationCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
@@ -110,7 +110,7 @@ TEST_P(BlockErrorTest, OperationCheck) {
      << "First failed at test case " << first_failure;
 }

-TEST_P(BlockErrorTest, ExtremeValues) {
+TEST_P(ErrorBlockTest, ExtremeValues) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
  DECLARE_ALIGNED(16, tran_low_t, coeff[4096]);
  DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]);
@@ -171,28 +171,29 @@ TEST_P(BlockErrorTest, ExtremeValues) {
 using std::tr1::make_tuple;

 #if HAVE_SSE2
-const BlockErrorParam sse2_block_error_tests[] = {
-#if CONFIG_VP9_HIGHBITDEPTH
-  make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
-             VPX_BITS_10),
-  make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
-             VPX_BITS_12),
-  make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
-             VPX_BITS_8),
-#endif  // CONFIG_VP9_HIGHBITDEPTH
-  make_tuple(&BlockError8BitWrapper<vp9_block_error_sse2>,
-             &BlockError8BitWrapper<vp9_block_error_c>, VPX_BITS_8)
-};
-
-INSTANTIATE_TEST_CASE_P(SSE2, BlockErrorTest,
-                        ::testing::ValuesIn(sse2_block_error_tests));
+INSTANTIATE_TEST_CASE_P(
+    SSE2, ErrorBlockTest,
+    ::testing::Values(
+        make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
+                   VPX_BITS_10),
+        make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
+                   VPX_BITS_12),
+        make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c,
+                   VPX_BITS_8),
+        make_tuple(
+            &HighBdBlockError8bitWrapper<vp9_highbd_block_error_8bit_sse2>,
+            &HighBdBlockError8bitWrapper<vp9_highbd_block_error_8bit_c>,
+            VPX_BITS_8)));
 #endif  // HAVE_SSE2

-#if HAVE_AVX2
+#if HAVE_AVX
 INSTANTIATE_TEST_CASE_P(
-    AVX2, BlockErrorTest,
-    ::testing::Values(make_tuple(&BlockError8BitWrapper<vp9_block_error_avx2>,
-                                 &BlockError8BitWrapper<vp9_block_error_c>,
-                                 VPX_BITS_8)));
-#endif  // HAVE_AVX2
+    AVX, ErrorBlockTest,
+    ::testing::Values(make_tuple(
+        &HighBdBlockError8bitWrapper<vp9_highbd_block_error_8bit_avx>,
+        &HighBdBlockError8bitWrapper<vp9_highbd_block_error_8bit_c>,
+        VPX_BITS_8)));
+#endif  // HAVE_AVX
+
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
--- a/test/vp9_ethread_test.cc
+++ b/test/vp9_ethread_test.cc
@@ -16,221 +16,17 @@
 #include "test/md5_helper.h"
 #include "test/util.h"
 #include "test/y4m_video_source.h"
-#include "vp9/encoder/vp9_firstpass.h"

 namespace {
-// FIRSTPASS_STATS struct:
-// {
-//   25 double members;
-//   1 int64_t member;
-// }
-// Whenever FIRSTPASS_STATS struct is modified, the following constants need to
-// be revisited.
-const int kDbl = 25;
-const int kInt = 1;
-const size_t kFirstPassStatsSz = kDbl * sizeof(double) + kInt * sizeof(int64_t);
-
-class VPxFirstPassEncoderThreadTest
+class VPxEncoderThreadTest
    : public ::libvpx_test::EncoderTest,
      public ::libvpx_test::CodecTestWith2Params<libvpx_test::TestMode, int> {
- protected:
-  VPxFirstPassEncoderThreadTest()
-      : EncoderTest(GET_PARAM(0)), encoder_initialized_(false), tiles_(0),
-        encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)) {
-    init_flags_ = VPX_CODEC_USE_PSNR;
-
-    row_mt_mode_ = 1;
-    first_pass_only_ = true;
-    firstpass_stats_.buf = NULL;
-    firstpass_stats_.sz = 0;
-  }
-  virtual ~VPxFirstPassEncoderThreadTest() { free(firstpass_stats_.buf); }
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-
-    cfg_.rc_end_usage = VPX_VBR;
-    cfg_.rc_2pass_vbr_minsection_pct = 5;
-    cfg_.rc_2pass_vbr_maxsection_pct = 2000;
-    cfg_.rc_max_quantizer = 56;
-    cfg_.rc_min_quantizer = 0;
-  }
-
-  virtual void BeginPassHook(unsigned int /*pass*/) {
-    encoder_initialized_ = false;
-    abort_ = false;
-  }
-
-  virtual void EndPassHook() {
-    // For first pass stats test, only run first pass encoder.
-    if (first_pass_only_ && cfg_.g_pass == VPX_RC_FIRST_PASS)
-      abort_ |= first_pass_only_;
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource * /*video*/,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (!encoder_initialized_) {
-      // Encode in 2-pass mode.
-      encoder->Control(VP9E_SET_TILE_COLUMNS, tiles_);
-      encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_);
-      encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
-      encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
-      encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
-      encoder->Control(VP8E_SET_ARNR_TYPE, 3);
-      encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0);
-
-      if (encoding_mode_ == ::libvpx_test::kTwoPassGood)
-        encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_);
-
-      encoder_initialized_ = true;
-    }
-  }
-
-  virtual void StatsPktHook(const vpx_codec_cx_pkt_t *pkt) {
-    const uint8_t *const pkt_buf =
-        reinterpret_cast<uint8_t *>(pkt->data.twopass_stats.buf);
-    const size_t pkt_size = pkt->data.twopass_stats.sz;
-
-    // First pass stats size equals sizeof(FIRSTPASS_STATS)
-    EXPECT_EQ(pkt_size, kFirstPassStatsSz)
-        << "Error: First pass stats size doesn't equal kFirstPassStatsSz";
-
-    firstpass_stats_.buf =
-        realloc(firstpass_stats_.buf, firstpass_stats_.sz + pkt_size);
-    memcpy((uint8_t *)firstpass_stats_.buf + firstpass_stats_.sz, pkt_buf,
-           pkt_size);
-    firstpass_stats_.sz += pkt_size;
-  }
-
-  bool encoder_initialized_;
-  int tiles_;
-  ::libvpx_test::TestMode encoding_mode_;
-  int set_cpu_used_;
-  int row_mt_mode_;
-  bool first_pass_only_;
-  vpx_fixed_buf_t firstpass_stats_;
-};
-
-static void compare_fp_stats(vpx_fixed_buf_t *fp_stats, double factor) {
-  // fp_stats consists of 2 set of first pass encoding stats. These 2 set of
-  // stats are compared to check if the stats match or at least are very close.
-  FIRSTPASS_STATS *stats1 = reinterpret_cast<FIRSTPASS_STATS *>(fp_stats->buf);
-  int nframes_ = (int)(fp_stats->sz / sizeof(FIRSTPASS_STATS));
-  FIRSTPASS_STATS *stats2 = stats1 + nframes_ / 2;
-  int i, j;
-
-  // The total stats are also output and included in the first pass stats. Here
-  // ignore that in the comparison.
-  for (i = 0; i < (nframes_ / 2 - 1); ++i) {
-    const double *frame_stats1 = reinterpret_cast<double *>(stats1);
-    const double *frame_stats2 = reinterpret_cast<double *>(stats2);
-
-    for (j = 0; j < kDbl; ++j) {
-      ASSERT_LE(fabs(*frame_stats1 - *frame_stats2),
-                fabs(*frame_stats1) / factor)
-          << "First failure @ frame #" << i << " stat #" << j << " ("
-          << *frame_stats1 << " vs. " << *frame_stats2 << ")";
-      frame_stats1++;
-      frame_stats2++;
-    }
-
-    stats1++;
-    stats2++;
-  }
-
-  // Reset firstpass_stats_ to 0.
-  memset((uint8_t *)fp_stats->buf, 0, fp_stats->sz);
-  fp_stats->sz = 0;
-}
-
-static void compare_fp_stats_md5(vpx_fixed_buf_t *fp_stats) {
-  // fp_stats consists of 2 set of first pass encoding stats. These 2 set of
-  // stats are compared to check if the stats match.
-  uint8_t *stats1 = reinterpret_cast<uint8_t *>(fp_stats->buf);
-  uint8_t *stats2 = stats1 + fp_stats->sz / 2;
-  ::libvpx_test::MD5 md5_row_mt_0, md5_row_mt_1;
-
-  md5_row_mt_0.Add(stats1, fp_stats->sz / 2);
-  const char *md5_row_mt_0_str = md5_row_mt_0.Get();
-
-  md5_row_mt_1.Add(stats2, fp_stats->sz / 2);
-  const char *md5_row_mt_1_str = md5_row_mt_1.Get();
-
-  // Check md5 match.
-  ASSERT_STREQ(md5_row_mt_0_str, md5_row_mt_1_str)
-      << "MD5 checksums don't match";
-
-  // Reset firstpass_stats_ to 0.
-  memset((uint8_t *)fp_stats->buf, 0, fp_stats->sz);
-  fp_stats->sz = 0;
-}
-
-TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) {
-  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60);
-
-  first_pass_only_ = true;
-  cfg_.rc_target_bitrate = 1000;
-
-  // Test row_mt_mode: 0 vs 1 at single thread case(threads = 1, tiles_ = 0)
-  tiles_ = 0;
-  cfg_.g_threads = 1;
-
-  row_mt_mode_ = 0;
-  init_flags_ = VPX_CODEC_USE_PSNR;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  row_mt_mode_ = 1;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  // Compare to check if using or not using row-mt generates close stats.
-  ASSERT_NO_FATAL_FAILURE(compare_fp_stats(&firstpass_stats_, 1000.0));
-
-  // Test single thread vs multiple threads
-  row_mt_mode_ = 1;
-  tiles_ = 0;
-
-  cfg_.g_threads = 1;
-  init_flags_ = VPX_CODEC_USE_PSNR;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  cfg_.g_threads = 4;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  // Compare to check if single-thread and multi-thread stats are close enough.
-  ASSERT_NO_FATAL_FAILURE(compare_fp_stats(&firstpass_stats_, 1000.0));
-
-  // Bit exact test in row_mt mode.
-  // When row_mt_mode_=1 and using >1 threads, the encoder generates bit exact
-  // result.
-  row_mt_mode_ = 1;
-  tiles_ = 2;
-
-  cfg_.g_threads = 2;
-  init_flags_ = VPX_CODEC_USE_PSNR;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  cfg_.g_threads = 8;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-
-  // Compare to check if stats match with row-mt=0/1.
-  compare_fp_stats_md5(&firstpass_stats_);
-}
-
-class VPxEncoderThreadTest
-    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWith4Params<libvpx_test::TestMode, int,
-                                                 int, int> {
 protected:
  VPxEncoderThreadTest()
-      : EncoderTest(GET_PARAM(0)), encoder_initialized_(false),
-        tiles_(GET_PARAM(3)), threads_(GET_PARAM(4)),
+      : EncoderTest(GET_PARAM(0)), encoder_initialized_(false), tiles_(2),
        encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)) {
    init_flags_ = VPX_CODEC_USE_PSNR;
    md5_.clear();
-    row_mt_mode_ = 1;
-    psnr_ = 0.0;
-    nframes_ = 0;
  }
  virtual ~VPxEncoderThreadTest() {}

@@ -239,6 +35,7 @@ class VPxEncoderThreadTest
    SetMode(encoding_mode_);

    if (encoding_mode_ != ::libvpx_test::kRealTime) {
+      cfg_.g_lag_in_frames = 3;
      cfg_.rc_end_usage = VPX_VBR;
      cfg_.rc_2pass_vbr_minsection_pct = 5;
      cfg_.rc_2pass_vbr_maxsection_pct = 2000;
@@ -253,8 +50,6 @@ class VPxEncoderThreadTest

  virtual void BeginPassHook(unsigned int /*pass*/) {
    encoder_initialized_ = false;
-    psnr_ = 0.0;
-    nframes_ = 0;
  }

  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource * /*video*/,
@@ -268,22 +63,14 @@ class VPxEncoderThreadTest
        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
-        encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0);
      } else {
        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0);
        encoder->Control(VP9E_SET_AQ_MODE, 3);
      }
-      encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_);
-
      encoder_initialized_ = true;
    }
  }

-  virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) {
-    psnr_ += pkt->data.psnr.psnr[0];
-    nframes_++;
-  }
-
  virtual void DecompressedFrameHook(const vpx_image_t &img,
                                     vpx_codec_pts_t /*pts*/) {
    ::libvpx_test::MD5 md5_res;
@@ -302,127 +89,40 @@ class VPxEncoderThreadTest
    return true;
  }

-  double GetAveragePsnr() const { return nframes_ ? (psnr_ / nframes_) : 0.0; }
-
  bool encoder_initialized_;
  int tiles_;
-  int threads_;
  ::libvpx_test::TestMode encoding_mode_;
  int set_cpu_used_;
-  int row_mt_mode_;
-  double psnr_;
-  unsigned int nframes_;
  std::vector<std::string> md5_;
 };

 TEST_P(VPxEncoderThreadTest, EncoderResultTest) {
-  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20);
-  cfg_.rc_target_bitrate = 1000;
+  std::vector<std::string> single_thr_md5, multi_thr_md5;

-  // Part 1: Bit exact test for row_mt_mode_ = 0.
-  // This part keeps original unit tests done before row-mt code is checked in.
-  row_mt_mode_ = 0;
+  ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20);
+
+  cfg_.rc_target_bitrate = 1000;

  // Encode using single thread.
  cfg_.g_threads = 1;
  init_flags_ = VPX_CODEC_USE_PSNR;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  const std::vector<std::string> single_thr_md5 = md5_;
+  single_thr_md5 = md5_;
  md5_.clear();

  // Encode using multiple threads.
-  cfg_.g_threads = threads_;
+  cfg_.g_threads = 4;
  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  const std::vector<std::string> multi_thr_md5 = md5_;
+  multi_thr_md5 = md5_;
  md5_.clear();

  // Compare to check if two vectors are equal.
  ASSERT_EQ(single_thr_md5, multi_thr_md5);
-
-  // Part 2: row_mt_mode_ = 0 vs row_mt_mode_ = 1 single thread bit exact test.
-  row_mt_mode_ = 1;
-
-  // Encode using single thread
-  cfg_.g_threads = 1;
-  init_flags_ = VPX_CODEC_USE_PSNR;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  std::vector<std::string> row_mt_single_thr_md5 = md5_;
-  md5_.clear();
-
-  ASSERT_EQ(single_thr_md5, row_mt_single_thr_md5);
-
-  // Part 3: Bit exact test with row-mt on
-  // When row_mt_mode_=1 and using >1 threads, the encoder generates bit exact
-  // result.
-  row_mt_mode_ = 1;
-  row_mt_single_thr_md5.clear();
-
-  // Encode using 2 threads.
-  cfg_.g_threads = 2;
-  init_flags_ = VPX_CODEC_USE_PSNR;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  row_mt_single_thr_md5 = md5_;
-  md5_.clear();
-
-  // Encode using multiple threads.
-  cfg_.g_threads = threads_;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  const std::vector<std::string> row_mt_multi_thr_md5 = md5_;
-  md5_.clear();
-
-  // Compare to check if two vectors are equal.
-  ASSERT_EQ(row_mt_single_thr_md5, row_mt_multi_thr_md5);
-
-  // Part 4: PSNR test with bit_match_mode_ = 0
-  row_mt_mode_ = 1;
-
-  // Encode using single thread.
-  cfg_.g_threads = 1;
-  init_flags_ = VPX_CODEC_USE_PSNR;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  const double single_thr_psnr = GetAveragePsnr();
-
-  // Encode using multiple threads.
-  cfg_.g_threads = threads_;
-  ASSERT_NO_FATAL_FAILURE(RunLoop(&video));
-  const double multi_thr_psnr = GetAveragePsnr();
-
-  EXPECT_NEAR(single_thr_psnr, multi_thr_psnr, 0.1);
 }

-INSTANTIATE_TEST_CASE_P(
-    VP9, VPxFirstPassEncoderThreadTest,
-    ::testing::Combine(
-        ::testing::Values(
-            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
-        ::testing::Values(::libvpx_test::kTwoPassGood),
-        ::testing::Range(0, 4)));  // cpu_used
-
-// Split this into two instantiations so that we can distinguish
-// between very slow runs ( ie cpu_speed 0 ) vs ones that can be
-// run nightly by adding Large to the title.
-INSTANTIATE_TEST_CASE_P(
-    VP9, VPxEncoderThreadTest,
-    ::testing::Combine(
-        ::testing::Values(
-            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
-        ::testing::Values(::libvpx_test::kTwoPassGood,
-                          ::libvpx_test::kOnePassGood,
-                          ::libvpx_test::kRealTime),
-        ::testing::Range(3, 9),    // cpu_used
-        ::testing::Range(0, 3),    // tile_columns
-        ::testing::Range(2, 5)));  // threads
-
-INSTANTIATE_TEST_CASE_P(
-    VP9Large, VPxEncoderThreadTest,
-    ::testing::Combine(
-        ::testing::Values(
-            static_cast<const libvpx_test::CodecFactory *>(&libvpx_test::kVP9)),
-        ::testing::Values(::libvpx_test::kTwoPassGood,
-                          ::libvpx_test::kOnePassGood,
-                          ::libvpx_test::kRealTime),
-        ::testing::Range(0, 3),    // cpu_used
-        ::testing::Range(0, 3),    // tile_columns
-        ::testing::Range(2, 5)));  // threads
-
+VP9_INSTANTIATE_TEST_CASE(VPxEncoderThreadTest,
+                          ::testing::Values(::libvpx_test::kTwoPassGood,
+                                            ::libvpx_test::kOnePassGood,
+                                            ::libvpx_test::kRealTime),
+                          ::testing::Range(1, 9));
 }  // namespace
--- a/test/vp9_frame_parallel_test.cc
+++ b/test/vp9_frame_parallel_test.cc
@@ -0,0 +1,217 @@
+/*
+ *  Copyright (c) 2014 The WebM project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include <cstdio>
+#include <cstdlib>
+#include <string>
+#include "third_party/googletest/src/include/gtest/gtest.h"
+#include "./vpx_config.h"
+#include "test/codec_factory.h"
+#include "test/decode_test_driver.h"
+#include "test/ivf_video_source.h"
+#include "test/md5_helper.h"
+#include "test/util.h"
+#if CONFIG_WEBM_IO
+#include "test/webm_video_source.h"
+#endif
+#include "vpx_mem/vpx_mem.h"
+
+namespace {
+
+using std::string;
+
+#if CONFIG_WEBM_IO
+
+struct PauseFileList {
+  const char *name;
+  // md5 sum for decoded frames which does not include skipped frames.
+  const char *expected_md5;
+  const int pause_frame_num;
+};
+
+// Decodes |filename| with |num_threads|. Pause at the specified frame_num,
+// seek to next key frame and then continue decoding until the end. Return
+// the md5 of the decoded frames which does not include skipped frames.
+string DecodeFileWithPause(const string &filename, int num_threads,
+                           int pause_num) {
+  libvpx_test::WebMVideoSource video(filename);
+  video.Init();
+  int in_frames = 0;
+  int out_frames = 0;
+
+  vpx_codec_dec_cfg_t cfg = { 0 };
+  cfg.threads = num_threads;
+  vpx_codec_flags_t flags = 0;
+  flags |= VPX_CODEC_USE_FRAME_THREADING;
+  libvpx_test::VP9Decoder decoder(cfg, flags);
+
+  libvpx_test::MD5 md5;
+  video.Begin();
+
+  do {
+    ++in_frames;
+    const vpx_codec_err_t res =
+        decoder.DecodeFrame(video.cxdata(), video.frame_size());
+    if (res != VPX_CODEC_OK) {
+      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
+      break;
+    }
+
+    // Pause at specified frame number.
+    if (in_frames == pause_num) {
+      // Flush the decoder and then seek to next key frame.
+      decoder.DecodeFrame(NULL, 0);
+      video.SeekToNextKeyFrame();
+    } else {
+      video.Next();
+    }
+
+    // Flush the decoder at the end of the video.
+    if (!video.cxdata()) decoder.DecodeFrame(NULL, 0);
+
+    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
+    const vpx_image_t *img;
+
+    // Get decompressed data
+    while ((img = dec_iter.Next())) {
+      ++out_frames;
+      md5.Add(img);
+    }
+  } while (video.cxdata() != NULL);
+
+  EXPECT_EQ(in_frames, out_frames)
+      << "Input frame count does not match output frame count";
+
+  return string(md5.Get());
+}
+
+void DecodeFilesWithPause(const PauseFileList files[]) {
+  for (const PauseFileList *iter = files; iter->name != NULL; ++iter) {
+    SCOPED_TRACE(iter->name);
+    for (int t = 2; t <= 8; ++t) {
+      EXPECT_EQ(iter->expected_md5,
+                DecodeFileWithPause(iter->name, t, iter->pause_frame_num))
+          << "threads = " << t;
+    }
+  }
+}
+
+TEST(VP9MultiThreadedFrameParallel, PauseSeekResume) {
+  // vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
+  // one key frame for every ten frames.
+  static const PauseFileList files[] = {
+    { "vp90-2-07-frame_parallel-1.webm", "6ea7c3875d67252e7caf2bc6e75b36b1",
+      6 },
+    { "vp90-2-07-frame_parallel-1.webm", "4bb634160c7356a8d7d4299b6dc83a45",
+      12 },
+    { "vp90-2-07-frame_parallel-1.webm", "89772591e6ef461f9fa754f916c78ed8",
+      26 },
+    { NULL, NULL, 0 },
+  };
+  DecodeFilesWithPause(files);
+}
+
+struct FileList {
+  const char *name;
+  // md5 sum for decoded frames which does not include corrupted frames.
+  const char *expected_md5;
+  // Expected number of decoded frames which does not include corrupted frames.
+  const int expected_frame_count;
+};
+
+// Decodes |filename| with |num_threads|. Return the md5 of the decoded
+// frames which does not include corrupted frames.
+string DecodeFile(const string &filename, int num_threads,
+                  int expected_frame_count) {
+  libvpx_test::WebMVideoSource video(filename);
+  video.Init();
+
+  vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t();
+  cfg.threads = num_threads;
+  const vpx_codec_flags_t flags = VPX_CODEC_USE_FRAME_THREADING;
+  libvpx_test::VP9Decoder decoder(cfg, flags);
+
+  libvpx_test::MD5 md5;
+  video.Begin();
+
+  int out_frames = 0;
+  do {
+    const vpx_codec_err_t res =
+        decoder.DecodeFrame(video.cxdata(), video.frame_size());
+    // TODO(hkuang): frame parallel mode should return an error on corruption.
+    if (res != VPX_CODEC_OK) {
+      EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError();
+      break;
+    }
+
+    video.Next();
+
+    // Flush the decoder at the end of the video.
+    if (!video.cxdata()) decoder.DecodeFrame(NULL, 0);
+
+    libvpx_test::DxDataIterator dec_iter = decoder.GetDxData();
+    const vpx_image_t *img;
+
+    // Get decompressed data
+    while ((img = dec_iter.Next())) {
+      ++out_frames;
+      md5.Add(img);
+    }
+  } while (video.cxdata() != NULL);
+
+  EXPECT_EQ(expected_frame_count, out_frames)
+      << "Input frame count does not match expected output frame count";
+
+  return string(md5.Get());
+}
+
+void DecodeFiles(const FileList files[]) {
+  for (const FileList *iter = files; iter->name != NULL; ++iter) {
+    SCOPED_TRACE(iter->name);
+    for (int t = 2; t <= 8; ++t) {
+      EXPECT_EQ(iter->expected_md5,
+                DecodeFile(iter->name, t, iter->expected_frame_count))
+          << "threads = " << t;
+    }
+  }
+}
+
+TEST(VP9MultiThreadedFrameParallel, InvalidFileTest) {
+  static const FileList files[] = {
+    // invalid-vp90-2-07-frame_parallel-1.webm is a 40 frame video file with
+    // one key frame for every ten frames. The 11th frame has corrupted data.
+    { "invalid-vp90-2-07-frame_parallel-1.webm",
+      "0549d0f45f60deaef8eb708e6c0eb6cb", 30 },
+    // invalid-vp90-2-07-frame_parallel-2.webm is a 40 frame video file with
+    // one key frame for every ten frames. The 1st and 31st frames have
+    // corrupted data.
+    { "invalid-vp90-2-07-frame_parallel-2.webm",
+      "6a1f3cf6f9e7a364212fadb9580d525e", 20 },
+    // invalid-vp90-2-07-frame_parallel-3.webm is a 40 frame video file with
+    // one key frame for every ten frames. The 5th and 13th frames have
+    // corrupted data.
+    { "invalid-vp90-2-07-frame_parallel-3.webm",
+      "8256544308de926b0681e04685b98677", 27 },
+    { NULL, NULL, 0 },
+  };
+  DecodeFiles(files);
+}
+
+TEST(VP9MultiThreadedFrameParallel, ValidFileTest) {
+  static const FileList files[] = {
+#if CONFIG_VP9_HIGHBITDEPTH
+    { "vp92-2-20-10bit-yuv420.webm", "a16b99df180c584e8db2ffeda987d293", 10 },
+#endif
+    { NULL, NULL, 0 },
+  };
+  DecodeFiles(files);
+}
+#endif  // CONFIG_WEBM_IO
+}  // namespace
--- a/test/vp9_intrapred_test.cc
+++ b/test/vp9_intrapred_test.cc
--- a/test/vp9_motion_vector_test.cc
+++ b/test/vp9_motion_vector_test.cc
@@ -1,97 +0,0 @@
-/*
- *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "test/codec_factory.h"
-#include "test/encode_test_driver.h"
-#include "test/util.h"
-#include "test/yuv_video_source.h"
-
-namespace {
-#define MAX_EXTREME_MV 1
-#define MIN_EXTREME_MV 2
-
-// Encoding modes
-const libvpx_test::TestMode kEncodingModeVectors[] = {
-  ::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood,
-  ::libvpx_test::kRealTime,
-};
-
-// Encoding speeds
-const int kCpuUsedVectors[] = { 0, 1, 2, 3, 4, 5, 6 };
-
-// MV test modes: 1 - always use maximum MV; 2 - always use minimum MV.
-const int kMVTestModes[] = { MAX_EXTREME_MV, MIN_EXTREME_MV };
-
-class MotionVectorTestLarge
-    : public ::libvpx_test::EncoderTest,
-      public ::libvpx_test::CodecTestWith3Params<libvpx_test::TestMode, int,
-                                                 int> {
- protected:
-  MotionVectorTestLarge()
-      : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)),
-        cpu_used_(GET_PARAM(2)), mv_test_mode_(GET_PARAM(3)) {}
-
-  virtual ~MotionVectorTestLarge() {}
-
-  virtual void SetUp() {
-    InitializeConfig();
-    SetMode(encoding_mode_);
-    if (encoding_mode_ != ::libvpx_test::kRealTime) {
-      cfg_.g_lag_in_frames = 3;
-      cfg_.rc_end_usage = VPX_VBR;
-    } else {
-      cfg_.g_lag_in_frames = 0;
-      cfg_.rc_end_usage = VPX_CBR;
-      cfg_.rc_buf_sz = 1000;
-      cfg_.rc_buf_initial_sz = 500;
-      cfg_.rc_buf_optimal_sz = 600;
-    }
-  }
-
-  virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video,
-                                  ::libvpx_test::Encoder *encoder) {
-    if (video->frame() == 1) {
-      encoder->Control(VP8E_SET_CPUUSED, cpu_used_);
-      encoder->Control(VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, mv_test_mode_);
-      if (encoding_mode_ != ::libvpx_test::kRealTime) {
-        encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1);
-        encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7);
-        encoder->Control(VP8E_SET_ARNR_STRENGTH, 5);
-        encoder->Control(VP8E_SET_ARNR_TYPE, 3);
-      }
-    }
-  }
-
-  libvpx_test::TestMode encoding_mode_;
-  int cpu_used_;
-  int mv_test_mode_;
-};
-
-TEST_P(MotionVectorTestLarge, OverallTest) {
-  cfg_.rc_target_bitrate = 24000;
-  cfg_.g_profile = 0;
-  init_flags_ = VPX_CODEC_USE_PSNR;
-
-  testing::internal::scoped_ptr<libvpx_test::VideoSource> video;
-  video.reset(new libvpx_test::YUVVideoSource(
-      "niklas_640_480_30.yuv", VPX_IMG_FMT_I420, 3840, 2160,  // 2048, 1080,
-      30, 1, 0, 5));
-
-  ASSERT_TRUE(video.get() != NULL);
-  ASSERT_NO_FATAL_FAILURE(RunLoop(video.get()));
-}
-
-VP9_INSTANTIATE_TEST_CASE(MotionVectorTestLarge,
-                          ::testing::ValuesIn(kEncodingModeVectors),
-                          ::testing::ValuesIn(kCpuUsedVectors),
-                          ::testing::ValuesIn(kMVTestModes));
-}  // namespace
--- a/test/vp9_quantize_test.cc
+++ b/test/vp9_quantize_test.cc
@@ -14,11 +14,9 @@

 #include "third_party/googletest/src/include/gtest/gtest.h"

-#include "./vp9_rtcd.h"
 #include "./vpx_config.h"
 #include "./vpx_dsp_rtcd.h"
 #include "test/acm_random.h"
-#include "test/buffer.h"
 #include "test/clear_system_state.h"
 #include "test/register_state_check.h"
 #include "test/util.h"
@@ -26,12 +24,11 @@
 #include "vp9/common/vp9_scan.h"
 #include "vpx/vpx_codec.h"
 #include "vpx/vpx_integer.h"
-#include "vpx_ports/vpx_timer.h"

 using libvpx_test::ACMRandom;
-using libvpx_test::Buffer;

 namespace {
+#if CONFIG_VP9_HIGHBITDEPTH
 const int number_of_iterations = 100;

 typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
@@ -41,494 +38,307 @@ typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count,
                             tran_low_t *dqcoeff, const int16_t *dequant,
                             uint16_t *eob, const int16_t *scan,
                             const int16_t *iscan);
-typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t,
-                        int /*max_size*/, bool /*is_fp*/>
+typedef std::tr1::tuple<QuantizeFunc, QuantizeFunc, vpx_bit_depth_t>
    QuantizeParam;

-// Wrapper for FP version which does not use zbin or quant_shift.
-typedef void (*QuantizeFPFunc)(const tran_low_t *coeff, intptr_t count,
-                               int skip_block, const int16_t *round,
-                               const int16_t *quant, tran_low_t *qcoeff,
-                               tran_low_t *dqcoeff, const int16_t *dequant,
-                               uint16_t *eob, const int16_t *scan,
-                               const int16_t *iscan);
-
-template <QuantizeFPFunc fn>
-void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block,
-                    const int16_t *zbin, const int16_t *round,
-                    const int16_t *quant, const int16_t *quant_shift,
-                    tran_low_t *qcoeff, tran_low_t *dqcoeff,
-                    const int16_t *dequant, uint16_t *eob, const int16_t *scan,
-                    const int16_t *iscan) {
-  (void)zbin;
-  (void)quant_shift;
-
-  fn(coeff, count, skip_block, round, quant, qcoeff, dqcoeff, dequant, eob,
-     scan, iscan);
-}
-
-class VP9QuantizeBase {
+class VP9QuantizeTest : public ::testing::TestWithParam<QuantizeParam> {
 public:
-  VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp)
-      : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp) {
-    max_value_ = (1 << bit_depth_) - 1;
-    zbin_ptr_ =
-        reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*zbin_ptr_)));
-    round_fp_ptr_ = reinterpret_cast<int16_t *>(
-        vpx_memalign(16, 8 * sizeof(*round_fp_ptr_)));
-    quant_fp_ptr_ = reinterpret_cast<int16_t *>(
-        vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_)));
-    round_ptr_ =
-        reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*round_ptr_)));
-    quant_ptr_ =
-        reinterpret_cast<int16_t *>(vpx_memalign(16, 8 * sizeof(*quant_ptr_)));
-    quant_shift_ptr_ = reinterpret_cast<int16_t *>(
-        vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_)));
-    dequant_ptr_ = reinterpret_cast<int16_t *>(
-        vpx_memalign(16, 8 * sizeof(*dequant_ptr_)));
+  virtual ~VP9QuantizeTest() {}
+  virtual void SetUp() {
+    quantize_op_ = GET_PARAM(0);
+    ref_quantize_op_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
  }

-  ~VP9QuantizeBase() {
-    vpx_free(zbin_ptr_);
-    vpx_free(round_fp_ptr_);
-    vpx_free(quant_fp_ptr_);
-    vpx_free(round_ptr_);
-    vpx_free(quant_ptr_);
-    vpx_free(quant_shift_ptr_);
-    vpx_free(dequant_ptr_);
-    zbin_ptr_ = NULL;
-    round_fp_ptr_ = NULL;
-    quant_fp_ptr_ = NULL;
-    round_ptr_ = NULL;
-    quant_ptr_ = NULL;
-    quant_shift_ptr_ = NULL;
-    dequant_ptr_ = NULL;
-    libvpx_test::ClearSystemState();
-  }
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  int16_t *zbin_ptr_;
-  int16_t *round_fp_ptr_;
-  int16_t *quant_fp_ptr_;
-  int16_t *round_ptr_;
-  int16_t *quant_ptr_;
-  int16_t *quant_shift_ptr_;
-  int16_t *dequant_ptr_;
-  const vpx_bit_depth_t bit_depth_;
-  int max_value_;
-  const int max_size_;
-  const bool is_fp_;
+  vpx_bit_depth_t bit_depth_;
+  int mask_;
+  QuantizeFunc quantize_op_;
+  QuantizeFunc ref_quantize_op_;
 };

-class VP9QuantizeTest : public VP9QuantizeBase,
-                        public ::testing::TestWithParam<QuantizeParam> {
+class VP9Quantize32Test : public ::testing::TestWithParam<QuantizeParam> {
 public:
-  VP9QuantizeTest()
-      : VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)),
-        quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {}
+  virtual ~VP9Quantize32Test() {}
+  virtual void SetUp() {
+    quantize_op_ = GET_PARAM(0);
+    ref_quantize_op_ = GET_PARAM(1);
+    bit_depth_ = GET_PARAM(2);
+    mask_ = (1 << bit_depth_) - 1;
+  }
+
+  virtual void TearDown() { libvpx_test::ClearSystemState(); }

 protected:
-  const QuantizeFunc quantize_op_;
-  const QuantizeFunc ref_quantize_op_;
+  vpx_bit_depth_t bit_depth_;
+  int mask_;
+  QuantizeFunc quantize_op_;
+  QuantizeFunc ref_quantize_op_;
 };

-// This quantizer compares the AC coefficients to the quantization step size to
-// determine if further multiplication operations are needed.
-// Based on vp9_quantize_fp_sse2().
-void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs,
-                      int skip_block, const int16_t *round_ptr,
-                      const int16_t *quant_ptr, tran_low_t *qcoeff_ptr,
-                      tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr,
-                      uint16_t *eob_ptr, const int16_t *scan,
-                      const int16_t *iscan) {
-  int i, eob = -1;
-  const int thr = dequant_ptr[1] >> 1;
-  (void)iscan;
-  (void)skip_block;
-  assert(!skip_block);
-
-  // Quantization pass: All coefficients with index >= zero_flag are
-  // skippable. Note: zero_flag can be zero.
-  for (i = 0; i < n_coeffs; i += 16) {
-    int y;
-    int nzflag_cnt = 0;
-    int abs_coeff[16];
-    int coeff_sign[16];
-
-    // count nzflag for each row (16 tran_low_t)
-    for (y = 0; y < 16; ++y) {
-      const int rc = i + y;
-      const int coeff = coeff_ptr[rc];
-      coeff_sign[y] = (coeff >> 31);
-      abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y];
-      // The first 16 are skipped in the sse2 code.  Do the same here to match.
-      if (i >= 16 && (abs_coeff[y] <= thr)) {
-        nzflag_cnt++;
-      }
-    }
-
-    for (y = 0; y < 16; ++y) {
-      const int rc = i + y;
-      // If all of the AC coeffs in a row has magnitude less than the
-      // quantization step_size/2, quantize to zero.
-      if (nzflag_cnt < 16) {
-        int tmp =
-            clamp(abs_coeff[y] + round_ptr[rc != 0], INT16_MIN, INT16_MAX);
-        tmp = (tmp * quant_ptr[rc != 0]) >> 16;
-        qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y];
-        dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0];
-      } else {
-        qcoeff_ptr[rc] = 0;
-        dqcoeff_ptr[rc] = 0;
-      }
-    }
-  }
-
-  // Scan for eob.
-  for (i = 0; i < n_coeffs; i++) {
-    // Use the scan order to find the correct eob.
-    const int rc = scan[i];
-    if (qcoeff_ptr[rc]) {
-      eob = i;
-    }
-  }
-  *eob_ptr = eob + 1;
-}
-
-void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round,
-                          int16_t *quant, int16_t *quant_shift,
-                          int16_t *dequant, int16_t *round_fp,
-                          int16_t *quant_fp) {
-  // Max when q == 0.  Otherwise, it is 48 for Y and 42 for U/V.
-  const int max_qrounding_factor_fp = 64;
-
-  for (int j = 0; j < 2; j++) {
-    // The range is 4 to 1828 in the VP9 tables.
-    const int qlookup = rnd->RandRange(1825) + 4;
-    round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7;
-    quant_fp[j] = (1 << 16) / qlookup;
-
-    // Values determined by deconstructing vp9_init_quantizer().
-    // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y
-    // values or U/V values of any bit depth. This is because y_delta is not
-    // factored into the vp9_ac_quant() call.
-    zbin[j] = rnd->RandRange(1200);
-
-    // round may be up to 685 for Y values or 914 for U/V.
-    round[j] = rnd->RandRange(914);
-    // quant ranges from 1 to -32703
-    quant[j] = static_cast<int>(rnd->RandRange(32704)) - 32703;
-    // quant_shift goes up to 1 << 16.
-    quant_shift[j] = rnd->RandRange(16384);
-    // dequant maxes out at 1828 for all cases.
-    dequant[j] = rnd->RandRange(1828);
-  }
-  for (int j = 2; j < 8; j++) {
-    zbin[j] = zbin[1];
-    round_fp[j] = round_fp[1];
-    quant_fp[j] = quant_fp[1];
-    round[j] = round[1];
-    quant[j] = quant[1];
-    quant_shift[j] = quant_shift[1];
-    dequant[j] = dequant[1];
-  }
-}
-
 TEST_P(VP9QuantizeTest, OperationCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
-  ASSERT_TRUE(coeff.Init());
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
-  ASSERT_TRUE(qcoeff.Init());
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
-  ASSERT_TRUE(dqcoeff.Init());
-  Buffer<tran_low_t> ref_qcoeff =
-      Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
-  ASSERT_TRUE(ref_qcoeff.Init());
-  Buffer<tran_low_t> ref_dqcoeff =
-      Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
-  ASSERT_TRUE(ref_dqcoeff.Init());
-  uint16_t eob, ref_eob;
-
+  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[256]);
+  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
+  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
+  int err_count_total = 0;
+  int first_failure = -1;
  for (int i = 0; i < number_of_iterations; ++i) {
-    // Test skip block for the first three iterations to catch all the different
-    // sizes.
-    const int skip_block = 0;
-    TX_SIZE sz;
-    if (max_size_ == 16) {
-      sz = static_cast<TX_SIZE>(i % 3);  // TX_4X4, TX_8X8 TX_16X16
-    } else {
-      sz = TX_32X32;
-    }
-    const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
+    const int skip_block = i == 0;
+    const TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
+    const TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    const int count = (4 << sz) * (4 << sz);
-    coeff.Set(&rnd, -max_value_, max_value_);
-    GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
-                         quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
-                         quant_fp_ptr_);
-    int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
-    int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
-    ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
-                     q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
-                     ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
-                     scan_order->scan, scan_order->iscan);
-
-    ASM_REGISTER_STATE_CHECK(quantize_op_(
-        coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
-        quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
-        dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
-
-    EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
-    EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
-
-    EXPECT_EQ(eob, ref_eob);
-
-    if (HasFailure()) {
-      printf("Failure on iteration %d.\n", i);
-      qcoeff.PrintDifference(ref_qcoeff);
-      dqcoeff.PrintDifference(ref_dqcoeff);
-      return;
+    const int count = (4 << sz) * (4 << sz);  // 16, 64, 256
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = rnd.Rand16() & mask_;
    }
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16() & mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr, ref_eob_ptr,
+                     scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(
+        coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+        quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr,
+        scan_order->scan, scan_order->iscan));
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Quantization Test, C output doesn't match SSE2 output. "
+      << "First failed at test case " << first_failure;
+}
+
+TEST_P(VP9Quantize32Test, OperationCheck) {
+  ACMRandom rnd(ACMRandom::DeterministicSeed());
+  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[1024]);
+  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
+  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    const int skip_block = i == 0;
+    const TX_SIZE sz = TX_32X32;
+    const TX_TYPE tx_type = (TX_TYPE)(i % 4);
+    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+    const int count = (4 << sz) * (4 << sz);  // 1024
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = rnd.Rand16() & mask_;
+    }
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16() & mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }
+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr, ref_eob_ptr,
+                     scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(
+        coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+        quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr,
+        scan_order->scan, scan_order->iscan));
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Quantization Test, C output doesn't match SSE2 output. "
+      << "First failed at test case " << first_failure;
 }

 TEST_P(VP9QuantizeTest, EOBCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
-  ASSERT_TRUE(coeff.Init());
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
-  ASSERT_TRUE(qcoeff.Init());
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
-  ASSERT_TRUE(dqcoeff.Init());
-  Buffer<tran_low_t> ref_qcoeff =
-      Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
-  ASSERT_TRUE(ref_qcoeff.Init());
-  Buffer<tran_low_t> ref_dqcoeff =
-      Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
-  ASSERT_TRUE(ref_dqcoeff.Init());
-  uint16_t eob, ref_eob;
-
+  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[256]);
+  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[256]);
+  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
+  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
+  int err_count_total = 0;
+  int first_failure = -1;
  for (int i = 0; i < number_of_iterations; ++i) {
-    const int skip_block = 0;
-    TX_SIZE sz;
-    if (max_size_ == 16) {
-      sz = static_cast<TX_SIZE>(i % 3);  // TX_4X4, TX_8X8 TX_16X16
-    } else {
-      sz = TX_32X32;
-    }
-    const TX_TYPE tx_type = static_cast<TX_TYPE>((i >> 2) % 3);
+    int skip_block = i == 0;
+    TX_SIZE sz = (TX_SIZE)(i % 3);  // TX_4X4, TX_8X8 TX_16X16
+    TX_TYPE tx_type = (TX_TYPE)((i >> 2) % 3);
    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-    int count = (4 << sz) * (4 << sz);
+    int count = (4 << sz) * (4 << sz);  // 16, 64, 256
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
    // Two random entries
-    coeff.Set(0);
-    coeff.TopLeftPixel()[rnd(count)] =
-        static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
-    coeff.TopLeftPixel()[rnd(count)] =
-        static_cast<int>(rnd.RandRange(max_value_ * 2)) - max_value_;
-    GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
-                         quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
-                         quant_fp_ptr_);
-    int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
-    int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
-    ref_quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
-                     q_ptr, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(),
-                     ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob,
-                     scan_order->scan, scan_order->iscan);
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = 0;
+    }
+    coeff_ptr[rnd(count)] = rnd.Rand16() & mask_;
+    coeff_ptr[rnd(count)] = rnd.Rand16() & mask_;
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16() & mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
+    }

+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr, ref_eob_ptr,
+                     scan_order->scan, scan_order->iscan);
    ASM_REGISTER_STATE_CHECK(quantize_op_(
-        coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr, q_ptr,
-        quant_shift_ptr_, qcoeff.TopLeftPixel(), dqcoeff.TopLeftPixel(),
-        dequant_ptr_, &eob, scan_order->scan, scan_order->iscan));
+        coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+        quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr,
+        scan_order->scan, scan_order->iscan));

-    EXPECT_TRUE(qcoeff.CheckValues(ref_qcoeff));
-    EXPECT_TRUE(dqcoeff.CheckValues(ref_dqcoeff));
-
-    EXPECT_EQ(eob, ref_eob);
-
-    if (HasFailure()) {
-      printf("Failure on iteration %d.\n", i);
-      qcoeff.PrintDifference(ref_qcoeff);
-      dqcoeff.PrintDifference(ref_dqcoeff);
-      return;
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Quantization Test, C output doesn't match SSE2 output. "
+      << "First failed at test case " << first_failure;
 }

-TEST_P(VP9QuantizeTest, DISABLED_Speed) {
+TEST_P(VP9Quantize32Test, EOBCheck) {
  ACMRandom rnd(ACMRandom::DeterministicSeed());
-  Buffer<tran_low_t> coeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 16);
-  ASSERT_TRUE(coeff.Init());
-  Buffer<tran_low_t> qcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
-  ASSERT_TRUE(qcoeff.Init());
-  Buffer<tran_low_t> dqcoeff = Buffer<tran_low_t>(max_size_, max_size_, 0, 32);
-  ASSERT_TRUE(dqcoeff.Init());
-  uint16_t eob;
-  TX_SIZE starting_sz, ending_sz;
-
-  if (max_size_ == 16) {
-    starting_sz = TX_4X4;
-    ending_sz = TX_16X16;
-  } else {
-    starting_sz = TX_32X32;
-    ending_sz = TX_32X32;
-  }
-
-  for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) {
-    // zbin > coeff, zbin < coeff.
-    for (int i = 0; i < 2; ++i) {
-      const int skip_block = 0;
-      // TX_TYPE defines the scan order. That is not relevant to the speed test.
-      // Pick the first one.
-      const TX_TYPE tx_type = DCT_DCT;
-      const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
-      const int count = (4 << sz) * (4 << sz);
-
-      GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_,
-                           quant_shift_ptr_, dequant_ptr_, round_fp_ptr_,
-                           quant_fp_ptr_);
-      int16_t *r_ptr = (is_fp_) ? round_fp_ptr_ : round_ptr_;
-      int16_t *q_ptr = (is_fp_) ? quant_fp_ptr_ : quant_ptr_;
-
-      if (i == 0) {
-        // When |coeff values| are less than zbin the results are 0.
-        int threshold = 100;
-        if (max_size_ == 32) {
-          // For 32x32, the threshold is halved. Double it to keep the values
-          // from clearing it.
-          threshold = 200;
-        }
-        for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold;
-        coeff.Set(&rnd, -99, 99);
-      } else if (i == 1) {
-        for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50;
-        coeff.Set(&rnd, -500, 500);
-      }
-
-      vpx_usec_timer timer;
-      vpx_usec_timer_start(&timer);
-      for (int j = 0; j < 100000000 / count; ++j) {
-        quantize_op_(coeff.TopLeftPixel(), count, skip_block, zbin_ptr_, r_ptr,
-                     q_ptr, quant_shift_ptr_, qcoeff.TopLeftPixel(),
-                     dqcoeff.TopLeftPixel(), dequant_ptr_, &eob,
-                     scan_order->scan, scan_order->iscan);
-      }
-      vpx_usec_timer_mark(&timer);
-      const int elapsed_time = static_cast<int>(vpx_usec_timer_elapsed(&timer));
-      if (i == 0) printf("Bypass calculations.\n");
-      if (i == 1) printf("Full calculations.\n");
-      printf("Quantize %dx%d time: %5d ms\n", 4 << sz, 4 << sz,
-             elapsed_time / 1000);
+  DECLARE_ALIGNED(16, tran_low_t, coeff_ptr[1024]);
+  DECLARE_ALIGNED(16, int16_t, zbin_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, round_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_ptr[2]);
+  DECLARE_ALIGNED(16, int16_t, quant_shift_ptr[2]);
+  DECLARE_ALIGNED(16, tran_low_t, qcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, dqcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_qcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, tran_low_t, ref_dqcoeff_ptr[1024]);
+  DECLARE_ALIGNED(16, int16_t, dequant_ptr[2]);
+  DECLARE_ALIGNED(16, uint16_t, eob_ptr[1]);
+  DECLARE_ALIGNED(16, uint16_t, ref_eob_ptr[1]);
+  int err_count_total = 0;
+  int first_failure = -1;
+  for (int i = 0; i < number_of_iterations; ++i) {
+    int skip_block = i == 0;
+    TX_SIZE sz = TX_32X32;
+    TX_TYPE tx_type = (TX_TYPE)(i % 4);
+    const scan_order *scan_order = &vp9_scan_orders[sz][tx_type];
+    int count = (4 << sz) * (4 << sz);  // 1024
+    int err_count = 0;
+    *eob_ptr = rnd.Rand16();
+    *ref_eob_ptr = *eob_ptr;
+    for (int j = 0; j < count; j++) {
+      coeff_ptr[j] = 0;
+    }
+    // Two random entries
+    coeff_ptr[rnd(count)] = rnd.Rand16() & mask_;
+    coeff_ptr[rnd(count)] = rnd.Rand16() & mask_;
+    for (int j = 0; j < 2; j++) {
+      zbin_ptr[j] = rnd.Rand16() & mask_;
+      round_ptr[j] = rnd.Rand16();
+      quant_ptr[j] = rnd.Rand16();
+      quant_shift_ptr[j] = rnd.Rand16();
+      dequant_ptr[j] = rnd.Rand16();
    }
-    printf("\n");
-  }
-}

+    ref_quantize_op_(coeff_ptr, count, skip_block, zbin_ptr, round_ptr,
+                     quant_ptr, quant_shift_ptr, ref_qcoeff_ptr,
+                     ref_dqcoeff_ptr, dequant_ptr, ref_eob_ptr,
+                     scan_order->scan, scan_order->iscan);
+    ASM_REGISTER_STATE_CHECK(quantize_op_(
+        coeff_ptr, count, skip_block, zbin_ptr, round_ptr, quant_ptr,
+        quant_shift_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr,
+        scan_order->scan, scan_order->iscan));
+
+    for (int j = 0; j < sz; ++j) {
+      err_count += (ref_qcoeff_ptr[j] != qcoeff_ptr[j]) |
+                   (ref_dqcoeff_ptr[j] != dqcoeff_ptr[j]);
+    }
+    err_count += (*ref_eob_ptr != *eob_ptr);
+    if (err_count && !err_count_total) {
+      first_failure = i;
+    }
+    err_count_total += err_count;
+  }
+  EXPECT_EQ(0, err_count_total)
+      << "Error: Quantization Test, C output doesn't match SSE2 output. "
+      << "First failed at test case " << first_failure;
+}
 using std::tr1::make_tuple;

 #if HAVE_SSE2
-#if CONFIG_VP9_HIGHBITDEPTH
-// TODO(johannkoenig): Fix vpx_quantize_b_sse2 in highbitdepth builds.
-// make_tuple(&vpx_quantize_b_sse2, &vpx_highbd_quantize_b_c, VPX_BITS_8),
 INSTANTIATE_TEST_CASE_P(
    SSE2, VP9QuantizeTest,
-    ::testing::Values(
-        make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
-                   VPX_BITS_8, 16, false),
-        make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
-                   VPX_BITS_10, 16, false),
-        make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c,
-                   VPX_BITS_12, 16, false),
-        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
-                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false),
-        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
-                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false),
-        make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
-                   &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false)));
-
-#else
+    ::testing::Values(make_tuple(&vpx_highbd_quantize_b_sse2,
+                                 &vpx_highbd_quantize_b_c, VPX_BITS_8),
+                      make_tuple(&vpx_highbd_quantize_b_sse2,
+                                 &vpx_highbd_quantize_b_c, VPX_BITS_10),
+                      make_tuple(&vpx_highbd_quantize_b_sse2,
+                                 &vpx_highbd_quantize_b_c, VPX_BITS_12)));
 INSTANTIATE_TEST_CASE_P(
-    SSE2, VP9QuantizeTest,
-    ::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c,
-                                 VPX_BITS_8, 16, false),
-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_sse2>,
-                                 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
-                                 16, true)));
-#endif  // CONFIG_VP9_HIGHBITDEPTH
+    SSE2, VP9Quantize32Test,
+    ::testing::Values(make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
+                                 &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8),
+                      make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
+                                 &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10),
+                      make_tuple(&vpx_highbd_quantize_b_32x32_sse2,
+                                 &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12)));
 #endif  // HAVE_SSE2
-
-#if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
-#if ARCH_X86_64
-INSTANTIATE_TEST_CASE_P(
-    SSSE3, VP9QuantizeTest,
-    ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c,
-                                 VPX_BITS_8, 16, false),
-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_ssse3>,
-                                 &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8,
-                                 16, true)));
-#else
-INSTANTIATE_TEST_CASE_P(SSSE3, VP9QuantizeTest,
-                        ::testing::Values(make_tuple(&vpx_quantize_b_ssse3,
-                                                     &vpx_quantize_b_c,
-                                                     VPX_BITS_8, 16, false)));
-#endif
-
-#if ARCH_X86_64
-// TODO(johannkoenig): SSSE3 optimizations do not yet pass this test.
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_SSSE3, VP9QuantizeTest,
-    ::testing::Values(make_tuple(&vpx_quantize_b_32x32_ssse3,
-                                 &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
-                                 false),
-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_ssse3>,
-                                 &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
-                                 VPX_BITS_8, 32, true)));
-#endif  // ARCH_X86_64
-#endif  // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH
-
-// TODO(johannkoenig): AVX optimizations do not yet pass the 32x32 test or
-// highbitdepth configurations.
-#if HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    AVX, VP9QuantizeTest,
-    ::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c,
-                                 VPX_BITS_8, 16, false),
-                      // Even though SSSE3 and AVX do not match the reference
-                      // code, we can keep them in sync with each other.
-                      make_tuple(&vpx_quantize_b_32x32_avx,
-                                 &vpx_quantize_b_32x32_ssse3, VPX_BITS_8, 32,
-                                 false)));
-#endif  // HAVE_AVX && !CONFIG_VP9_HIGHBITDEPTH
-
-// TODO(webm:1448): dqcoeff is not handled correctly in HBD builds.
-#if HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
-INSTANTIATE_TEST_CASE_P(
-    NEON, VP9QuantizeTest,
-    ::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c,
-                                 VPX_BITS_8, 16, false),
-                      make_tuple(&vpx_quantize_b_32x32_neon,
-                                 &vpx_quantize_b_32x32_c, VPX_BITS_8, 32,
-                                 false),
-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_neon>,
-                                 &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8,
-                                 16, true),
-                      make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_neon>,
-                                 &QuantFPWrapper<vp9_quantize_fp_32x32_c>,
-                                 VPX_BITS_8, 32, true)));
-#endif  // HAVE_NEON && !CONFIG_VP9_HIGHBITDEPTH
-
-// Only useful to compare "Speed" test results.
-INSTANTIATE_TEST_CASE_P(
-    DISABLED_C, VP9QuantizeTest,
-    ::testing::Values(
-        make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false),
-        make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8,
-                   32, false),
-        make_tuple(&QuantFPWrapper<vp9_quantize_fp_c>,
-                   &QuantFPWrapper<vp9_quantize_fp_c>, VPX_BITS_8, 16, true),
-        make_tuple(&QuantFPWrapper<quantize_fp_nz_c>,
-                   &QuantFPWrapper<quantize_fp_nz_c>, VPX_BITS_8, 16, true),
-        make_tuple(&QuantFPWrapper<vp9_quantize_fp_32x32_c>,
-                   &QuantFPWrapper<vp9_quantize_fp_32x32_c>, VPX_BITS_8, 32,
-                   true)));
+#endif  // CONFIG_VP9_HIGHBITDEPTH
 }  // namespace
--- a/test/vp9_scale_test.cc
+++ b/test/vp9_scale_test.cc
@@ -1,214 +0,0 @@
-/*
- *  Copyright (c) 2017 The WebM project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include <assert.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "third_party/googletest/src/include/gtest/gtest.h"
-
-#include "./vp9_rtcd.h"
-#include "./vpx_config.h"
-#include "./vpx_scale_rtcd.h"
-#include "test/clear_system_state.h"
-#include "test/register_state_check.h"
-#include "test/vpx_scale_test.h"
-#include "vpx_mem/vpx_mem.h"
-#include "vpx_ports/vpx_timer.h"
-#include "vpx_scale/yv12config.h"
-
-namespace libvpx_test {
-
-typedef void (*ScaleFrameFunc)(const YV12_BUFFER_CONFIG *src,
-                               YV12_BUFFER_CONFIG *dst,
-                               INTERP_FILTER filter_type, int phase_scaler);
-
-class ScaleTest : public VpxScaleBase,
-                  public ::testing::TestWithParam<ScaleFrameFunc> {
- public:
-  virtual ~ScaleTest() {}
-
- protected:
-  virtual void SetUp() { scale_fn_ = GetParam(); }
-
-  void ReferenceScaleFrame(INTERP_FILTER filter_type, int phase_scaler) {
-    vp9_scale_and_extend_frame_c(&img_, &ref_img_, filter_type, phase_scaler);
-  }
-
-  void ScaleFrame(INTERP_FILTER filter_type, int phase_scaler) {
-    ASM_REGISTER_STATE_CHECK(
-        scale_fn_(&img_, &dst_img_, filter_type, phase_scaler));
-  }
-
-  void RunTest() {
-    static const int kNumSizesToTest = 20;
-    static const int kNumScaleFactorsToTest = 4;
-    static const int kSizesToTest[] = {
-      2,  4,  6,  8,  10, 12, 14, 16, 18,  20,
-      22, 24, 26, 28, 30, 32, 34, 68, 128, 134
-    };
-    static const int kScaleFactors[] = { 1, 2, 3, 4 };
-    for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) {
-      for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) {
-        for (int h = 0; h < kNumSizesToTest; ++h) {
-          const int src_height = kSizesToTest[h];
-          for (int w = 0; w < kNumSizesToTest; ++w) {
-            const int src_width = kSizesToTest[w];
-            for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest;
-                 ++sf_up_idx) {
-              const int sf_up = kScaleFactors[sf_up_idx];
-              for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest;
-                   ++sf_down_idx) {
-                const int sf_down = kScaleFactors[sf_down_idx];
-                const int dst_width = src_width * sf_up / sf_down;
-                const int dst_height = src_height * sf_up / sf_down;
-                if (sf_up == sf_down && sf_up != 1) {
-                  continue;
-                }
-                // I420 frame width and height must be even.
-                if (!dst_width || !dst_height || dst_width & 1 ||
-                    dst_height & 1) {
-                  continue;
-                }
-                // vpx_convolve8_c() has restriction on the step which cannot
-                // exceed 64 (ratio 1 to 4).
-                if (src_width > 4 * dst_width || src_height > 4 * dst_height) {
-                  continue;
-                }
-                ASSERT_NO_FATAL_FAILURE(ResetScaleImages(
-                    src_width, src_height, dst_width, dst_height));
-                ReferenceScaleFrame(filter_type, phase_scaler);
-                ScaleFrame(filter_type, phase_scaler);
-                if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc,
-                           ref_img_.frame_size)) {
-                  printf(
-                      "filter_type = %d, phase_scaler = %d, src_width = %4d, "
-                      "src_height = %4d, dst_width = %4d, dst_height = %4d, "
-                      "scale factor = %d:%d\n",
-                      filter_type, phase_scaler, src_width, src_height,
-                      dst_width, dst_height, sf_down, sf_up);
-                  PrintDiff();
-                }
-                CompareImages(dst_img_);
-                DeallocScaleImages();
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-
-  void PrintDiffComponent(const uint8_t *const ref, const uint8_t *const opt,
-                          const int stride, const int width, const int height,
-                          const int plane_idx) const {
-    for (int y = 0; y < height; y++) {
-      for (int x = 0; x < width; x++) {
-        if (ref[y * stride + x] != opt[y * stride + x]) {
-          printf("Plane %d pixel[%d][%d] diff:%6d (ref),%6d (opt)\n", plane_idx,
-                 y, x, ref[y * stride + x], opt[y * stride + x]);
-          break;
-        }
-      }
-    }
-  }
-
-  void PrintDiff() const {
-    assert(ref_img_.y_stride == dst_img_.y_stride);
-    assert(ref_img_.y_width == dst_img_.y_width);
-    assert(ref_img_.y_height == dst_img_.y_height);
-    assert(ref_img_.uv_stride == dst_img_.uv_stride);
-    assert(ref_img_.uv_width == dst_img_.uv_width);
-    assert(ref_img_.uv_height == dst_img_.uv_height);
-
-    if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc,
-               ref_img_.frame_size)) {
-      PrintDiffComponent(ref_img_.y_buffer, dst_img_.y_buffer,
-                         ref_img_.y_stride, ref_img_.y_width, ref_img_.y_height,
-                         0);
-      PrintDiffComponent(ref_img_.u_buffer, dst_img_.u_buffer,
-                         ref_img_.uv_stride, ref_img_.uv_width,
-                         ref_img_.uv_height, 1);
-      PrintDiffComponent(ref_img_.v_buffer, dst_img_.v_buffer,
-                         ref_img_.uv_stride, ref_img_.uv_width,
-                         ref_img_.uv_height, 2);
-    }
-  }
-
-  ScaleFrameFunc scale_fn_;
-};
-
-TEST_P(ScaleTest, ScaleFrame) { ASSERT_NO_FATAL_FAILURE(RunTest()); }
-
-TEST_P(ScaleTest, DISABLED_Speed) {
-  static const int kCountSpeedTestBlock = 100;
-  static const int kNumScaleFactorsToTest = 4;
-  static const int kScaleFactors[] = { 1, 2, 3, 4 };
-  const int src_width = 1280;
-  const int src_height = 720;
-  for (INTERP_FILTER filter_type = 2; filter_type < 4; ++filter_type) {
-    for (int phase_scaler = 0; phase_scaler < 2; ++phase_scaler) {
-      for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest; ++sf_up_idx) {
-        const int sf_up = kScaleFactors[sf_up_idx];
-        for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest;
-             ++sf_down_idx) {
-          const int sf_down = kScaleFactors[sf_down_idx];
-          const int dst_width = src_width * sf_up / sf_down;
-          const int dst_height = src_height * sf_up / sf_down;
-          if (sf_up == sf_down && sf_up != 1) {
-            continue;
-          }
-          // I420 frame width and height must be even.
-          if (dst_width & 1 || dst_height & 1) {
-            continue;
-          }
-          ASSERT_NO_FATAL_FAILURE(
-              ResetScaleImages(src_width, src_height, dst_width, dst_height));
-          ASM_REGISTER_STATE_CHECK(
-              ReferenceScaleFrame(filter_type, phase_scaler));
-
-          vpx_usec_timer timer;
-          vpx_usec_timer_start(&timer);
-          for (int i = 0; i < kCountSpeedTestBlock; ++i) {
-            ScaleFrame(filter_type, phase_scaler);
-          }
-          libvpx_test::ClearSystemState();
-          vpx_usec_timer_mark(&timer);
-          const int elapsed_time =
-              static_cast<int>(vpx_usec_timer_elapsed(&timer) / 1000);
-          CompareImages(dst_img_);
-          DeallocScaleImages();
-
-          printf(
-              "filter_type = %d, phase_scaler = %d, src_width = %4d, "
-              "src_height = %4d, dst_width = %4d, dst_height = %4d, "
-              "scale factor = %d:%d, scale time: %5d ms\n",
-              filter_type, phase_scaler, src_width, src_height, dst_width,
-              dst_height, sf_down, sf_up, elapsed_time);
-        }
-      }
-    }
-  }
-}
-
-INSTANTIATE_TEST_CASE_P(C, ScaleTest,
-                        ::testing::Values(vp9_scale_and_extend_frame_c));
-
-#if HAVE_SSSE3
-INSTANTIATE_TEST_CASE_P(SSSE3, ScaleTest,
-                        ::testing::Values(vp9_scale_and_extend_frame_ssse3));
-#endif  // HAVE_SSSE3
-
-#if HAVE_NEON
-INSTANTIATE_TEST_CASE_P(NEON, ScaleTest,
-                        ::testing::Values(vp9_scale_and_extend_frame_neon));
-#endif  // HAVE_NEON
-
-}  // namespace libvpx_test
--- a/test/vp9_skip_loopfilter_test.cc
+++ b/test/vp9_skip_loopfilter_test.cc
@@ -85,8 +85,8 @@ class SkipLoopFilterTest {
  // TODO(fgalligan): Move the MD5 testing code into another class.
  void OpenMd5File(const std::string &md5_file_name) {
    md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name);
-    ASSERT_TRUE(md5_file_ != NULL)
-        << "MD5 file open failed. Filename: " << md5_file_name;
+    ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: "
+                                   << md5_file_name;
  }

  // Reads the next line of the MD5 file.
--- a/Show More
+++ b/Show More